aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-dma.txt14
-rw-r--r--Documentation/dmaengine.txt8
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arm/include/asm/kprobes.h1
-rw-r--r--arch/arm/include/asm/ptrace.h5
-rw-r--r--arch/arm/include/asm/thread_info.h6
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/ptrace.c16
-rw-r--r--arch/arm/mach-ep93xx/include/mach/dma.h6
-rw-r--r--arch/arm/mach-shmobile/setup-sh7372.c15
-rw-r--r--arch/arm/plat-mxc/include/mach/mx3fb.h15
-rw-r--r--arch/arm/plat-nomadik/include/plat/ste_dma40.h11
-rw-r--r--arch/arm/plat-samsung/dma-ops.c4
-rw-r--r--arch/arm/plat-samsung/include/plat/dma-ops.h4
-rw-r--r--arch/ia64/include/asm/ptrace.h13
-rw-r--r--arch/ia64/kernel/ptrace.c18
-rw-r--r--arch/microblaze/include/asm/ptrace.h5
-rw-r--r--arch/microblaze/kernel/ptrace.c9
-rw-r--r--arch/microblaze/kernel/setup.c21
-rw-r--r--arch/mips/include/asm/ptrace.h14
-rw-r--r--arch/mips/kernel/ptrace.c11
-rw-r--r--arch/powerpc/include/asm/ptrace.h13
-rw-r--r--arch/powerpc/kernel/ptrace.c30
-rw-r--r--arch/s390/include/asm/ptrace.h6
-rw-r--r--arch/s390/kernel/ptrace.c15
-rw-r--r--arch/sh/include/asm/ptrace_32.h5
-rw-r--r--arch/sh/include/asm/ptrace_64.h5
-rw-r--r--arch/sh/kernel/ptrace_32.c11
-rw-r--r--arch/sh/kernel/ptrace_64.c11
-rw-r--r--arch/sparc/include/asm/ptrace.h10
-rw-r--r--arch/sparc/kernel/ptrace_64.c28
-rw-r--r--arch/um/kernel/ptrace.c20
-rw-r--r--arch/x86/ia32/ia32entry.S14
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S14
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h5
-rw-r--r--arch/xtensa/kernel/ptrace.c3
-rw-r--r--block/cfq-iosched.c7
-rw-r--r--drivers/ata/ata_piix.c7
-rw-r--r--drivers/ata/libata-core.c2
-rw-r--r--drivers/ata/libata-transport.c1
-rw-r--r--drivers/ata/pata_bf54x.c167
-rw-r--r--drivers/ata/sata_fsl.c11
-rw-r--r--drivers/dma/Kconfig27
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/amba-pl08x.c41
-rw-r--r--drivers/dma/at_hdmac.c103
-rw-r--r--drivers/dma/at_hdmac_regs.h1
-rw-r--r--drivers/dma/coh901318.c12
-rw-r--r--drivers/dma/coh901318_lli.c23
-rw-r--r--drivers/dma/coh901318_lli.h4
-rw-r--r--drivers/dma/dmaengine.c4
-rw-r--r--drivers/dma/dw_dmac.c83
-rw-r--r--drivers/dma/dw_dmac_regs.h1
-rw-r--r--drivers/dma/ep93xx_dma.c90
-rw-r--r--drivers/dma/fsldma.c4
-rw-r--r--drivers/dma/imx-dma.c10
-rw-r--r--drivers/dma/imx-sdma.c27
-rw-r--r--drivers/dma/intel_mid_dma.c39
-rw-r--r--drivers/dma/intel_mid_dma_regs.h4
-rw-r--r--drivers/dma/iop-adma.c16
-rw-r--r--drivers/dma/ipu/ipu_idmac.c29
-rw-r--r--drivers/dma/mpc512x_dma.c12
-rw-r--r--drivers/dma/mxs-dma.c53
-rw-r--r--drivers/dma/pch_dma.c20
-rw-r--r--drivers/dma/pl330.c31
-rw-r--r--drivers/dma/shdma.c72
-rw-r--r--drivers/dma/sirf-dma.c707
-rw-r--r--drivers/dma/ste_dma40.c441
-rw-r--r--drivers/dma/ste_dma40_ll.h11
-rw-r--r--drivers/dma/timb_dma.c30
-rw-r--r--drivers/dma/txx9dmac.c12
-rw-r--r--drivers/media/video/mx3_camera.c2
-rw-r--r--drivers/media/video/timblogiw.c2
-rw-r--r--drivers/misc/carma/carma-fpga-program.c2
-rw-r--r--drivers/mmc/host/atmel-mci.c10
-rw-r--r--drivers/mmc/host/mmci.c11
-rw-r--r--drivers/mmc/host/mxcmmc.c10
-rw-r--r--drivers/mmc/host/mxs-mmc.c10
-rw-r--r--drivers/mmc/host/sh_mmcif.c4
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c4
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-lib.c22
-rw-r--r--drivers/net/ethernet/micrel/ks8842.c4
-rw-r--r--drivers/spi/spi-dw-mid.c8
-rw-r--r--drivers/spi/spi-ep93xx.c9
-rw-r--r--drivers/spi/spi-pl022.c8
-rw-r--r--drivers/spi/spi-topcliff-pch.c4
-rw-r--r--drivers/tty/serial/amba-pl011.c8
-rw-r--r--drivers/tty/serial/pch_uart.c4
-rw-r--r--drivers/tty/serial/sh-sci.c4
-rw-r--r--drivers/usb/host/ehci-xilinx-of.c2
-rw-r--r--drivers/usb/musb/ux500_dma.c4
-rw-r--r--drivers/usb/renesas_usbhs/fifo.c4
-rw-r--r--drivers/video/mx3fb.c65
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--fs/btrfs/Kconfig19
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/backref.c1131
-rw-r--r--fs/btrfs/backref.h5
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c3068
-rw-r--r--fs/btrfs/check-integrity.h36
-rw-r--r--fs/btrfs/ctree.c42
-rw-r--r--fs/btrfs/ctree.h239
-rw-r--r--fs/btrfs/delayed-inode.c45
-rw-r--r--fs/btrfs/delayed-ref.c153
-rw-r--r--fs/btrfs/delayed-ref.h104
-rw-r--r--fs/btrfs/disk-io.c119
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c465
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/free-space-cache.c417
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c66
-rw-r--r--fs/btrfs/ioctl.c268
-rw-r--r--fs/btrfs/ioctl.h54
-rw-r--r--fs/btrfs/locking.c53
-rw-r--r--fs/btrfs/relocation.c20
-rw-r--r--fs/btrfs/scrub.c12
-rw-r--r--fs/btrfs/super.c190
-rw-r--r--fs/btrfs/transaction.c20
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/ulist.c220
-rw-r--r--fs/btrfs/ulist.h68
-rw-r--r--fs/btrfs/volumes.c993
-rw-r--r--fs/btrfs/volumes.h54
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/namei.c28
-rw-r--r--fs/proc/base.c150
-rw-r--r--fs/xfs/xfs_aops.c29
-rw-r--r--fs/xfs/xfs_attr.c4
-rw-r--r--fs/xfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/xfs_bmap.c116
-rw-r--r--fs/xfs/xfs_dfrag.c43
-rw-r--r--fs/xfs/xfs_file.c184
-rw-r--r--fs/xfs/xfs_fs_subr.c2
-rw-r--r--fs/xfs/xfs_iget.c24
-rw-r--r--fs/xfs/xfs_inode.c193
-rw-r--r--fs/xfs/xfs_inode.h114
-rw-r--r--fs/xfs/xfs_inode_item.c8
-rw-r--r--fs/xfs/xfs_iomap.c46
-rw-r--r--fs/xfs/xfs_iops.c46
-rw-r--r--fs/xfs/xfs_qm_syscalls.c8
-rw-r--r--fs/xfs/xfs_super.c8
-rw-r--r--fs/xfs/xfs_sync.c9
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_vnodeops.c44
-rw-r--r--include/linux/amba/pl08x.h4
-rw-r--r--include/linux/audit.h116
-rw-r--r--include/linux/digsig.h4
-rw-r--r--include/linux/dmaengine.h99
-rw-r--r--include/linux/dw_dmac.h2
-rw-r--r--include/linux/key.h3
-rw-r--r--include/linux/kref.h1
-rw-r--r--include/linux/mtd/gpmi-nand.h68
-rw-r--r--include/linux/ptrace.h10
-rw-r--r--include/linux/sh_dma.h4
-rw-r--r--include/linux/sirfsoc_dma.h6
-rw-r--r--include/linux/tty_driver.h1
-rw-r--r--include/trace/events/btrfs.h203
-rw-r--r--init/Kconfig16
-rw-r--r--kernel/audit.c4
-rw-r--r--kernel/audit.h6
-rw-r--r--kernel/auditfilter.c17
-rw-r--r--kernel/auditsc.c735
-rw-r--r--kernel/capability.c2
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/seccomp.c2
-rw-r--r--lib/Kconfig19
-rw-r--r--lib/Makefile2
-rw-r--r--security/integrity/Kconfig4
-rw-r--r--security/integrity/Makefile2
-rw-r--r--security/integrity/ima/ima_audit.c8
-rw-r--r--security/integrity/integrity.h4
-rw-r--r--security/keys/encrypted-keys/encrypted.c6
-rw-r--r--security/keys/encrypted-keys/masterkey_trusted.c4
-rw-r--r--security/keys/gc.c4
-rw-r--r--security/keys/keyring.c22
-rw-r--r--security/keys/trusted.c4
-rw-r--r--security/lsm_audit.c27
-rw-r--r--security/tomoyo/util.c6
-rw-r--r--sound/atmel/abdac.c2
-rw-r--r--sound/atmel/ac97c.c10
-rw-r--r--sound/core/Kconfig13
-rw-r--r--sound/pci/au88x0/au88x0.c13
-rw-r--r--sound/pci/au88x0/au88x0.h1
-rw-r--r--sound/pci/au88x0/au88x0_pcm.c1
-rw-r--r--sound/pci/hda/hda_intel.c1
-rw-r--r--sound/pci/hda/patch_sigmatel.c2
-rw-r--r--sound/pci/oxygen/xonar_wm87x6.c1
-rw-r--r--sound/soc/ep93xx/ep93xx-pcm.c4
-rw-r--r--sound/soc/imx/imx-pcm-dma-mx2.c6
-rw-r--r--sound/soc/mxs/mxs-pcm.c2
-rw-r--r--sound/soc/samsung/dma.c4
-rw-r--r--sound/soc/sh/siu_pcm.c4
-rw-r--r--sound/soc/txx9/txx9aclc.c2
202 files changed, 10357 insertions, 2865 deletions
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt
new file mode 100644
index 00000000000..3c046ee6e8b
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -0,0 +1,14 @@
+* Atmel Direct Memory Access Controller (DMA)
+
+Required properties:
+- compatible: Should be "atmel,<chip>-dma"
+- reg: Should contain DMA registers location and length
+- interrupts: Should contain DMA interrupt
+
+Examples:
+
+dma@ffffec00 {
+ compatible = "atmel,at91sam9g45-dma";
+ reg = <0xffffec00 0x200>;
+ interrupts = <21>;
+};
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 94b7e0f96b3..bbe6cb3d185 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -75,6 +75,10 @@ The slave DMA usage consists of following steps:
slave_sg - DMA a list of scatter gather buffers from/to a peripheral
dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the
operation is explicitly stopped.
+ interleaved_dma - This is common to Slave as well as M2M clients. For slave
+ address of devices' fifo could be already known to the driver.
+ Various types of operations could be expressed by setting
+ appropriate values to the 'dma_interleaved_template' members.
A non-NULL return of this transfer API represents a "descriptor" for
the given transaction.
@@ -89,6 +93,10 @@ The slave DMA usage consists of following steps:
struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
size_t period_len, enum dma_data_direction direction);
+ struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags);
+
The peripheral driver is expected to have mapped the scatterlist for
the DMA operation prior to calling device_prep_slave_sg, and must
keep the scatterlist mapped until the DMA operation has completed.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2a90101309d..341dee3b02c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -745,6 +745,7 @@ M: Barry Song <baohua.song@csr.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-prima2/
+F: drivers/dma/sirf-dma*
ARM/EBSA110 MACHINE SUPPORT
M: Russell King <linux@arm.linux.org.uk>
@@ -5846,7 +5847,7 @@ F: drivers/mmc/host/sdhci-spear.c
SECURITY SUBSYSTEM
M: James Morris <jmorris@namei.org>
L: linux-security-module@vger.kernel.org (suggested Cc:)
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git
W: http://security.wiki.kernel.org/
S: Supported
F: security/
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index feec86768f9..f82ec22eeb1 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -24,7 +24,6 @@
#define MAX_INSN_SIZE 2
#define MAX_STACK_SIZE 64 /* 32 would probably be OK */
-#define regs_return_value(regs) ((regs)->ARM_r0)
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 96187ff58c2..451808ba121 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -189,6 +189,11 @@ static inline int valid_user_regs(struct pt_regs *regs)
return 0;
}
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->ARM_r0;
+}
+
#define instruction_pointer(regs) (regs)->ARM_pc
#ifdef CONFIG_SMP
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 0f30c3a78fc..d4c24d412a8 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -129,6 +129,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
/*
* thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active
+ * TIF_SYSCAL_AUDIT - syscall auditing active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -139,6 +140,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_SYSCALL_TRACE 8
+#define TIF_SYSCALL_AUDIT 9
#define TIF_POLLING_NRFLAG 16
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -149,11 +151,15 @@ extern void vfp_flush_hwstate(struct thread_info *);
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+/* Checks for any syscall work in entry-common.S */
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+
/*
* Change these and you break ASM code in entry-common.S
*/
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index b2a27b6b004..520889cf1b5 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -87,7 +87,7 @@ ENTRY(ret_from_fork)
get_thread_info tsk
ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing
mov why, #1
- tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
+ tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
beq ret_slow_syscall
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
@@ -443,7 +443,7 @@ ENTRY(vector_swi)
1:
#endif
- tst r10, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
+ tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 483727ad689..e1d5e1929fb 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -906,11 +906,6 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
{
unsigned long ip;
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- return scno;
- if (!(current->ptrace & PT_PTRACED))
- return scno;
-
/*
* Save IP. IP is used to denote syscall entry/exit:
* IP = 0 -> entry, = 1 -> exit
@@ -918,6 +913,17 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
ip = regs->ARM_ip;
regs->ARM_ip = why;
+ if (!ip)
+ audit_syscall_exit(regs);
+ else
+ audit_syscall_entry(AUDIT_ARCH_ARMEB, scno, regs->ARM_r0,
+ regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return scno;
+ if (!(current->ptrace & PT_PTRACED))
+ return scno;
+
current_thread_info()->syscall = scno;
/* the 0x80 provides a way for the tracing parent to distinguish
diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h
index 46d4d876e6f..e82c642fa53 100644
--- a/arch/arm/mach-ep93xx/include/mach/dma.h
+++ b/arch/arm/mach-ep93xx/include/mach/dma.h
@@ -37,7 +37,7 @@
*/
struct ep93xx_dma_data {
int port;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
const char *name;
};
@@ -80,14 +80,14 @@ static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
* channel supports given DMA direction. Only M2P channels have such
* limitation, for M2M channels the direction is configurable.
*/
-static inline enum dma_data_direction
+static inline enum dma_transfer_direction
ep93xx_dma_chan_direction(struct dma_chan *chan)
{
if (!ep93xx_dma_chan_is_m2p(chan))
return DMA_NONE;
/* even channels are for TX, odd for RX */
- return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
}
#endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index 1ea89be63e2..6fcf304d3cd 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -445,31 +445,39 @@ static const struct sh_dmae_slave_config sh7372_dmae_slaves[] = {
},
};
+#define SH7372_CHCLR 0x220
+
static const struct sh_dmae_channel sh7372_dmae_channels[] = {
{
.offset = 0,
.dmars = 0,
.dmars_bit = 0,
+ .chclr_offset = SH7372_CHCLR + 0,
}, {
.offset = 0x10,
.dmars = 0,
.dmars_bit = 8,
+ .chclr_offset = SH7372_CHCLR + 0x10,
}, {
.offset = 0x20,
.dmars = 4,
.dmars_bit = 0,
+ .chclr_offset = SH7372_CHCLR + 0x20,
}, {
.offset = 0x30,
.dmars = 4,
.dmars_bit = 8,
+ .chclr_offset = SH7372_CHCLR + 0x30,
}, {
.offset = 0x50,
.dmars = 8,
.dmars_bit = 0,
+ .chclr_offset = SH7372_CHCLR + 0x50,
}, {
.offset = 0x60,
.dmars = 8,
.dmars_bit = 8,
+ .chclr_offset = SH7372_CHCLR + 0x60,
}
};
@@ -487,6 +495,7 @@ static struct sh_dmae_pdata dma_platform_data = {
.ts_shift = ts_shift,
.ts_shift_num = ARRAY_SIZE(ts_shift),
.dmaor_init = DMAOR_DME,
+ .chclr_present = 1,
};
/* Resource order important! */
@@ -494,7 +503,7 @@ static struct resource sh7372_dmae0_resources[] = {
{
/* Channel registers and DMAOR */
.start = 0xfe008020,
- .end = 0xfe00808f,
+ .end = 0xfe00828f,
.flags = IORESOURCE_MEM,
},
{
@@ -522,7 +531,7 @@ static struct resource sh7372_dmae1_resources[] = {
{
/* Channel registers and DMAOR */
.start = 0xfe018020,
- .end = 0xfe01808f,
+ .end = 0xfe01828f,
.flags = IORESOURCE_MEM,
},
{
@@ -550,7 +559,7 @@ static struct resource sh7372_dmae2_resources[] = {
{
/* Channel registers and DMAOR */
.start = 0xfe028020,
- .end = 0xfe02808f,
+ .end = 0xfe02828f,
.flags = IORESOURCE_MEM,
},
{
diff --git a/arch/arm/plat-mxc/include/mach/mx3fb.h b/arch/arm/plat-mxc/include/mach/mx3fb.h
index ac24c5c4bc8..fdbe6000154 100644
--- a/arch/arm/plat-mxc/include/mach/mx3fb.h
+++ b/arch/arm/plat-mxc/include/mach/mx3fb.h
@@ -22,6 +22,20 @@
#define FB_SYNC_SWAP_RGB 0x04000000
#define FB_SYNC_CLK_SEL_EN 0x02000000
+/*
+ * Specify the way your display is connected. The IPU can arbitrarily
+ * map the internal colors to the external data lines. We only support
+ * the following mappings at the moment.
+ */
+enum disp_data_mapping {
+ /* blue -> d[0..5], green -> d[6..11], red -> d[12..17] */
+ IPU_DISP_DATA_MAPPING_RGB666,
+ /* blue -> d[0..4], green -> d[5..10], red -> d[11..15] */
+ IPU_DISP_DATA_MAPPING_RGB565,
+ /* blue -> d[0..7], green -> d[8..15], red -> d[16..23] */
+ IPU_DISP_DATA_MAPPING_RGB888,
+};
+
/**
* struct mx3fb_platform_data - mx3fb platform data
*
@@ -33,6 +47,7 @@ struct mx3fb_platform_data {
const char *name;
const struct fb_videomode *mode;
int num_modes;
+ enum disp_data_mapping disp_data_fmt;
};
#endif
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 685c78716d9..fd0ee84c45d 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -113,7 +113,8 @@ struct stedma40_half_channel_info {
* @dst_dev_type: Dst device type
* @src_info: Parameters for dst half channel
* @dst_info: Parameters for dst half channel
- *
+ * @use_fixed_channel: if true, use physical channel specified by phy_channel
+ * @phy_channel: physical channel to use, only if use_fixed_channel is true
*
* This structure has to be filled by the client drivers.
* It is recommended to do all dma configurations for clients in the machine.
@@ -129,6 +130,9 @@ struct stedma40_chan_cfg {
int dst_dev_type;
struct stedma40_half_channel_info src_info;
struct stedma40_half_channel_info dst_info;
+
+ bool use_fixed_channel;
+ int phy_channel;
};
/**
@@ -153,6 +157,7 @@ struct stedma40_platform_data {
struct stedma40_chan_cfg *memcpy_conf_phy;
struct stedma40_chan_cfg *memcpy_conf_log;
int disabled_channels[STEDMA40_MAX_PHYS];
+ bool use_esram_lcla;
};
#ifdef CONFIG_STE_DMA40
@@ -187,7 +192,7 @@ static inline struct
dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
dma_addr_t addr,
unsigned int size,
- enum dma_data_direction direction,
+ enum dma_transfer_direction direction,
unsigned long flags)
{
struct scatterlist sg;
@@ -209,7 +214,7 @@ static inline struct
dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
dma_addr_t addr,
unsigned int size,
- enum dma_data_direction direction,
+ enum dma_transfer_direction direction,
unsigned long flags)
{
return NULL;
diff --git a/arch/arm/plat-samsung/dma-ops.c b/arch/arm/plat-samsung/dma-ops.c
index 2cded872f22..0747c77a2fd 100644
--- a/arch/arm/plat-samsung/dma-ops.c
+++ b/arch/arm/plat-samsung/dma-ops.c
@@ -37,14 +37,14 @@ static unsigned samsung_dmadev_request(enum dma_ch dma_ch,
(void *)dma_ch;
chan = dma_request_channel(mask, pl330_filter, filter_param);
- if (info->direction == DMA_FROM_DEVICE) {
+ if (info->direction == DMA_DEV_TO_MEM) {
memset(&slave_config, 0, sizeof(struct dma_slave_config));
slave_config.direction = info->direction;
slave_config.src_addr = info->fifo;
slave_config.src_addr_width = info->width;
slave_config.src_maxburst = 1;
dmaengine_slave_config(chan, &slave_config);
- } else if (info->direction == DMA_TO_DEVICE) {
+ } else if (info->direction == DMA_MEM_TO_DEV) {
memset(&slave_config, 0, sizeof(struct dma_slave_config));
slave_config.direction = info->direction;
slave_config.dst_addr = info->fifo;
diff --git a/arch/arm/plat-samsung/include/plat/dma-ops.h b/arch/arm/plat-samsung/include/plat/dma-ops.h
index 7c6c2a8dd55..71a6827c770 100644
--- a/arch/arm/plat-samsung/include/plat/dma-ops.h
+++ b/arch/arm/plat-samsung/include/plat/dma-ops.h
@@ -18,7 +18,7 @@
struct samsung_dma_prep_info {
enum dma_transaction_type cap;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
dma_addr_t buf;
unsigned long period;
unsigned long len;
@@ -28,7 +28,7 @@ struct samsung_dma_prep_info {
struct samsung_dma_info {
enum dma_transaction_type cap;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
enum dma_slave_buswidth width;
dma_addr_t fifo;
struct s3c2410_dma_client *client;
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index f5cb27614e3..68c98f5b3ca 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -246,7 +246,18 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
return regs->ar_bspstore;
}
-#define regs_return_value(regs) ((regs)->r8)
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ return regs->r10 != -1;
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (is_syscall_success(regs))
+ return regs->r8;
+ else
+ return -regs->r8;
+}
/* Conserve space in histogram by encoding slot bits in address
* bits 2 and 3 rather than bits 0 and 1.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 8848f43d819..dad91661ddf 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1246,15 +1246,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
- if (unlikely(current->audit_context)) {
- long syscall;
- int arch;
- syscall = regs.r15;
- arch = AUDIT_ARCH_IA64;
-
- audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
- }
+ audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
return 0;
}
@@ -1268,14 +1261,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
{
int step;
- if (unlikely(current->audit_context)) {
- int success = AUDITSC_RESULT(regs.r10);
- long result = regs.r8;
-
- if (success != AUDITSC_SUCCESS)
- result = -result;
- audit_syscall_exit(success, result);
- }
+ audit_syscall_exit(&regs);
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 816bee64b19..94e92c80585 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -61,6 +61,11 @@ struct pt_regs {
#define instruction_pointer(regs) ((regs)->pc)
#define profile_pc(regs) instruction_pointer(regs)
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->r3;
+}
+
#else /* __KERNEL__ */
/* pt_regs offsets used by gdbserver etc in ptrace syscalls */
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 043cb58f9c4..6eb2aa927d8 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,10 +147,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
*/
ret = -1L;
- if (unlikely(current->audit_context))
- audit_syscall_entry(EM_MICROBLAZE, regs->r12,
- regs->r5, regs->r6,
- regs->r7, regs->r8);
+ audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
+ regs->r7, regs->r8);
return ret ?: regs->r12;
}
@@ -159,8 +157,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+ audit_syscall_exit(regs);
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 604cd9dd133..d4fc1a97177 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -26,6 +26,7 @@
#include <linux/cache.h>
#include <linux/of_platform.h>
#include <linux/dma-mapping.h>
+#include <linux/cpu.h>
#include <asm/cacheflush.h>
#include <asm/entry.h>
#include <asm/cpuinfo.h>
@@ -226,5 +227,23 @@ static int __init setup_bus_notifier(void)
return 0;
}
-
arch_initcall(setup_bus_notifier);
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+static int __init topology_init(void)
+{
+ int i, ret;
+
+ for_each_present_cpu(i) {
+ struct cpu *c = &per_cpu(cpu_devices, i);
+
+ ret = register_cpu(c, i);
+ if (ret)
+ printk(KERN_WARNING "topology_init: register_cpu %d "
+ "failed (%d)\n", i, ret);
+ }
+
+ return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 7b99c670e47..4b7f5252d2f 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -137,7 +137,19 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
*/
#define user_mode(regs) (((regs)->cp0_status & KU_MASK) == KU_USER)
-#define regs_return_value(_regs) ((_regs)->regs[2])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ return !regs->regs[7];
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (is_syscall_success(regs))
+ return regs->regs[2];
+ else
+ return -regs->regs[2];
+}
+
#define instruction_pointer(regs) ((regs)->cp0_epc)
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4e6ea1ffad4..7786b608d93 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -560,10 +560,9 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
}
out:
- if (unlikely(current->audit_context))
- audit_syscall_entry(audit_arch(), regs->regs[2],
- regs->regs[4], regs->regs[5],
- regs->regs[6], regs->regs[7]);
+ audit_syscall_entry(audit_arch(), regs->regs[2],
+ regs->regs[4], regs->regs[5],
+ regs->regs[6], regs->regs[7]);
}
/*
@@ -572,9 +571,7 @@ out:
*/
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
{
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
- -regs->regs[2]);
+ audit_syscall_exit(regs);
if (!(current->ptrace & PT_PTRACED))
return;
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 48223f9b872..78a205162fd 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -86,7 +86,18 @@ struct pt_regs {
#define instruction_pointer(regs) ((regs)->nip)
#define user_stack_pointer(regs) ((regs)->gpr[1])
#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-#define regs_return_value(regs) ((regs)->gpr[3])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (is_syscall_success(regs))
+ return regs->gpr[3];
+ else
+ return -regs->gpr[3];
+}
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 5de73dbd15c..5b43325402b 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->gpr[0]);
- if (unlikely(current->audit_context)) {
#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- audit_syscall_entry(AUDIT_ARCH_PPC64,
- regs->gpr[0],
- regs->gpr[3], regs->gpr[4],
- regs->gpr[5], regs->gpr[6]);
- else
+ if (!is_32bit_task())
+ audit_syscall_entry(AUDIT_ARCH_PPC64,
+ regs->gpr[0],
+ regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
#endif
- audit_syscall_entry(AUDIT_ARCH_PPC,
- regs->gpr[0],
- regs->gpr[3] & 0xffffffff,
- regs->gpr[4] & 0xffffffff,
- regs->gpr[5] & 0xffffffff,
- regs->gpr[6] & 0xffffffff);
- }
+ audit_syscall_entry(AUDIT_ARCH_PPC,
+ regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
return ret ?: regs->gpr[0];
}
@@ -1748,9 +1746,7 @@ void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
- if (unlikely(current->audit_context))
- audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
- regs->result);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->result);
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 56da355678f..aeb77f01798 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -541,9 +541,13 @@ struct user_regs_struct
#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
#define user_stack_pointer(regs)((regs)->gprs[15])
-#define regs_return_value(regs)((regs)->gprs[2])
#define profile_pc(regs) instruction_pointer(regs)
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->gprs[2];
+}
+
int regs_query_register_offset(const char *name);
const char *regs_query_register_name(unsigned int offset);
unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 573bc29551e..9d82ed4bcb2 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -740,20 +740,17 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->gprs[2]);
- if (unlikely(current->audit_context))
- audit_syscall_entry(is_compat_task() ?
- AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
- regs->gprs[2], regs->orig_gpr2,
- regs->gprs[3], regs->gprs[4],
- regs->gprs[5]);
+ audit_syscall_entry(is_compat_task() ?
+ AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+ regs->gprs[2], regs->orig_gpr2,
+ regs->gprs[3], regs->gprs[4],
+ regs->gprs[5]);
return ret ?: regs->gprs[2];
}
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
{
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
- regs->gprs[2]);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->gprs[2]);
diff --git a/arch/sh/include/asm/ptrace_32.h b/arch/sh/include/asm/ptrace_32.h
index 6c2239cca1a..2d3e906aa72 100644
--- a/arch/sh/include/asm/ptrace_32.h
+++ b/arch/sh/include/asm/ptrace_32.h
@@ -76,7 +76,10 @@ struct pt_dspregs {
#ifdef __KERNEL__
#define MAX_REG_OFFSET offsetof(struct pt_regs, tra)
-#define regs_return_value(_regs) ((_regs)->regs[0])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->regs[0];
+}
#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/ptrace_64.h b/arch/sh/include/asm/ptrace_64.h
index bf9be7764d6..eb3fcceaf64 100644
--- a/arch/sh/include/asm/ptrace_64.h
+++ b/arch/sh/include/asm/ptrace_64.h
@@ -13,7 +13,10 @@ struct pt_regs {
#ifdef __KERNEL__
#define MAX_REG_OFFSET offsetof(struct pt_regs, tregs[7])
-#define regs_return_value(_regs) ((_regs)->regs[3])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->regs[3];
+}
#endif /* __KERNEL__ */
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 92b3c276339..a3e65156376 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -518,10 +518,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[0]);
- if (unlikely(current->audit_context))
- audit_syscall_entry(audit_arch(), regs->regs[3],
- regs->regs[4], regs->regs[5],
- regs->regs[6], regs->regs[7]);
+ audit_syscall_entry(audit_arch(), regs->regs[3],
+ regs->regs[4], regs->regs[5],
+ regs->regs[6], regs->regs[7]);
return ret ?: regs->regs[0];
}
@@ -530,9 +529,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]),
- regs->regs[0]);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->regs[0]);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c8f97649f35..3d0080b5c97 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -536,10 +536,9 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[9]);
- if (unlikely(current->audit_context))
- audit_syscall_entry(audit_arch(), regs->regs[1],
- regs->regs[2], regs->regs[3],
- regs->regs[4], regs->regs[5]);
+ audit_syscall_entry(audit_arch(), regs->regs[1],
+ regs->regs[2], regs->regs[3],
+ regs->regs[4], regs->regs[5]);
return ret ?: regs->regs[9];
}
@@ -548,9 +547,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]),
- regs->regs[9]);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->regs[9]);
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index a0e1bcf843a..c00c3b5c280 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -207,7 +207,15 @@ do { current_thread_info()->syscall_noerror = 1; \
#define instruction_pointer(regs) ((regs)->tpc)
#define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY));
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->u_regs[UREG_I0];
+}
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *);
#else
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 96ee50a8066..9388844cd88 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1071,32 +1071,22 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->u_regs[UREG_G1]);
- if (unlikely(current->audit_context) && !ret)
- audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
- AUDIT_ARCH_SPARC :
- AUDIT_ARCH_SPARC64),
- regs->u_regs[UREG_G1],
- regs->u_regs[UREG_I0],
- regs->u_regs[UREG_I1],
- regs->u_regs[UREG_I2],
- regs->u_regs[UREG_I3]);
+ audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
+ AUDIT_ARCH_SPARC :
+ AUDIT_ARCH_SPARC64),
+ regs->u_regs[UREG_G1],
+ regs->u_regs[UREG_I0],
+ regs->u_regs[UREG_I1],
+ regs->u_regs[UREG_I2],
+ regs->u_regs[UREG_I3]);
return ret;
}
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
{
-#ifdef CONFIG_AUDITSYSCALL
- if (unlikely(current->audit_context)) {
- unsigned long tstate = regs->tstate;
- int result = AUDITSC_SUCCESS;
+ audit_syscall_exit(regs);
- if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
- result = AUDITSC_FAILURE;
-
- audit_syscall_exit(result, regs->u_regs[UREG_I0]);
- }
-#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->u_regs[UREG_G1]);
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index c9da32b0c70..06b19039050 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -167,17 +167,15 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
int tracesysgood;
- if (unlikely(current->audit_context)) {
- if (!entryexit)
- audit_syscall_entry(HOST_AUDIT_ARCH,
- UPT_SYSCALL_NR(regs),
- UPT_SYSCALL_ARG1(regs),
- UPT_SYSCALL_ARG2(regs),
- UPT_SYSCALL_ARG3(regs),
- UPT_SYSCALL_ARG4(regs));
- else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
- UPT_SYSCALL_RET(regs));
- }
+ if (!entryexit)
+ audit_syscall_entry(HOST_AUDIT_ARCH,
+ UPT_SYSCALL_NR(regs),
+ UPT_SYSCALL_ARG1(regs),
+ UPT_SYSCALL_ARG2(regs),
+ UPT_SYSCALL_ARG3(regs),
+ UPT_SYSCALL_ARG4(regs));
+ else
+ audit_syscall_exit(regs);
/* Fake a debug trap */
if (is_singlestep)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 1106261856c..e3e734005e1 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
#include <asm/segment.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
+#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -189,7 +190,7 @@ sysexit_from_sys_call:
movl %ebx,%edx /* 3rd arg: 1st syscall arg */
movl %eax,%esi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
@@ -206,12 +207,13 @@ sysexit_from_sys_call:
TRACE_IRQS_ON
sti
movl %eax,%esi /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpl $-MAX_ERRNO,%eax /* is it an error ? */
+ jbe 1f
+ movslq %eax, %rsi /* if error sign extend to 64 bits */
+1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
- movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
+ call __audit_syscall_exit
+ movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli
TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4af9fd2450a..79d97e68f04 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
*/
#include <linux/linkage.h>
+#include <linux/err.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/errno.h>
@@ -453,7 +454,7 @@ sysenter_audit:
movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
movl %eax,%edx /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
pushl_cfi %ebx
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -464,11 +465,10 @@ sysexit_audit:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpl $-MAX_ERRNO,%eax /* is it an error ? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%eax /* zero-extend that */
- inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 940ba711fc2..3fe8239fd8f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
#include <asm/paravirt.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
+#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -548,7 +549,7 @@ badsys:
#ifdef CONFIG_AUDITSYSCALL
/*
* Fast path for syscall audit without full syscall trace.
- * We just call audit_syscall_entry() directly, and then
+ * We just call __audit_syscall_entry() directly, and then
* jump back to the normal fast path.
*/
auditsys:
@@ -558,22 +559,21 @@ auditsys:
movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
movq %rax,%rsi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
LOAD_ARGS 0 /* reload call-clobbered registers */
jmp system_call_fastpath
/*
- * Return fast path for syscall audit. Call audit_syscall_exit()
+ * Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
*/
sysret_audit:
movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
- cmpq $0,%rsi /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
jmp sysret_check
#endif /* CONFIG_AUDITSYSCALL */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7b5bb..50267386b76 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
- if (unlikely(current->audit_context)) {
- if (IS_IA32)
- audit_syscall_entry(AUDIT_ARCH_I386,
- regs->orig_ax,
- regs->bx, regs->cx,
- regs->dx, regs->si);
+ if (IS_IA32)
+ audit_syscall_entry(AUDIT_ARCH_I386,
+ regs->orig_ax,
+ regs->bx, regs->cx,
+ regs->dx, regs->si);
#ifdef CONFIG_X86_64
- else
- audit_syscall_entry(AUDIT_ARCH_X86_64,
- regs->orig_ax,
- regs->di, regs->si,
- regs->dx, regs->r10);
+ else
+ audit_syscall_entry(AUDIT_ARCH_X86_64,
+ regs->orig_ax,
+ regs->di, regs->si,
+ regs->dx, regs->r10);
#endif
- }
return ret ?: regs->orig_ax;
}
@@ -1414,8 +1412,7 @@ void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0..b466cab5ba1 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -335,9 +335,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
if (info->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
- /*call audit_syscall_exit since we do not exit via the normal paths */
+ /*call __audit_syscall_exit since we do not exit via the normal paths */
+#ifdef CONFIG_AUDITSYSCALL
if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(0), 0);
+ __audit_syscall_exit(1, 0);
+#endif
__asm__ __volatile__(
"movl %0,%%esp\n\t"
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b1621747..5ef9344a8b2 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -3,3 +3,8 @@
#else
#include "ptrace_64.h"
#endif
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+ return UPT_SYSCALL_RET(regs);
+}
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index a0d042aa296..2dff698ab02 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -334,8 +334,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
do_syscall_trace();
#if 0
- if (unlikely(current->audit_context))
- audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+ audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
#endif
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 163263ddd38..ee55019066a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3117,18 +3117,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct cfq_queue *old_cfqq = cfqd->active_queue;
-
cfq_log_cfqq(cfqd, cfqq, "preempt");
- cfq_slice_expired(cfqd, 1);
/*
* workload type is changed, don't save slice, otherwise preempt
* doesn't happen
*/
- if (cfqq_type(old_cfqq) != cfqq_type(cfqq))
+ if (cfqq_type(cfqd->active_queue) != cfqq_type(cfqq))
cfqq->cfqg->saved_workload_slice = 0;
+ cfq_slice_expired(cfqd, 1);
+
/*
* Put the new queue at the front of the of the current list,
* so we know that it will be selected next.
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 69ac373c72a..fdf27b9fce4 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1117,6 +1117,13 @@ static int piix_broken_suspend(void)
},
},
{
+ .ident = "Satellite Pro A120",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Satellite Pro A120"),
+ },
+ },
+ {
.ident = "Portege M500",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 11c9aea4f4f..c06e0ec1155 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4125,6 +4125,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
* device and controller are SATA.
*/
{ "PIONEER DVD-RW DVRTD08", NULL, ATA_HORKAGE_NOSETXFER },
+ { "PIONEER DVD-RW DVRTD08A", NULL, ATA_HORKAGE_NOSETXFER },
+ { "PIONEER DVD-RW DVR-215", NULL, ATA_HORKAGE_NOSETXFER },
{ "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER },
{ "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER },
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 9a7f0ea565d..74aaee30e26 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -291,6 +291,7 @@ int ata_tport_add(struct device *parent,
goto tport_err;
}
+ device_enable_async_suspend(dev);
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index d6a4677fdf7..1e65842e2ca 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -251,6 +251,8 @@ static const u32 udma_tenvmin = 20;
static const u32 udma_tackmin = 20;
static const u32 udma_tssmin = 50;
+#define BFIN_MAX_SG_SEGMENTS 4
+
/**
*
* Function: num_clocks_min
@@ -829,79 +831,61 @@ static void bfin_set_devctl(struct ata_port *ap, u8 ctl)
static void bfin_bmdma_setup(struct ata_queued_cmd *qc)
{
- unsigned short config = WDSIZE_16;
+ struct ata_port *ap = qc->ap;
+ struct dma_desc_array *dma_desc_cpu = (struct dma_desc_array *)ap->bmdma_prd;
+ void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+ unsigned short config = DMAFLOW_ARRAY | NDSIZE_5 | RESTART | WDSIZE_16 | DMAEN;
struct scatterlist *sg;
unsigned int si;
+ unsigned int channel;
+ unsigned int dir;
+ unsigned int size = 0;
dev_dbg(qc->ap->dev, "in atapi dma setup\n");
/* Program the ATA_CTRL register with dir */
if (qc->tf.flags & ATA_TFLAG_WRITE) {
- /* fill the ATAPI DMA controller */
- set_dma_config(CH_ATAPI_TX, config);
- set_dma_x_modify(CH_ATAPI_TX, 2);
- for_each_sg(qc->sg, sg, qc->n_elem, si) {
- set_dma_start_addr(CH_ATAPI_TX, sg_dma_address(sg));
- set_dma_x_count(CH_ATAPI_TX, sg_dma_len(sg) >> 1);
- }
+ channel = CH_ATAPI_TX;
+ dir = DMA_TO_DEVICE;
} else {
+ channel = CH_ATAPI_RX;
+ dir = DMA_FROM_DEVICE;
config |= WNR;
- /* fill the ATAPI DMA controller */
- set_dma_config(CH_ATAPI_RX, config);
- set_dma_x_modify(CH_ATAPI_RX, 2);
- for_each_sg(qc->sg, sg, qc->n_elem, si) {
- set_dma_start_addr(CH_ATAPI_RX, sg_dma_address(sg));
- set_dma_x_count(CH_ATAPI_RX, sg_dma_len(sg) >> 1);
- }
}
-}
-/**
- * bfin_bmdma_start - Start an IDE DMA transaction
- * @qc: Info associated with this ATA transaction.
- *
- * Note: Original code is ata_bmdma_start().
- */
+ dma_map_sg(ap->dev, qc->sg, qc->n_elem, dir);
-static void bfin_bmdma_start(struct ata_queued_cmd *qc)
-{
- struct ata_port *ap = qc->ap;
- void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
- struct scatterlist *sg;
- unsigned int si;
+ /* fill the ATAPI DMA controller */
+ for_each_sg(qc->sg, sg, qc->n_elem, si) {
+ dma_desc_cpu[si].start_addr = sg_dma_address(sg);
+ dma_desc_cpu[si].cfg = config;
+ dma_desc_cpu[si].x_count = sg_dma_len(sg) >> 1;
+ dma_desc_cpu[si].x_modify = 2;
+ size += sg_dma_len(sg);
+ }
- dev_dbg(qc->ap->dev, "in atapi dma start\n");
- if (!(ap->udma_mask || ap->mwdma_mask))
- return;
+ /* Set the last descriptor to stop mode */
+ dma_desc_cpu[qc->n_elem - 1].cfg &= ~(DMAFLOW | NDSIZE);
- /* start ATAPI DMA controller*/
- if (qc->tf.flags & ATA_TFLAG_WRITE) {
- /*
- * On blackfin arch, uncacheable memory is not
- * allocated with flag GFP_DMA. DMA buffer from
- * common kenel code should be flushed if WB
- * data cache is enabled. Otherwise, this loop
- * is an empty loop and optimized out.
- */
- for_each_sg(qc->sg, sg, qc->n_elem, si) {
- flush_dcache_range(sg_dma_address(sg),
- sg_dma_address(sg) + sg_dma_len(sg));
- }
- enable_dma(CH_ATAPI_TX);
- dev_dbg(qc->ap->dev, "enable udma write\n");
+ flush_dcache_range((unsigned int)dma_desc_cpu,
+ (unsigned int)dma_desc_cpu +
+ qc->n_elem * sizeof(struct dma_desc_array));
- /* Send ATA DMA write command */
- bfin_exec_command(ap, &qc->tf);
+ /* Enable ATA DMA operation*/
+ set_dma_curr_desc_addr(channel, (unsigned long *)ap->bmdma_prd_dma);
+ set_dma_x_count(channel, 0);
+ set_dma_x_modify(channel, 0);
+ set_dma_config(channel, config);
+
+ SSYNC();
+
+ /* Send ATA DMA command */
+ bfin_exec_command(ap, &qc->tf);
+ if (qc->tf.flags & ATA_TFLAG_WRITE) {
/* set ATA DMA write direction */
ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base)
| XFER_DIR));
} else {
- enable_dma(CH_ATAPI_RX);
- dev_dbg(qc->ap->dev, "enable udma read\n");
-
- /* Send ATA DMA read command */
- bfin_exec_command(ap, &qc->tf);
-
/* set ATA DMA read direction */
ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base)
& ~XFER_DIR));
@@ -913,12 +897,28 @@ static void bfin_bmdma_start(struct ata_queued_cmd *qc)
/* Set ATAPI state machine contorl in terminate sequence */
ATAPI_SET_CONTROL(base, ATAPI_GET_CONTROL(base) | END_ON_TERM);
- /* Set transfer length to buffer len */
- for_each_sg(qc->sg, sg, qc->n_elem, si) {
- ATAPI_SET_XFER_LEN(base, (sg_dma_len(sg) >> 1));
- }
+ /* Set transfer length to the total size of sg buffers */
+ ATAPI_SET_XFER_LEN(base, size >> 1);
+}
- /* Enable ATA DMA operation*/
+/**
+ * bfin_bmdma_start - Start an IDE DMA transaction
+ * @qc: Info associated with this ATA transaction.
+ *
+ * Note: Original code is ata_bmdma_start().
+ */
+
+static void bfin_bmdma_start(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+
+ dev_dbg(qc->ap->dev, "in atapi dma start\n");
+
+ if (!(ap->udma_mask || ap->mwdma_mask))
+ return;
+
+ /* start ATAPI transfer*/
if (ap->udma_mask)
ATAPI_SET_CONTROL(base, ATAPI_GET_CONTROL(base)
| ULTRA_START);
@@ -935,34 +935,23 @@ static void bfin_bmdma_start(struct ata_queued_cmd *qc)
static void bfin_bmdma_stop(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
- struct scatterlist *sg;
- unsigned int si;
+ unsigned int dir;
dev_dbg(qc->ap->dev, "in atapi dma stop\n");
+
if (!(ap->udma_mask || ap->mwdma_mask))
return;
/* stop ATAPI DMA controller*/
- if (qc->tf.flags & ATA_TFLAG_WRITE)
+ if (qc->tf.flags & ATA_TFLAG_WRITE) {
+ dir = DMA_TO_DEVICE;
disable_dma(CH_ATAPI_TX);
- else {
+ } else {
+ dir = DMA_FROM_DEVICE;
disable_dma(CH_ATAPI_RX);
- if (ap->hsm_task_state & HSM_ST_LAST) {
- /*
- * On blackfin arch, uncacheable memory is not
- * allocated with flag GFP_DMA. DMA buffer from
- * common kenel code should be invalidated if
- * data cache is enabled. Otherwise, this loop
- * is an empty loop and optimized out.
- */
- for_each_sg(qc->sg, sg, qc->n_elem, si) {
- invalidate_dcache_range(
- sg_dma_address(sg),
- sg_dma_address(sg)
- + sg_dma_len(sg));
- }
- }
}
+
+ dma_unmap_sg(ap->dev, qc->sg, qc->n_elem, dir);
}
/**
@@ -1260,6 +1249,11 @@ static void bfin_port_stop(struct ata_port *ap)
{
dev_dbg(ap->dev, "in atapi port stop\n");
if (ap->udma_mask != 0 || ap->mwdma_mask != 0) {
+ dma_free_coherent(ap->dev,
+ BFIN_MAX_SG_SEGMENTS * sizeof(struct dma_desc_array),
+ ap->bmdma_prd,
+ ap->bmdma_prd_dma);
+
free_dma(CH_ATAPI_RX);
free_dma(CH_ATAPI_TX);
}
@@ -1271,14 +1265,29 @@ static int bfin_port_start(struct ata_port *ap)
if (!(ap->udma_mask || ap->mwdma_mask))
return 0;
+ ap->bmdma_prd = dma_alloc_coherent(ap->dev,
+ BFIN_MAX_SG_SEGMENTS * sizeof(struct dma_desc_array),
+ &ap->bmdma_prd_dma,
+ GFP_KERNEL);
+
+ if (ap->bmdma_prd == NULL) {
+ dev_info(ap->dev, "Unable to allocate DMA descriptor array.\n");
+ goto out;
+ }
+
if (request_dma(CH_ATAPI_RX, "BFIN ATAPI RX DMA") >= 0) {
if (request_dma(CH_ATAPI_TX,
"BFIN ATAPI TX DMA") >= 0)
return 0;
free_dma(CH_ATAPI_RX);
+ dma_free_coherent(ap->dev,
+ BFIN_MAX_SG_SEGMENTS * sizeof(struct dma_desc_array),
+ ap->bmdma_prd,
+ ap->bmdma_prd_dma);
}
+out:
ap->udma_mask = 0;
ap->mwdma_mask = 0;
dev_err(ap->dev, "Unable to request ATAPI DMA!"
@@ -1400,7 +1409,7 @@ static irqreturn_t bfin_ata_interrupt(int irq, void *dev_instance)
static struct scsi_host_template bfin_sht = {
ATA_BASE_SHT(DRV_NAME),
- .sg_tablesize = SG_NONE,
+ .sg_tablesize = BFIN_MAX_SG_SEGMENTS,
.dma_boundary = ATA_DMA_BOUNDARY,
};
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 5a2c95ba050..0120b0d1e9a 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -140,6 +140,7 @@ enum {
*/
HCONTROL_ONLINE_PHY_RST = (1 << 31),
HCONTROL_FORCE_OFFLINE = (1 << 30),
+ HCONTROL_LEGACY = (1 << 28),
HCONTROL_PARITY_PROT_MOD = (1 << 14),
HCONTROL_DPATH_PARITY = (1 << 12),
HCONTROL_SNOOP_ENABLE = (1 << 10),
@@ -1223,6 +1224,10 @@ static int sata_fsl_init_controller(struct ata_host *host)
* part of the port_start() callback
*/
+ /* sata controller to operate in enterprise mode */
+ temp = ioread32(hcr_base + HCONTROL);
+ iowrite32(temp & ~HCONTROL_LEGACY, hcr_base + HCONTROL);
+
/* ack. any pending IRQs for this controller/port */
temp = ioread32(hcr_base + HSTATUS);
if (temp & 0x3F)
@@ -1421,6 +1426,12 @@ static int sata_fsl_resume(struct platform_device *op)
/* Recovery the CHBA register in host controller cmd register set */
iowrite32(pp->cmdslot_paddr & 0xffffffff, hcr_base + CHBA);
+ iowrite32((ioread32(hcr_base + HCONTROL)
+ | HCONTROL_ONLINE_PHY_RST
+ | HCONTROL_SNOOP_ENABLE
+ | HCONTROL_PMP_ATTACHED),
+ hcr_base + HCONTROL);
+
ata_host_resume(host);
return 0;
}
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 5a99bb3f255..f1a274994bb 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -124,7 +124,7 @@ config MV_XOR
config MX3_IPU
bool "MX3x Image Processing Unit support"
- depends on SOC_IMX31 || SOC_IMX35
+ depends on ARCH_MXC
select DMA_ENGINE
default y
help
@@ -187,6 +187,13 @@ config TIMB_DMA
help
Enable support for the Timberdale FPGA DMA engine.
+config SIRF_DMA
+ tristate "CSR SiRFprimaII DMA support"
+ depends on ARCH_PRIMA2
+ select DMA_ENGINE
+ help
+ Enable support for the CSR SiRFprimaII DMA engine.
+
config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
bool
@@ -201,26 +208,26 @@ config PL330_DMA
platform_data for a dma-pl330 device.
config PCH_DMA
- tristate "Intel EG20T PCH / OKI Semi IOH(ML7213/ML7223) DMA support"
+ tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
depends on PCI && X86
select DMA_ENGINE
help
Enable support for Intel EG20T PCH DMA engine.
- This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
- Output Hub), ML7213 and ML7223.
- ML7213 IOH is for IVI(In-Vehicle Infotainment) use and ML7223 IOH is
- for MP(Media Phone) use.
- ML7213/ML7223 is companion chip for Intel Atom E6xx series.
- ML7213/ML7223 is completely compatible for Intel EG20T PCH.
+ This driver also can be used for LAPIS Semiconductor IOH(Input/
+ Output Hub), ML7213, ML7223 and ML7831.
+ ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
+ for MP(Media Phone) use and ML7831 IOH is for general purpose use.
+ ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+ ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
config IMX_SDMA
tristate "i.MX SDMA support"
- depends on ARCH_MX25 || SOC_IMX31 || SOC_IMX35 || ARCH_MX5
+ depends on ARCH_MXC
select DMA_ENGINE
help
Support the i.MX SDMA engine. This engine is integrated into
- Freescale i.MX25/31/35/51 chips.
+ Freescale i.MX25/31/35/51/53 chips.
config IMX_DMA
tristate "i.MX DMA support"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 30cf3b1f0c5..009a222e828 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
obj-$(CONFIG_IMX_DMA) += imx-dma.o
obj-$(CONFIG_MXS_DMA) += mxs-dma.o
obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
obj-$(CONFIG_PL330_DMA) += pl330.o
obj-$(CONFIG_PCH_DMA) += pch_dma.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 0698695e8bf..8a281584458 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -854,8 +854,10 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
int ret;
/* Check if we already have a channel */
- if (plchan->phychan)
- return 0;
+ if (plchan->phychan) {
+ ch = plchan->phychan;
+ goto got_channel;
+ }
ch = pl08x_get_phy_channel(pl08x, plchan);
if (!ch) {
@@ -880,21 +882,22 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
return -EBUSY;
}
ch->signal = ret;
-
- /* Assign the flow control signal to this channel */
- if (txd->direction == DMA_TO_DEVICE)
- txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
- else if (txd->direction == DMA_FROM_DEVICE)
- txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
}
+ plchan->phychan = ch;
dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
ch->id,
ch->signal,
plchan->name);
+got_channel:
+ /* Assign the flow control signal to this channel */
+ if (txd->direction == DMA_MEM_TO_DEV)
+ txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
+ else if (txd->direction == DMA_DEV_TO_MEM)
+ txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
+
plchan->phychan_hold++;
- plchan->phychan = ch;
return 0;
}
@@ -1102,10 +1105,10 @@ static int dma_set_runtime_config(struct dma_chan *chan,
/* Transfer direction */
plchan->runtime_direction = config->direction;
- if (config->direction == DMA_TO_DEVICE) {
+ if (config->direction == DMA_MEM_TO_DEV) {
addr_width = config->dst_addr_width;
maxburst = config->dst_maxburst;
- } else if (config->direction == DMA_FROM_DEVICE) {
+ } else if (config->direction == DMA_DEV_TO_MEM) {
addr_width = config->src_addr_width;
maxburst = config->src_maxburst;
} else {
@@ -1136,7 +1139,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
- if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+ if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
plchan->src_addr = config->src_addr;
plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
pl08x_select_bus(plchan->cd->periph_buses,
@@ -1152,7 +1155,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
"configured channel %s (%s) for %s, data width %d, "
"maxburst %d words, LE, CCTL=0x%08x\n",
dma_chan_name(chan), plchan->name,
- (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+ (config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
addr_width,
maxburst,
cctl);
@@ -1322,7 +1325,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
@@ -1354,10 +1357,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
*/
txd->direction = direction;
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
txd->cctl = plchan->dst_cctl;
slave_addr = plchan->dst_addr;
- } else if (direction == DMA_FROM_DEVICE) {
+ } else if (direction == DMA_DEV_TO_MEM) {
txd->cctl = plchan->src_cctl;
slave_addr = plchan->src_addr;
} else {
@@ -1368,10 +1371,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
}
if (plchan->cd->device_fc)
- tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER_PER :
+ tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
PL080_FLOW_PER2MEM_PER;
else
- tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER :
+ tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
PL080_FLOW_PER2MEM;
txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
@@ -1387,7 +1390,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
list_add_tail(&dsg->node, &txd->dsg_list);
dsg->len = sg_dma_len(sg);
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
dsg->src_addr = sg_phys(sg);
dsg->dst_addr = slave_addr;
} else {
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fcfa0a8b5c5..97f87b29b9f 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -23,6 +23,8 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include "at_hdmac_regs.h"
@@ -660,7 +662,7 @@ err_desc_get:
*/
static struct dma_async_tx_descriptor *
atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct at_dma_chan *atchan = to_at_dma_chan(chan);
@@ -678,7 +680,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
sg_len,
- direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+ direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
flags);
if (unlikely(!atslave || !sg_len)) {
@@ -692,7 +694,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
ctrlb = ATC_IEN;
switch (direction) {
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
ctrla |= ATC_DST_WIDTH(reg_width);
ctrlb |= ATC_DST_ADDR_MODE_FIXED
| ATC_SRC_ADDR_MODE_INCR
@@ -725,7 +727,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
total_len += len;
}
break;
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
ctrla |= ATC_SRC_WIDTH(reg_width);
ctrlb |= ATC_DST_ADDR_MODE_INCR
| ATC_SRC_ADDR_MODE_FIXED
@@ -787,7 +789,7 @@ err_desc_get:
*/
static int
atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
if (period_len > (ATC_BTSIZE_MAX << reg_width))
goto err_out;
@@ -795,7 +797,7 @@ atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
goto err_out;
if (unlikely(buf_addr & ((1 << reg_width) - 1)))
goto err_out;
- if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+ if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV))))
goto err_out;
return 0;
@@ -810,7 +812,7 @@ err_out:
static int
atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
unsigned int period_index, dma_addr_t buf_addr,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
u32 ctrla;
unsigned int reg_width = atslave->reg_width;
@@ -822,7 +824,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
| period_len >> reg_width;
switch (direction) {
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
desc->lli.saddr = buf_addr + (period_len * period_index);
desc->lli.daddr = atslave->tx_reg;
desc->lli.ctrla = ctrla;
@@ -833,7 +835,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
| ATC_DIF(AT_DMA_PER_IF);
break;
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
desc->lli.saddr = atslave->rx_reg;
desc->lli.daddr = buf_addr + (period_len * period_index);
desc->lli.ctrla = ctrla;
@@ -861,7 +863,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
*/
static struct dma_async_tx_descriptor *
atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
struct at_dma_chan *atchan = to_at_dma_chan(chan);
struct at_dma_slave *atslave = chan->private;
@@ -872,7 +874,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
unsigned int i;
dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
- direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+ direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
buf_addr,
periods, buf_len, period_len);
@@ -1175,6 +1177,56 @@ static void atc_free_chan_resources(struct dma_chan *chan)
/*-- Module Management -----------------------------------------------*/
+/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */
+static struct at_dma_platform_data at91sam9rl_config = {
+ .nr_channels = 2,
+};
+static struct at_dma_platform_data at91sam9g45_config = {
+ .nr_channels = 8,
+};
+
+#if defined(CONFIG_OF)
+static const struct of_device_id atmel_dma_dt_ids[] = {
+ {
+ .compatible = "atmel,at91sam9rl-dma",
+ .data = &at91sam9rl_config,
+ }, {
+ .compatible = "atmel,at91sam9g45-dma",
+ .data = &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids);
+#endif
+
+static const struct platform_device_id atdma_devtypes[] = {
+ {
+ .name = "at91sam9rl_dma",
+ .driver_data = (unsigned long) &at91sam9rl_config,
+ }, {
+ .name = "at91sam9g45_dma",
+ .driver_data = (unsigned long) &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+static inline struct at_dma_platform_data * __init at_dma_get_driver_data(
+ struct platform_device *pdev)
+{
+ if (pdev->dev.of_node) {
+ const struct of_device_id *match;
+ match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node);
+ if (match == NULL)
+ return NULL;
+ return match->data;
+ }
+ return (struct at_dma_platform_data *)
+ platform_get_device_id(pdev)->driver_data;
+}
+
/**
* at_dma_off - disable DMA controller
* @atdma: the Atmel HDAMC device
@@ -1193,18 +1245,23 @@ static void at_dma_off(struct at_dma *atdma)
static int __init at_dma_probe(struct platform_device *pdev)
{
- struct at_dma_platform_data *pdata;
struct resource *io;
struct at_dma *atdma;
size_t size;
int irq;
int err;
int i;
+ struct at_dma_platform_data *plat_dat;
- /* get DMA Controller parameters from platform */
- pdata = pdev->dev.platform_data;
- if (!pdata || pdata->nr_channels > AT_DMA_MAX_NR_CHANNELS)
- return -EINVAL;
+ /* setup platform data for each SoC */
+ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+ dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+
+ /* get DMA parameters from controller type */
+ plat_dat = at_dma_get_driver_data(pdev);
+ if (!plat_dat)
+ return -ENODEV;
io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!io)
@@ -1215,14 +1272,14 @@ static int __init at_dma_probe(struct platform_device *pdev)
return irq;
size = sizeof(struct at_dma);
- size += pdata->nr_channels * sizeof(struct at_dma_chan);
+ size += plat_dat->nr_channels * sizeof(struct at_dma_chan);
atdma = kzalloc(size, GFP_KERNEL);
if (!atdma)
return -ENOMEM;
- /* discover transaction capabilites from the platform data */
- atdma->dma_common.cap_mask = pdata->cap_mask;
- atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
+ /* discover transaction capabilities */
+ atdma->dma_common.cap_mask = plat_dat->cap_mask;
+ atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
size = resource_size(io);
if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
@@ -1268,7 +1325,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
/* initialize channels related values */
INIT_LIST_HEAD(&atdma->dma_common.channels);
- for (i = 0; i < pdata->nr_channels; i++) {
+ for (i = 0; i < plat_dat->nr_channels; i++) {
struct at_dma_chan *atchan = &atdma->chan[i];
atchan->chan_common.device = &atdma->dma_common;
@@ -1313,7 +1370,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n",
dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "",
- pdata->nr_channels);
+ plat_dat->nr_channels);
dma_async_device_register(&atdma->dma_common);
@@ -1495,9 +1552,11 @@ static const struct dev_pm_ops at_dma_dev_pm_ops = {
static struct platform_driver at_dma_driver = {
.remove = __exit_p(at_dma_remove),
.shutdown = at_dma_shutdown,
+ .id_table = atdma_devtypes,
.driver = {
.name = "at_hdmac",
.pm = &at_dma_dev_pm_ops,
+ .of_match_table = of_match_ptr(atmel_dma_dt_ids),
},
};
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index aa4c9aebab7..dcaedfc181c 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -251,6 +251,7 @@ static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
/**
* struct at_dma - internal representation of an Atmel HDMA Controller
* @chan_common: common dmaengine dma_device object members
+ * @atdma_devtype: identifier of DMA controller compatibility
* @ch_regs: memory mapped register base
* @clk: dma controller clock
* @save_imr: interrupt mask register that is saved on suspend/resume cycle
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 4234f416ef1..d65a718c0f9 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -39,7 +39,7 @@ struct coh901318_desc {
struct scatterlist *sg;
unsigned int sg_len;
struct coh901318_lli *lli;
- enum dma_data_direction dir;
+ enum dma_transfer_direction dir;
unsigned long flags;
u32 head_config;
u32 head_ctrl;
@@ -1034,7 +1034,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
static struct dma_async_tx_descriptor *
coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct coh901318_chan *cohc = to_coh901318_chan(chan);
@@ -1077,7 +1077,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
ctrl_last |= cohc->runtime_ctrl;
ctrl |= cohc->runtime_ctrl;
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
@@ -1085,7 +1085,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
ctrl_chained |= tx_flags;
ctrl_last |= tx_flags;
ctrl |= tx_flags;
- } else if (direction == DMA_FROM_DEVICE) {
+ } else if (direction == DMA_DEV_TO_MEM) {
u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
@@ -1274,11 +1274,11 @@ static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
int i = 0;
/* We only support mem to per or per to mem transfers */
- if (config->direction == DMA_FROM_DEVICE) {
+ if (config->direction == DMA_DEV_TO_MEM) {
addr = config->src_addr;
addr_width = config->src_addr_width;
maxburst = config->src_maxburst;
- } else if (config->direction == DMA_TO_DEVICE) {
+ } else if (config->direction == DMA_MEM_TO_DEV) {
addr = config->dst_addr;
addr_width = config->dst_addr_width;
maxburst = config->dst_maxburst;
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
index 9f7e0e6a7ee..6c0e2d4c668 100644
--- a/drivers/dma/coh901318_lli.c
+++ b/drivers/dma/coh901318_lli.c
@@ -7,11 +7,10 @@
* Author: Per Friden <per.friden@stericsson.com>
*/
-#include <linux/dma-mapping.h>
#include <linux/spinlock.h>
-#include <linux/dmapool.h>
#include <linux/memory.h>
#include <linux/gfp.h>
+#include <linux/dmapool.h>
#include <mach/coh901318.h>
#include "coh901318_lli.h"
@@ -177,18 +176,18 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
struct coh901318_lli *lli,
dma_addr_t buf, unsigned int size,
dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
- enum dma_data_direction dir)
+ enum dma_transfer_direction dir)
{
int s = size;
dma_addr_t src;
dma_addr_t dst;
- if (dir == DMA_TO_DEVICE) {
+ if (dir == DMA_MEM_TO_DEV) {
src = buf;
dst = dev_addr;
- } else if (dir == DMA_FROM_DEVICE) {
+ } else if (dir == DMA_DEV_TO_MEM) {
src = dev_addr;
dst = buf;
@@ -215,9 +214,9 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
lli = coh901318_lli_next(lli);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_MEM_TO_DEV)
src += block_size;
- else if (dir == DMA_FROM_DEVICE)
+ else if (dir == DMA_DEV_TO_MEM)
dst += block_size;
}
@@ -234,7 +233,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
struct scatterlist *sgl, unsigned int nents,
dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
u32 ctrl_last,
- enum dma_data_direction dir, u32 ctrl_irq_mask)
+ enum dma_transfer_direction dir, u32 ctrl_irq_mask)
{
int i;
struct scatterlist *sg;
@@ -249,9 +248,9 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
spin_lock(&pool->lock);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_MEM_TO_DEV)
dst = dev_addr;
- else if (dir == DMA_FROM_DEVICE)
+ else if (dir == DMA_DEV_TO_MEM)
src = dev_addr;
else
goto err;
@@ -269,7 +268,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
ctrl_sg = ctrl ? ctrl : ctrl_last;
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_MEM_TO_DEV)
/* increment source address */
src = sg_phys(sg);
else
@@ -293,7 +292,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
lli->src_addr = src;
lli->dst_addr = dst;
- if (dir == DMA_FROM_DEVICE)
+ if (dir == DMA_DEV_TO_MEM)
dst += elem_size;
else
src += elem_size;
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7a5c80990e9..abff3714fdd 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -97,7 +97,7 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
struct coh901318_lli *lli,
dma_addr_t buf, unsigned int size,
dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
- enum dma_data_direction dir);
+ enum dma_transfer_direction dir);
/**
* coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
@@ -119,6 +119,6 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
struct scatterlist *sg, unsigned int nents,
dma_addr_t dev_addr, u32 ctrl_chained,
u32 ctrl, u32 ctrl_last,
- enum dma_data_direction dir, u32 ctrl_irq_mask);
+ enum dma_transfer_direction dir, u32 ctrl_irq_mask);
#endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b48967b499d..a6c6051ec85 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -693,12 +693,12 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_interrupt);
BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
!device->device_prep_dma_sg);
- BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
- !device->device_prep_slave_sg);
BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
!device->device_prep_dma_cyclic);
BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
!device->device_control);
+ BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
+ !device->device_prep_interleaved_dma);
BUG_ON(!device->device_alloc_chan_resources);
BUG_ON(!device->device_free_chan_resources);
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9bfd6d36071..9b592b02b5f 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -166,6 +166,38 @@ dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
return cookie;
}
+static void dwc_initialize(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_dma_slave *dws = dwc->chan.private;
+ u32 cfghi = DWC_CFGH_FIFO_MODE;
+ u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+
+ if (dwc->initialized == true)
+ return;
+
+ if (dws) {
+ /*
+ * We need controller-specific data to set up slave
+ * transfers.
+ */
+ BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
+
+ cfghi = dws->cfg_hi;
+ cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+ }
+
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+
+ /* Enable interrupts */
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+ dwc->initialized = true;
+}
+
/*----------------------------------------------------------------------*/
/* Called with dwc->lock held and bh disabled */
@@ -189,6 +221,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
return;
}
+ dwc_initialize(dwc);
+
channel_writel(dwc, LLP, first->txd.phys);
channel_writel(dwc, CTL_LO,
DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
@@ -696,7 +730,7 @@ err_desc_get:
static struct dma_async_tx_descriptor *
dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
@@ -720,7 +754,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev = first = NULL;
switch (direction) {
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
ctllo = (DWC_DEFAULT_CTLLO(chan->private)
| DWC_CTLL_DST_WIDTH(reg_width)
| DWC_CTLL_DST_FIX
@@ -777,7 +811,7 @@ slave_sg_todev_fill_desc:
goto slave_sg_todev_fill_desc;
}
break;
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
ctllo = (DWC_DEFAULT_CTLLO(chan->private)
| DWC_CTLL_SRC_WIDTH(reg_width)
| DWC_CTLL_DST_INC
@@ -959,10 +993,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_dma *dw = to_dw_dma(chan->device);
struct dw_desc *desc;
- struct dw_dma_slave *dws;
int i;
- u32 cfghi;
- u32 cfglo;
unsigned long flags;
dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
@@ -975,26 +1006,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
dwc->completed = chan->cookie = 1;
- cfghi = DWC_CFGH_FIFO_MODE;
- cfglo = 0;
-
- dws = chan->private;
- if (dws) {
- /*
- * We need controller-specific data to set up slave
- * transfers.
- */
- BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
-
- cfghi = dws->cfg_hi;
- cfglo = dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
- }
-
- cfglo |= DWC_CFGL_CH_PRIOR(dwc->priority);
-
- channel_writel(dwc, CFG_LO, cfglo);
- channel_writel(dwc, CFG_HI, cfghi);
-
/*
* NOTE: some controllers may have additional features that we
* need to initialize here, like "scatter-gather" (which
@@ -1026,11 +1037,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
i = ++dwc->descs_allocated;
}
- /* Enable interrupts */
- channel_set_bit(dw, MASK.XFER, dwc->mask);
- channel_set_bit(dw, MASK.BLOCK, dwc->mask);
- channel_set_bit(dw, MASK.ERROR, dwc->mask);
-
spin_unlock_irqrestore(&dwc->lock, flags);
dev_dbg(chan2dev(chan),
@@ -1058,6 +1064,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
spin_lock_irqsave(&dwc->lock, flags);
list_splice_init(&dwc->free_list, &list);
dwc->descs_allocated = 0;
+ dwc->initialized = false;
/* Disable interrupts */
channel_clear_bit(dw, MASK.XFER, dwc->mask);
@@ -1165,7 +1172,7 @@ EXPORT_SYMBOL(dw_dma_cyclic_stop);
*/
struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
dma_addr_t buf_addr, size_t buf_len, size_t period_len,
- enum dma_data_direction direction)
+ enum dma_transfer_direction direction)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_cyclic_desc *cdesc;
@@ -1206,7 +1213,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
goto out_err;
if (unlikely(buf_addr & ((1 << reg_width) - 1)))
goto out_err;
- if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+ if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM))))
goto out_err;
retval = ERR_PTR(-ENOMEM);
@@ -1228,7 +1235,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
goto out_err_desc_get;
switch (direction) {
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
desc->lli.dar = dws->tx_reg;
desc->lli.sar = buf_addr + (period_len * i);
desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
@@ -1239,7 +1246,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
| DWC_CTLL_FC(dws->fc)
| DWC_CTLL_INT_EN);
break;
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
desc->lli.dar = buf_addr + (period_len * i);
desc->lli.sar = dws->rx_reg;
desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
@@ -1335,6 +1342,8 @@ EXPORT_SYMBOL(dw_dma_cyclic_free);
static void dw_dma_off(struct dw_dma *dw)
{
+ int i;
+
dma_writel(dw, CFG, 0);
channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
@@ -1345,6 +1354,9 @@ static void dw_dma_off(struct dw_dma *dw)
while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
cpu_relax();
+
+ for (i = 0; i < dw->dma.chancnt; i++)
+ dw->chan[i].initialized = false;
}
static int __init dw_probe(struct platform_device *pdev)
@@ -1533,6 +1545,7 @@ static int dw_suspend_noirq(struct device *dev)
dw_dma_off(platform_get_drvdata(pdev));
clk_disable(dw->clk);
+
return 0;
}
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index c3419518d70..5eef6946a36 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -140,6 +140,7 @@ struct dw_dma_chan {
u8 mask;
u8 priority;
bool paused;
+ bool initialized;
spinlock_t lock;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index b47e2b803fa..59e7a965772 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -246,6 +246,9 @@ static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac,
static struct ep93xx_dma_desc *
ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
{
+ if (list_empty(&edmac->active))
+ return NULL;
+
return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node);
}
@@ -263,16 +266,22 @@ ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
*/
static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac)
{
+ struct ep93xx_dma_desc *desc;
+
list_rotate_left(&edmac->active);
if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
return true;
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc)
+ return false;
+
/*
* If txd.cookie is set it means that we are back in the first
* descriptor in the chain and hence done with it.
*/
- return !ep93xx_dma_get_active(edmac)->txd.cookie;
+ return !desc->txd.cookie;
}
/*
@@ -327,10 +336,16 @@ static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
{
- struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+ struct ep93xx_dma_desc *desc;
u32 bus_addr;
- if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE)
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n");
+ return;
+ }
+
+ if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
bus_addr = desc->src_addr;
else
bus_addr = desc->dst_addr;
@@ -443,7 +458,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
control = (5 << M2M_CONTROL_PWSC_SHIFT);
control |= M2M_CONTROL_NO_HDSK;
- if (data->direction == DMA_TO_DEVICE) {
+ if (data->direction == DMA_MEM_TO_DEV) {
control |= M2M_CONTROL_DAH;
control |= M2M_CONTROL_TM_TX;
control |= M2M_CONTROL_RSS_SSPTX;
@@ -459,11 +474,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
* This IDE part is totally untested. Values below are taken
* from the EP93xx Users's Guide and might not be correct.
*/
- control |= M2M_CONTROL_NO_HDSK;
- control |= M2M_CONTROL_RSS_IDE;
- control |= M2M_CONTROL_PW_16;
-
- if (data->direction == DMA_TO_DEVICE) {
+ if (data->direction == DMA_MEM_TO_DEV) {
/* Worst case from the UG */
control = (3 << M2M_CONTROL_PWSC_SHIFT);
control |= M2M_CONTROL_DAH;
@@ -473,6 +484,10 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
control |= M2M_CONTROL_SAH;
control |= M2M_CONTROL_TM_RX;
}
+
+ control |= M2M_CONTROL_NO_HDSK;
+ control |= M2M_CONTROL_RSS_IDE;
+ control |= M2M_CONTROL_PW_16;
break;
default:
@@ -491,7 +506,13 @@ static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac)
static void m2m_fill_desc(struct ep93xx_dma_chan *edmac)
{
- struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+ struct ep93xx_dma_desc *desc;
+
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n");
+ return;
+ }
if (edmac->buffer == 0) {
writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0);
@@ -669,24 +690,30 @@ static void ep93xx_dma_tasklet(unsigned long data)
{
struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
struct ep93xx_dma_desc *desc, *d;
- dma_async_tx_callback callback;
- void *callback_param;
+ dma_async_tx_callback callback = NULL;
+ void *callback_param = NULL;
LIST_HEAD(list);
spin_lock_irq(&edmac->lock);
+ /*
+ * If dma_terminate_all() was called before we get to run, the active
+ * list has become empty. If that happens we aren't supposed to do
+ * anything more than call ep93xx_dma_advance_work().
+ */
desc = ep93xx_dma_get_active(edmac);
- if (desc->complete) {
- edmac->last_completed = desc->txd.cookie;
- list_splice_init(&edmac->active, &list);
+ if (desc) {
+ if (desc->complete) {
+ edmac->last_completed = desc->txd.cookie;
+ list_splice_init(&edmac->active, &list);
+ }
+ callback = desc->txd.callback;
+ callback_param = desc->txd.callback_param;
}
spin_unlock_irq(&edmac->lock);
/* Pick up the next descriptor from the queue */
ep93xx_dma_advance_work(edmac);
- callback = desc->txd.callback;
- callback_param = desc->txd.callback_param;
-
/* Now we can release all the chained descriptors */
list_for_each_entry_safe(desc, d, &list, node) {
/*
@@ -706,13 +733,22 @@ static void ep93xx_dma_tasklet(unsigned long data)
static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id)
{
struct ep93xx_dma_chan *edmac = dev_id;
+ struct ep93xx_dma_desc *desc;
irqreturn_t ret = IRQ_HANDLED;
spin_lock(&edmac->lock);
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac),
+ "got interrupt while active list is empty\n");
+ spin_unlock(&edmac->lock);
+ return IRQ_NONE;
+ }
+
switch (edmac->edma->hw_interrupt(edmac)) {
case INTERRUPT_DONE:
- ep93xx_dma_get_active(edmac)->complete = true;
+ desc->complete = true;
tasklet_schedule(&edmac->tasklet);
break;
@@ -803,8 +839,8 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
switch (data->port) {
case EP93XX_DMA_SSP:
case EP93XX_DMA_IDE:
- if (data->direction != DMA_TO_DEVICE &&
- data->direction != DMA_FROM_DEVICE)
+ if (data->direction != DMA_MEM_TO_DEV &&
+ data->direction != DMA_DEV_TO_MEM)
return -EINVAL;
break;
default:
@@ -952,7 +988,7 @@ fail:
*/
static struct dma_async_tx_descriptor *
ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction dir,
+ unsigned int sg_len, enum dma_transfer_direction dir,
unsigned long flags)
{
struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
@@ -988,7 +1024,7 @@ ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
goto fail;
}
- if (dir == DMA_TO_DEVICE) {
+ if (dir == DMA_MEM_TO_DEV) {
desc->src_addr = sg_dma_address(sg);
desc->dst_addr = edmac->runtime_addr;
} else {
@@ -1032,7 +1068,7 @@ fail:
static struct dma_async_tx_descriptor *
ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
size_t buf_len, size_t period_len,
- enum dma_data_direction dir)
+ enum dma_transfer_direction dir)
{
struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
struct ep93xx_dma_desc *desc, *first;
@@ -1065,7 +1101,7 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
goto fail;
}
- if (dir == DMA_TO_DEVICE) {
+ if (dir == DMA_MEM_TO_DEV) {
desc->src_addr = dma_addr + offset;
desc->dst_addr = edmac->runtime_addr;
} else {
@@ -1133,12 +1169,12 @@ static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
return -EINVAL;
switch (config->direction) {
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
width = config->src_addr_width;
addr = config->src_addr;
break;
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
width = config->dst_addr_width;
addr = config->dst_addr;
break;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8a781540590..b98070c33ca 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -772,7 +772,7 @@ fail:
*/
static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long flags)
+ enum dma_transfer_direction direction, unsigned long flags)
{
/*
* This operation is not supported on the Freescale DMA controller
@@ -819,7 +819,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
return -ENXIO;
/* we set the controller burst size depending on direction */
- if (config->direction == DMA_TO_DEVICE)
+ if (config->direction == DMA_MEM_TO_DEV)
size = config->dst_addr_width * config->dst_maxburst;
else
size = config->src_addr_width * config->src_maxburst;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 4be55f9bb6c..e4383ee2c9a 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -107,7 +107,7 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
imx_dma_disable(imxdmac->imxdma_channel);
return 0;
case DMA_SLAVE_CONFIG:
- if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+ if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
imxdmac->per_address = dmaengine_cfg->src_addr;
imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
imxdmac->word_size = dmaengine_cfg->src_addr_width;
@@ -224,7 +224,7 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
@@ -241,7 +241,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
dma_length += sg->length;
}
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
dmamode = DMA_MODE_READ;
else
dmamode = DMA_MODE_WRITE;
@@ -271,7 +271,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
struct imxdma_engine *imxdma = imxdmac->imxdma;
@@ -317,7 +317,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
imxdmac->sg_list[periods].page_link =
((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
dmamode = DMA_MODE_READ;
else
dmamode = DMA_MODE_WRITE;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index f993955a640..a8af379680c 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -247,7 +247,7 @@ struct sdma_engine;
struct sdma_channel {
struct sdma_engine *sdma;
unsigned int channel;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
enum sdma_peripheral_type peripheral_type;
unsigned int event_id0;
unsigned int event_id1;
@@ -268,6 +268,8 @@ struct sdma_channel {
struct dma_async_tx_descriptor desc;
dma_cookie_t last_completed;
enum dma_status status;
+ unsigned int chn_count;
+ unsigned int chn_real_count;
};
#define IMX_DMA_SG_LOOP (1 << 0)
@@ -503,6 +505,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
struct sdma_buffer_descriptor *bd;
int i, error = 0;
+ sdmac->chn_real_count = 0;
/*
* non loop mode. Iterate over all descriptors, collect
* errors and call callback function
@@ -512,6 +515,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
if (bd->mode.status & (BD_DONE | BD_RROR))
error = -EIO;
+ sdmac->chn_real_count += bd->mode.count;
}
if (error)
@@ -519,9 +523,9 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
else
sdmac->status = DMA_SUCCESS;
+ sdmac->last_completed = sdmac->desc.cookie;
if (sdmac->desc.callback)
sdmac->desc.callback(sdmac->desc.callback_param);
- sdmac->last_completed = sdmac->desc.cookie;
}
static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
@@ -650,7 +654,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
int ret;
- if (sdmac->direction == DMA_FROM_DEVICE) {
+ if (sdmac->direction == DMA_DEV_TO_MEM) {
load_address = sdmac->pc_from_device;
} else {
load_address = sdmac->pc_to_device;
@@ -832,17 +836,18 @@ static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
{
+ unsigned long flags;
struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
struct sdma_engine *sdma = sdmac->sdma;
dma_cookie_t cookie;
- spin_lock_irq(&sdmac->lock);
+ spin_lock_irqsave(&sdmac->lock, flags);
cookie = sdma_assign_cookie(sdmac);
sdma_enable_channel(sdma, sdmac->channel);
- spin_unlock_irq(&sdmac->lock);
+ spin_unlock_irqrestore(&sdmac->lock, flags);
return cookie;
}
@@ -911,7 +916,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct sdma_channel *sdmac = to_sdma_chan(chan);
@@ -941,6 +946,7 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
goto err_out;
}
+ sdmac->chn_count = 0;
for_each_sg(sgl, sg, sg_len, i) {
struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
int param;
@@ -957,6 +963,7 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
}
bd->mode.count = count;
+ sdmac->chn_count += count;
if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) {
ret = -EINVAL;
@@ -1008,7 +1015,7 @@ err_out:
static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
struct sdma_channel *sdmac = to_sdma_chan(chan);
struct sdma_engine *sdma = sdmac->sdma;
@@ -1093,7 +1100,7 @@ static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
sdma_disable_channel(sdmac);
return 0;
case DMA_SLAVE_CONFIG:
- if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+ if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
sdmac->per_address = dmaengine_cfg->src_addr;
sdmac->watermark_level = dmaengine_cfg->src_maxburst;
sdmac->word_size = dmaengine_cfg->src_addr_width;
@@ -1102,6 +1109,7 @@ static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
sdmac->watermark_level = dmaengine_cfg->dst_maxburst;
sdmac->word_size = dmaengine_cfg->dst_addr_width;
}
+ sdmac->direction = dmaengine_cfg->direction;
return sdma_config_channel(sdmac);
default:
return -ENOSYS;
@@ -1119,7 +1127,8 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
last_used = chan->cookie;
- dma_set_tx_state(txstate, sdmac->last_completed, last_used, 0);
+ dma_set_tx_state(txstate, sdmac->last_completed, last_used,
+ sdmac->chn_count - sdmac->chn_real_count);
return sdmac->status;
}
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 19a0c64d45d..74f70aadf9e 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -280,7 +280,8 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
* callbacks but must be called with the lock held.
*/
static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
- struct intel_mid_dma_desc *desc)
+ struct intel_mid_dma_desc *desc)
+ __releases(&midc->lock) __acquires(&midc->lock)
{
struct dma_async_tx_descriptor *txd = &desc->txd;
dma_async_tx_callback callback_txd = NULL;
@@ -311,6 +312,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
pci_pool_free(desc->lli_pool, desc->lli,
desc->lli_phys);
pci_pool_destroy(desc->lli_pool);
+ desc->lli = NULL;
}
list_move(&desc->desc_node, &midc->free_list);
midc->busy = false;
@@ -395,10 +397,10 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
midc->dma->block_size);
/*Populate SAR and DAR values*/
sg_phy_addr = sg_phys(sg);
- if (desc->dirn == DMA_TO_DEVICE) {
+ if (desc->dirn == DMA_MEM_TO_DEV) {
lli_bloc_desc->sar = sg_phy_addr;
lli_bloc_desc->dar = mids->dma_slave.dst_addr;
- } else if (desc->dirn == DMA_FROM_DEVICE) {
+ } else if (desc->dirn == DMA_DEV_TO_MEM) {
lli_bloc_desc->sar = mids->dma_slave.src_addr;
lli_bloc_desc->dar = sg_phy_addr;
}
@@ -490,7 +492,9 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
ret = dma_async_is_complete(cookie, last_complete, last_used);
if (ret != DMA_SUCCESS) {
+ spin_lock_bh(&midc->lock);
midc_scan_descriptors(to_middma_device(chan->device), midc);
+ spin_unlock_bh(&midc->lock);
last_complete = midc->completed;
last_used = chan->cookie;
@@ -566,6 +570,7 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
pci_pool_free(desc->lli_pool, desc->lli,
desc->lli_phys);
pci_pool_destroy(desc->lli_pool);
+ desc->lli = NULL;
}
list_move(&desc->desc_node, &midc->free_list);
}
@@ -632,13 +637,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
if (midc->dma->pimr_mask) {
cfg_hi.cfgx.protctl = 0x0; /*default value*/
cfg_hi.cfgx.fifo_mode = 1;
- if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+ if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
cfg_hi.cfgx.src_per = 0;
if (mids->device_instance == 0)
cfg_hi.cfgx.dst_per = 3;
if (mids->device_instance == 1)
cfg_hi.cfgx.dst_per = 1;
- } else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+ } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
if (mids->device_instance == 0)
cfg_hi.cfgx.src_per = 2;
if (mids->device_instance == 1)
@@ -682,11 +687,11 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
ctl_lo.ctlx.sinc = 0;
ctl_lo.ctlx.dinc = 0;
} else {
- if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+ if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
ctl_lo.ctlx.sinc = 0;
ctl_lo.ctlx.dinc = 2;
ctl_lo.ctlx.tt_fc = 1;
- } else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+ } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
ctl_lo.ctlx.sinc = 2;
ctl_lo.ctlx.dinc = 0;
ctl_lo.ctlx.tt_fc = 2;
@@ -732,7 +737,7 @@ err_desc_get:
*/
static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct intel_mid_dma_chan *midc = NULL;
@@ -868,7 +873,7 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
pm_runtime_get_sync(&mid->pdev->dev);
if (mid->state == SUSPENDED) {
- if (dma_resume(mid->pdev)) {
+ if (dma_resume(&mid->pdev->dev)) {
pr_err("ERR_MDMA: resume failed");
return -EFAULT;
}
@@ -1099,7 +1104,8 @@ static int mid_setup_dma(struct pci_dev *pdev)
LNW_PERIPHRAL_MASK_SIZE);
if (dma->mask_reg == NULL) {
pr_err("ERR_MDMA:Can't map periphral intr space !!\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_ioremap;
}
} else
dma->mask_reg = NULL;
@@ -1196,6 +1202,9 @@ static int mid_setup_dma(struct pci_dev *pdev)
err_engine:
free_irq(pdev->irq, dma);
err_irq:
+ if (dma->mask_reg)
+ iounmap(dma->mask_reg);
+err_ioremap:
pci_pool_destroy(dma->dma_pool);
err_dma_pool:
pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
@@ -1337,8 +1346,9 @@ static void __devexit intel_mid_dma_remove(struct pci_dev *pdev)
*
* This function is called by OS when a power event occurs
*/
-int dma_suspend(struct pci_dev *pci, pm_message_t state)
+static int dma_suspend(struct device *dev)
{
+ struct pci_dev *pci = to_pci_dev(dev);
int i;
struct middma_device *device = pci_get_drvdata(pci);
pr_debug("MDMA: dma_suspend called\n");
@@ -1362,8 +1372,9 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state)
*
* This function is called by OS when a power event occurs
*/
-int dma_resume(struct pci_dev *pci)
+int dma_resume(struct device *dev)
{
+ struct pci_dev *pci = to_pci_dev(dev);
int ret;
struct middma_device *device = pci_get_drvdata(pci);
@@ -1429,6 +1440,8 @@ static const struct dev_pm_ops intel_mid_dma_pm = {
.runtime_suspend = dma_runtime_suspend,
.runtime_resume = dma_runtime_resume,
.runtime_idle = dma_runtime_idle,
+ .suspend = dma_suspend,
+ .resume = dma_resume,
};
static struct pci_driver intel_mid_dma_pci_driver = {
@@ -1437,8 +1450,6 @@ static struct pci_driver intel_mid_dma_pci_driver = {
.probe = intel_mid_dma_probe,
.remove = __devexit_p(intel_mid_dma_remove),
#ifdef CONFIG_PM
- .suspend = dma_suspend,
- .resume = dma_resume,
.driver = {
.pm = &intel_mid_dma_pm,
},
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index aea5ee88ce0..c83d35b97bd 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -262,7 +262,7 @@ struct intel_mid_dma_desc {
unsigned int lli_length;
unsigned int current_lli;
dma_addr_t next;
- enum dma_data_direction dirn;
+ enum dma_transfer_direction dirn;
enum dma_status status;
enum dma_slave_buswidth width; /*width of DMA txn*/
enum intel_mid_dma_mode cfg_mode; /*mode configuration*/
@@ -296,6 +296,6 @@ static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
}
-int dma_resume(struct pci_dev *pci);
+int dma_resume(struct device *dev);
#endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index e03f811a83d..04be90b645b 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1735,8 +1735,6 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
spin_unlock_bh(&iop_chan->lock);
}
-MODULE_ALIAS("platform:iop-adma");
-
static struct platform_driver iop_adma_driver = {
.probe = iop_adma_probe,
.remove = __devexit_p(iop_adma_remove),
@@ -1746,19 +1744,9 @@ static struct platform_driver iop_adma_driver = {
},
};
-static int __init iop_adma_init (void)
-{
- return platform_driver_register(&iop_adma_driver);
-}
-
-static void __exit iop_adma_exit (void)
-{
- platform_driver_unregister(&iop_adma_driver);
- return;
-}
-module_exit(iop_adma_exit);
-module_init(iop_adma_init);
+module_platform_driver(iop_adma_driver);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("IOP ADMA Engine Driver");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:iop-adma");
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 0e5ef33f90a..6212b16e8cf 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -312,7 +312,7 @@ static void ipu_ch_param_set_size(union chan_param_mem *params,
case IPU_PIX_FMT_RGB565:
params->ip.bpp = 2;
params->ip.pfs = 4;
- params->ip.npb = 7;
+ params->ip.npb = 15;
params->ip.sat = 2; /* SAT = 32-bit access */
params->ip.ofs0 = 0; /* Red bit offset */
params->ip.ofs1 = 5; /* Green bit offset */
@@ -422,12 +422,6 @@ static void ipu_ch_param_set_size(union chan_param_mem *params,
params->pp.nsb = 1;
}
-static void ipu_ch_param_set_burst_size(union chan_param_mem *params,
- uint16_t burst_pixels)
-{
- params->pp.npb = burst_pixels - 1;
-}
-
static void ipu_ch_param_set_buffer(union chan_param_mem *params,
dma_addr_t buf0, dma_addr_t buf1)
{
@@ -690,23 +684,6 @@ static int ipu_init_channel_buffer(struct idmac_channel *ichan,
ipu_ch_param_set_size(&params, pixel_fmt, width, height, stride_bytes);
ipu_ch_param_set_buffer(&params, phyaddr_0, phyaddr_1);
ipu_ch_param_set_rotation(&params, rot_mode);
- /* Some channels (rotation) have restriction on burst length */
- switch (channel) {
- case IDMAC_IC_7: /* Hangs with burst 8, 16, other values
- invalid - Table 44-30 */
-/*
- ipu_ch_param_set_burst_size(&params, 8);
- */
- break;
- case IDMAC_SDC_0:
- case IDMAC_SDC_1:
- /* In original code only IPU_PIX_FMT_RGB565 was setting burst */
- ipu_ch_param_set_burst_size(&params, 16);
- break;
- case IDMAC_IC_0:
- default:
- break;
- }
spin_lock_irqsave(&ipu->lock, flags);
@@ -1364,7 +1341,7 @@ static void ipu_gc_tasklet(unsigned long arg)
/* Allocate and initialise a transfer descriptor. */
static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
struct scatterlist *sgl, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long tx_flags)
+ enum dma_transfer_direction direction, unsigned long tx_flags)
{
struct idmac_channel *ichan = to_idmac_chan(chan);
struct idmac_tx_desc *desc = NULL;
@@ -1376,7 +1353,7 @@ static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan
chan->chan_id != IDMAC_IC_7)
return NULL;
- if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
+ if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) {
dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
return NULL;
}
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 8ba4edc6185..4d6d4cf6694 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -835,17 +835,7 @@ static struct platform_driver mpc_dma_driver = {
},
};
-static int __init mpc_dma_init(void)
-{
- return platform_driver_register(&mpc_dma_driver);
-}
-module_init(mpc_dma_init);
-
-static void __exit mpc_dma_exit(void)
-{
- platform_driver_unregister(&mpc_dma_driver);
-}
-module_exit(mpc_dma_exit);
+module_platform_driver(mpc_dma_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>");
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index fc903c0ed23..b06cd4ca626 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -44,7 +44,6 @@
#define HW_APBHX_CTRL0 0x000
#define BM_APBH_CTRL0_APB_BURST8_EN (1 << 29)
#define BM_APBH_CTRL0_APB_BURST_EN (1 << 28)
-#define BP_APBH_CTRL0_CLKGATE_CHANNEL 8
#define BP_APBH_CTRL0_RESET_CHANNEL 16
#define HW_APBHX_CTRL1 0x010
#define HW_APBHX_CTRL2 0x020
@@ -111,6 +110,7 @@ struct mxs_dma_chan {
int chan_irq;
struct mxs_dma_ccw *ccw;
dma_addr_t ccw_phys;
+ int desc_count;
dma_cookie_t last_completed;
enum dma_status status;
unsigned int flags;
@@ -130,23 +130,6 @@ struct mxs_dma_engine {
struct mxs_dma_chan mxs_chans[MXS_DMA_CHANNELS];
};
-static inline void mxs_dma_clkgate(struct mxs_dma_chan *mxs_chan, int enable)
-{
- struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
- int chan_id = mxs_chan->chan.chan_id;
- int set_clr = enable ? MXS_CLR_ADDR : MXS_SET_ADDR;
-
- /* enable apbh channel clock */
- if (dma_is_apbh()) {
- if (apbh_is_old())
- writel(1 << (chan_id + BP_APBH_CTRL0_CLKGATE_CHANNEL),
- mxs_dma->base + HW_APBHX_CTRL0 + set_clr);
- else
- writel(1 << chan_id,
- mxs_dma->base + HW_APBHX_CTRL0 + set_clr);
- }
-}
-
static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
{
struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -165,9 +148,6 @@ static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
int chan_id = mxs_chan->chan.chan_id;
- /* clkgate needs to be enabled before writing other registers */
- mxs_dma_clkgate(mxs_chan, 1);
-
/* set cmd_addr up */
writel(mxs_chan->ccw_phys,
mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(chan_id));
@@ -178,9 +158,6 @@ static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
{
- /* disable apbh channel clock */
- mxs_dma_clkgate(mxs_chan, 0);
-
mxs_chan->status = DMA_SUCCESS;
}
@@ -268,7 +245,7 @@ static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
/*
* When both completion and error of termination bits set at the
* same time, we do not take it as an error. IOW, it only becomes
- * an error we need to handler here in case of ether it's (1) an bus
+ * an error we need to handle here in case of either it's (1) a bus
* error or (2) a termination error with no completion.
*/
stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
@@ -338,10 +315,7 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
if (ret)
goto err_clk;
- /* clkgate needs to be enabled for reset to finish */
- mxs_dma_clkgate(mxs_chan, 1);
mxs_dma_reset_chan(mxs_chan);
- mxs_dma_clkgate(mxs_chan, 0);
dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
@@ -377,7 +351,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long append)
{
struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
@@ -386,7 +360,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
struct scatterlist *sg;
int i, j;
u32 *pio;
- static int idx;
+ int idx = append ? mxs_chan->desc_count : 0;
if (mxs_chan->status == DMA_IN_PROGRESS && !append)
return NULL;
@@ -417,7 +391,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
idx = 0;
}
- if (direction == DMA_NONE) {
+ if (direction == DMA_TRANS_NONE) {
ccw = &mxs_chan->ccw[idx++];
pio = (u32 *) sgl;
@@ -450,7 +424,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
ccw->bits |= CCW_CHAIN;
ccw->bits |= CCW_HALT_ON_TERM;
ccw->bits |= CCW_TERM_FLUSH;
- ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+ ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
COMMAND);
@@ -462,6 +436,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
}
}
}
+ mxs_chan->desc_count = idx;
return &mxs_chan->desc;
@@ -472,7 +447,7 @@ err_out:
static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -515,7 +490,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
ccw->bits |= CCW_IRQ;
ccw->bits |= CCW_HALT_ON_TERM;
ccw->bits |= CCW_TERM_FLUSH;
- ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+ ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
dma_addr += period_len;
@@ -523,6 +498,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
i++;
}
+ mxs_chan->desc_count = i;
return &mxs_chan->desc;
@@ -539,8 +515,8 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
switch (cmd) {
case DMA_TERMINATE_ALL:
- mxs_dma_disable_chan(mxs_chan);
mxs_dma_reset_chan(mxs_chan);
+ mxs_dma_disable_chan(mxs_chan);
break;
case DMA_PAUSE:
mxs_dma_pause_chan(mxs_chan);
@@ -580,7 +556,7 @@ static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
ret = clk_prepare_enable(mxs_dma->clk);
if (ret)
- goto err_out;
+ return ret;
ret = mxs_reset_block(mxs_dma->base);
if (ret)
@@ -604,11 +580,8 @@ static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
mxs_dma->base + HW_APBHX_CTRL1 + MXS_SET_ADDR);
- clk_disable_unprepare(mxs_dma->clk);
-
- return 0;
-
err_out:
+ clk_disable_unprepare(mxs_dma->clk);
return ret;
}
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index a6d0e3dbed0..823f58179f9 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -1,7 +1,7 @@
/*
* Topcliff PCH DMA controller driver
* Copyright (c) 2010 Intel Corporation
- * Copyright (C) 2011 OKI SEMICONDUCTOR CO., LTD.
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -99,7 +99,7 @@ struct pch_dma_desc {
struct pch_dma_chan {
struct dma_chan chan;
void __iomem *membase;
- enum dma_data_direction dir;
+ enum dma_transfer_direction dir;
struct tasklet_struct tasklet;
unsigned long err_status;
@@ -224,7 +224,7 @@ static void pdc_set_dir(struct dma_chan *chan)
mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
(DMA_CTL0_BITS_PER_CH * chan->chan_id));
val &= mask_mode;
- if (pd_chan->dir == DMA_TO_DEVICE)
+ if (pd_chan->dir == DMA_MEM_TO_DEV)
val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
DMA_CTL0_DIR_SHIFT_BITS);
else
@@ -242,7 +242,7 @@ static void pdc_set_dir(struct dma_chan *chan)
mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
(DMA_CTL0_BITS_PER_CH * ch));
val &= mask_mode;
- if (pd_chan->dir == DMA_TO_DEVICE)
+ if (pd_chan->dir == DMA_MEM_TO_DEV)
val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
DMA_CTL0_DIR_SHIFT_BITS);
else
@@ -607,7 +607,7 @@ static void pd_issue_pending(struct dma_chan *chan)
static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
struct scatterlist *sgl, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long flags)
+ enum dma_transfer_direction direction, unsigned long flags)
{
struct pch_dma_chan *pd_chan = to_pd_chan(chan);
struct pch_dma_slave *pd_slave = chan->private;
@@ -623,9 +623,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
return NULL;
}
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
reg = pd_slave->rx_reg;
- else if (direction == DMA_TO_DEVICE)
+ else if (direction == DMA_MEM_TO_DEV)
reg = pd_slave->tx_reg;
else
return NULL;
@@ -1018,6 +1018,8 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev)
#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E
#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017
#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B
+#define PCI_DEVICE_ID_ML7831_DMA1_8CH 0x8810
+#define PCI_DEVICE_ID_ML7831_DMA2_4CH 0x8815
DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
@@ -1030,6 +1032,8 @@ DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */
{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */
{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */
{ 0, },
};
@@ -1057,7 +1061,7 @@ static void __exit pch_dma_exit(void)
module_init(pch_dma_init);
module_exit(pch_dma_exit);
-MODULE_DESCRIPTION("Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH "
+MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH "
"DMA controller driver");
MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 09adcfcd953..b8ec03ee8e2 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -350,14 +350,14 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
case DMA_SLAVE_CONFIG:
slave_config = (struct dma_slave_config *)arg;
- if (slave_config->direction == DMA_TO_DEVICE) {
+ if (slave_config->direction == DMA_MEM_TO_DEV) {
if (slave_config->dst_addr)
pch->fifo_addr = slave_config->dst_addr;
if (slave_config->dst_addr_width)
pch->burst_sz = __ffs(slave_config->dst_addr_width);
if (slave_config->dst_maxburst)
pch->burst_len = slave_config->dst_maxburst;
- } else if (slave_config->direction == DMA_FROM_DEVICE) {
+ } else if (slave_config->direction == DMA_DEV_TO_MEM) {
if (slave_config->src_addr)
pch->fifo_addr = slave_config->src_addr;
if (slave_config->src_addr_width)
@@ -621,7 +621,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
- size_t period_len, enum dma_data_direction direction)
+ size_t period_len, enum dma_transfer_direction direction)
{
struct dma_pl330_desc *desc;
struct dma_pl330_chan *pch = to_pchan(chan);
@@ -636,14 +636,14 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
}
switch (direction) {
- case DMA_TO_DEVICE:
+ case DMA_MEM_TO_DEV:
desc->rqcfg.src_inc = 1;
desc->rqcfg.dst_inc = 0;
desc->req.rqtype = MEMTODEV;
src = dma_addr;
dst = pch->fifo_addr;
break;
- case DMA_FROM_DEVICE:
+ case DMA_DEV_TO_MEM:
desc->rqcfg.src_inc = 0;
desc->rqcfg.dst_inc = 1;
desc->req.rqtype = DEVTOMEM;
@@ -710,7 +710,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
static struct dma_async_tx_descriptor *
pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flg)
{
struct dma_pl330_desc *first, *desc = NULL;
@@ -759,7 +759,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
else
list_add_tail(&desc->node, &first->node);
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
desc->rqcfg.src_inc = 1;
desc->rqcfg.dst_inc = 0;
desc->req.rqtype = MEMTODEV;
@@ -834,17 +834,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
amba_set_drvdata(adev, pdmac);
-#ifdef CONFIG_PM_RUNTIME
- /* to use the runtime PM helper functions */
- pm_runtime_enable(&adev->dev);
-
- /* enable the power domain */
- if (pm_runtime_get_sync(&adev->dev)) {
- dev_err(&adev->dev, "failed to get runtime pm\n");
- ret = -ENODEV;
- goto probe_err1;
- }
-#else
+#ifndef CONFIG_PM_RUNTIME
/* enable dma clk */
clk_enable(pdmac->clk);
#endif
@@ -977,10 +967,7 @@ static int __devexit pl330_remove(struct amba_device *adev)
res = &adev->res;
release_mem_region(res->start, resource_size(res));
-#ifdef CONFIG_PM_RUNTIME
- pm_runtime_put(&adev->dev);
- pm_runtime_disable(&adev->dev);
-#else
+#ifndef CONFIG_PM_RUNTIME
clk_disable(pdmac->clk);
#endif
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 81809c2b46a..54043cd831c 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -23,7 +23,6 @@
#include <linux/interrupt.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
-#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/sh_dma.h>
@@ -57,6 +56,15 @@ static LIST_HEAD(sh_dmae_devices);
static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+ __raw_writel(data, shdev->chan_reg +
+ shdev->pdata->channel[sh_dc->id].chclr_offset);
+}
static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
{
@@ -129,6 +137,15 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+ if (shdev->pdata->chclr_present) {
+ int i;
+ for (i = 0; i < shdev->pdata->channel_num; i++) {
+ struct sh_dmae_chan *sh_chan = shdev->chan[i];
+ if (sh_chan)
+ chclr_write(sh_chan, 0);
+ }
+ }
+
dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
dmaor = dmaor_read(shdev);
@@ -139,6 +156,10 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
return -EIO;
}
+ if (shdev->pdata->dmaor_init & ~dmaor)
+ dev_warn(shdev->common.dev,
+ "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+ dmaor, shdev->pdata->dmaor_init);
return 0;
}
@@ -259,8 +280,6 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
return 0;
}
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
-
static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
@@ -340,6 +359,8 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
sh_chan_xfer_ld_queue(sh_chan);
sh_chan->pm_state = DMAE_PM_ESTABLISHED;
}
+ } else {
+ sh_chan->pm_state = DMAE_PM_PENDING;
}
spin_unlock_irq(&sh_chan->desc_lock);
@@ -479,19 +500,19 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
* @sh_chan: DMA channel
* @flags: DMA transfer flags
* @dest: destination DMA address, incremented when direction equals
- * DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
+ * DMA_DEV_TO_MEM
* @src: source DMA address, incremented when direction equals
- * DMA_TO_DEVICE or DMA_BIDIRECTIONAL
+ * DMA_MEM_TO_DEV
* @len: DMA transfer length
* @first: if NULL, set to the current descriptor and cookie set to -EBUSY
* @direction: needed for slave DMA to decide which address to keep constant,
- * equals DMA_BIDIRECTIONAL for MEMCPY
+ * equals DMA_MEM_TO_MEM for MEMCPY
* Returns 0 or an error
* Locks: called with desc_lock held
*/
static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
- struct sh_desc **first, enum dma_data_direction direction)
+ struct sh_desc **first, enum dma_transfer_direction direction)
{
struct sh_desc *new;
size_t copy_size;
@@ -531,9 +552,9 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
new->direction = direction;
*len -= copy_size;
- if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+ if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
*src += copy_size;
- if (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE)
+ if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
*dest += copy_size;
return new;
@@ -546,12 +567,12 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
* converted to scatter-gather to guarantee consistent locking and a correct
* list manipulation. For slave DMA direction carries the usual meaning, and,
* logically, the SG list is RAM and the addr variable contains slave address,
- * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_BIDIRECTIONAL
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
* and the SG list contains only one element and points at the source buffer.
*/
static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
- enum dma_data_direction direction, unsigned long flags)
+ enum dma_transfer_direction direction, unsigned long flags)
{
struct scatterlist *sg;
struct sh_desc *first = NULL, *new = NULL /* compiler... */;
@@ -592,7 +613,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_c
dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
i, sg, len, (unsigned long long)sg_addr);
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
new = sh_dmae_add_desc(sh_chan, flags,
&sg_addr, addr, &len, &first,
direction);
@@ -646,13 +667,13 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
sg_dma_address(&sg) = dma_src;
sg_dma_len(&sg) = len;
- return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_BIDIRECTIONAL,
+ return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
flags);
}
static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long flags)
+ enum dma_transfer_direction direction, unsigned long flags)
{
struct sh_dmae_slave *param;
struct sh_dmae_chan *sh_chan;
@@ -996,7 +1017,7 @@ static void dmae_do_tasklet(unsigned long data)
spin_lock_irq(&sh_chan->desc_lock);
list_for_each_entry(desc, &sh_chan->ld_queue, node) {
if (desc->mark == DESC_SUBMITTED &&
- ((desc->direction == DMA_FROM_DEVICE &&
+ ((desc->direction == DMA_DEV_TO_MEM &&
(desc->hw.dar + desc->hw.tcr) == dar_buf) ||
(desc->hw.sar + desc->hw.tcr) == sar_buf)) {
dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
@@ -1225,6 +1246,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, shdev);
+ shdev->common.dev = &pdev->dev;
+
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
@@ -1254,7 +1277,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
shdev->common.device_control = sh_dmae_control;
- shdev->common.dev = &pdev->dev;
/* Default transfer size of 32 bytes requires 32-byte alignment */
shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
@@ -1435,22 +1457,17 @@ static int sh_dmae_runtime_resume(struct device *dev)
#ifdef CONFIG_PM
static int sh_dmae_suspend(struct device *dev)
{
- struct sh_dmae_device *shdev = dev_get_drvdata(dev);
- int i;
-
- for (i = 0; i < shdev->pdata->channel_num; i++) {
- struct sh_dmae_chan *sh_chan = shdev->chan[i];
- if (sh_chan->descs_allocated)
- sh_chan->pm_error = pm_runtime_put_sync(dev);
- }
-
return 0;
}
static int sh_dmae_resume(struct device *dev)
{
struct sh_dmae_device *shdev = dev_get_drvdata(dev);
- int i;
+ int i, ret;
+
+ ret = sh_dmae_rst(shdev);
+ if (ret < 0)
+ dev_err(dev, "Failed to reset!\n");
for (i = 0; i < shdev->pdata->channel_num; i++) {
struct sh_dmae_chan *sh_chan = shdev->chan[i];
@@ -1459,9 +1476,6 @@ static int sh_dmae_resume(struct device *dev)
if (!sh_chan->descs_allocated)
continue;
- if (!sh_chan->pm_error)
- pm_runtime_get_sync(dev);
-
if (param) {
const struct sh_dmae_slave_config *cfg = param->config;
dmae_set_dmars(sh_chan, cfg->mid_rid);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 00000000000..2333810d168
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,707 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sirfsoc_dma.h>
+
+#define SIRFSOC_DMA_DESCRIPTORS 16
+#define SIRFSOC_DMA_CHANNELS 16
+
+#define SIRFSOC_DMA_CH_ADDR 0x00
+#define SIRFSOC_DMA_CH_XLEN 0x04
+#define SIRFSOC_DMA_CH_YLEN 0x08
+#define SIRFSOC_DMA_CH_CTRL 0x0C
+
+#define SIRFSOC_DMA_WIDTH_0 0x100
+#define SIRFSOC_DMA_CH_VALID 0x140
+#define SIRFSOC_DMA_CH_INT 0x144
+#define SIRFSOC_DMA_INT_EN 0x148
+#define SIRFSOC_DMA_CH_LOOP_CTRL 0x150
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT 4
+#define SIRFSOC_DMA_DIR_CTRL_BIT 5
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN 4
+
+struct sirfsoc_dma_desc {
+ struct dma_async_tx_descriptor desc;
+ struct list_head node;
+
+ /* SiRFprimaII 2D-DMA parameters */
+
+ int xlen; /* DMA xlen */
+ int ylen; /* DMA ylen */
+ int width; /* DMA width */
+ int dir;
+ bool cyclic; /* is loop DMA? */
+ u32 addr; /* DMA buffer address */
+};
+
+struct sirfsoc_dma_chan {
+ struct dma_chan chan;
+ struct list_head free;
+ struct list_head prepared;
+ struct list_head queued;
+ struct list_head active;
+ struct list_head completed;
+ dma_cookie_t completed_cookie;
+ unsigned long happened_cyclic;
+ unsigned long completed_cyclic;
+
+ /* Lock for this structure */
+ spinlock_t lock;
+
+ int mode;
+};
+
+struct sirfsoc_dma {
+ struct dma_device dma;
+ struct tasklet_struct tasklet;
+ struct sirfsoc_dma_chan channels[SIRFSOC_DMA_CHANNELS];
+ void __iomem *base;
+ int irq;
+};
+
+#define DRV_NAME "sirfsoc_dma"
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+ return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+ struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+ int cid = schan->chan.chan_id;
+ struct sirfsoc_dma_desc *sdesc = NULL;
+
+ /*
+ * lock has been held by functions calling this, so we don't hold
+ * lock again
+ */
+
+ sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+ node);
+ /* Move the first queued descriptor to active list */
+ list_move_tail(&schan->queued, &schan->active);
+
+ /* Start the DMA transfer */
+ writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
+ cid * 4);
+ writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+ (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+ sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+ writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
+ SIRFSOC_DMA_CH_XLEN);
+ writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
+ SIRFSOC_DMA_CH_YLEN);
+ writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
+ (1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+
+ /*
+ * writel has an implict memory write barrier to make sure data is
+ * flushed into memory before starting DMA
+ */
+ writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+ if (sdesc->cyclic) {
+ writel((1 << cid) | 1 << (cid + 16) |
+ readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
+ sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+ schan->happened_cyclic = schan->completed_cyclic = 0;
+ }
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+ struct sirfsoc_dma *sdma = data;
+ struct sirfsoc_dma_chan *schan;
+ struct sirfsoc_dma_desc *sdesc = NULL;
+ u32 is;
+ int ch;
+
+ is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+ while ((ch = fls(is) - 1) >= 0) {
+ is &= ~(1 << ch);
+ writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
+ schan = &sdma->channels[ch];
+
+ spin_lock(&schan->lock);
+
+ sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+ node);
+ if (!sdesc->cyclic) {
+ /* Execute queued descriptors */
+ list_splice_tail_init(&schan->active, &schan->completed);
+ if (!list_empty(&schan->queued))
+ sirfsoc_dma_execute(schan);
+ } else
+ schan->happened_cyclic++;
+
+ spin_unlock(&schan->lock);
+ }
+
+ /* Schedule tasklet */
+ tasklet_schedule(&sdma->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+ dma_cookie_t last_cookie = 0;
+ struct sirfsoc_dma_chan *schan;
+ struct sirfsoc_dma_desc *sdesc;
+ struct dma_async_tx_descriptor *desc;
+ unsigned long flags;
+ unsigned long happened_cyclic;
+ LIST_HEAD(list);
+ int i;
+
+ for (i = 0; i < sdma->dma.chancnt; i++) {
+ schan = &sdma->channels[i];
+
+ /* Get all completed descriptors */
+ spin_lock_irqsave(&schan->lock, flags);
+ if (!list_empty(&schan->completed)) {
+ list_splice_tail_init(&schan->completed, &list);
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ /* Execute callbacks and run dependencies */
+ list_for_each_entry(sdesc, &list, node) {
+ desc = &sdesc->desc;
+
+ if (desc->callback)
+ desc->callback(desc->callback_param);
+
+ last_cookie = desc->cookie;
+ dma_run_dependencies(desc);
+ }
+
+ /* Free descriptors */
+ spin_lock_irqsave(&schan->lock, flags);
+ list_splice_tail_init(&list, &schan->free);
+ schan->completed_cookie = last_cookie;
+ spin_unlock_irqrestore(&schan->lock, flags);
+ } else {
+ /* for cyclic channel, desc is always in active list */
+ sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+ node);
+
+ if (!sdesc || (sdesc && !sdesc->cyclic)) {
+ /* without active cyclic DMA */
+ spin_unlock_irqrestore(&schan->lock, flags);
+ continue;
+ }
+
+ /* cyclic DMA */
+ happened_cyclic = schan->happened_cyclic;
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ desc = &sdesc->desc;
+ while (happened_cyclic != schan->completed_cyclic) {
+ if (desc->callback)
+ desc->callback(desc->callback_param);
+ schan->completed_cyclic++;
+ }
+ }
+ }
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+ struct sirfsoc_dma *sdma = (void *)data;
+
+ sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+ struct sirfsoc_dma_desc *sdesc;
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+ spin_lock_irqsave(&schan->lock, flags);
+
+ /* Move descriptor to queue */
+ list_move_tail(&sdesc->node, &schan->queued);
+
+ /* Update cookie */
+ cookie = schan->chan.cookie + 1;
+ if (cookie <= 0)
+ cookie = 1;
+
+ schan->chan.cookie = cookie;
+ sdesc->desc.cookie = cookie;
+
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan,
+ struct dma_slave_config *config)
+{
+ unsigned long flags;
+
+ if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+ (config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+ return -EINVAL;
+
+ spin_lock_irqsave(&schan->lock, flags);
+ schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan)
+{
+ struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+ int cid = schan->chan.chan_id;
+ unsigned long flags;
+
+ writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+ ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+ writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+
+ writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
+ & ~((1 << cid) | 1 << (cid + 16)),
+ sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+
+ spin_lock_irqsave(&schan->lock, flags);
+ list_splice_tail_init(&schan->active, &schan->free);
+ list_splice_tail_init(&schan->queued, &schan->free);
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ return 0;
+}
+
+static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+ unsigned long arg)
+{
+ struct dma_slave_config *config;
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+
+ switch (cmd) {
+ case DMA_TERMINATE_ALL:
+ return sirfsoc_dma_terminate_all(schan);
+ case DMA_SLAVE_CONFIG:
+ config = (struct dma_slave_config *)arg;
+ return sirfsoc_dma_slave_config(schan, config);
+
+ default:
+ break;
+ }
+
+ return -ENOSYS;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ struct sirfsoc_dma_desc *sdesc;
+ unsigned long flags;
+ LIST_HEAD(descs);
+ int i;
+
+ /* Alloc descriptors for this channel */
+ for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+ sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+ if (!sdesc) {
+ dev_notice(sdma->dma.dev, "Memory allocation error. "
+ "Allocated only %u descriptors\n", i);
+ break;
+ }
+
+ dma_async_tx_descriptor_init(&sdesc->desc, chan);
+ sdesc->desc.flags = DMA_CTRL_ACK;
+ sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+ list_add_tail(&sdesc->node, &descs);
+ }
+
+ /* Return error only if no descriptors were allocated */
+ if (i == 0)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&schan->lock, flags);
+
+ list_splice_tail_init(&descs, &schan->free);
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ struct sirfsoc_dma_desc *sdesc, *tmp;
+ unsigned long flags;
+ LIST_HEAD(descs);
+
+ spin_lock_irqsave(&schan->lock, flags);
+
+ /* Channel must be idle */
+ BUG_ON(!list_empty(&schan->prepared));
+ BUG_ON(!list_empty(&schan->queued));
+ BUG_ON(!list_empty(&schan->active));
+ BUG_ON(!list_empty(&schan->completed));
+
+ /* Move data */
+ list_splice_tail_init(&schan->free, &descs);
+
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ /* Free descriptors */
+ list_for_each_entry_safe(sdesc, tmp, &descs, node)
+ kfree(sdesc);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->lock, flags);
+
+ if (list_empty(&schan->active) && !list_empty(&schan->queued))
+ sirfsoc_dma_execute(schan);
+
+ spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ unsigned long flags;
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ spin_lock_irqsave(&schan->lock, flags);
+ last_used = schan->chan.cookie;
+ last_complete = schan->completed_cookie;
+ spin_unlock_irqrestore(&schan->lock, flags);
+
+ dma_set_tx_state(txstate, last_complete, last_used, 0);
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ struct sirfsoc_dma_desc *sdesc = NULL;
+ unsigned long iflags;
+ int ret;
+
+ if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) {
+ ret = -EINVAL;
+ goto err_dir;
+ }
+
+ /* Get free descriptor */
+ spin_lock_irqsave(&schan->lock, iflags);
+ if (!list_empty(&schan->free)) {
+ sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+ node);
+ list_del(&sdesc->node);
+ }
+ spin_unlock_irqrestore(&schan->lock, iflags);
+
+ if (!sdesc) {
+ /* try to free completed descriptors */
+ sirfsoc_dma_process_completed(sdma);
+ ret = 0;
+ goto no_desc;
+ }
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&schan->lock, iflags);
+
+ /*
+ * Number of chunks in a frame can only be 1 for prima2
+ * and ylen (number of frame - 1) must be at least 0
+ */
+ if ((xt->frame_size == 1) && (xt->numf > 0)) {
+ sdesc->cyclic = 0;
+ sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+ sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+ SIRFSOC_DMA_WORD_LEN;
+ sdesc->ylen = xt->numf - 1;
+ if (xt->dir == DMA_MEM_TO_DEV) {
+ sdesc->addr = xt->src_start;
+ sdesc->dir = 1;
+ } else {
+ sdesc->addr = xt->dst_start;
+ sdesc->dir = 0;
+ }
+
+ list_add_tail(&sdesc->node, &schan->prepared);
+ } else {
+ pr_err("sirfsoc DMA Invalid xfer\n");
+ ret = -EINVAL;
+ goto err_xfer;
+ }
+ spin_unlock_irqrestore(&schan->lock, iflags);
+
+ return &sdesc->desc;
+err_xfer:
+ spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+ return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction direction)
+{
+ struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+ struct sirfsoc_dma_desc *sdesc = NULL;
+ unsigned long iflags;
+
+ /*
+ * we only support cycle transfer with 2 period
+ * If the X-length is set to 0, it would be the loop mode.
+ * The DMA address keeps increasing until reaching the end of a loop
+ * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+ * the DMA address goes back to the beginning of this area.
+ * In loop mode, the DMA data region is divided into two parts, BUFA
+ * and BUFB. DMA controller generates interrupts twice in each loop:
+ * when the DMA address reaches the end of BUFA or the end of the
+ * BUFB
+ */
+ if (buf_len != 2 * period_len)
+ return ERR_PTR(-EINVAL);
+
+ /* Get free descriptor */
+ spin_lock_irqsave(&schan->lock, iflags);
+ if (!list_empty(&schan->free)) {
+ sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+ node);
+ list_del(&sdesc->node);
+ }
+ spin_unlock_irqrestore(&schan->lock, iflags);
+
+ if (!sdesc)
+ return 0;
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&schan->lock, iflags);
+ sdesc->addr = addr;
+ sdesc->cyclic = 1;
+ sdesc->xlen = 0;
+ sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+ sdesc->width = 1;
+ list_add_tail(&sdesc->node, &schan->prepared);
+ spin_unlock_irqrestore(&schan->lock, iflags);
+
+ return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+ unsigned int ch_nr = (unsigned int) chan_id;
+
+ if (ch_nr == chan->chan_id +
+ chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+static int __devinit sirfsoc_dma_probe(struct platform_device *op)
+{
+ struct device_node *dn = op->dev.of_node;
+ struct device *dev = &op->dev;
+ struct dma_device *dma;
+ struct sirfsoc_dma *sdma;
+ struct sirfsoc_dma_chan *schan;
+ struct resource res;
+ ulong regs_start, regs_size;
+ u32 id;
+ int ret, i;
+
+ sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+ if (!sdma) {
+ dev_err(dev, "Memory exhausted!\n");
+ return -ENOMEM;
+ }
+
+ if (of_property_read_u32(dn, "cell-index", &id)) {
+ dev_err(dev, "Fail to get DMAC index\n");
+ ret = -ENODEV;
+ goto free_mem;
+ }
+
+ sdma->irq = irq_of_parse_and_map(dn, 0);
+ if (sdma->irq == NO_IRQ) {
+ dev_err(dev, "Error mapping IRQ!\n");
+ ret = -EINVAL;
+ goto free_mem;
+ }
+
+ ret = of_address_to_resource(dn, 0, &res);
+ if (ret) {
+ dev_err(dev, "Error parsing memory region!\n");
+ goto free_mem;
+ }
+
+ regs_start = res.start;
+ regs_size = resource_size(&res);
+
+ sdma->base = devm_ioremap(dev, regs_start, regs_size);
+ if (!sdma->base) {
+ dev_err(dev, "Error mapping memory region!\n");
+ ret = -ENOMEM;
+ goto irq_dispose;
+ }
+
+ ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
+ sdma);
+ if (ret) {
+ dev_err(dev, "Error requesting IRQ!\n");
+ ret = -EINVAL;
+ goto unmap_mem;
+ }
+
+ dma = &sdma->dma;
+ dma->dev = dev;
+ dma->chancnt = SIRFSOC_DMA_CHANNELS;
+
+ dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+ dma->device_issue_pending = sirfsoc_dma_issue_pending;
+ dma->device_control = sirfsoc_dma_control;
+ dma->device_tx_status = sirfsoc_dma_tx_status;
+ dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+ dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma_cap_set(DMA_SLAVE, dma->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+ for (i = 0; i < dma->chancnt; i++) {
+ schan = &sdma->channels[i];
+
+ schan->chan.device = dma;
+ schan->chan.cookie = 1;
+ schan->completed_cookie = schan->chan.cookie;
+
+ INIT_LIST_HEAD(&schan->free);
+ INIT_LIST_HEAD(&schan->prepared);
+ INIT_LIST_HEAD(&schan->queued);
+ INIT_LIST_HEAD(&schan->active);
+ INIT_LIST_HEAD(&schan->completed);
+
+ spin_lock_init(&schan->lock);
+ list_add_tail(&schan->chan.device_node, &dma->channels);
+ }
+
+ tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+ /* Register DMA engine */
+ dev_set_drvdata(dev, sdma);
+ ret = dma_async_device_register(dma);
+ if (ret)
+ goto free_irq;
+
+ dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+ return 0;
+
+free_irq:
+ devm_free_irq(dev, sdma->irq, sdma);
+irq_dispose:
+ irq_dispose_mapping(sdma->irq);
+unmap_mem:
+ iounmap(sdma->base);
+free_mem:
+ devm_kfree(dev, sdma);
+ return ret;
+}
+
+static int __devexit sirfsoc_dma_remove(struct platform_device *op)
+{
+ struct device *dev = &op->dev;
+ struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+ dma_async_device_unregister(&sdma->dma);
+ devm_free_irq(dev, sdma->irq, sdma);
+ irq_dispose_mapping(sdma->irq);
+ iounmap(sdma->base);
+ devm_kfree(dev, sdma);
+ return 0;
+}
+
+static struct of_device_id sirfsoc_dma_match[] = {
+ { .compatible = "sirf,prima2-dmac", },
+ {},
+};
+
+static struct platform_driver sirfsoc_dma_driver = {
+ .probe = sirfsoc_dma_probe,
+ .remove = __devexit_p(sirfsoc_dma_remove),
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = sirfsoc_dma_match,
+ },
+};
+
+module_platform_driver(sirfsoc_dma_driver);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
+ "Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 13259cad0ce..cc5ecbc067a 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -14,6 +14,8 @@
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
#include <linux/err.h>
#include <linux/amba/bus.h>
@@ -32,6 +34,9 @@
/* Maximum iterations taken before giving up suspending a channel */
#define D40_SUSPEND_MAX_IT 500
+/* Milliseconds */
+#define DMA40_AUTOSUSPEND_DELAY 100
+
/* Hardware requirement on LCLA alignment */
#define LCLA_ALIGNMENT 0x40000
@@ -62,6 +67,55 @@ enum d40_command {
D40_DMA_SUSPENDED = 3
};
+/*
+ * These are the registers that has to be saved and later restored
+ * when the DMA hw is powered off.
+ * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works.
+ */
+static u32 d40_backup_regs[] = {
+ D40_DREG_LCPA,
+ D40_DREG_LCLA,
+ D40_DREG_PRMSE,
+ D40_DREG_PRMSO,
+ D40_DREG_PRMOE,
+ D40_DREG_PRMOO,
+};
+
+#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs)
+
+/* TODO: Check if all these registers have to be saved/restored on dma40 v3 */
+static u32 d40_backup_regs_v3[] = {
+ D40_DREG_PSEG1,
+ D40_DREG_PSEG2,
+ D40_DREG_PSEG3,
+ D40_DREG_PSEG4,
+ D40_DREG_PCEG1,
+ D40_DREG_PCEG2,
+ D40_DREG_PCEG3,
+ D40_DREG_PCEG4,
+ D40_DREG_RSEG1,
+ D40_DREG_RSEG2,
+ D40_DREG_RSEG3,
+ D40_DREG_RSEG4,
+ D40_DREG_RCEG1,
+ D40_DREG_RCEG2,
+ D40_DREG_RCEG3,
+ D40_DREG_RCEG4,
+};
+
+#define BACKUP_REGS_SZ_V3 ARRAY_SIZE(d40_backup_regs_v3)
+
+static u32 d40_backup_regs_chan[] = {
+ D40_CHAN_REG_SSCFG,
+ D40_CHAN_REG_SSELT,
+ D40_CHAN_REG_SSPTR,
+ D40_CHAN_REG_SSLNK,
+ D40_CHAN_REG_SDCFG,
+ D40_CHAN_REG_SDELT,
+ D40_CHAN_REG_SDPTR,
+ D40_CHAN_REG_SDLNK,
+};
+
/**
* struct d40_lli_pool - Structure for keeping LLIs in memory
*
@@ -96,7 +150,7 @@ struct d40_lli_pool {
* during a transfer.
* @node: List entry.
* @is_in_client_list: true if the client owns this descriptor.
- * the previous one.
+ * @cyclic: true if this is a cyclic job
*
* This descriptor is used for both logical and physical transfers.
*/
@@ -143,6 +197,7 @@ struct d40_lcla_pool {
* channels.
*
* @lock: A lock protection this entity.
+ * @reserved: True if used by secure world or otherwise.
* @num: The physical channel number of this entity.
* @allocated_src: Bit mapped to show which src event line's are mapped to
* this physical channel. Can also be free or physically allocated.
@@ -152,6 +207,7 @@ struct d40_lcla_pool {
*/
struct d40_phy_res {
spinlock_t lock;
+ bool reserved;
int num;
u32 allocated_src;
u32 allocated_dst;
@@ -185,7 +241,6 @@ struct d40_base;
* @src_def_cfg: Default cfg register setting for src.
* @dst_def_cfg: Default cfg register setting for dst.
* @log_def: Default logical channel settings.
- * @lcla: Space for one dst src pair for logical channel transfers.
* @lcpa: Pointer to dst and src lcpa settings.
* @runtime_addr: runtime configured address.
* @runtime_direction: runtime configured direction.
@@ -217,7 +272,7 @@ struct d40_chan {
struct d40_log_lli_full *lcpa;
/* Runtime reconfiguration */
dma_addr_t runtime_addr;
- enum dma_data_direction runtime_direction;
+ enum dma_transfer_direction runtime_direction;
};
/**
@@ -241,6 +296,7 @@ struct d40_chan {
* @dma_both: dma_device channels that can do both memcpy and slave transfers.
* @dma_slave: dma_device channels that can do only do slave transfers.
* @dma_memcpy: dma_device channels that can do only do memcpy transfers.
+ * @phy_chans: Room for all possible physical channels in system.
* @log_chans: Room for all possible logical channels in system.
* @lookup_log_chans: Used to map interrupt number to logical channel. Points
* to log_chans entries.
@@ -248,12 +304,20 @@ struct d40_chan {
* to phy_chans entries.
* @plat_data: Pointer to provided platform_data which is the driver
* configuration.
+ * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla.
* @phy_res: Vector containing all physical channels.
* @lcla_pool: lcla pool settings and data.
* @lcpa_base: The virtual mapped address of LCPA.
* @phy_lcpa: The physical address of the LCPA.
* @lcpa_size: The size of the LCPA area.
* @desc_slab: cache for descriptors.
+ * @reg_val_backup: Here the values of some hardware registers are stored
+ * before the DMA is powered off. They are restored when the power is back on.
+ * @reg_val_backup_v3: Backup of registers that only exits on dma40 v3 and
+ * later.
+ * @reg_val_backup_chan: Backup data for standard channel parameter registers.
+ * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
+ * @initialized: true if the dma has been initialized
*/
struct d40_base {
spinlock_t interrupt_lock;
@@ -275,6 +339,7 @@ struct d40_base {
struct d40_chan **lookup_log_chans;
struct d40_chan **lookup_phy_chans;
struct stedma40_platform_data *plat_data;
+ struct regulator *lcpa_regulator;
/* Physical half channels */
struct d40_phy_res *phy_res;
struct d40_lcla_pool lcla_pool;
@@ -282,6 +347,11 @@ struct d40_base {
dma_addr_t phy_lcpa;
resource_size_t lcpa_size;
struct kmem_cache *desc_slab;
+ u32 reg_val_backup[BACKUP_REGS_SZ];
+ u32 reg_val_backup_v3[BACKUP_REGS_SZ_V3];
+ u32 *reg_val_backup_chan;
+ u16 gcc_pwr_off_mask;
+ bool initialized;
};
/**
@@ -479,13 +549,14 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
struct d40_desc *d;
struct d40_desc *_d;
- list_for_each_entry_safe(d, _d, &d40c->client, node)
+ list_for_each_entry_safe(d, _d, &d40c->client, node) {
if (async_tx_test_ack(&d->txd)) {
d40_desc_remove(d);
desc = d;
memset(desc, 0, sizeof(*desc));
break;
}
+ }
}
if (!desc)
@@ -536,6 +607,7 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
bool cyclic = desc->cyclic;
int curr_lcla = -EINVAL;
int first_lcla = 0;
+ bool use_esram_lcla = chan->base->plat_data->use_esram_lcla;
bool linkback;
/*
@@ -608,11 +680,16 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
&lli->src[lli_current],
next_lcla, flags);
- dma_sync_single_range_for_device(chan->base->dev,
- pool->dma_addr, lcla_offset,
- 2 * sizeof(struct d40_log_lli),
- DMA_TO_DEVICE);
-
+ /*
+ * Cache maintenance is not needed if lcla is
+ * mapped in esram
+ */
+ if (!use_esram_lcla) {
+ dma_sync_single_range_for_device(chan->base->dev,
+ pool->dma_addr, lcla_offset,
+ 2 * sizeof(struct d40_log_lli),
+ DMA_TO_DEVICE);
+ }
curr_lcla = next_lcla;
if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
@@ -740,7 +817,61 @@ static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
return len;
}
-/* Support functions for logical channels */
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+ u32 *regaddr, int num, bool save)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ void __iomem *addr = baseaddr + regaddr[i];
+
+ if (save)
+ backup[i] = readl_relaxed(addr);
+ else
+ writel_relaxed(backup[i], addr);
+ }
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+ int i;
+
+ /* Save/Restore channel specific registers */
+ for (i = 0; i < base->num_phy_chans; i++) {
+ void __iomem *addr;
+ int idx;
+
+ if (base->phy_res[i].reserved)
+ continue;
+
+ addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+ idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+ dma40_backup(addr, &base->reg_val_backup_chan[idx],
+ d40_backup_regs_chan,
+ ARRAY_SIZE(d40_backup_regs_chan),
+ save);
+ }
+
+ /* Save/Restore global registers */
+ dma40_backup(base->virtbase, base->reg_val_backup,
+ d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+ save);
+
+ /* Save/Restore registers only existing on dma40 v3 and later */
+ if (base->rev >= 3)
+ dma40_backup(base->virtbase, base->reg_val_backup_v3,
+ d40_backup_regs_v3,
+ ARRAY_SIZE(d40_backup_regs_v3),
+ save);
+}
+#else
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+}
+#endif
static int d40_channel_execute_command(struct d40_chan *d40c,
enum d40_command command)
@@ -973,6 +1104,10 @@ static void d40_config_write(struct d40_chan *d40c)
/* Set LIDX for lcla */
writel(lidx, chanbase + D40_CHAN_REG_SSELT);
writel(lidx, chanbase + D40_CHAN_REG_SDELT);
+
+ /* Clear LNK which will be used by d40_chan_has_events() */
+ writel(0, chanbase + D40_CHAN_REG_SSLNK);
+ writel(0, chanbase + D40_CHAN_REG_SDLNK);
}
}
@@ -1013,6 +1148,7 @@ static int d40_pause(struct d40_chan *d40c)
if (!d40c->busy)
return 0;
+ pm_runtime_get_sync(d40c->base->dev);
spin_lock_irqsave(&d40c->lock, flags);
res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
@@ -1025,7 +1161,8 @@ static int d40_pause(struct d40_chan *d40c)
D40_DMA_RUN);
}
}
-
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
spin_unlock_irqrestore(&d40c->lock, flags);
return res;
}
@@ -1039,7 +1176,7 @@ static int d40_resume(struct d40_chan *d40c)
return 0;
spin_lock_irqsave(&d40c->lock, flags);
-
+ pm_runtime_get_sync(d40c->base->dev);
if (d40c->base->rev == 0)
if (chan_is_logical(d40c)) {
res = d40_channel_execute_command(d40c,
@@ -1057,6 +1194,8 @@ static int d40_resume(struct d40_chan *d40c)
}
no_suspend:
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
spin_unlock_irqrestore(&d40c->lock, flags);
return res;
}
@@ -1129,7 +1268,10 @@ static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
d40d = d40_first_queued(d40c);
if (d40d != NULL) {
- d40c->busy = true;
+ if (!d40c->busy)
+ d40c->busy = true;
+
+ pm_runtime_get_sync(d40c->base->dev);
/* Remove from queue */
d40_desc_remove(d40d);
@@ -1190,6 +1332,8 @@ static void dma_tc_handle(struct d40_chan *d40c)
if (d40_queue_start(d40c) == NULL)
d40c->busy = false;
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
}
d40c->pending_tx++;
@@ -1405,11 +1549,16 @@ static int d40_validate_conf(struct d40_chan *d40c,
return res;
}
-static bool d40_alloc_mask_set(struct d40_phy_res *phy, bool is_src,
- int log_event_line, bool is_log)
+static bool d40_alloc_mask_set(struct d40_phy_res *phy,
+ bool is_src, int log_event_line, bool is_log,
+ bool *first_user)
{
unsigned long flags;
spin_lock_irqsave(&phy->lock, flags);
+
+ *first_user = ((phy->allocated_src | phy->allocated_dst)
+ == D40_ALLOC_FREE);
+
if (!is_log) {
/* Physical interrupts are masked per physical full channel */
if (phy->allocated_src == D40_ALLOC_FREE &&
@@ -1490,7 +1639,7 @@ out:
return is_free;
}
-static int d40_allocate_channel(struct d40_chan *d40c)
+static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user)
{
int dev_type;
int event_group;
@@ -1526,7 +1675,8 @@ static int d40_allocate_channel(struct d40_chan *d40c)
for (i = 0; i < d40c->base->num_phy_chans; i++) {
if (d40_alloc_mask_set(&phys[i], is_src,
- 0, is_log))
+ 0, is_log,
+ first_phy_user))
goto found_phy;
}
} else
@@ -1536,7 +1686,8 @@ static int d40_allocate_channel(struct d40_chan *d40c)
if (d40_alloc_mask_set(&phys[i],
is_src,
0,
- is_log))
+ is_log,
+ first_phy_user))
goto found_phy;
}
}
@@ -1552,6 +1703,25 @@ found_phy:
/* Find logical channel */
for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
int phy_num = j + event_group * 2;
+
+ if (d40c->dma_cfg.use_fixed_channel) {
+ i = d40c->dma_cfg.phy_channel;
+
+ if ((i != phy_num) && (i != phy_num + 1)) {
+ dev_err(chan2dev(d40c),
+ "invalid fixed phy channel %d\n", i);
+ return -EINVAL;
+ }
+
+ if (d40_alloc_mask_set(&phys[i], is_src, event_line,
+ is_log, first_phy_user))
+ goto found_log;
+
+ dev_err(chan2dev(d40c),
+ "could not allocate fixed phy channel %d\n", i);
+ return -EINVAL;
+ }
+
/*
* Spread logical channels across all available physical rather
* than pack every logical channel at the first available phy
@@ -1560,13 +1730,15 @@ found_phy:
if (is_src) {
for (i = phy_num; i < phy_num + 2; i++) {
if (d40_alloc_mask_set(&phys[i], is_src,
- event_line, is_log))
+ event_line, is_log,
+ first_phy_user))
goto found_log;
}
} else {
for (i = phy_num + 1; i >= phy_num; i--) {
if (d40_alloc_mask_set(&phys[i], is_src,
- event_line, is_log))
+ event_line, is_log,
+ first_phy_user))
goto found_log;
}
}
@@ -1643,10 +1815,11 @@ static int d40_free_dma(struct d40_chan *d40c)
return -EINVAL;
}
+ pm_runtime_get_sync(d40c->base->dev);
res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
if (res) {
chan_err(d40c, "suspend failed\n");
- return res;
+ goto out;
}
if (chan_is_logical(d40c)) {
@@ -1664,13 +1837,11 @@ static int d40_free_dma(struct d40_chan *d40c)
if (d40_chan_has_events(d40c)) {
res = d40_channel_execute_command(d40c,
D40_DMA_RUN);
- if (res) {
+ if (res)
chan_err(d40c,
"Executing RUN command\n");
- return res;
- }
}
- return 0;
+ goto out;
}
} else {
(void) d40_alloc_mask_free(phy, is_src, 0);
@@ -1680,13 +1851,23 @@ static int d40_free_dma(struct d40_chan *d40c)
res = d40_channel_execute_command(d40c, D40_DMA_STOP);
if (res) {
chan_err(d40c, "Failed to stop channel\n");
- return res;
+ goto out;
}
+
+ if (d40c->busy) {
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ }
+
+ d40c->busy = false;
d40c->phy_chan = NULL;
d40c->configured = false;
d40c->base->lookup_phy_chans[phy->num] = NULL;
+out:
- return 0;
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ return res;
}
static bool d40_is_paused(struct d40_chan *d40c)
@@ -1855,7 +2036,7 @@ err:
}
static dma_addr_t
-d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction)
{
struct stedma40_platform_data *plat = chan->base->plat_data;
struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
@@ -1864,9 +2045,9 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
if (chan->runtime_addr)
return chan->runtime_addr;
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
addr = plat->dev_rx[cfg->src_dev_type];
- else if (direction == DMA_TO_DEVICE)
+ else if (direction == DMA_MEM_TO_DEV)
addr = plat->dev_tx[cfg->dst_dev_type];
return addr;
@@ -1875,7 +2056,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
static struct dma_async_tx_descriptor *
d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
struct scatterlist *sg_dst, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long dma_flags)
+ enum dma_transfer_direction direction, unsigned long dma_flags)
{
struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
dma_addr_t src_dev_addr = 0;
@@ -1902,9 +2083,9 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
if (direction != DMA_NONE) {
dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
- if (direction == DMA_FROM_DEVICE)
+ if (direction == DMA_DEV_TO_MEM)
src_dev_addr = dev_addr;
- else if (direction == DMA_TO_DEVICE)
+ else if (direction == DMA_MEM_TO_DEV)
dst_dev_addr = dev_addr;
}
@@ -2011,14 +2192,15 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
goto fail;
}
}
- is_free_phy = (d40c->phy_chan == NULL);
- err = d40_allocate_channel(d40c);
+ err = d40_allocate_channel(d40c, &is_free_phy);
if (err) {
chan_err(d40c, "Failed to allocate channel\n");
+ d40c->configured = false;
goto fail;
}
+ pm_runtime_get_sync(d40c->base->dev);
/* Fill in basic CFG register values */
d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
&d40c->dst_def_cfg, chan_is_logical(d40c));
@@ -2038,6 +2220,12 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
}
+ dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n",
+ chan_is_logical(d40c) ? "logical" : "physical",
+ d40c->phy_chan->num,
+ d40c->dma_cfg.use_fixed_channel ? ", fixed" : "");
+
+
/*
* Only write channel configuration to the DMA if the physical
* resource is free. In case of multiple logical channels
@@ -2046,6 +2234,8 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
if (is_free_phy)
d40_config_write(d40c);
fail:
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
spin_unlock_irqrestore(&d40c->lock, flags);
return err;
}
@@ -2108,10 +2298,10 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
struct scatterlist *sgl,
unsigned int sg_len,
- enum dma_data_direction direction,
+ enum dma_transfer_direction direction,
unsigned long dma_flags)
{
- if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+ if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
return NULL;
return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
@@ -2120,7 +2310,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
static struct dma_async_tx_descriptor *
dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
size_t buf_len, size_t period_len,
- enum dma_data_direction direction)
+ enum dma_transfer_direction direction)
{
unsigned int periods = buf_len / period_len;
struct dma_async_tx_descriptor *txd;
@@ -2269,7 +2459,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
dst_addr_width = config->dst_addr_width;
dst_maxburst = config->dst_maxburst;
- if (config->direction == DMA_FROM_DEVICE) {
+ if (config->direction == DMA_DEV_TO_MEM) {
dma_addr_t dev_addr_rx =
d40c->base->plat_data->dev_rx[cfg->src_dev_type];
@@ -2292,7 +2482,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
if (dst_maxburst == 0)
dst_maxburst = src_maxburst;
- } else if (config->direction == DMA_TO_DEVICE) {
+ } else if (config->direction == DMA_MEM_TO_DEV) {
dma_addr_t dev_addr_tx =
d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
@@ -2357,7 +2547,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
"configured channel %s for %s, data width %d/%d, "
"maxburst %d/%d elements, LE, no flow control\n",
dma_chan_name(chan),
- (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+ (config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
src_addr_width, dst_addr_width,
src_maxburst, dst_maxburst);
@@ -2519,6 +2709,72 @@ failure1:
return err;
}
+/* Suspend resume functionality */
+#ifdef CONFIG_PM
+static int dma40_pm_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct d40_base *base = platform_get_drvdata(pdev);
+ int ret = 0;
+ if (!pm_runtime_suspended(dev))
+ return -EBUSY;
+
+ if (base->lcpa_regulator)
+ ret = regulator_disable(base->lcpa_regulator);
+ return ret;
+}
+
+static int dma40_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct d40_base *base = platform_get_drvdata(pdev);
+
+ d40_save_restore_registers(base, true);
+
+ /* Don't disable/enable clocks for v1 due to HW bugs */
+ if (base->rev != 1)
+ writel_relaxed(base->gcc_pwr_off_mask,
+ base->virtbase + D40_DREG_GCC);
+
+ return 0;
+}
+
+static int dma40_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct d40_base *base = platform_get_drvdata(pdev);
+
+ if (base->initialized)
+ d40_save_restore_registers(base, false);
+
+ writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
+ base->virtbase + D40_DREG_GCC);
+ return 0;
+}
+
+static int dma40_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct d40_base *base = platform_get_drvdata(pdev);
+ int ret = 0;
+
+ if (base->lcpa_regulator)
+ ret = regulator_enable(base->lcpa_regulator);
+
+ return ret;
+}
+
+static const struct dev_pm_ops dma40_pm_ops = {
+ .suspend = dma40_pm_suspend,
+ .runtime_suspend = dma40_runtime_suspend,
+ .runtime_resume = dma40_runtime_resume,
+ .resume = dma40_resume,
+};
+#define DMA40_PM_OPS (&dma40_pm_ops)
+#else
+#define DMA40_PM_OPS NULL
+#endif
+
/* Initialization functions. */
static int __init d40_phy_res_init(struct d40_base *base)
@@ -2527,6 +2783,7 @@ static int __init d40_phy_res_init(struct d40_base *base)
int num_phy_chans_avail = 0;
u32 val[2];
int odd_even_bit = -2;
+ int gcc = D40_DREG_GCC_ENA;
val[0] = readl(base->virtbase + D40_DREG_PRSME);
val[1] = readl(base->virtbase + D40_DREG_PRSMO);
@@ -2538,9 +2795,17 @@ static int __init d40_phy_res_init(struct d40_base *base)
/* Mark security only channels as occupied */
base->phy_res[i].allocated_src = D40_ALLOC_PHY;
base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
+ base->phy_res[i].reserved = true;
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+ D40_DREG_GCC_SRC);
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+ D40_DREG_GCC_DST);
+
+
} else {
base->phy_res[i].allocated_src = D40_ALLOC_FREE;
base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
+ base->phy_res[i].reserved = false;
num_phy_chans_avail++;
}
spin_lock_init(&base->phy_res[i].lock);
@@ -2552,6 +2817,11 @@ static int __init d40_phy_res_init(struct d40_base *base)
base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
+ base->phy_res[chan].reserved = true;
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+ D40_DREG_GCC_SRC);
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+ D40_DREG_GCC_DST);
num_phy_chans_avail--;
}
@@ -2572,6 +2842,15 @@ static int __init d40_phy_res_init(struct d40_base *base)
val[0] = val[0] >> 2;
}
+ /*
+ * To keep things simple, Enable all clocks initially.
+ * The clocks will get managed later post channel allocation.
+ * The clocks for the event lines on which reserved channels exists
+ * are not managed here.
+ */
+ writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+ base->gcc_pwr_off_mask = gcc;
+
return num_phy_chans_avail;
}
@@ -2699,10 +2978,15 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
goto failure;
}
- base->lcla_pool.alloc_map = kzalloc(num_phy_chans *
- sizeof(struct d40_desc *) *
- D40_LCLA_LINK_PER_EVENT_GRP,
+ base->reg_val_backup_chan = kmalloc(base->num_phy_chans *
+ sizeof(d40_backup_regs_chan),
GFP_KERNEL);
+ if (!base->reg_val_backup_chan)
+ goto failure;
+
+ base->lcla_pool.alloc_map =
+ kzalloc(num_phy_chans * sizeof(struct d40_desc *)
+ * D40_LCLA_LINK_PER_EVENT_GRP, GFP_KERNEL);
if (!base->lcla_pool.alloc_map)
goto failure;
@@ -2741,9 +3025,9 @@ failure:
static void __init d40_hw_init(struct d40_base *base)
{
- static const struct d40_reg_val dma_init_reg[] = {
+ static struct d40_reg_val dma_init_reg[] = {
/* Clock every part of the DMA block from start */
- { .reg = D40_DREG_GCC, .val = 0x0000ff01},
+ { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL},
/* Interrupts on all logical channels */
{ .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
@@ -2943,11 +3227,31 @@ static int __init d40_probe(struct platform_device *pdev)
d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
goto failure;
}
+ /* If lcla has to be located in ESRAM we don't need to allocate */
+ if (base->plat_data->use_esram_lcla) {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "lcla_esram");
+ if (!res) {
+ ret = -ENOENT;
+ d40_err(&pdev->dev,
+ "No \"lcla_esram\" memory resource\n");
+ goto failure;
+ }
+ base->lcla_pool.base = ioremap(res->start,
+ resource_size(res));
+ if (!base->lcla_pool.base) {
+ ret = -ENOMEM;
+ d40_err(&pdev->dev, "Failed to ioremap LCLA region\n");
+ goto failure;
+ }
+ writel(res->start, base->virtbase + D40_DREG_LCLA);
- ret = d40_lcla_allocate(base);
- if (ret) {
- d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
- goto failure;
+ } else {
+ ret = d40_lcla_allocate(base);
+ if (ret) {
+ d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
+ goto failure;
+ }
}
spin_lock_init(&base->lcla_pool.lock);
@@ -2960,6 +3264,32 @@ static int __init d40_probe(struct platform_device *pdev)
goto failure;
}
+ pm_runtime_irq_safe(base->dev);
+ pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(base->dev);
+ pm_runtime_enable(base->dev);
+ pm_runtime_resume(base->dev);
+
+ if (base->plat_data->use_esram_lcla) {
+
+ base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
+ if (IS_ERR(base->lcpa_regulator)) {
+ d40_err(&pdev->dev, "Failed to get lcpa_regulator\n");
+ base->lcpa_regulator = NULL;
+ goto failure;
+ }
+
+ ret = regulator_enable(base->lcpa_regulator);
+ if (ret) {
+ d40_err(&pdev->dev,
+ "Failed to enable lcpa_regulator\n");
+ regulator_put(base->lcpa_regulator);
+ base->lcpa_regulator = NULL;
+ goto failure;
+ }
+ }
+
+ base->initialized = true;
err = d40_dmaengine_init(base, num_reserved_chans);
if (err)
goto failure;
@@ -2976,6 +3306,11 @@ failure:
if (base->virtbase)
iounmap(base->virtbase);
+ if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
+ iounmap(base->lcla_pool.base);
+ base->lcla_pool.base = NULL;
+ }
+
if (base->lcla_pool.dma_addr)
dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
SZ_1K * base->num_phy_chans,
@@ -2998,6 +3333,11 @@ failure:
clk_put(base->clk);
}
+ if (base->lcpa_regulator) {
+ regulator_disable(base->lcpa_regulator);
+ regulator_put(base->lcpa_regulator);
+ }
+
kfree(base->lcla_pool.alloc_map);
kfree(base->lookup_log_chans);
kfree(base->lookup_phy_chans);
@@ -3013,6 +3353,7 @@ static struct platform_driver d40_driver = {
.driver = {
.owner = THIS_MODULE,
.name = D40_NAME,
+ .pm = DMA40_PM_OPS,
},
};
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index b44c455158d..8d3d490968a 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -16,6 +16,8 @@
#define D40_TYPE_TO_GROUP(type) (type / 16)
#define D40_TYPE_TO_EVENT(type) (type % 16)
+#define D40_GROUP_SIZE 8
+#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2)
/* Most bits of the CFG register are the same in log as in phy mode */
#define D40_SREG_CFG_MST_POS 15
@@ -123,6 +125,15 @@
/* DMA Register Offsets */
#define D40_DREG_GCC 0x000
+#define D40_DREG_GCC_ENA 0x1
+/* This assumes that there are only 4 event groups */
+#define D40_DREG_GCC_ENABLE_ALL 0xff01
+#define D40_DREG_GCC_EVTGRP_POS 8
+#define D40_DREG_GCC_SRC 0
+#define D40_DREG_GCC_DST 1
+#define D40_DREG_GCC_EVTGRP_ENA(x, y) \
+ (1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y))
+
#define D40_DREG_PRTYP 0x004
#define D40_DREG_PRSME 0x008
#define D40_DREG_PRSMO 0x00C
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a4a398f2ef6..a6f9c1684a0 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -90,7 +90,7 @@ struct timb_dma_chan {
struct list_head queue;
struct list_head free_list;
unsigned int bytes_per_line;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
unsigned int descs; /* Descriptors to allocate */
unsigned int desc_elems; /* number of elems per descriptor */
};
@@ -166,10 +166,10 @@ static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
if (single)
dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
- td_chan->direction);
+ DMA_TO_DEVICE);
else
dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
- td_chan->direction);
+ DMA_TO_DEVICE);
}
static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
@@ -235,7 +235,7 @@ static void __td_start_dma(struct timb_dma_chan *td_chan)
"td_chan: %p, chan: %d, membase: %p\n",
td_chan, td_chan->chan.chan_id, td_chan->membase);
- if (td_chan->direction == DMA_FROM_DEVICE) {
+ if (td_chan->direction == DMA_DEV_TO_MEM) {
/* descriptor address */
iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
@@ -278,7 +278,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
txd->cookie);
/* make sure to stop the transfer */
- if (td_chan->direction == DMA_FROM_DEVICE)
+ if (td_chan->direction == DMA_DEV_TO_MEM)
iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
/* Currently no support for stopping DMA transfers
else
@@ -558,7 +558,7 @@ static void td_issue_pending(struct dma_chan *chan)
static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
struct scatterlist *sgl, unsigned int sg_len,
- enum dma_data_direction direction, unsigned long flags)
+ enum dma_transfer_direction direction, unsigned long flags)
{
struct timb_dma_chan *td_chan =
container_of(chan, struct timb_dma_chan, chan);
@@ -606,7 +606,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
}
dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
- td_desc->desc_list_len, DMA_TO_DEVICE);
+ td_desc->desc_list_len, DMA_MEM_TO_DEV);
return &td_desc->txd;
}
@@ -775,8 +775,8 @@ static int __devinit td_probe(struct platform_device *pdev)
td_chan->descs = pchan->descriptors;
td_chan->desc_elems = pchan->descriptor_elements;
td_chan->bytes_per_line = pchan->bytes_per_line;
- td_chan->direction = pchan->rx ? DMA_FROM_DEVICE :
- DMA_TO_DEVICE;
+ td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+ DMA_MEM_TO_DEV;
td_chan->membase = td->membase +
(i / 2) * TIMBDMA_INSTANCE_OFFSET +
@@ -841,17 +841,7 @@ static struct platform_driver td_driver = {
.remove = __exit_p(td_remove),
};
-static int __init td_init(void)
-{
- return platform_driver_register(&td_driver);
-}
-module_init(td_init);
-
-static void __exit td_exit(void)
-{
- platform_driver_unregister(&td_driver);
-}
-module_exit(td_exit);
+module_platform_driver(td_driver);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Timberdale DMA controller driver");
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index cbd83e362b5..6122c364cf1 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -845,7 +845,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
static struct dma_async_tx_descriptor *
txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags)
{
struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
@@ -860,9 +860,9 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
BUG_ON(!ds || !ds->reg_width);
if (ds->tx_reg)
- BUG_ON(direction != DMA_TO_DEVICE);
+ BUG_ON(direction != DMA_MEM_TO_DEV);
else
- BUG_ON(direction != DMA_FROM_DEVICE);
+ BUG_ON(direction != DMA_DEV_TO_MEM);
if (unlikely(!sg_len))
return NULL;
@@ -882,7 +882,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
mem = sg_dma_address(sg);
if (__is_dmac64(ddev)) {
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
desc->hwdesc.SAR = mem;
desc->hwdesc.DAR = ds->tx_reg;
} else {
@@ -891,7 +891,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
}
desc->hwdesc.CNTR = sg_dma_len(sg);
} else {
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
desc->hwdesc32.SAR = mem;
desc->hwdesc32.DAR = ds->tx_reg;
} else {
@@ -900,7 +900,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
}
desc->hwdesc32.CNTR = sg_dma_len(sg);
}
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
sai = ds->reg_width;
dai = 0;
} else {
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 0cb461dd396..74522773e93 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -287,7 +287,7 @@ static void mx3_videobuf_queue(struct vb2_buffer *vb)
sg_dma_len(sg) = new_size;
txd = ichan->dma_chan.device->device_prep_slave_sg(
- &ichan->dma_chan, sg, 1, DMA_FROM_DEVICE,
+ &ichan->dma_chan, sg, 1, DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT);
if (!txd)
goto error;
diff --git a/drivers/media/video/timblogiw.c b/drivers/media/video/timblogiw.c
index 0a2d75f0406..4ed1c7c28ae 100644
--- a/drivers/media/video/timblogiw.c
+++ b/drivers/media/video/timblogiw.c
@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
spin_unlock_irq(&fh->queue_lock);
desc = fh->chan->device->device_prep_slave_sg(fh->chan,
- buf->sg, sg_elems, DMA_FROM_DEVICE,
+ buf->sg, sg_elems, DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
if (!desc) {
spin_lock_irq(&fh->queue_lock);
diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
index eb5cd28bc6d..a2d25e4857e 100644
--- a/drivers/misc/carma/carma-fpga-program.c
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -513,7 +513,7 @@ static noinline int fpga_program_dma(struct fpga_dev *priv)
* transaction, and then put it under external control
*/
memset(&config, 0, sizeof(config));
- config.direction = DMA_TO_DEVICE;
+ config.direction = DMA_MEM_TO_DEV;
config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
config.dst_maxburst = fpga_fifo_size(priv->regs) / 2 / 4;
ret = chan->device->device_control(chan, DMA_SLAVE_CONFIG,
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index a7ee5027146..fcfe1eb5acc 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -823,6 +823,7 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
struct scatterlist *sg;
unsigned int i;
enum dma_data_direction direction;
+ enum dma_transfer_direction slave_dirn;
unsigned int sglen;
u32 iflags;
@@ -860,16 +861,19 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
if (host->caps.has_dma)
atmci_writel(host, ATMCI_DMA, ATMCI_DMA_CHKSIZE(3) | ATMCI_DMAEN);
- if (data->flags & MMC_DATA_READ)
+ if (data->flags & MMC_DATA_READ) {
direction = DMA_FROM_DEVICE;
- else
+ slave_dirn = DMA_DEV_TO_MEM;
+ } else {
direction = DMA_TO_DEVICE;
+ slave_dirn = DMA_MEM_TO_DEV;
+ }
sglen = dma_map_sg(chan->device->dev, data->sg,
data->sg_len, direction);
desc = chan->device->device_prep_slave_sg(chan,
- data->sg, sglen, direction,
+ data->sg, sglen, slave_dirn,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc)
goto unmap_exit;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index ece03b491c7..0d955ffaf44 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -374,6 +374,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
struct dma_chan *chan;
struct dma_device *device;
struct dma_async_tx_descriptor *desc;
+ enum dma_data_direction buffer_dirn;
int nr_sg;
/* Check if next job is already prepared */
@@ -387,10 +388,12 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
}
if (data->flags & MMC_DATA_READ) {
- conf.direction = DMA_FROM_DEVICE;
+ conf.direction = DMA_DEV_TO_MEM;
+ buffer_dirn = DMA_FROM_DEVICE;
chan = host->dma_rx_channel;
} else {
- conf.direction = DMA_TO_DEVICE;
+ conf.direction = DMA_MEM_TO_DEV;
+ buffer_dirn = DMA_TO_DEVICE;
chan = host->dma_tx_channel;
}
@@ -403,7 +406,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
return -EINVAL;
device = chan->device;
- nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len, conf.direction);
+ nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len, buffer_dirn);
if (nr_sg == 0)
return -EINVAL;
@@ -426,7 +429,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
unmap_exit:
if (!next)
dmaengine_terminate_all(chan);
- dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
+ dma_unmap_sg(device->dev, data->sg, data->sg_len, buffer_dirn);
return -ENOMEM;
}
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 7088b40f957..4184b7946bb 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -218,6 +218,7 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
unsigned int blksz = data->blksz;
unsigned int datasize = nob * blksz;
struct scatterlist *sg;
+ enum dma_transfer_direction slave_dirn;
int i, nents;
if (data->flags & MMC_DATA_STREAM)
@@ -240,10 +241,13 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
}
}
- if (data->flags & MMC_DATA_READ)
+ if (data->flags & MMC_DATA_READ) {
host->dma_dir = DMA_FROM_DEVICE;
- else
+ slave_dirn = DMA_DEV_TO_MEM;
+ } else {
host->dma_dir = DMA_TO_DEVICE;
+ slave_dirn = DMA_MEM_TO_DEV;
+ }
nents = dma_map_sg(host->dma->device->dev, data->sg,
data->sg_len, host->dma_dir);
@@ -251,7 +255,7 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
return -EINVAL;
host->desc = host->dma->device->device_prep_slave_sg(host->dma,
- data->sg, data->sg_len, host->dma_dir,
+ data->sg, data->sg_len, slave_dirn,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!host->desc) {
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 4e2e019dd5c..382c835d217 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -154,6 +154,7 @@ struct mxs_mmc_host {
struct dma_chan *dmach;
struct mxs_dma_data dma_data;
unsigned int dma_dir;
+ enum dma_transfer_direction slave_dirn;
u32 ssp_pio_words[SSP_PIO_NUM];
unsigned int version;
@@ -324,7 +325,7 @@ static struct dma_async_tx_descriptor *mxs_mmc_prep_dma(
}
desc = host->dmach->device->device_prep_slave_sg(host->dmach,
- sgl, sg_len, host->dma_dir, append);
+ sgl, sg_len, host->slave_dirn, append);
if (desc) {
desc->callback = mxs_mmc_dma_irq_callback;
desc->callback_param = host;
@@ -356,6 +357,7 @@ static void mxs_mmc_bc(struct mxs_mmc_host *host)
host->ssp_pio_words[1] = cmd0;
host->ssp_pio_words[2] = cmd1;
host->dma_dir = DMA_NONE;
+ host->slave_dirn = DMA_TRANS_NONE;
desc = mxs_mmc_prep_dma(host, 0);
if (!desc)
goto out;
@@ -395,6 +397,7 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host)
host->ssp_pio_words[1] = cmd0;
host->ssp_pio_words[2] = cmd1;
host->dma_dir = DMA_NONE;
+ host->slave_dirn = DMA_TRANS_NONE;
desc = mxs_mmc_prep_dma(host, 0);
if (!desc)
goto out;
@@ -433,6 +436,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
int i;
unsigned short dma_data_dir, timeout;
+ enum dma_transfer_direction slave_dirn;
unsigned int data_size = 0, log2_blksz;
unsigned int blocks = data->blocks;
@@ -448,9 +452,11 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
if (data->flags & MMC_DATA_WRITE) {
dma_data_dir = DMA_TO_DEVICE;
+ slave_dirn = DMA_MEM_TO_DEV;
read = 0;
} else {
dma_data_dir = DMA_FROM_DEVICE;
+ slave_dirn = DMA_DEV_TO_MEM;
read = BM_SSP_CTRL0_READ;
}
@@ -510,6 +516,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
host->ssp_pio_words[1] = cmd0;
host->ssp_pio_words[2] = cmd1;
host->dma_dir = DMA_NONE;
+ host->slave_dirn = DMA_TRANS_NONE;
desc = mxs_mmc_prep_dma(host, 0);
if (!desc)
goto out;
@@ -518,6 +525,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
WARN_ON(host->data != NULL);
host->data = data;
host->dma_dir = dma_data_dir;
+ host->slave_dirn = slave_dirn;
desc = mxs_mmc_prep_dma(host, 1);
if (!desc)
goto out;
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 4a2c5b2355f..f5d8b53be33 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -286,7 +286,7 @@ static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
if (ret > 0) {
host->dma_active = true;
desc = chan->device->device_prep_slave_sg(chan, sg, ret,
- DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
}
if (desc) {
@@ -335,7 +335,7 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
if (ret > 0) {
host->dma_active = true;
desc = chan->device->device_prep_slave_sg(chan, sg, ret,
- DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
}
if (desc) {
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 86f259cdfcb..7a6e6cc8f8b 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -77,7 +77,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
ret = dma_map_sg(chan->device->dev, sg, host->sg_len, DMA_FROM_DEVICE);
if (ret > 0)
desc = chan->device->device_prep_slave_sg(chan, sg, ret,
- DMA_FROM_DEVICE, DMA_CTRL_ACK);
+ DMA_DEV_TO_MEM, DMA_CTRL_ACK);
if (desc) {
cookie = dmaengine_submit(desc);
@@ -158,7 +158,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
ret = dma_map_sg(chan->device->dev, sg, host->sg_len, DMA_TO_DEVICE);
if (ret > 0)
desc = chan->device->device_prep_slave_sg(chan, sg, ret,
- DMA_TO_DEVICE, DMA_CTRL_ACK);
+ DMA_MEM_TO_DEV, DMA_CTRL_ACK);
if (desc) {
cookie = dmaengine_submit(desc);
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 2a56fc6f399..7f680420bfa 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -827,7 +827,7 @@ int gpmi_send_command(struct gpmi_nand_data *this)
pio[1] = pio[2] = 0;
desc = channel->device->device_prep_slave_sg(channel,
(struct scatterlist *)pio,
- ARRAY_SIZE(pio), DMA_NONE, 0);
+ ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
if (!desc) {
pr_err("step 1 error\n");
return -1;
@@ -839,7 +839,7 @@ int gpmi_send_command(struct gpmi_nand_data *this)
sg_init_one(sgl, this->cmd_buffer, this->command_length);
dma_map_sg(this->dev, sgl, 1, DMA_TO_DEVICE);
desc = channel->device->device_prep_slave_sg(channel,
- sgl, 1, DMA_TO_DEVICE, 1);
+ sgl, 1, DMA_MEM_TO_DEV, 1);
if (!desc) {
pr_err("step 2 error\n");
return -1;
@@ -872,7 +872,7 @@ int gpmi_send_data(struct gpmi_nand_data *this)
pio[1] = 0;
desc = channel->device->device_prep_slave_sg(channel,
(struct scatterlist *)pio,
- ARRAY_SIZE(pio), DMA_NONE, 0);
+ ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
if (!desc) {
pr_err("step 1 error\n");
return -1;
@@ -881,7 +881,7 @@ int gpmi_send_data(struct gpmi_nand_data *this)
/* [2] send DMA request */
prepare_data_dma(this, DMA_TO_DEVICE);
desc = channel->device->device_prep_slave_sg(channel, &this->data_sgl,
- 1, DMA_TO_DEVICE, 1);
+ 1, DMA_MEM_TO_DEV, 1);
if (!desc) {
pr_err("step 2 error\n");
return -1;
@@ -908,7 +908,7 @@ int gpmi_read_data(struct gpmi_nand_data *this)
pio[1] = 0;
desc = channel->device->device_prep_slave_sg(channel,
(struct scatterlist *)pio,
- ARRAY_SIZE(pio), DMA_NONE, 0);
+ ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
if (!desc) {
pr_err("step 1 error\n");
return -1;
@@ -917,7 +917,7 @@ int gpmi_read_data(struct gpmi_nand_data *this)
/* [2] : send DMA request */
prepare_data_dma(this, DMA_FROM_DEVICE);
desc = channel->device->device_prep_slave_sg(channel, &this->data_sgl,
- 1, DMA_FROM_DEVICE, 1);
+ 1, DMA_DEV_TO_MEM, 1);
if (!desc) {
pr_err("step 2 error\n");
return -1;
@@ -964,7 +964,7 @@ int gpmi_send_page(struct gpmi_nand_data *this,
desc = channel->device->device_prep_slave_sg(channel,
(struct scatterlist *)pio,
- ARRAY_SIZE(pio), DMA_NONE, 0);
+ ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
if (!desc) {
pr_err("step 2 error\n");
return -1;
@@ -998,7 +998,8 @@ int gpmi_read_page(struct gpmi_nand_data *this,
| BF_GPMI_CTRL0_XFER_COUNT(0);
pio[1] = 0;
desc = channel->device->device_prep_slave_sg(channel,
- (struct scatterlist *)pio, 2, DMA_NONE, 0);
+ (struct scatterlist *)pio, 2,
+ DMA_TRANS_NONE, 0);
if (!desc) {
pr_err("step 1 error\n");
return -1;
@@ -1027,7 +1028,7 @@ int gpmi_read_page(struct gpmi_nand_data *this,
pio[5] = auxiliary;
desc = channel->device->device_prep_slave_sg(channel,
(struct scatterlist *)pio,
- ARRAY_SIZE(pio), DMA_NONE, 1);
+ ARRAY_SIZE(pio), DMA_TRANS_NONE, 1);
if (!desc) {
pr_err("step 2 error\n");
return -1;
@@ -1045,7 +1046,8 @@ int gpmi_read_page(struct gpmi_nand_data *this,
| BF_GPMI_CTRL0_XFER_COUNT(geo->page_size);
pio[1] = 0;
desc = channel->device->device_prep_slave_sg(channel,
- (struct scatterlist *)pio, 2, DMA_NONE, 1);
+ (struct scatterlist *)pio, 2,
+ DMA_TRANS_NONE, 1);
if (!desc) {
pr_err("step 3 error\n");
return -1;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 75ec87a822b..0a85690a132 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -459,7 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
- &ctl->sg, 1, DMA_TO_DEVICE,
+ &ctl->sg, 1, DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
if (!ctl->adesc)
return NETDEV_TX_BUSY;
@@ -571,7 +571,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
sg_dma_len(sg) = DMA_BUFFER_SIZE;
ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
- sg, 1, DMA_FROM_DEVICE,
+ sg, 1, DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
if (!ctl->adesc)
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index e743a45ee92..8418eb03665 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -131,7 +131,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
rxchan = dws->rxchan;
/* 2. Prepare the TX dma transfer */
- txconf.direction = DMA_TO_DEVICE;
+ txconf.direction = DMA_MEM_TO_DEV;
txconf.dst_addr = dws->dma_addr;
txconf.dst_maxburst = LNW_DMA_MSIZE_16;
txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -147,13 +147,13 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
txdesc = txchan->device->device_prep_slave_sg(txchan,
&dws->tx_sgl,
1,
- DMA_TO_DEVICE,
+ DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
txdesc->callback = dw_spi_dma_done;
txdesc->callback_param = dws;
/* 3. Prepare the RX dma transfer */
- rxconf.direction = DMA_FROM_DEVICE;
+ rxconf.direction = DMA_DEV_TO_MEM;
rxconf.src_addr = dws->dma_addr;
rxconf.src_maxburst = LNW_DMA_MSIZE_16;
rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -169,7 +169,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
&dws->rx_sgl,
1,
- DMA_FROM_DEVICE,
+ DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
rxdesc->callback = dw_spi_dma_done;
rxdesc->callback_param = dws;
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 0a282e5fcc9..d46e55c720b 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -551,6 +551,7 @@ ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
struct dma_async_tx_descriptor *txd;
enum dma_slave_buswidth buswidth;
struct dma_slave_config conf;
+ enum dma_transfer_direction slave_dirn;
struct scatterlist *sg;
struct sg_table *sgt;
struct dma_chan *chan;
@@ -573,6 +574,7 @@ ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
conf.src_addr = espi->sspdr_phys;
conf.src_addr_width = buswidth;
+ slave_dirn = DMA_DEV_TO_MEM;
} else {
chan = espi->dma_tx;
buf = t->tx_buf;
@@ -580,6 +582,7 @@ ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
conf.dst_addr = espi->sspdr_phys;
conf.dst_addr_width = buswidth;
+ slave_dirn = DMA_MEM_TO_DEV;
}
ret = dmaengine_slave_config(chan, &conf);
@@ -631,7 +634,7 @@ ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
return ERR_PTR(-ENOMEM);
txd = chan->device->device_prep_slave_sg(chan, sgt->sgl, nents,
- dir, DMA_CTRL_ACK);
+ slave_dirn, DMA_CTRL_ACK);
if (!txd) {
dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir);
return ERR_PTR(-ENOMEM);
@@ -979,7 +982,7 @@ static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi)
dma_cap_set(DMA_SLAVE, mask);
espi->dma_rx_data.port = EP93XX_DMA_SSP;
- espi->dma_rx_data.direction = DMA_FROM_DEVICE;
+ espi->dma_rx_data.direction = DMA_DEV_TO_MEM;
espi->dma_rx_data.name = "ep93xx-spi-rx";
espi->dma_rx = dma_request_channel(mask, ep93xx_spi_dma_filter,
@@ -990,7 +993,7 @@ static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi)
}
espi->dma_tx_data.port = EP93XX_DMA_SSP;
- espi->dma_tx_data.direction = DMA_TO_DEVICE;
+ espi->dma_tx_data.direction = DMA_MEM_TO_DEV;
espi->dma_tx_data.name = "ep93xx-spi-tx";
espi->dma_tx = dma_request_channel(mask, ep93xx_spi_dma_filter,
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index f1f5efbc340..2f9cb43a239 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -900,11 +900,11 @@ static int configure_dma(struct pl022 *pl022)
{
struct dma_slave_config rx_conf = {
.src_addr = SSP_DR(pl022->phybase),
- .direction = DMA_FROM_DEVICE,
+ .direction = DMA_DEV_TO_MEM,
};
struct dma_slave_config tx_conf = {
.dst_addr = SSP_DR(pl022->phybase),
- .direction = DMA_TO_DEVICE,
+ .direction = DMA_MEM_TO_DEV,
};
unsigned int pages;
int ret;
@@ -1041,7 +1041,7 @@ static int configure_dma(struct pl022 *pl022)
rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
pl022->sgt_rx.sgl,
rx_sglen,
- DMA_FROM_DEVICE,
+ DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!rxdesc)
goto err_rxdesc;
@@ -1049,7 +1049,7 @@ static int configure_dma(struct pl022 *pl022)
txdesc = txchan->device->device_prep_slave_sg(txchan,
pl022->sgt_tx.sgl,
tx_sglen,
- DMA_TO_DEVICE,
+ DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!txdesc)
goto err_txdesc;
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 7086583b910..2a6429d8c36 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -1079,7 +1079,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
}
sg = dma->sg_rx_p;
desc_rx = dma->chan_rx->device->device_prep_slave_sg(dma->chan_rx, sg,
- num, DMA_FROM_DEVICE,
+ num, DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc_rx) {
dev_err(&data->master->dev, "%s:device_prep_slave_sg Failed\n",
@@ -1124,7 +1124,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
}
sg = dma->sg_tx_p;
desc_tx = dma->chan_tx->device->device_prep_slave_sg(dma->chan_tx,
- sg, num, DMA_TO_DEVICE,
+ sg, num, DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc_tx) {
dev_err(&data->master->dev, "%s:device_prep_slave_sg Failed\n",
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 6958594f2fc..9ae024025ff 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -268,7 +268,7 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
struct dma_slave_config tx_conf = {
.dst_addr = uap->port.mapbase + UART01x_DR,
.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
- .direction = DMA_TO_DEVICE,
+ .direction = DMA_MEM_TO_DEV,
.dst_maxburst = uap->fifosize >> 1,
};
struct dma_chan *chan;
@@ -301,7 +301,7 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
struct dma_slave_config rx_conf = {
.src_addr = uap->port.mapbase + UART01x_DR,
.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
- .direction = DMA_FROM_DEVICE,
+ .direction = DMA_DEV_TO_MEM,
.src_maxburst = uap->fifosize >> 1,
};
@@ -480,7 +480,7 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap)
return -EBUSY;
}
- desc = dma_dev->device_prep_slave_sg(chan, &dmatx->sg, 1, DMA_TO_DEVICE,
+ desc = dma_dev->device_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE);
@@ -676,7 +676,7 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
dma_dev = rxchan->device;
desc = rxchan->device->device_prep_slave_sg(rxchan, &sgbuf->sg, 1,
- DMA_FROM_DEVICE,
+ DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
/*
* If the DMA engine is busy and cannot prepare a
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index de0f613ed6f..17ae65762d1 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -764,7 +764,7 @@ static int dma_handle_rx(struct eg20t_port *priv)
sg_dma_address(sg) = priv->rx_buf_dma;
desc = priv->chan_rx->device->device_prep_slave_sg(priv->chan_rx,
- sg, 1, DMA_FROM_DEVICE,
+ sg, 1, DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc)
@@ -923,7 +923,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv)
}
desc = priv->chan_tx->device->device_prep_slave_sg(priv->chan_tx,
- priv->sg_tx_p, nent, DMA_TO_DEVICE,
+ priv->sg_tx_p, nent, DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(priv->port.dev, "%s:device_prep_slave_sg Failed\n",
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 9e62349b3d9..75085795528 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1339,7 +1339,7 @@ static void sci_submit_rx(struct sci_port *s)
struct dma_async_tx_descriptor *desc;
desc = chan->device->device_prep_slave_sg(chan,
- sg, 1, DMA_FROM_DEVICE, DMA_PREP_INTERRUPT);
+ sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
if (desc) {
s->desc_rx[i] = desc;
@@ -1454,7 +1454,7 @@ static void work_fn_tx(struct work_struct *work)
BUG_ON(!sg_dma_len(sg));
desc = chan->device->device_prep_slave_sg(chan,
- sg, s->sg_len_tx, DMA_TO_DEVICE,
+ sg, s->sg_len_tx, DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
/* switch to PIO */
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index 32793ce3d9e..9c2cc463389 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -183,7 +183,7 @@ static int __devinit ehci_hcd_xilinx_of_probe(struct platform_device *op)
}
irq = irq_of_parse_and_map(dn, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR "%s: irq_of_parse_and_map failed\n", __FILE__);
rv = -EBUSY;
goto err_irq;
diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c
index a163632877a..97cb45916c4 100644
--- a/drivers/usb/musb/ux500_dma.c
+++ b/drivers/usb/musb/ux500_dma.c
@@ -84,7 +84,7 @@ static bool ux500_configure_channel(struct dma_channel *channel,
struct musb_hw_ep *hw_ep = ux500_channel->hw_ep;
struct dma_chan *dma_chan = ux500_channel->dma_chan;
struct dma_async_tx_descriptor *dma_desc;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
struct scatterlist sg;
struct dma_slave_config slave_conf;
enum dma_slave_buswidth addr_width;
@@ -104,7 +104,7 @@ static bool ux500_configure_channel(struct dma_channel *channel,
sg_dma_address(&sg) = dma_addr;
sg_dma_len(&sg) = len;
- direction = ux500_channel->is_tx ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ direction = ux500_channel->is_tx ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
addr_width = (len & 0x3) ? DMA_SLAVE_BUSWIDTH_1_BYTE :
DMA_SLAVE_BUSWIDTH_4_BYTES;
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index b51fcd80d24..72339bd6fca 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -772,10 +772,10 @@ static void usbhsf_dma_prepare_tasklet(unsigned long data)
struct dma_async_tx_descriptor *desc;
struct dma_chan *chan = usbhsf_dma_chan_get(fifo, pkt);
struct device *dev = usbhs_priv_to_dev(priv);
- enum dma_data_direction dir;
+ enum dma_transfer_direction dir;
dma_cookie_t cookie;
- dir = usbhs_pipe_is_dir_in(pipe) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ dir = usbhs_pipe_is_dir_in(pipe) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
sg_init_table(&sg, 1);
sg_set_page(&sg, virt_to_page(pkt->dma),
diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index e3406ab3130..727a5149d81 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -245,6 +245,7 @@ struct mx3fb_data {
uint32_t h_start_width;
uint32_t v_start_width;
+ enum disp_data_mapping disp_data_fmt;
};
struct dma_chan_request {
@@ -287,11 +288,14 @@ static void mx3fb_write_reg(struct mx3fb_data *mx3fb, u32 value, unsigned long r
__raw_writel(value, mx3fb->reg_base + reg);
}
-static const uint32_t di_mappings[] = {
- 0x1600AAAA, 0x00E05555, 0x00070000, 3, /* RGB888 */
- 0x0005000F, 0x000B000F, 0x0011000F, 1, /* RGB666 */
- 0x0011000F, 0x000B000F, 0x0005000F, 1, /* BGR666 */
- 0x0004003F, 0x000A000F, 0x000F003F, 1 /* RGB565 */
+struct di_mapping {
+ uint32_t b0, b1, b2;
+};
+
+static const struct di_mapping di_mappings[] = {
+ [IPU_DISP_DATA_MAPPING_RGB666] = { 0x0005000f, 0x000b000f, 0x0011000f },
+ [IPU_DISP_DATA_MAPPING_RGB565] = { 0x0004003f, 0x000a000f, 0x000f003f },
+ [IPU_DISP_DATA_MAPPING_RGB888] = { 0x00070000, 0x000f0000, 0x00170000 },
};
static void sdc_fb_init(struct mx3fb_info *fbi)
@@ -334,7 +338,7 @@ static void sdc_enable_channel(struct mx3fb_info *mx3_fbi)
/* This enables the channel */
if (mx3_fbi->cookie < 0) {
mx3_fbi->txd = dma_chan->device->device_prep_slave_sg(dma_chan,
- &mx3_fbi->sg[0], 1, DMA_TO_DEVICE, DMA_PREP_INTERRUPT);
+ &mx3_fbi->sg[0], 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
if (!mx3_fbi->txd) {
dev_err(mx3fb->dev, "Cannot allocate descriptor on %d\n",
dma_chan->chan_id);
@@ -425,7 +429,6 @@ static int sdc_set_window_pos(struct mx3fb_data *mx3fb, enum ipu_channel channel
* @pixel_clk: desired pixel clock frequency in Hz.
* @width: width of panel in pixels.
* @height: height of panel in pixels.
- * @pixel_fmt: pixel format of buffer as FOURCC ASCII code.
* @h_start_width: number of pixel clocks between the HSYNC signal pulse
* and the start of valid data.
* @h_sync_width: width of the HSYNC signal in units of pixel clocks.
@@ -442,7 +445,6 @@ static int sdc_set_window_pos(struct mx3fb_data *mx3fb, enum ipu_channel channel
static int sdc_init_panel(struct mx3fb_data *mx3fb, enum ipu_panel panel,
uint32_t pixel_clk,
uint16_t width, uint16_t height,
- enum pixel_fmt pixel_fmt,
uint16_t h_start_width, uint16_t h_sync_width,
uint16_t h_end_width, uint16_t v_start_width,
uint16_t v_sync_width, uint16_t v_end_width,
@@ -453,6 +455,7 @@ static int sdc_init_panel(struct mx3fb_data *mx3fb, enum ipu_panel panel,
uint32_t old_conf;
uint32_t div;
struct clk *ipu_clk;
+ const struct di_mapping *map;
dev_dbg(mx3fb->dev, "panel size = %d x %d", width, height);
@@ -540,36 +543,10 @@ static int sdc_init_panel(struct mx3fb_data *mx3fb, enum ipu_panel panel,
sig.Vsync_pol << DI_D3_VSYNC_POL_SHIFT;
mx3fb_write_reg(mx3fb, old_conf, DI_DISP_SIG_POL);
- switch (pixel_fmt) {
- case IPU_PIX_FMT_RGB24:
- mx3fb_write_reg(mx3fb, di_mappings[0], DI_DISP3_B0_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[1], DI_DISP3_B1_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[2], DI_DISP3_B2_MAP);
- mx3fb_write_reg(mx3fb, mx3fb_read_reg(mx3fb, DI_DISP_ACC_CC) |
- ((di_mappings[3] - 1) << 12), DI_DISP_ACC_CC);
- break;
- case IPU_PIX_FMT_RGB666:
- mx3fb_write_reg(mx3fb, di_mappings[4], DI_DISP3_B0_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[5], DI_DISP3_B1_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[6], DI_DISP3_B2_MAP);
- mx3fb_write_reg(mx3fb, mx3fb_read_reg(mx3fb, DI_DISP_ACC_CC) |
- ((di_mappings[7] - 1) << 12), DI_DISP_ACC_CC);
- break;
- case IPU_PIX_FMT_BGR666:
- mx3fb_write_reg(mx3fb, di_mappings[8], DI_DISP3_B0_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[9], DI_DISP3_B1_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[10], DI_DISP3_B2_MAP);
- mx3fb_write_reg(mx3fb, mx3fb_read_reg(mx3fb, DI_DISP_ACC_CC) |
- ((di_mappings[11] - 1) << 12), DI_DISP_ACC_CC);
- break;
- default:
- mx3fb_write_reg(mx3fb, di_mappings[12], DI_DISP3_B0_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[13], DI_DISP3_B1_MAP);
- mx3fb_write_reg(mx3fb, di_mappings[14], DI_DISP3_B2_MAP);
- mx3fb_write_reg(mx3fb, mx3fb_read_reg(mx3fb, DI_DISP_ACC_CC) |
- ((di_mappings[15] - 1) << 12), DI_DISP_ACC_CC);
- break;
- }
+ map = &di_mappings[mx3fb->disp_data_fmt];
+ mx3fb_write_reg(mx3fb, map->b0, DI_DISP3_B0_MAP);
+ mx3fb_write_reg(mx3fb, map->b1, DI_DISP3_B1_MAP);
+ mx3fb_write_reg(mx3fb, map->b2, DI_DISP3_B2_MAP);
spin_unlock_irqrestore(&mx3fb->lock, lock_flags);
@@ -780,8 +757,6 @@ static int __set_par(struct fb_info *fbi, bool lock)
if (sdc_init_panel(mx3fb, mode,
(PICOS2KHZ(fbi->var.pixclock)) * 1000UL,
fbi->var.xres, fbi->var.yres,
- (fbi->var.sync & FB_SYNC_SWAP_RGB) ?
- IPU_PIX_FMT_BGR666 : IPU_PIX_FMT_RGB666,
fbi->var.left_margin,
fbi->var.hsync_len,
fbi->var.right_margin +
@@ -1117,7 +1092,7 @@ static int mx3fb_pan_display(struct fb_var_screeninfo *var,
async_tx_ack(mx3_fbi->txd);
txd = dma_chan->device->device_prep_slave_sg(dma_chan, sg +
- mx3_fbi->cur_ipu_buf, 1, DMA_TO_DEVICE, DMA_PREP_INTERRUPT);
+ mx3_fbi->cur_ipu_buf, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
if (!txd) {
dev_err(fbi->device,
"Error preparing a DMA transaction descriptor.\n");
@@ -1349,6 +1324,12 @@ static int init_fb_chan(struct mx3fb_data *mx3fb, struct idmac_channel *ichan)
const struct fb_videomode *mode;
int ret, num_modes;
+ if (mx3fb_pdata->disp_data_fmt >= ARRAY_SIZE(di_mappings)) {
+ dev_err(dev, "Illegal display data format %d\n",
+ mx3fb_pdata->disp_data_fmt);
+ return -EINVAL;
+ }
+
ichan->client = mx3fb;
irq = ichan->eof_irq;
@@ -1402,6 +1383,8 @@ static int init_fb_chan(struct mx3fb_data *mx3fb, struct idmac_channel *ichan)
mx3fbi->mx3fb = mx3fb;
mx3fbi->blank = FB_BLANK_NORMAL;
+ mx3fb->disp_data_fmt = mx3fb_pdata->disp_data_fmt;
+
init_completion(&mx3fbi->flip_cmpl);
disable_irq(ichan->eof_irq);
dev_dbg(mx3fb->dev, "disabling irq %d\n", ichan->eof_irq);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 3832e303c33..596e6a7b17d 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -221,7 +221,7 @@ static int register_balloon(struct device *dev)
{
int i, error;
- error = bus_register(&balloon_subsys);
+ error = subsys_system_register(&balloon_subsys, NULL);
if (error)
return error;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index ecb9fd3be14..d33f01c08b6 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -31,3 +31,22 @@ config BTRFS_FS_POSIX_ACL
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
+
+config BTRFS_FS_CHECK_INTEGRITY
+ bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
+ depends on BTRFS_FS
+ help
+ Adds code that examines all block write requests (including
+ writes of the super block). The goal is to verify that the
+ state of the filesystem on disk is always consistent, i.e.,
+ after a power-loss or kernel panic event the filesystem is
+ in a consistent state.
+
+ If the integrity check tool is included and activated in
+ the mount options, plenty of kernel memory is used, and
+ plenty of additional CPU cycles are spent. Enabling this
+ functionality is not intended for normal use.
+
+ In most cases, unless you are a btrfs developer who needs
+ to verify the integrity of (super)-block write requests
+ during the run of a regression test, say N
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index c0ddfd29c5e..0c4fa2befae 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,6 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
- reada.o backref.o
+ reada.o backref.o ulist.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
+btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 22c64fff1bd..b9a843226de 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -19,18 +19,789 @@
#include "ctree.h"
#include "disk-io.h"
#include "backref.h"
+#include "ulist.h"
+#include "transaction.h"
+#include "delayed-ref.h"
-struct __data_ref {
+/*
+ * this structure records all encountered refs on the way up to the root
+ */
+struct __prelim_ref {
struct list_head list;
- u64 inum;
- u64 root;
- u64 extent_data_item_offset;
+ u64 root_id;
+ struct btrfs_key key;
+ int level;
+ int count;
+ u64 parent;
+ u64 wanted_disk_byte;
};
-struct __shared_ref {
- struct list_head list;
+static int __add_prelim_ref(struct list_head *head, u64 root_id,
+ struct btrfs_key *key, int level, u64 parent,
+ u64 wanted_disk_byte, int count)
+{
+ struct __prelim_ref *ref;
+
+ /* in case we're adding delayed refs, we're holding the refs spinlock */
+ ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+ if (!ref)
+ return -ENOMEM;
+
+ ref->root_id = root_id;
+ if (key)
+ ref->key = *key;
+ else
+ memset(&ref->key, 0, sizeof(ref->key));
+
+ ref->level = level;
+ ref->count = count;
+ ref->parent = parent;
+ ref->wanted_disk_byte = wanted_disk_byte;
+ list_add_tail(&ref->list, head);
+
+ return 0;
+}
+
+static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ struct ulist *parents,
+ struct extent_buffer *eb, int level,
+ u64 wanted_objectid, u64 wanted_disk_byte)
+{
+ int ret;
+ int slot;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
u64 disk_byte;
-};
+
+add_parent:
+ ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
+ if (ret < 0)
+ return ret;
+
+ if (level != 0)
+ return 0;
+
+ /*
+ * if the current leaf is full with EXTENT_DATA items, we must
+ * check the next one if that holds a reference as well.
+ * ref->count cannot be used to skip this check.
+ * repeat this until we don't find any additional EXTENT_DATA items.
+ */
+ while (1) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ return 0;
+
+ eb = path->nodes[0];
+ for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != wanted_objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ return 0;
+ fi = btrfs_item_ptr(eb, slot,
+ struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (disk_byte == wanted_disk_byte)
+ goto add_parent;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * resolve an indirect backref in the form (root_id, key, level)
+ * to a logical address
+ */
+static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+ struct __prelim_ref *ref,
+ struct ulist *parents)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct btrfs_key root_key;
+ struct btrfs_key key = {0};
+ struct extent_buffer *eb;
+ int ret = 0;
+ int root_level;
+ int level = ref->level;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ root_key.objectid = ref->root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ rcu_read_lock();
+ root_level = btrfs_header_level(root->node);
+ rcu_read_unlock();
+
+ if (root_level + 1 == level)
+ goto out;
+
+ path->lowest_level = level;
+ ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0);
+ pr_debug("search slot in root %llu (level %d, ref count %d) returned "
+ "%d for key (%llu %u %llu)\n",
+ (unsigned long long)ref->root_id, level, ref->count, ret,
+ (unsigned long long)ref->key.objectid, ref->key.type,
+ (unsigned long long)ref->key.offset);
+ if (ret < 0)
+ goto out;
+
+ eb = path->nodes[level];
+ if (!eb) {
+ WARN_ON(1);
+ ret = 1;
+ goto out;
+ }
+
+ if (level == 0) {
+ if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ goto out;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
+ }
+
+ /* the last two parameters will only be used for level == 0 */
+ ret = add_all_parents(root, path, parents, eb, level, key.objectid,
+ ref->wanted_disk_byte);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * resolve all indirect backrefs from the list
+ */
+static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ struct list_head *head)
+{
+ int err;
+ int ret = 0;
+ struct __prelim_ref *ref;
+ struct __prelim_ref *ref_safe;
+ struct __prelim_ref *new_ref;
+ struct ulist *parents;
+ struct ulist_node *node;
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ /*
+ * _safe allows us to insert directly after the current item without
+ * iterating over the newly inserted items.
+ * we're also allowed to re-assign ref during iteration.
+ */
+ list_for_each_entry_safe(ref, ref_safe, head, list) {
+ if (ref->parent) /* already direct */
+ continue;
+ if (ref->count == 0)
+ continue;
+ err = __resolve_indirect_ref(fs_info, ref, parents);
+ if (err) {
+ if (ret == 0)
+ ret = err;
+ continue;
+ }
+
+ /* we put the first parent into the ref at hand */
+ node = ulist_next(parents, NULL);
+ ref->parent = node ? node->val : 0;
+
+ /* additional parents require new refs being added here */
+ while ((node = ulist_next(parents, node))) {
+ new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+ if (!new_ref) {
+ ret = -ENOMEM;
+ break;
+ }
+ memcpy(new_ref, ref, sizeof(*ref));
+ new_ref->parent = node->val;
+ list_add(&new_ref->list, &ref->list);
+ }
+ ulist_reinit(parents);
+ }
+
+ ulist_free(parents);
+ return ret;
+}
+
+/*
+ * merge two lists of backrefs and adjust counts accordingly
+ *
+ * mode = 1: merge identical keys, if key is set
+ * mode = 2: merge identical parents
+ */
+static int __merge_refs(struct list_head *head, int mode)
+{
+ struct list_head *pos1;
+
+ list_for_each(pos1, head) {
+ struct list_head *n2;
+ struct list_head *pos2;
+ struct __prelim_ref *ref1;
+
+ ref1 = list_entry(pos1, struct __prelim_ref, list);
+
+ if (mode == 1 && ref1->key.type == 0)
+ continue;
+ for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
+ pos2 = n2, n2 = pos2->next) {
+ struct __prelim_ref *ref2;
+
+ ref2 = list_entry(pos2, struct __prelim_ref, list);
+
+ if (mode == 1) {
+ if (memcmp(&ref1->key, &ref2->key,
+ sizeof(ref1->key)) ||
+ ref1->level != ref2->level ||
+ ref1->root_id != ref2->root_id)
+ continue;
+ ref1->count += ref2->count;
+ } else {
+ if (ref1->parent != ref2->parent)
+ continue;
+ ref1->count += ref2->count;
+ }
+ list_del(&ref2->list);
+ kfree(ref2);
+ }
+
+ }
+ return 0;
+}
+
+/*
+ * add all currently queued delayed refs from this head whose seq nr is
+ * smaller or equal that seq to the list
+ */
+static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
+ struct btrfs_key *info_key,
+ struct list_head *prefs)
+{
+ struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+ struct rb_node *n = &head->node.rb_node;
+ int sgn;
+ int ret;
+
+ if (extent_op && extent_op->update_key)
+ btrfs_disk_key_to_cpu(info_key, &extent_op->key);
+
+ while ((n = rb_prev(n))) {
+ struct btrfs_delayed_ref_node *node;
+ node = rb_entry(n, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (node->bytenr != head->node.bytenr)
+ break;
+ WARN_ON(node->is_head);
+
+ if (node->seq > seq)
+ continue;
+
+ switch (node->action) {
+ case BTRFS_ADD_DELAYED_EXTENT:
+ case BTRFS_UPDATE_DELAYED_HEAD:
+ WARN_ON(1);
+ continue;
+ case BTRFS_ADD_DELAYED_REF:
+ sgn = 1;
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ sgn = -1;
+ break;
+ default:
+ BUG_ON(1);
+ }
+ switch (node->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY: {
+ struct btrfs_delayed_tree_ref *ref;
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = __add_prelim_ref(prefs, ref->root, info_key,
+ ref->level + 1, 0, node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_SHARED_BLOCK_REF_KEY: {
+ struct btrfs_delayed_tree_ref *ref;
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = __add_prelim_ref(prefs, ref->root, info_key,
+ ref->level + 1, ref->parent,
+ node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_delayed_data_ref *ref;
+ struct btrfs_key key;
+
+ ref = btrfs_delayed_node_to_data_ref(node);
+
+ key.objectid = ref->objectid;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = ref->offset;
+ ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
+ node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_delayed_data_ref *ref;
+ struct btrfs_key key;
+
+ ref = btrfs_delayed_node_to_data_ref(node);
+
+ key.objectid = ref->objectid;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = ref->offset;
+ ret = __add_prelim_ref(prefs, ref->root, &key, 0,
+ ref->parent, node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ }
+
+ return 0;
+}
+
+/*
+ * add all inline backrefs for bytenr to the list
+ */
+static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ struct btrfs_key *info_key, int *info_level,
+ struct list_head *prefs)
+{
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long ptr;
+ unsigned long end;
+ struct btrfs_extent_item *ei;
+ u64 flags;
+ u64 item_size;
+
+ /*
+ * enumerate all inline refs
+ */
+ leaf = path->nodes[0];
+ slot = path->slots[0] - 1;
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + item_size;
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ struct btrfs_tree_block_info *info;
+ struct btrfs_disk_key disk_key;
+
+ info = (struct btrfs_tree_block_info *)ptr;
+ *info_level = btrfs_tree_block_level(leaf, info);
+ btrfs_tree_block_key(leaf, info, &disk_key);
+ btrfs_disk_key_to_cpu(info_key, &disk_key);
+ ptr += sizeof(struct btrfs_tree_block_info);
+ BUG_ON(ptr > end);
+ } else {
+ BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+ }
+
+ while (ptr < end) {
+ struct btrfs_extent_inline_ref *iref;
+ u64 offset;
+ int type;
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ offset = btrfs_extent_inline_ref_offset(leaf, iref);
+
+ switch (type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, info_key,
+ *info_level + 1, offset,
+ bytenr, 1);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = (struct btrfs_shared_data_ref *)(iref + 1);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+ bytenr, count);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, offset, info_key,
+ *info_level + 1, 0, bytenr, 1);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr,
+ count);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+
+ return 0;
+}
+
+/*
+ * add all non-inline backrefs for bytenr to the list
+ */
+static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ struct btrfs_key *info_key, int info_level,
+ struct list_head *prefs)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ while (1) {
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = 0;
+ break;
+ }
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid != bytenr)
+ break;
+ if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+ continue;
+ if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+ break;
+
+ switch (key.type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, info_key,
+ info_level + 1, key.offset,
+ bytenr, 1);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_shared_data_ref);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+ bytenr, count);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, key.offset, info_key,
+ info_level + 1, 0, bytenr, 1);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_extent_data_ref);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ }
+
+ return ret;
+}
+
+/*
+ * this adds all existing backrefs (inline backrefs, backrefs and delayed
+ * refs) for the given bytenr to the refs list, merges duplicates and resolves
+ * indirect refs to their parent bytenr.
+ * When roots are found, they're added to the roots list
+ *
+ * FIXME some caching might speed things up
+ */
+static int find_parent_nodes(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 seq, struct ulist *refs, struct ulist *roots)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_key info_key = { 0 };
+ struct btrfs_delayed_ref_root *delayed_refs = NULL;
+ struct btrfs_delayed_ref_head *head = NULL;
+ int info_level = 0;
+ int ret;
+ struct list_head prefs_delayed;
+ struct list_head prefs;
+ struct __prelim_ref *ref;
+
+ INIT_LIST_HEAD(&prefs);
+ INIT_LIST_HEAD(&prefs_delayed);
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * grab both a lock on the path and a lock on the delayed ref head.
+ * We need both to get a consistent picture of how the refs look
+ * at a specified point in time
+ */
+again:
+ ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ /*
+ * look if there are updates for this ref queued and lock the head
+ */
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (head) {
+ if (!mutex_trylock(&head->mutex)) {
+ atomic_inc(&head->node.refs);
+ spin_unlock(&delayed_refs->lock);
+
+ btrfs_release_path(path);
+
+ /*
+ * Mutex was contended, block until it's
+ * released and try again
+ */
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(&head->node);
+ goto again;
+ }
+ ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
+ if (ret)
+ goto out;
+ }
+ spin_unlock(&delayed_refs->lock);
+
+ if (path->slots[0]) {
+ struct extent_buffer *leaf;
+ int slot;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0] - 1;
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY) {
+ ret = __add_inline_refs(fs_info, path, bytenr,
+ &info_key, &info_level, &prefs);
+ if (ret)
+ goto out;
+ ret = __add_keyed_refs(fs_info, path, bytenr, &info_key,
+ info_level, &prefs);
+ if (ret)
+ goto out;
+ }
+ }
+ btrfs_release_path(path);
+
+ /*
+ * when adding the delayed refs above, the info_key might not have
+ * been known yet. Go over the list and replace the missing keys
+ */
+ list_for_each_entry(ref, &prefs_delayed, list) {
+ if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
+ memcpy(&ref->key, &info_key, sizeof(ref->key));
+ }
+ list_splice_init(&prefs_delayed, &prefs);
+
+ ret = __merge_refs(&prefs, 1);
+ if (ret)
+ goto out;
+
+ ret = __resolve_indirect_refs(fs_info, &prefs);
+ if (ret)
+ goto out;
+
+ ret = __merge_refs(&prefs, 2);
+ if (ret)
+ goto out;
+
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ if (ref->count < 0)
+ WARN_ON(1);
+ if (ref->count && ref->root_id && ref->parent == 0) {
+ /* no parent == root of tree */
+ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
+ BUG_ON(ret < 0);
+ }
+ if (ref->count && ref->parent) {
+ ret = ulist_add(refs, ref->parent, 0, GFP_NOFS);
+ BUG_ON(ret < 0);
+ }
+ kfree(ref);
+ }
+
+out:
+ if (head)
+ mutex_unlock(&head->mutex);
+ btrfs_free_path(path);
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ while (!list_empty(&prefs_delayed)) {
+ ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
+ list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+
+ return ret;
+}
+
+/*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+ * free each list element). The leafs will be stored in the leafs ulist, which
+ * must be freed with ulist_free.
+ *
+ * returns 0 on success, <0 on error
+ */
+static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 num_bytes, u64 seq, struct ulist **leafs)
+{
+ struct ulist *tmp;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *leafs = ulist_alloc(GFP_NOFS);
+ if (!*leafs) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp);
+ ulist_free(tmp);
+
+ if (ret < 0 && ret != -ENOENT) {
+ ulist_free(*leafs);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * walk all backrefs for a given extent to find all roots that reference this
+ * extent. Walking a backref means finding all extents that reference this
+ * extent and in turn walk the backrefs of those, too. Naturally this is a
+ * recursive process, but here it is implemented in an iterative fashion: We
+ * find all referencing extents for the extent in question and put them on a
+ * list. In turn, we find all referencing extents for those, further appending
+ * to the list. The way we iterate the list allows adding more elements after
+ * the current while iterating. The process stops when we reach the end of the
+ * list. Found roots are added to the roots list.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 num_bytes, u64 seq, struct ulist **roots)
+{
+ struct ulist *tmp;
+ struct ulist_node *node = NULL;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *roots = ulist_alloc(GFP_NOFS);
+ if (!*roots) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ while (1) {
+ ret = find_parent_nodes(trans, fs_info, bytenr, seq,
+ tmp, *roots);
+ if (ret < 0 && ret != -ENOENT) {
+ ulist_free(tmp);
+ ulist_free(*roots);
+ return ret;
+ }
+ node = ulist_next(tmp, node);
+ if (!node)
+ break;
+ bytenr = node->val;
+ }
+
+ ulist_free(tmp);
+ return 0;
+}
+
static int __inode_info(u64 inum, u64 ioff, u8 key_type,
struct btrfs_root *fs_root, struct btrfs_path *path,
@@ -181,8 +952,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
found_key->objectid > logical ||
- found_key->objectid + found_key->offset <= logical)
+ found_key->objectid + found_key->offset <= logical) {
+ pr_debug("logical %llu is not within any extent\n",
+ (unsigned long long)logical);
return -ENOENT;
+ }
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -191,6 +965,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
+ pr_debug("logical %llu is at position %llu within the extent (%llu "
+ "EXTENT_ITEM %llu) flags %#llx size %u\n",
+ (unsigned long long)logical,
+ (unsigned long long)(logical - found_key->objectid),
+ (unsigned long long)found_key->objectid,
+ (unsigned long long)found_key->offset,
+ (unsigned long long)flags, item_size);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
return BTRFS_EXTENT_FLAG_TREE_BLOCK;
if (flags & BTRFS_EXTENT_FLAG_DATA)
@@ -287,128 +1068,11 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 0;
}
-static int __data_list_add(struct list_head *head, u64 inum,
- u64 extent_data_item_offset, u64 root)
-{
- struct __data_ref *ref;
-
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- ref->inum = inum;
- ref->extent_data_item_offset = extent_data_item_offset;
- ref->root = root;
- list_add_tail(&ref->list, head);
-
- return 0;
-}
-
-static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb,
- struct btrfs_extent_data_ref *dref)
-{
- return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref),
- btrfs_extent_data_ref_offset(eb, dref),
- btrfs_extent_data_ref_root(eb, dref));
-}
-
-static int __shared_list_add(struct list_head *head, u64 disk_byte)
-{
- struct __shared_ref *ref;
-
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- ref->disk_byte = disk_byte;
- list_add_tail(&ref->list, head);
-
- return 0;
-}
-
-static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info,
- u64 logical, u64 inum,
- u64 extent_data_item_offset,
- u64 extent_offset,
- struct btrfs_path *path,
- struct list_head *data_refs,
- iterate_extent_inodes_t *iterate,
- void *ctx)
-{
- u64 ref_root;
- u32 item_size;
- struct btrfs_key key;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_extent_inline_ref *eiref;
- struct __data_ref *ref;
- int ret;
- int type;
- int last;
- unsigned long ptr = 0;
-
- WARN_ON(!list_empty(data_refs));
- ret = extent_from_logical(fs_info, logical, path, &key);
- if (ret & BTRFS_EXTENT_FLAG_DATA)
- ret = -EIO;
- if (ret < 0)
- goto out;
-
- eb = path->nodes[0];
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
- ret = 0;
- ref_root = 0;
- /*
- * as done in iterate_extent_inodes, we first build a list of refs to
- * iterate, then free the path and then iterate them to avoid deadlocks.
- */
- do {
- last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
- &eiref, &type);
- if (last < 0) {
- ret = last;
- goto out;
- }
- if (type == BTRFS_TREE_BLOCK_REF_KEY ||
- type == BTRFS_SHARED_BLOCK_REF_KEY) {
- ref_root = btrfs_extent_inline_ref_offset(eb, eiref);
- ret = __data_list_add(data_refs, inum,
- extent_data_item_offset,
- ref_root);
- }
- } while (!ret && !last);
-
- btrfs_release_path(path);
-
- if (ref_root == 0) {
- printk(KERN_ERR "btrfs: failed to find tree block ref "
- "for shared data backref %llu\n", logical);
- WARN_ON(1);
- ret = -EIO;
- }
-
-out:
- while (!list_empty(data_refs)) {
- ref = list_first_entry(data_refs, struct __data_ref, list);
- list_del(&ref->list);
- if (!ret)
- ret = iterate(ref->inum, extent_offset +
- ref->extent_data_item_offset,
- ref->root, ctx);
- kfree(ref);
- }
-
- return ret;
-}
-
-static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
- u64 logical, u64 orig_extent_item_objectid,
- u64 extent_offset, struct btrfs_path *path,
- struct list_head *data_refs,
- iterate_extent_inodes_t *iterate,
- void *ctx)
+static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 logical,
+ u64 orig_extent_item_objectid,
+ u64 extent_item_pos, u64 root,
+ iterate_extent_inodes_t *iterate, void *ctx)
{
u64 disk_byte;
struct btrfs_key key;
@@ -416,8 +1080,10 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb;
int slot;
int nritems;
- int ret;
- int found = 0;
+ int ret = 0;
+ int extent_type;
+ u64 data_offset;
+ u64 data_len;
eb = read_tree_block(fs_info->tree_root, logical,
fs_info->tree_root->leafsize, 0);
@@ -435,149 +1101,99 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- if (!fi) {
- free_extent_buffer(eb);
- return -EIO;
- }
+ extent_type = btrfs_file_extent_type(eb, fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
- if (disk_byte != orig_extent_item_objectid) {
- if (found)
- break;
- else
- continue;
- }
- ++found;
- ret = __iter_shared_inline_ref_inodes(fs_info, logical,
- key.objectid,
- key.offset,
- extent_offset, path,
- data_refs,
- iterate, ctx);
- if (ret)
- break;
- }
+ if (disk_byte != orig_extent_item_objectid)
+ continue;
- if (!found) {
- printk(KERN_ERR "btrfs: failed to follow shared data backref "
- "to parent %llu\n", logical);
- WARN_ON(1);
- ret = -EIO;
+ data_offset = btrfs_file_extent_offset(eb, fi);
+ data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+ if (extent_item_pos < data_offset ||
+ extent_item_pos >= data_offset + data_len)
+ continue;
+
+ pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
+ "root %llu\n", orig_extent_item_objectid,
+ key.objectid, key.offset, root);
+ ret = iterate(key.objectid,
+ key.offset + (extent_item_pos - data_offset),
+ root, ctx);
+ if (ret) {
+ pr_debug("stopping iteration because ret=%d\n", ret);
+ break;
+ }
}
free_extent_buffer(eb);
+
return ret;
}
/*
* calls iterate() for every inode that references the extent identified by
- * the given parameters. will use the path given as a parameter and return it
- * released.
+ * the given parameters.
* when the iterator function returns a non-zero value, iteration stops.
+ * path is guaranteed to be in released state when iterate() is called.
*/
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- u64 extent_item_objectid,
- u64 extent_offset,
+ u64 extent_item_objectid, u64 extent_item_pos,
iterate_extent_inodes_t *iterate, void *ctx)
{
- unsigned long ptr = 0;
- int last;
int ret;
- int type;
- u64 logical;
- u32 item_size;
- struct btrfs_extent_inline_ref *eiref;
- struct btrfs_extent_data_ref *dref;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
struct list_head data_refs = LIST_HEAD_INIT(data_refs);
struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
- struct __data_ref *ref_d;
- struct __shared_ref *ref_s;
-
- eb = path->nodes[0];
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
- /* first we iterate the inline refs, ... */
- do {
- last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
- &eiref, &type);
- if (last == -ENOENT) {
- ret = 0;
- break;
- }
- if (last < 0) {
- ret = last;
- break;
- }
+ struct btrfs_trans_handle *trans;
+ struct ulist *refs;
+ struct ulist *roots;
+ struct ulist_node *ref_node = NULL;
+ struct ulist_node *root_node = NULL;
+ struct seq_list seq_elem;
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ pr_debug("resolving all inodes for extent %llu\n",
+ extent_item_objectid);
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ btrfs_get_delayed_seq(delayed_refs, &seq_elem);
+ spin_unlock(&delayed_refs->lock);
+
+ ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
+ extent_item_pos, seq_elem.seq,
+ &refs);
- if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = (struct btrfs_extent_data_ref *)(&eiref->offset);
- ret = __data_list_add_eb(&data_refs, eb, dref);
- } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
- logical = btrfs_extent_inline_ref_offset(eb, eiref);
- ret = __shared_list_add(&shared_refs, logical);
- }
- } while (!ret && !last);
+ if (ret)
+ goto out;
- /* ... then we proceed to in-tree references and ... */
- while (!ret) {
- ++path->slots[0];
- if (path->slots[0] > btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(fs_info->extent_root, path);
- if (ret) {
- if (ret == 1)
- ret = 0; /* we're done */
- break;
- }
- eb = path->nodes[0];
- }
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
- if (key.objectid != extent_item_objectid)
+ while (!ret && (ref_node = ulist_next(refs, ref_node))) {
+ ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1,
+ seq_elem.seq, &roots);
+ if (ret)
break;
- if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_extent_data_ref);
- ret = __data_list_add_eb(&data_refs, eb, dref);
- } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- ret = __shared_list_add(&shared_refs, key.offset);
+ while (!ret && (root_node = ulist_next(roots, root_node))) {
+ pr_debug("root %llu references leaf %llu\n",
+ root_node->val, ref_node->val);
+ ret = iterate_leaf_refs(fs_info, path, ref_node->val,
+ extent_item_objectid,
+ extent_item_pos, root_node->val,
+ iterate, ctx);
}
}
- btrfs_release_path(path);
-
- /*
- * ... only at the very end we can process the refs we found. this is
- * because the iterator function we call is allowed to make tree lookups
- * and we have to avoid deadlocks. additionally, we need more tree
- * lookups ourselves for shared data refs.
- */
- while (!list_empty(&data_refs)) {
- ref_d = list_first_entry(&data_refs, struct __data_ref, list);
- list_del(&ref_d->list);
- if (!ret)
- ret = iterate(ref_d->inum, extent_offset +
- ref_d->extent_data_item_offset,
- ref_d->root, ctx);
- kfree(ref_d);
- }
-
- while (!list_empty(&shared_refs)) {
- ref_s = list_first_entry(&shared_refs, struct __shared_ref,
- list);
- list_del(&ref_s->list);
- if (!ret)
- ret = __iter_shared_inline_ref(fs_info,
- ref_s->disk_byte,
- extent_item_objectid,
- extent_offset, path,
- &data_refs,
- iterate, ctx);
- kfree(ref_s);
- }
-
+ ulist_free(refs);
+ ulist_free(roots);
+out:
+ btrfs_put_delayed_seq(delayed_refs, &seq_elem);
+ btrfs_end_transaction(trans, fs_info->extent_root);
return ret;
}
@@ -586,19 +1202,20 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
iterate_extent_inodes_t *iterate, void *ctx)
{
int ret;
- u64 offset;
+ u64 extent_item_pos;
struct btrfs_key found_key;
ret = extent_from_logical(fs_info, logical, path,
&found_key);
+ btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = -EINVAL;
if (ret < 0)
return ret;
- offset = logical - found_key.objectid;
+ extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, path, found_key.objectid,
- offset, iterate, ctx);
+ extent_item_pos, iterate, ctx);
return ret;
}
@@ -643,6 +1260,10 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
name_len = btrfs_inode_ref_name_len(eb, iref);
/* path must be released before calling iterate()! */
+ pr_debug("following ref at offset %u for inode %llu in "
+ "tree %llu\n", cur,
+ (unsigned long long)found_key.objectid,
+ (unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx);
if (ret) {
free_extent_buffer(eb);
@@ -683,10 +1304,14 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
return PTR_ERR(fspath);
if (fspath > fspath_min) {
+ pr_debug("path resolved: %s\n", fspath);
ipath->fspath->val[i] = (u64)(unsigned long)fspath;
++ipath->fspath->elem_cnt;
ipath->fspath->bytes_left = fspath - fspath_min;
} else {
+ pr_debug("missed path, not enough space. missing bytes: %lu, "
+ "constructed so far: %s\n",
+ (unsigned long)(fspath_min - fspath), fspath_min);
++ipath->fspath->elem_missed;
ipath->fspath->bytes_missing += fspath_min - fspath;
ipath->fspath->bytes_left = 0;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 92618837cb8..d00dfa9ca93 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -20,6 +20,7 @@
#define __BTRFS_BACKREF__
#include "ioctl.h"
+#include "ulist.h"
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
@@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 num_bytes, u64 seq, struct ulist **roots);
+
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 634608d2a6d..9b9b15fd520 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -51,6 +51,9 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
+ /* held while doing delalloc reservations */
+ struct mutex delalloc_mutex;
+
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
new file mode 100644
index 00000000000..ad0b3ba735b
--- /dev/null
+++ b/fs/btrfs/check-integrity.c
@@ -0,0 +1,3068 @@
+/*
+ * Copyright (C) STRATO AG 2011. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+/*
+ * This module can be used to catch cases when the btrfs kernel
+ * code executes write requests to the disk that bring the file
+ * system in an inconsistent state. In such a state, a power-loss
+ * or kernel panic event would cause that the data on disk is
+ * lost or at least damaged.
+ *
+ * Code is added that examines all block write requests during
+ * runtime (including writes of the super block). Three rules
+ * are verified and an error is printed on violation of the
+ * rules:
+ * 1. It is not allowed to write a disk block which is
+ * currently referenced by the super block (either directly
+ * or indirectly).
+ * 2. When a super block is written, it is verified that all
+ * referenced (directly or indirectly) blocks fulfill the
+ * following requirements:
+ * 2a. All referenced blocks have either been present when
+ * the file system was mounted, (i.e., they have been
+ * referenced by the super block) or they have been
+ * written since then and the write completion callback
+ * was called and a FLUSH request to the device where
+ * these blocks are located was received and completed.
+ * 2b. All referenced blocks need to have a generation
+ * number which is equal to the parent's number.
+ *
+ * One issue that was found using this module was that the log
+ * tree on disk became temporarily corrupted because disk blocks
+ * that had been in use for the log tree had been freed and
+ * reused too early, while being referenced by the written super
+ * block.
+ *
+ * The search term in the kernel log that can be used to filter
+ * on the existence of detected integrity issues is
+ * "btrfs: attempt".
+ *
+ * The integrity check is enabled via mount options. These
+ * mount options are only supported if the integrity check
+ * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
+ *
+ * Example #1, apply integrity checks to all metadata:
+ * mount /dev/sdb1 /mnt -o check_int
+ *
+ * Example #2, apply integrity checks to all metadata and
+ * to data extents:
+ * mount /dev/sdb1 /mnt -o check_int_data
+ *
+ * Example #3, apply integrity checks to all metadata and dump
+ * the tree that the super block references to kernel messages
+ * each time after a super block was written:
+ * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
+ *
+ * If the integrity check tool is included and activated in
+ * the mount options, plenty of kernel memory is used, and
+ * plenty of additional CPU cycles are spent. Enabling this
+ * functionality is not intended for normal use. In most
+ * cases, unless you are a btrfs developer who needs to verify
+ * the integrity of (super)-block write requests, do not
+ * enable the config option BTRFS_FS_CHECK_INTEGRITY to
+ * include and compile the integrity check tool.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/mutex.h>
+#include <linux/crc32c.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "extent_io.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "print-tree.h"
+#include "locking.h"
+#include "check-integrity.h"
+
+#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
+#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
+#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
+#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
+#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
+#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
+#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
+#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
+ * excluding " [...]" */
+#define BTRFSIC_BLOCK_SIZE PAGE_SIZE
+
+#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
+
+/*
+ * The definition of the bitmask fields for the print_mask.
+ * They are specified with the mount option check_integrity_print_mask.
+ */
+#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
+#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
+#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
+#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
+#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
+#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
+#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
+#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
+#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
+#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
+#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
+#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
+
+struct btrfsic_dev_state;
+struct btrfsic_state;
+
+struct btrfsic_block {
+ u32 magic_num; /* only used for debug purposes */
+ unsigned int is_metadata:1; /* if it is meta-data, not data-data */
+ unsigned int is_superblock:1; /* if it is one of the superblocks */
+ unsigned int is_iodone:1; /* if is done by lower subsystem */
+ unsigned int iodone_w_error:1; /* error was indicated to endio */
+ unsigned int never_written:1; /* block was added because it was
+ * referenced, not because it was
+ * written */
+ unsigned int mirror_num:2; /* large enough to hold
+ * BTRFS_SUPER_MIRROR_MAX */
+ struct btrfsic_dev_state *dev_state;
+ u64 dev_bytenr; /* key, physical byte num on disk */
+ u64 logical_bytenr; /* logical byte num on disk */
+ u64 generation;
+ struct btrfs_disk_key disk_key; /* extra info to print in case of
+ * issues, will not always be correct */
+ struct list_head collision_resolving_node; /* list node */
+ struct list_head all_blocks_node; /* list node */
+
+ /* the following two lists contain block_link items */
+ struct list_head ref_to_list; /* list */
+ struct list_head ref_from_list; /* list */
+ struct btrfsic_block *next_in_same_bio;
+ void *orig_bio_bh_private;
+ union {
+ bio_end_io_t *bio;
+ bh_end_io_t *bh;
+ } orig_bio_bh_end_io;
+ int submit_bio_bh_rw;
+ u64 flush_gen; /* only valid if !never_written */
+};
+
+/*
+ * Elements of this type are allocated dynamically and required because
+ * each block object can refer to and can be ref from multiple blocks.
+ * The key to lookup them in the hashtable is the dev_bytenr of
+ * the block ref to plus the one from the block refered from.
+ * The fact that they are searchable via a hashtable and that a
+ * ref_cnt is maintained is not required for the btrfs integrity
+ * check algorithm itself, it is only used to make the output more
+ * beautiful in case that an error is detected (an error is defined
+ * as a write operation to a block while that block is still referenced).
+ */
+struct btrfsic_block_link {
+ u32 magic_num; /* only used for debug purposes */
+ u32 ref_cnt;
+ struct list_head node_ref_to; /* list node */
+ struct list_head node_ref_from; /* list node */
+ struct list_head collision_resolving_node; /* list node */
+ struct btrfsic_block *block_ref_to;
+ struct btrfsic_block *block_ref_from;
+ u64 parent_generation;
+};
+
+struct btrfsic_dev_state {
+ u32 magic_num; /* only used for debug purposes */
+ struct block_device *bdev;
+ struct btrfsic_state *state;
+ struct list_head collision_resolving_node; /* list node */
+ struct btrfsic_block dummy_block_for_bio_bh_flush;
+ u64 last_flush_gen;
+ char name[BDEVNAME_SIZE];
+};
+
+struct btrfsic_block_hashtable {
+ struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
+};
+
+struct btrfsic_block_link_hashtable {
+ struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
+};
+
+struct btrfsic_dev_state_hashtable {
+ struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
+};
+
+struct btrfsic_block_data_ctx {
+ u64 start; /* virtual bytenr */
+ u64 dev_bytenr; /* physical bytenr on device */
+ u32 len;
+ struct btrfsic_dev_state *dev;
+ char *data;
+ struct buffer_head *bh; /* do not use if set to NULL */
+};
+
+/* This structure is used to implement recursion without occupying
+ * any stack space, refer to btrfsic_process_metablock() */
+struct btrfsic_stack_frame {
+ u32 magic;
+ u32 nr;
+ int error;
+ int i;
+ int limit_nesting;
+ int num_copies;
+ int mirror_num;
+ struct btrfsic_block *block;
+ struct btrfsic_block_data_ctx *block_ctx;
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx next_block_ctx;
+ struct btrfs_header *hdr;
+ struct btrfsic_stack_frame *prev;
+};
+
+/* Some state per mounted filesystem */
+struct btrfsic_state {
+ u32 print_mask;
+ int include_extent_data;
+ int csum_size;
+ struct list_head all_blocks_list;
+ struct btrfsic_block_hashtable block_hashtable;
+ struct btrfsic_block_link_hashtable block_link_hashtable;
+ struct btrfs_root *root;
+ u64 max_superblock_generation;
+ struct btrfsic_block *latest_superblock;
+};
+
+static void btrfsic_block_init(struct btrfsic_block *b);
+static struct btrfsic_block *btrfsic_block_alloc(void);
+static void btrfsic_block_free(struct btrfsic_block *b);
+static void btrfsic_block_link_init(struct btrfsic_block_link *n);
+static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
+static void btrfsic_block_link_free(struct btrfsic_block_link *n);
+static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
+static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
+static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
+static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
+static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
+ struct btrfsic_block_hashtable *h);
+static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
+static struct btrfsic_block *btrfsic_block_hashtable_lookup(
+ struct block_device *bdev,
+ u64 dev_bytenr,
+ struct btrfsic_block_hashtable *h);
+static void btrfsic_block_link_hashtable_init(
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_block_link_hashtable_add(
+ struct btrfsic_block_link *l,
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
+static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
+ struct block_device *bdev_ref_to,
+ u64 dev_bytenr_ref_to,
+ struct block_device *bdev_ref_from,
+ u64 dev_bytenr_ref_from,
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_dev_state_hashtable_init(
+ struct btrfsic_dev_state_hashtable *h);
+static void btrfsic_dev_state_hashtable_add(
+ struct btrfsic_dev_state *ds,
+ struct btrfsic_dev_state_hashtable *h);
+static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
+ struct block_device *bdev,
+ struct btrfsic_dev_state_hashtable *h);
+static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
+static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
+static int btrfsic_process_superblock(struct btrfsic_state *state,
+ struct btrfs_fs_devices *fs_devices);
+static int btrfsic_process_metablock(struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ struct btrfs_header *hdr,
+ int limit_nesting, int force_iodone_flag);
+static int btrfsic_create_link_to_next_block(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx
+ *block_ctx, u64 next_bytenr,
+ int limit_nesting,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block **next_blockp,
+ int force_iodone_flag,
+ int *num_copiesp, int *mirror_nump,
+ struct btrfs_disk_key *disk_key,
+ u64 parent_generation);
+static int btrfsic_handle_extent_data(struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u32 item_offset, int force_iodone_flag);
+static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
+ struct btrfsic_block_data_ctx *block_ctx_out,
+ int mirror_num);
+static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
+ u32 len, struct block_device *bdev,
+ struct btrfsic_block_data_ctx *block_ctx_out);
+static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
+static int btrfsic_read_block(struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx);
+static void btrfsic_dump_database(struct btrfsic_state *state);
+static int btrfsic_test_for_metadata(struct btrfsic_state *state,
+ const u8 *data, unsigned int size);
+static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr, u8 *mapped_data,
+ unsigned int len, struct bio *bio,
+ int *bio_is_patched,
+ struct buffer_head *bh,
+ int submit_bio_bh_rw);
+static int btrfsic_process_written_superblock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ struct btrfs_super_block *const super_hdr);
+static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
+static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
+static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int recursion_level);
+static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ int recursion_level);
+static void btrfsic_print_add_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l);
+static void btrfsic_print_rem_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l);
+static char btrfsic_get_block_type(const struct btrfsic_state *state,
+ const struct btrfsic_block *block);
+static void btrfsic_dump_tree(const struct btrfsic_state *state);
+static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int indent_level);
+static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block *next_block,
+ struct btrfsic_block *from_block,
+ u64 parent_generation);
+static struct btrfsic_block *btrfsic_block_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx,
+ const char *additional_string,
+ int is_metadata,
+ int is_iodone,
+ int never_written,
+ int mirror_num,
+ int *was_created);
+static int btrfsic_process_superblock_dev_mirror(
+ struct btrfsic_state *state,
+ struct btrfsic_dev_state *dev_state,
+ struct btrfs_device *device,
+ int superblock_mirror_num,
+ struct btrfsic_dev_state **selected_dev_state,
+ struct btrfs_super_block *selected_super);
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
+ struct block_device *bdev);
+static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
+ u64 bytenr,
+ struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr, char *data);
+
+static struct mutex btrfsic_mutex;
+static int btrfsic_is_initialized;
+static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
+
+
+static void btrfsic_block_init(struct btrfsic_block *b)
+{
+ b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
+ b->dev_state = NULL;
+ b->dev_bytenr = 0;
+ b->logical_bytenr = 0;
+ b->generation = BTRFSIC_GENERATION_UNKNOWN;
+ b->disk_key.objectid = 0;
+ b->disk_key.type = 0;
+ b->disk_key.offset = 0;
+ b->is_metadata = 0;
+ b->is_superblock = 0;
+ b->is_iodone = 0;
+ b->iodone_w_error = 0;
+ b->never_written = 0;
+ b->mirror_num = 0;
+ b->next_in_same_bio = NULL;
+ b->orig_bio_bh_private = NULL;
+ b->orig_bio_bh_end_io.bio = NULL;
+ INIT_LIST_HEAD(&b->collision_resolving_node);
+ INIT_LIST_HEAD(&b->all_blocks_node);
+ INIT_LIST_HEAD(&b->ref_to_list);
+ INIT_LIST_HEAD(&b->ref_from_list);
+ b->submit_bio_bh_rw = 0;
+ b->flush_gen = 0;
+}
+
+static struct btrfsic_block *btrfsic_block_alloc(void)
+{
+ struct btrfsic_block *b;
+
+ b = kzalloc(sizeof(*b), GFP_NOFS);
+ if (NULL != b)
+ btrfsic_block_init(b);
+
+ return b;
+}
+
+static void btrfsic_block_free(struct btrfsic_block *b)
+{
+ BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
+ kfree(b);
+}
+
+static void btrfsic_block_link_init(struct btrfsic_block_link *l)
+{
+ l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
+ l->ref_cnt = 1;
+ INIT_LIST_HEAD(&l->node_ref_to);
+ INIT_LIST_HEAD(&l->node_ref_from);
+ INIT_LIST_HEAD(&l->collision_resolving_node);
+ l->block_ref_to = NULL;
+ l->block_ref_from = NULL;
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
+{
+ struct btrfsic_block_link *l;
+
+ l = kzalloc(sizeof(*l), GFP_NOFS);
+ if (NULL != l)
+ btrfsic_block_link_init(l);
+
+ return l;
+}
+
+static void btrfsic_block_link_free(struct btrfsic_block_link *l)
+{
+ BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
+ kfree(l);
+}
+
+static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
+{
+ ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
+ ds->bdev = NULL;
+ ds->state = NULL;
+ ds->name[0] = '\0';
+ INIT_LIST_HEAD(&ds->collision_resolving_node);
+ ds->last_flush_gen = 0;
+ btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
+ ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
+ ds->dummy_block_for_bio_bh_flush.dev_state = ds;
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
+{
+ struct btrfsic_dev_state *ds;
+
+ ds = kzalloc(sizeof(*ds), GFP_NOFS);
+ if (NULL != ds)
+ btrfsic_dev_state_init(ds);
+
+ return ds;
+}
+
+static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
+{
+ BUG_ON(!(NULL == ds ||
+ BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
+ kfree(ds);
+}
+
+static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
+ struct btrfsic_block_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(b->dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
+ (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
+
+ list_add(&b->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
+{
+ list_del(&b->collision_resolving_node);
+}
+
+static struct btrfsic_block *btrfsic_block_hashtable_lookup(
+ struct block_device *bdev,
+ u64 dev_bytenr,
+ struct btrfsic_block_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)bdev))) &
+ (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_block *const b =
+ list_entry(elem, struct btrfsic_block,
+ collision_resolving_node);
+
+ if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
+ return b;
+ }
+
+ return NULL;
+}
+
+static void btrfsic_block_link_hashtable_init(
+ struct btrfsic_block_link_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_block_link_hashtable_add(
+ struct btrfsic_block_link *l,
+ struct btrfsic_block_link_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
+ ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
+ ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
+ & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
+
+ BUG_ON(NULL == l->block_ref_to);
+ BUG_ON(NULL == l->block_ref_from);
+ list_add(&l->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
+{
+ list_del(&l->collision_resolving_node);
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
+ struct block_device *bdev_ref_to,
+ u64 dev_bytenr_ref_to,
+ struct block_device *bdev_ref_from,
+ u64 dev_bytenr_ref_from,
+ struct btrfsic_block_link_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
+ ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
+ ((unsigned int)((uintptr_t)bdev_ref_to)) ^
+ ((unsigned int)((uintptr_t)bdev_ref_from))) &
+ (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem, struct btrfsic_block_link,
+ collision_resolving_node);
+
+ BUG_ON(NULL == l->block_ref_to);
+ BUG_ON(NULL == l->block_ref_from);
+ if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
+ l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
+ l->block_ref_from->dev_state->bdev == bdev_ref_from &&
+ l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
+ return l;
+ }
+
+ return NULL;
+}
+
+static void btrfsic_dev_state_hashtable_init(
+ struct btrfsic_dev_state_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_dev_state_hashtable_add(
+ struct btrfsic_dev_state *ds,
+ struct btrfsic_dev_state_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)((uintptr_t)ds->bdev)) &
+ (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+
+ list_add(&ds->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
+{
+ list_del(&ds->collision_resolving_node);
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
+ struct block_device *bdev,
+ struct btrfsic_dev_state_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)((uintptr_t)bdev)) &
+ (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_dev_state *const ds =
+ list_entry(elem, struct btrfsic_dev_state,
+ collision_resolving_node);
+
+ if (ds->bdev == bdev)
+ return ds;
+ }
+
+ return NULL;
+}
+
+static int btrfsic_process_superblock(struct btrfsic_state *state,
+ struct btrfs_fs_devices *fs_devices)
+{
+ int ret;
+ struct btrfs_super_block *selected_super;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+ struct btrfsic_dev_state *selected_dev_state = NULL;
+ int pass;
+
+ BUG_ON(NULL == state);
+ selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS);
+ if (NULL == selected_super) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ return -1;
+ }
+
+ list_for_each_entry(device, dev_head, dev_list) {
+ int i;
+ struct btrfsic_dev_state *dev_state;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ dev_state = btrfsic_dev_state_lookup(device->bdev);
+ BUG_ON(NULL == dev_state);
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ ret = btrfsic_process_superblock_dev_mirror(
+ state, dev_state, device, i,
+ &selected_dev_state, selected_super);
+ if (0 != ret && 0 == i) {
+ kfree(selected_super);
+ return ret;
+ }
+ }
+ }
+
+ if (NULL == state->latest_superblock) {
+ printk(KERN_INFO "btrfsic: no superblock found!\n");
+ kfree(selected_super);
+ return -1;
+ }
+
+ state->csum_size = btrfs_super_csum_size(selected_super);
+
+ for (pass = 0; pass < 3; pass++) {
+ int num_copies;
+ int mirror_num;
+ u64 next_bytenr;
+
+ switch (pass) {
+ case 0:
+ next_bytenr = btrfs_super_root(selected_super);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "root@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 1:
+ next_bytenr = btrfs_super_chunk_root(selected_super);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "chunk@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 2:
+ next_bytenr = btrfs_super_log_root(selected_super);
+ if (0 == next_bytenr)
+ continue;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "log@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, PAGE_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+ struct btrfs_header *hdr;
+
+ ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
+ &tmp_next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO "btrfsic:"
+ " btrfsic_map_block(root @%llu,"
+ " mirror %d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ kfree(selected_super);
+ return -1;
+ }
+
+ next_block = btrfsic_block_hashtable_lookup(
+ tmp_next_block_ctx.dev->bdev,
+ tmp_next_block_ctx.dev_bytenr,
+ &state->block_hashtable);
+ BUG_ON(NULL == next_block);
+
+ l = btrfsic_block_link_hashtable_lookup(
+ tmp_next_block_ctx.dev->bdev,
+ tmp_next_block_ctx.dev_bytenr,
+ state->latest_superblock->dev_state->
+ bdev,
+ state->latest_superblock->dev_bytenr,
+ &state->block_link_hashtable);
+ BUG_ON(NULL == l);
+
+ ret = btrfsic_read_block(state, &tmp_next_block_ctx);
+ if (ret < (int)BTRFSIC_BLOCK_SIZE) {
+ printk(KERN_INFO
+ "btrfsic: read @logical %llu failed!\n",
+ (unsigned long long)
+ tmp_next_block_ctx.start);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ kfree(selected_super);
+ return -1;
+ }
+
+ hdr = (struct btrfs_header *)tmp_next_block_ctx.data;
+ ret = btrfsic_process_metablock(state,
+ next_block,
+ &tmp_next_block_ctx,
+ hdr,
+ BTRFS_MAX_LEVEL + 3, 1);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ }
+ }
+
+ kfree(selected_super);
+ return ret;
+}
+
+static int btrfsic_process_superblock_dev_mirror(
+ struct btrfsic_state *state,
+ struct btrfsic_dev_state *dev_state,
+ struct btrfs_device *device,
+ int superblock_mirror_num,
+ struct btrfsic_dev_state **selected_dev_state,
+ struct btrfs_super_block *selected_super)
+{
+ struct btrfs_super_block *super_tmp;
+ u64 dev_bytenr;
+ struct buffer_head *bh;
+ struct btrfsic_block *superblock_tmp;
+ int pass;
+ struct block_device *const superblock_bdev = device->bdev;
+
+ /* super block bytenr is always the unmapped device bytenr */
+ dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
+ bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096);
+ if (NULL == bh)
+ return -1;
+ super_tmp = (struct btrfs_super_block *)
+ (bh->b_data + (dev_bytenr & 4095));
+
+ if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
+ strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
+ sizeof(super_tmp->magic)) ||
+ memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) {
+ brelse(bh);
+ return 0;
+ }
+
+ superblock_tmp =
+ btrfsic_block_hashtable_lookup(superblock_bdev,
+ dev_bytenr,
+ &state->block_hashtable);
+ if (NULL == superblock_tmp) {
+ superblock_tmp = btrfsic_block_alloc();
+ if (NULL == superblock_tmp) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ brelse(bh);
+ return -1;
+ }
+ /* for superblock, only the dev_bytenr makes sense */
+ superblock_tmp->dev_bytenr = dev_bytenr;
+ superblock_tmp->dev_state = dev_state;
+ superblock_tmp->logical_bytenr = dev_bytenr;
+ superblock_tmp->generation = btrfs_super_generation(super_tmp);
+ superblock_tmp->is_metadata = 1;
+ superblock_tmp->is_superblock = 1;
+ superblock_tmp->is_iodone = 1;
+ superblock_tmp->never_written = 0;
+ superblock_tmp->mirror_num = 1 + superblock_mirror_num;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk(KERN_INFO "New initial S-block (bdev %p, %s)"
+ " @%llu (%s/%llu/%d)\n",
+ superblock_bdev, device->name,
+ (unsigned long long)dev_bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ superblock_mirror_num);
+ list_add(&superblock_tmp->all_blocks_node,
+ &state->all_blocks_list);
+ btrfsic_block_hashtable_add(superblock_tmp,
+ &state->block_hashtable);
+ }
+
+ /* select the one with the highest generation field */
+ if (btrfs_super_generation(super_tmp) >
+ state->max_superblock_generation ||
+ 0 == state->max_superblock_generation) {
+ memcpy(selected_super, super_tmp, sizeof(*selected_super));
+ *selected_dev_state = dev_state;
+ state->max_superblock_generation =
+ btrfs_super_generation(super_tmp);
+ state->latest_superblock = superblock_tmp;
+ }
+
+ for (pass = 0; pass < 3; pass++) {
+ u64 next_bytenr;
+ int num_copies;
+ int mirror_num;
+ const char *additional_string = NULL;
+ struct btrfs_disk_key tmp_disk_key;
+
+ tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
+ tmp_disk_key.offset = 0;
+ switch (pass) {
+ case 0:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ additional_string = "initial root ";
+ next_bytenr = btrfs_super_root(super_tmp);
+ break;
+ case 1:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ additional_string = "initial chunk ";
+ next_bytenr = btrfs_super_chunk_root(super_tmp);
+ break;
+ case 2:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ additional_string = "initial log ";
+ next_bytenr = btrfs_super_log_root(super_tmp);
+ if (0 == next_bytenr)
+ continue;
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, PAGE_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+
+ if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
+ &tmp_next_block_ctx,
+ mirror_num)) {
+ printk(KERN_INFO "btrfsic: btrfsic_map_block("
+ "bytenr @%llu, mirror %d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ brelse(bh);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state, &tmp_next_block_ctx,
+ additional_string, 1, 1, 0,
+ mirror_num, NULL);
+ if (NULL == next_block) {
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ brelse(bh);
+ return -1;
+ }
+
+ next_block->disk_key = tmp_disk_key;
+ next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ l = btrfsic_block_link_lookup_or_add(
+ state, &tmp_next_block_ctx,
+ next_block, superblock_tmp,
+ BTRFSIC_GENERATION_UNKNOWN);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ if (NULL == l) {
+ brelse(bh);
+ return -1;
+ }
+ }
+ }
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
+ btrfsic_dump_tree_sub(state, superblock_tmp, 0);
+
+ brelse(bh);
+ return 0;
+}
+
+static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
+{
+ struct btrfsic_stack_frame *sf;
+
+ sf = kzalloc(sizeof(*sf), GFP_NOFS);
+ if (NULL == sf)
+ printk(KERN_INFO "btrfsic: alloc memory failed!\n");
+ else
+ sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
+ return sf;
+}
+
+static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
+{
+ BUG_ON(!(NULL == sf ||
+ BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
+ kfree(sf);
+}
+
+static int btrfsic_process_metablock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const first_block,
+ struct btrfsic_block_data_ctx *const first_block_ctx,
+ struct btrfs_header *const first_hdr,
+ int first_limit_nesting, int force_iodone_flag)
+{
+ struct btrfsic_stack_frame initial_stack_frame = { 0 };
+ struct btrfsic_stack_frame *sf;
+ struct btrfsic_stack_frame *next_stack;
+
+ sf = &initial_stack_frame;
+ sf->error = 0;
+ sf->i = -1;
+ sf->limit_nesting = first_limit_nesting;
+ sf->block = first_block;
+ sf->block_ctx = first_block_ctx;
+ sf->next_block = NULL;
+ sf->hdr = first_hdr;
+ sf->prev = NULL;
+
+continue_with_new_stack_frame:
+ sf->block->generation = le64_to_cpu(sf->hdr->generation);
+ if (0 == sf->hdr->level) {
+ struct btrfs_leaf *const leafhdr =
+ (struct btrfs_leaf *)sf->hdr;
+
+ if (-1 == sf->i) {
+ sf->nr = le32_to_cpu(leafhdr->header.nritems);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "leaf %llu items %d generation %llu"
+ " owner %llu\n",
+ (unsigned long long)
+ sf->block_ctx->start,
+ sf->nr,
+ (unsigned long long)
+ le64_to_cpu(leafhdr->header.generation),
+ (unsigned long long)
+ le64_to_cpu(leafhdr->header.owner));
+ }
+
+continue_with_current_leaf_stack_frame:
+ if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
+ sf->i++;
+ sf->num_copies = 0;
+ }
+
+ if (sf->i < sf->nr) {
+ struct btrfs_item *disk_item = leafhdr->items + sf->i;
+ struct btrfs_disk_key *disk_key = &disk_item->key;
+ u8 type;
+ const u32 item_offset = le32_to_cpu(disk_item->offset);
+
+ type = disk_key->type;
+
+ if (BTRFS_ROOT_ITEM_KEY == type) {
+ const struct btrfs_root_item *const root_item =
+ (struct btrfs_root_item *)
+ (sf->block_ctx->data +
+ offsetof(struct btrfs_leaf, items) +
+ item_offset);
+ const u64 next_bytenr =
+ le64_to_cpu(root_item->bytenr);
+
+ sf->error =
+ btrfsic_create_link_to_next_block(
+ state,
+ sf->block,
+ sf->block_ctx,
+ next_bytenr,
+ sf->limit_nesting,
+ &sf->next_block_ctx,
+ &sf->next_block,
+ force_iodone_flag,
+ &sf->num_copies,
+ &sf->mirror_num,
+ disk_key,
+ le64_to_cpu(root_item->
+ generation));
+ if (sf->error)
+ goto one_stack_frame_backwards;
+
+ if (NULL != sf->next_block) {
+ struct btrfs_header *const next_hdr =
+ (struct btrfs_header *)
+ sf->next_block_ctx.data;
+
+ next_stack =
+ btrfsic_stack_frame_alloc();
+ if (NULL == next_stack) {
+ btrfsic_release_block_ctx(
+ &sf->
+ next_block_ctx);
+ goto one_stack_frame_backwards;
+ }
+
+ next_stack->i = -1;
+ next_stack->block = sf->next_block;
+ next_stack->block_ctx =
+ &sf->next_block_ctx;
+ next_stack->next_block = NULL;
+ next_stack->hdr = next_hdr;
+ next_stack->limit_nesting =
+ sf->limit_nesting - 1;
+ next_stack->prev = sf;
+ sf = next_stack;
+ goto continue_with_new_stack_frame;
+ }
+ } else if (BTRFS_EXTENT_DATA_KEY == type &&
+ state->include_extent_data) {
+ sf->error = btrfsic_handle_extent_data(
+ state,
+ sf->block,
+ sf->block_ctx,
+ item_offset,
+ force_iodone_flag);
+ if (sf->error)
+ goto one_stack_frame_backwards;
+ }
+
+ goto continue_with_current_leaf_stack_frame;
+ }
+ } else {
+ struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
+
+ if (-1 == sf->i) {
+ sf->nr = le32_to_cpu(nodehdr->header.nritems);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "node %llu level %d items %d"
+ " generation %llu owner %llu\n",
+ (unsigned long long)
+ sf->block_ctx->start,
+ nodehdr->header.level, sf->nr,
+ (unsigned long long)
+ le64_to_cpu(nodehdr->header.generation),
+ (unsigned long long)
+ le64_to_cpu(nodehdr->header.owner));
+ }
+
+continue_with_current_node_stack_frame:
+ if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
+ sf->i++;
+ sf->num_copies = 0;
+ }
+
+ if (sf->i < sf->nr) {
+ struct btrfs_key_ptr *disk_key_ptr =
+ nodehdr->ptrs + sf->i;
+ const u64 next_bytenr =
+ le64_to_cpu(disk_key_ptr->blockptr);
+
+ sf->error = btrfsic_create_link_to_next_block(
+ state,
+ sf->block,
+ sf->block_ctx,
+ next_bytenr,
+ sf->limit_nesting,
+ &sf->next_block_ctx,
+ &sf->next_block,
+ force_iodone_flag,
+ &sf->num_copies,
+ &sf->mirror_num,
+ &disk_key_ptr->key,
+ le64_to_cpu(disk_key_ptr->generation));
+ if (sf->error)
+ goto one_stack_frame_backwards;
+
+ if (NULL != sf->next_block) {
+ struct btrfs_header *const next_hdr =
+ (struct btrfs_header *)
+ sf->next_block_ctx.data;
+
+ next_stack = btrfsic_stack_frame_alloc();
+ if (NULL == next_stack)
+ goto one_stack_frame_backwards;
+
+ next_stack->i = -1;
+ next_stack->block = sf->next_block;
+ next_stack->block_ctx = &sf->next_block_ctx;
+ next_stack->next_block = NULL;
+ next_stack->hdr = next_hdr;
+ next_stack->limit_nesting =
+ sf->limit_nesting - 1;
+ next_stack->prev = sf;
+ sf = next_stack;
+ goto continue_with_new_stack_frame;
+ }
+
+ goto continue_with_current_node_stack_frame;
+ }
+ }
+
+one_stack_frame_backwards:
+ if (NULL != sf->prev) {
+ struct btrfsic_stack_frame *const prev = sf->prev;
+
+ /* the one for the initial block is freed in the caller */
+ btrfsic_release_block_ctx(sf->block_ctx);
+
+ if (sf->error) {
+ prev->error = sf->error;
+ btrfsic_stack_frame_free(sf);
+ sf = prev;
+ goto one_stack_frame_backwards;
+ }
+
+ btrfsic_stack_frame_free(sf);
+ sf = prev;
+ goto continue_with_new_stack_frame;
+ } else {
+ BUG_ON(&initial_stack_frame != sf);
+ }
+
+ return sf->error;
+}
+
+static int btrfsic_create_link_to_next_block(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u64 next_bytenr,
+ int limit_nesting,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block **next_blockp,
+ int force_iodone_flag,
+ int *num_copiesp, int *mirror_nump,
+ struct btrfs_disk_key *disk_key,
+ u64 parent_generation)
+{
+ struct btrfsic_block *next_block = NULL;
+ int ret;
+ struct btrfsic_block_link *l;
+ int did_alloc_block_link;
+ int block_was_created;
+
+ *next_blockp = NULL;
+ if (0 == *num_copiesp) {
+ *num_copiesp =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, PAGE_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, *num_copiesp);
+ *mirror_nump = 1;
+ }
+
+ if (*mirror_nump > *num_copiesp)
+ return 0;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
+ *mirror_nump);
+ ret = btrfsic_map_block(state, next_bytenr,
+ BTRFSIC_BLOCK_SIZE,
+ next_block_ctx, *mirror_nump);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr, *mirror_nump);
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(state,
+ next_block_ctx, "referenced ",
+ 1, force_iodone_flag,
+ !force_iodone_flag,
+ *mirror_nump,
+ &block_was_created);
+ if (NULL == next_block) {
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+ if (block_was_created) {
+ l = NULL;
+ next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ } else {
+ if (next_block->logical_bytenr != next_bytenr &&
+ !(!next_block->is_metadata &&
+ 0 == next_block->logical_bytenr)) {
+ printk(KERN_INFO
+ "Referenced block @%llu (%s/%llu/%d)"
+ " found in hash table, %c,"
+ " bytenr mismatch (!= stored %llu).\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx->dev->name,
+ (unsigned long long)next_block_ctx->dev_bytenr,
+ *mirror_nump,
+ btrfsic_get_block_type(state, next_block),
+ (unsigned long long)next_block->logical_bytenr);
+ } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Referenced block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx->dev->name,
+ (unsigned long long)next_block_ctx->dev_bytenr,
+ *mirror_nump,
+ btrfsic_get_block_type(state, next_block));
+ next_block->logical_bytenr = next_bytenr;
+
+ next_block->mirror_num = *mirror_nump;
+ l = btrfsic_block_link_hashtable_lookup(
+ next_block_ctx->dev->bdev,
+ next_block_ctx->dev_bytenr,
+ block_ctx->dev->bdev,
+ block_ctx->dev_bytenr,
+ &state->block_link_hashtable);
+ }
+
+ next_block->disk_key = *disk_key;
+ if (NULL == l) {
+ l = btrfsic_block_link_alloc();
+ if (NULL == l) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ did_alloc_block_link = 1;
+ l->block_ref_to = next_block;
+ l->block_ref_from = block;
+ l->ref_cnt = 1;
+ l->parent_generation = parent_generation;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+
+ list_add(&l->node_ref_to, &block->ref_to_list);
+ list_add(&l->node_ref_from, &next_block->ref_from_list);
+
+ btrfsic_block_link_hashtable_add(l,
+ &state->block_link_hashtable);
+ } else {
+ did_alloc_block_link = 0;
+ if (0 == limit_nesting) {
+ l->ref_cnt++;
+ l->parent_generation = parent_generation;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+ }
+ }
+
+ if (limit_nesting > 0 && did_alloc_block_link) {
+ ret = btrfsic_read_block(state, next_block_ctx);
+ if (ret < (int)BTRFSIC_BLOCK_SIZE) {
+ printk(KERN_INFO
+ "btrfsic: read block @logical %llu failed!\n",
+ (unsigned long long)next_bytenr);
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ *next_blockp = next_block;
+ } else {
+ *next_blockp = NULL;
+ }
+ (*mirror_nump)++;
+
+ return 0;
+}
+
+static int btrfsic_handle_extent_data(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u32 item_offset, int force_iodone_flag)
+{
+ int ret;
+ struct btrfs_file_extent_item *file_extent_item =
+ (struct btrfs_file_extent_item *)(block_ctx->data +
+ offsetof(struct btrfs_leaf,
+ items) + item_offset);
+ u64 next_bytenr =
+ le64_to_cpu(file_extent_item->disk_bytenr) +
+ le64_to_cpu(file_extent_item->offset);
+ u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes);
+ u64 generation = le64_to_cpu(file_extent_item->generation);
+ struct btrfsic_block_link *l;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
+ printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
+ " offset = %llu, num_bytes = %llu\n",
+ file_extent_item->type,
+ (unsigned long long)
+ le64_to_cpu(file_extent_item->disk_bytenr),
+ (unsigned long long)
+ le64_to_cpu(file_extent_item->offset),
+ (unsigned long long)
+ le64_to_cpu(file_extent_item->num_bytes));
+ if (BTRFS_FILE_EXTENT_REG != file_extent_item->type ||
+ ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr))
+ return 0;
+ while (num_bytes > 0) {
+ u32 chunk_len;
+ int num_copies;
+ int mirror_num;
+
+ if (num_bytes > BTRFSIC_BLOCK_SIZE)
+ chunk_len = BTRFSIC_BLOCK_SIZE;
+ else
+ chunk_len = num_bytes;
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, PAGE_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block_data_ctx next_block_ctx;
+ struct btrfsic_block *next_block;
+ int block_was_created;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "btrfsic_handle_extent_data("
+ "mirror_num=%d)\n", mirror_num);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
+ printk(KERN_INFO
+ "\tdisk_bytenr = %llu, num_bytes %u\n",
+ (unsigned long long)next_bytenr,
+ chunk_len);
+ ret = btrfsic_map_block(state, next_bytenr,
+ chunk_len, &next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu,"
+ " mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state,
+ &next_block_ctx,
+ "referenced ",
+ 0,
+ force_iodone_flag,
+ !force_iodone_flag,
+ mirror_num,
+ &block_was_created);
+ if (NULL == next_block) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&next_block_ctx);
+ return -1;
+ }
+ if (!block_was_created) {
+ if (next_block->logical_bytenr != next_bytenr &&
+ !(!next_block->is_metadata &&
+ 0 == next_block->logical_bytenr)) {
+ printk(KERN_INFO
+ "Referenced block"
+ " @%llu (%s/%llu/%d)"
+ " found in hash table, D,"
+ " bytenr mismatch"
+ " (!= stored %llu).\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx.dev->name,
+ (unsigned long long)
+ next_block_ctx.dev_bytenr,
+ mirror_num,
+ (unsigned long long)
+ next_block->logical_bytenr);
+ }
+ next_block->logical_bytenr = next_bytenr;
+ next_block->mirror_num = mirror_num;
+ }
+
+ l = btrfsic_block_link_lookup_or_add(state,
+ &next_block_ctx,
+ next_block, block,
+ generation);
+ btrfsic_release_block_ctx(&next_block_ctx);
+ if (NULL == l)
+ return -1;
+ }
+
+ next_bytenr += chunk_len;
+ num_bytes -= chunk_len;
+ }
+
+ return 0;
+}
+
+static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
+ struct btrfsic_block_data_ctx *block_ctx_out,
+ int mirror_num)
+{
+ int ret;
+ u64 length;
+ struct btrfs_bio *multi = NULL;
+ struct btrfs_device *device;
+
+ length = len;
+ ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
+ bytenr, &length, &multi, mirror_num);
+
+ device = multi->stripes[0].dev;
+ block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
+ block_ctx_out->dev_bytenr = multi->stripes[0].physical;
+ block_ctx_out->start = bytenr;
+ block_ctx_out->len = len;
+ block_ctx_out->data = NULL;
+ block_ctx_out->bh = NULL;
+
+ if (0 == ret)
+ kfree(multi);
+ if (NULL == block_ctx_out->dev) {
+ ret = -ENXIO;
+ printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
+ }
+
+ return ret;
+}
+
+static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
+ u32 len, struct block_device *bdev,
+ struct btrfsic_block_data_ctx *block_ctx_out)
+{
+ block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
+ block_ctx_out->dev_bytenr = bytenr;
+ block_ctx_out->start = bytenr;
+ block_ctx_out->len = len;
+ block_ctx_out->data = NULL;
+ block_ctx_out->bh = NULL;
+ if (NULL != block_ctx_out->dev) {
+ return 0;
+ } else {
+ printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
+ return -ENXIO;
+ }
+}
+
+static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
+{
+ if (NULL != block_ctx->bh) {
+ brelse(block_ctx->bh);
+ block_ctx->bh = NULL;
+ }
+}
+
+static int btrfsic_read_block(struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx)
+{
+ block_ctx->bh = NULL;
+ if (block_ctx->dev_bytenr & 4095) {
+ printk(KERN_INFO
+ "btrfsic: read_block() with unaligned bytenr %llu\n",
+ (unsigned long long)block_ctx->dev_bytenr);
+ return -1;
+ }
+ if (block_ctx->len > 4096) {
+ printk(KERN_INFO
+ "btrfsic: read_block() with too huge size %d\n",
+ block_ctx->len);
+ return -1;
+ }
+
+ block_ctx->bh = __bread(block_ctx->dev->bdev,
+ block_ctx->dev_bytenr >> 12, 4096);
+ if (NULL == block_ctx->bh)
+ return -1;
+ block_ctx->data = block_ctx->bh->b_data;
+
+ return block_ctx->len;
+}
+
+static void btrfsic_dump_database(struct btrfsic_state *state)
+{
+ struct list_head *elem_all;
+
+ BUG_ON(NULL == state);
+
+ printk(KERN_INFO "all_blocks_list:\n");
+ list_for_each(elem_all, &state->all_blocks_list) {
+ const struct btrfsic_block *const b_all =
+ list_entry(elem_all, struct btrfsic_block,
+ all_blocks_node);
+ struct list_head *elem_ref_to;
+ struct list_head *elem_ref_from;
+
+ printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num);
+
+ list_for_each(elem_ref_to, &b_all->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
+ " refers %u* to"
+ " %c @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ }
+
+ list_for_each(elem_ref_from, &b_all->ref_from_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_from,
+ struct btrfsic_block_link,
+ node_ref_from);
+
+ printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
+ " is ref %u* from"
+ " %c @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)
+ l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)
+ l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num);
+ }
+
+ printk(KERN_INFO "\n");
+ }
+}
+
+/*
+ * Test whether the disk block contains a tree block (leaf or node)
+ * (note that this test fails for the super block)
+ */
+static int btrfsic_test_for_metadata(struct btrfsic_state *state,
+ const u8 *data, unsigned int size)
+{
+ struct btrfs_header *h;
+ u8 csum[BTRFS_CSUM_SIZE];
+ u32 crc = ~(u32)0;
+ int fail = 0;
+ int crc_fail = 0;
+
+ h = (struct btrfs_header *)data;
+
+ if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
+ fail++;
+
+ crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, csum);
+ if (memcmp(csum, h->csum, state->csum_size))
+ crc_fail++;
+
+ return fail || crc_fail;
+}
+
+static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr,
+ u8 *mapped_data, unsigned int len,
+ struct bio *bio,
+ int *bio_is_patched,
+ struct buffer_head *bh,
+ int submit_bio_bh_rw)
+{
+ int is_metadata;
+ struct btrfsic_block *block;
+ struct btrfsic_block_data_ctx block_ctx;
+ int ret;
+ struct btrfsic_state *state = dev_state->state;
+ struct block_device *bdev = dev_state->bdev;
+
+ WARN_ON(len > PAGE_SIZE);
+ is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len));
+ if (NULL != bio_is_patched)
+ *bio_is_patched = 0;
+
+ block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
+ &state->block_hashtable);
+ if (NULL != block) {
+ u64 bytenr;
+ struct list_head *elem_ref_to;
+ struct list_head *tmp_ref_to;
+
+ if (block->is_superblock) {
+ bytenr = le64_to_cpu(((struct btrfs_super_block *)
+ mapped_data)->bytenr);
+ is_metadata = 1;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
+ printk(KERN_INFO
+ "[before new superblock is written]:\n");
+ btrfsic_dump_tree_sub(state, block, 0);
+ }
+ }
+ if (is_metadata) {
+ if (!block->is_superblock) {
+ bytenr = le64_to_cpu(((struct btrfs_header *)
+ mapped_data)->bytenr);
+ btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
+ dev_state,
+ dev_bytenr,
+ mapped_data);
+ }
+ if (block->logical_bytenr != bytenr) {
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c,"
+ " bytenr mismatch"
+ " (!= stored %llu).\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)
+ block->logical_bytenr);
+ block->logical_bytenr = bytenr;
+ } else if (state->print_mask &
+ BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block));
+ } else {
+ bytenr = block->logical_bytenr;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block));
+ }
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "ref_to_list: %cE, ref_from_list: %cE\n",
+ list_empty(&block->ref_to_list) ? ' ' : '!',
+ list_empty(&block->ref_from_list) ? ' ' : '!');
+ if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
+ printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
+ " @%llu (%s/%llu/%d), old(gen=%llu,"
+ " objectid=%llu, type=%d, offset=%llu),"
+ " new(gen=%llu),"
+ " which is referenced by most recent superblock"
+ " (superblockgen=%llu)!\n",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ (unsigned long long)block->generation,
+ (unsigned long long)
+ le64_to_cpu(block->disk_key.objectid),
+ block->disk_key.type,
+ (unsigned long long)
+ le64_to_cpu(block->disk_key.offset),
+ (unsigned long long)
+ le64_to_cpu(((struct btrfs_header *)
+ mapped_data)->generation),
+ (unsigned long long)
+ state->max_superblock_generation);
+ btrfsic_dump_tree(state);
+ }
+
+ if (!block->is_iodone && !block->never_written) {
+ printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
+ " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
+ " which is not yet iodone!\n",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ (unsigned long long)block->generation,
+ (unsigned long long)
+ le64_to_cpu(((struct btrfs_header *)
+ mapped_data)->generation));
+ /* it would not be safe to go on */
+ btrfsic_dump_tree(state);
+ return;
+ }
+
+ /*
+ * Clear all references of this block. Do not free
+ * the block itself even if is not referenced anymore
+ * because it still carries valueable information
+ * like whether it was ever written and IO completed.
+ */
+ list_for_each_safe(elem_ref_to, tmp_ref_to,
+ &block->ref_to_list) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_rem_link(state, l);
+ l->ref_cnt--;
+ if (0 == l->ref_cnt) {
+ list_del(&l->node_ref_to);
+ list_del(&l->node_ref_from);
+ btrfsic_block_link_hashtable_remove(l);
+ btrfsic_block_link_free(l);
+ }
+ }
+
+ if (block->is_superblock)
+ ret = btrfsic_map_superblock(state, bytenr, len,
+ bdev, &block_ctx);
+ else
+ ret = btrfsic_map_block(state, bytenr, len,
+ &block_ctx, 0);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(root @%llu)"
+ " failed!\n", (unsigned long long)bytenr);
+ return;
+ }
+ block_ctx.data = mapped_data;
+ /* the following is required in case of writes to mirrors,
+ * use the same that was used for the lookup */
+ block_ctx.dev = dev_state;
+ block_ctx.dev_bytenr = dev_bytenr;
+
+ if (is_metadata || state->include_extent_data) {
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ if (NULL != bio) {
+ block->is_iodone = 0;
+ BUG_ON(NULL == bio_is_patched);
+ if (!*bio_is_patched) {
+ block->orig_bio_bh_private =
+ bio->bi_private;
+ block->orig_bio_bh_end_io.bio =
+ bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ *bio_is_patched = 1;
+ } else {
+ struct btrfsic_block *chained_block =
+ (struct btrfsic_block *)
+ bio->bi_private;
+
+ BUG_ON(NULL == chained_block);
+ block->orig_bio_bh_private =
+ chained_block->orig_bio_bh_private;
+ block->orig_bio_bh_end_io.bio =
+ chained_block->orig_bio_bh_end_io.
+ bio;
+ block->next_in_same_bio = chained_block;
+ bio->bi_private = block;
+ }
+ } else if (NULL != bh) {
+ block->is_iodone = 0;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ } else {
+ block->is_iodone = 1;
+ block->orig_bio_bh_private = NULL;
+ block->orig_bio_bh_end_io.bio = NULL;
+ block->next_in_same_bio = NULL;
+ }
+ }
+
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = submit_bio_bh_rw;
+ if (is_metadata) {
+ block->logical_bytenr = bytenr;
+ block->is_metadata = 1;
+ if (block->is_superblock) {
+ ret = btrfsic_process_written_superblock(
+ state,
+ block,
+ (struct btrfs_super_block *)
+ mapped_data);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
+ printk(KERN_INFO
+ "[after new superblock is written]:\n");
+ btrfsic_dump_tree_sub(state, block, 0);
+ }
+ } else {
+ block->mirror_num = 0; /* unknown */
+ ret = btrfsic_process_metablock(
+ state,
+ block,
+ &block_ctx,
+ (struct btrfs_header *)
+ block_ctx.data,
+ 0, 0);
+ }
+ if (ret)
+ printk(KERN_INFO
+ "btrfsic: btrfsic_process_metablock"
+ "(root @%llu) failed!\n",
+ (unsigned long long)dev_bytenr);
+ } else {
+ block->is_metadata = 0;
+ block->mirror_num = 0; /* unknown */
+ block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ if (!state->include_extent_data
+ && list_empty(&block->ref_from_list)) {
+ /*
+ * disk block is overwritten with extent
+ * data (not meta data) and we are configured
+ * to not include extent data: take the
+ * chance and free the block's memory
+ */
+ btrfsic_block_hashtable_remove(block);
+ list_del(&block->all_blocks_node);
+ btrfsic_block_free(block);
+ }
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ } else {
+ /* block has not been found in hash table */
+ u64 bytenr;
+
+ if (!is_metadata) {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "Written block (%s/%llu/?)"
+ " !found in hash table, D.\n",
+ dev_state->name,
+ (unsigned long long)dev_bytenr);
+ if (!state->include_extent_data)
+ return; /* ignore that written D block */
+
+ /* this is getting ugly for the
+ * include_extent_data case... */
+ bytenr = 0; /* unknown */
+ block_ctx.start = bytenr;
+ block_ctx.len = len;
+ block_ctx.bh = NULL;
+ } else {
+ bytenr = le64_to_cpu(((struct btrfs_header *)
+ mapped_data)->bytenr);
+ btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
+ dev_bytenr,
+ mapped_data);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/?)"
+ " !found in hash table, M.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr);
+
+ ret = btrfsic_map_block(state, bytenr, len, &block_ctx,
+ 0);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(root @%llu)"
+ " failed!\n",
+ (unsigned long long)dev_bytenr);
+ return;
+ }
+ }
+ block_ctx.data = mapped_data;
+ /* the following is required in case of writes to mirrors,
+ * use the same that was used for the lookup */
+ block_ctx.dev = dev_state;
+ block_ctx.dev_bytenr = dev_bytenr;
+
+ block = btrfsic_block_alloc();
+ if (NULL == block) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&block_ctx);
+ return;
+ }
+ block->dev_state = dev_state;
+ block->dev_bytenr = dev_bytenr;
+ block->logical_bytenr = bytenr;
+ block->is_metadata = is_metadata;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->mirror_num = 0; /* unknown */
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = submit_bio_bh_rw;
+ if (NULL != bio) {
+ block->is_iodone = 0;
+ BUG_ON(NULL == bio_is_patched);
+ if (!*bio_is_patched) {
+ block->orig_bio_bh_private = bio->bi_private;
+ block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ *bio_is_patched = 1;
+ } else {
+ struct btrfsic_block *chained_block =
+ (struct btrfsic_block *)
+ bio->bi_private;
+
+ BUG_ON(NULL == chained_block);
+ block->orig_bio_bh_private =
+ chained_block->orig_bio_bh_private;
+ block->orig_bio_bh_end_io.bio =
+ chained_block->orig_bio_bh_end_io.bio;
+ block->next_in_same_bio = chained_block;
+ bio->bi_private = block;
+ }
+ } else if (NULL != bh) {
+ block->is_iodone = 0;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ } else {
+ block->is_iodone = 1;
+ block->orig_bio_bh_private = NULL;
+ block->orig_bio_bh_end_io.bio = NULL;
+ block->next_in_same_bio = NULL;
+ }
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "New written %c-block @%llu (%s/%llu/%d)\n",
+ is_metadata ? 'M' : 'D',
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ list_add(&block->all_blocks_node, &state->all_blocks_list);
+ btrfsic_block_hashtable_add(block, &state->block_hashtable);
+
+ if (is_metadata) {
+ ret = btrfsic_process_metablock(state, block,
+ &block_ctx,
+ (struct btrfs_header *)
+ block_ctx.data, 0, 0);
+ if (ret)
+ printk(KERN_INFO
+ "btrfsic: process_metablock(root @%llu)"
+ " failed!\n",
+ (unsigned long long)dev_bytenr);
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ }
+}
+
+static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
+{
+ struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
+ int iodone_w_error;
+
+ /* mutex is not held! This is not save if IO is not yet completed
+ * on umount */
+ iodone_w_error = 0;
+ if (bio_error_status)
+ iodone_w_error = 1;
+
+ BUG_ON(NULL == block);
+ bp->bi_private = block->orig_bio_bh_private;
+ bp->bi_end_io = block->orig_bio_bh_end_io.bio;
+
+ do {
+ struct btrfsic_block *next_block;
+ struct btrfsic_dev_state *const dev_state = block->dev_state;
+
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
+ bio_error_status,
+ btrfsic_get_block_type(dev_state->state, block),
+ (unsigned long long)block->logical_bytenr,
+ dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ next_block = block->next_in_same_bio;
+ block->iodone_w_error = iodone_w_error;
+ if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ dev_state->last_flush_gen++;
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bio_end_io() new %s flush_gen=%llu\n",
+ dev_state->name,
+ (unsigned long long)
+ dev_state->last_flush_gen);
+ }
+ if (block->submit_bio_bh_rw & REQ_FUA)
+ block->flush_gen = 0; /* FUA completed means block is
+ * on disk */
+ block->is_iodone = 1; /* for FLUSH, this releases the block */
+ block = next_block;
+ } while (NULL != block);
+
+ bp->bi_end_io(bp, bio_error_status);
+}
+
+static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
+{
+ struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
+ int iodone_w_error = !uptodate;
+ struct btrfsic_dev_state *dev_state;
+
+ BUG_ON(NULL == block);
+ dev_state = block->dev_state;
+ if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
+ iodone_w_error,
+ btrfsic_get_block_type(dev_state->state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+
+ block->iodone_w_error = iodone_w_error;
+ if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ dev_state->last_flush_gen++;
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bh_end_io() new %s flush_gen=%llu\n",
+ dev_state->name,
+ (unsigned long long)dev_state->last_flush_gen);
+ }
+ if (block->submit_bio_bh_rw & REQ_FUA)
+ block->flush_gen = 0; /* FUA completed means block is on disk */
+
+ bh->b_private = block->orig_bio_bh_private;
+ bh->b_end_io = block->orig_bio_bh_end_io.bh;
+ block->is_iodone = 1; /* for FLUSH, this releases the block */
+ bh->b_end_io(bh, uptodate);
+}
+
+static int btrfsic_process_written_superblock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const superblock,
+ struct btrfs_super_block *const super_hdr)
+{
+ int pass;
+
+ superblock->generation = btrfs_super_generation(super_hdr);
+ if (!(superblock->generation > state->max_superblock_generation ||
+ 0 == state->max_superblock_generation)) {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk(KERN_INFO
+ "btrfsic: superblock @%llu (%s/%llu/%d)"
+ " with old gen %llu <= %llu\n",
+ (unsigned long long)superblock->logical_bytenr,
+ superblock->dev_state->name,
+ (unsigned long long)superblock->dev_bytenr,
+ superblock->mirror_num,
+ (unsigned long long)
+ btrfs_super_generation(super_hdr),
+ (unsigned long long)
+ state->max_superblock_generation);
+ } else {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk(KERN_INFO
+ "btrfsic: got new superblock @%llu (%s/%llu/%d)"
+ " with new gen %llu > %llu\n",
+ (unsigned long long)superblock->logical_bytenr,
+ superblock->dev_state->name,
+ (unsigned long long)superblock->dev_bytenr,
+ superblock->mirror_num,
+ (unsigned long long)
+ btrfs_super_generation(super_hdr),
+ (unsigned long long)
+ state->max_superblock_generation);
+
+ state->max_superblock_generation =
+ btrfs_super_generation(super_hdr);
+ state->latest_superblock = superblock;
+ }
+
+ for (pass = 0; pass < 3; pass++) {
+ int ret;
+ u64 next_bytenr;
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+ int num_copies;
+ int mirror_num;
+ const char *additional_string = NULL;
+ struct btrfs_disk_key tmp_disk_key;
+
+ tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
+ tmp_disk_key.offset = 0;
+
+ switch (pass) {
+ case 0:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ additional_string = "root ";
+ next_bytenr = btrfs_super_root(super_hdr);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "root@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 1:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ additional_string = "chunk ";
+ next_bytenr = btrfs_super_chunk_root(super_hdr);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "chunk@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 2:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ additional_string = "log ";
+ next_bytenr = btrfs_super_log_root(super_hdr);
+ if (0 == next_bytenr)
+ continue;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "log@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, PAGE_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ int was_created;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic_process_written_superblock("
+ "mirror_num=%d)\n", mirror_num);
+ ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
+ &tmp_next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu,"
+ " mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state,
+ &tmp_next_block_ctx,
+ additional_string,
+ 1, 0, 1,
+ mirror_num,
+ &was_created);
+ if (NULL == next_block) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ return -1;
+ }
+
+ next_block->disk_key = tmp_disk_key;
+ if (was_created)
+ next_block->generation =
+ BTRFSIC_GENERATION_UNKNOWN;
+ l = btrfsic_block_link_lookup_or_add(
+ state,
+ &tmp_next_block_ctx,
+ next_block,
+ superblock,
+ BTRFSIC_GENERATION_UNKNOWN);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ if (NULL == l)
+ return -1;
+ }
+ }
+
+ if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
+ WARN_ON(1);
+ btrfsic_dump_tree(state);
+ }
+
+ return 0;
+}
+
+static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ int recursion_level)
+{
+ struct list_head *elem_ref_to;
+ int ret = 0;
+
+ if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
+ /*
+ * Note that this situation can happen and does not
+ * indicate an error in regular cases. It happens
+ * when disk blocks are freed and later reused.
+ * The check-integrity module is not aware of any
+ * block free operations, it just recognizes block
+ * write operations. Therefore it keeps the linkage
+ * information for a block until a block is
+ * rewritten. This can temporarily cause incorrect
+ * and even circular linkage informations. This
+ * causes no harm unless such blocks are referenced
+ * by the most recent super block.
+ */
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic: abort cyclic linkage (case 1).\n");
+
+ return ret;
+ }
+
+ /*
+ * This algorithm is recursive because the amount of used stack
+ * space is very small and the max recursion depth is limited.
+ */
+ list_for_each(elem_ref_to, &block->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to, struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "rl=%d, %c @%llu (%s/%llu/%d)"
+ " %u* refers to %c @%llu (%s/%llu/%d)\n",
+ recursion_level,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ if (l->block_ref_to->never_written) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is never written!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ ret = -1;
+ } else if (!l->block_ref_to->is_iodone) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is not yet iodone!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ ret = -1;
+ } else if (l->parent_generation !=
+ l->block_ref_to->generation &&
+ BTRFSIC_GENERATION_UNKNOWN !=
+ l->parent_generation &&
+ BTRFSIC_GENERATION_UNKNOWN !=
+ l->block_ref_to->generation) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " with generation %llu !="
+ " parent generation %llu!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num,
+ (unsigned long long)l->block_ref_to->generation,
+ (unsigned long long)l->parent_generation);
+ ret = -1;
+ } else if (l->block_ref_to->flush_gen >
+ l->block_ref_to->dev_state->last_flush_gen) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is not flushed out of disk's write cache"
+ " (block flush_gen=%llu,"
+ " dev->flush_gen=%llu)!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num,
+ (unsigned long long)block->flush_gen,
+ (unsigned long long)
+ l->block_ref_to->dev_state->last_flush_gen);
+ ret = -1;
+ } else if (-1 == btrfsic_check_all_ref_blocks(state,
+ l->block_ref_to,
+ recursion_level +
+ 1)) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+static int btrfsic_is_block_ref_by_superblock(
+ const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int recursion_level)
+{
+ struct list_head *elem_ref_from;
+
+ if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
+ /* refer to comment at "abort cyclic linkage (case 1)" */
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic: abort cyclic linkage (case 2).\n");
+
+ return 0;
+ }
+
+ /*
+ * This algorithm is recursive because the amount of used stack space
+ * is very small and the max recursion depth is limited.
+ */
+ list_for_each(elem_ref_from, &block->ref_from_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_from, struct btrfsic_block_link,
+ node_ref_from);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "rl=%d, %c @%llu (%s/%llu/%d)"
+ " is ref %u* from %c @%llu (%s/%llu/%d)\n",
+ recursion_level,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)
+ l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)
+ l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num);
+ if (l->block_ref_from->is_superblock &&
+ state->latest_superblock->dev_bytenr ==
+ l->block_ref_from->dev_bytenr &&
+ state->latest_superblock->dev_state->bdev ==
+ l->block_ref_from->dev_state->bdev)
+ return 1;
+ else if (btrfsic_is_block_ref_by_superblock(state,
+ l->block_ref_from,
+ recursion_level +
+ 1))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void btrfsic_print_add_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l)
+{
+ printk(KERN_INFO
+ "Add %u* link from %c @%llu (%s/%llu/%d)"
+ " to %c @%llu (%s/%llu/%d).\n",
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+}
+
+static void btrfsic_print_rem_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l)
+{
+ printk(KERN_INFO
+ "Rem %u* link from %c @%llu (%s/%llu/%d)"
+ " to %c @%llu (%s/%llu/%d).\n",
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+}
+
+static char btrfsic_get_block_type(const struct btrfsic_state *state,
+ const struct btrfsic_block *block)
+{
+ if (block->is_superblock &&
+ state->latest_superblock->dev_bytenr == block->dev_bytenr &&
+ state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
+ return 'S';
+ else if (block->is_superblock)
+ return 's';
+ else if (block->is_metadata)
+ return 'M';
+ else
+ return 'D';
+}
+
+static void btrfsic_dump_tree(const struct btrfsic_state *state)
+{
+ btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
+}
+
+static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int indent_level)
+{
+ struct list_head *elem_ref_to;
+ int indent_add;
+ static char buf[80];
+ int cursor_position;
+
+ /*
+ * Should better fill an on-stack buffer with a complete line and
+ * dump it at once when it is time to print a newline character.
+ */
+
+ /*
+ * This algorithm is recursive because the amount of used stack space
+ * is very small and the max recursion depth is limited.
+ */
+ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
+ printk("[...]\n");
+ return;
+ }
+ printk(buf);
+ indent_level += indent_add;
+ if (list_empty(&block->ref_to_list)) {
+ printk("\n");
+ return;
+ }
+ if (block->mirror_num > 1 &&
+ !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
+ printk(" [...]\n");
+ return;
+ }
+
+ cursor_position = indent_level;
+ list_for_each(elem_ref_to, &block->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to, struct btrfsic_block_link,
+ node_ref_to);
+
+ while (cursor_position < indent_level) {
+ printk(" ");
+ cursor_position++;
+ }
+ if (l->ref_cnt > 1)
+ indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
+ else
+ indent_add = sprintf(buf, " --> ");
+ if (indent_level + indent_add >
+ BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
+ printk("[...]\n");
+ cursor_position = 0;
+ continue;
+ }
+
+ printk(buf);
+
+ btrfsic_dump_tree_sub(state, l->block_ref_to,
+ indent_level + indent_add);
+ cursor_position = 0;
+ }
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block *next_block,
+ struct btrfsic_block *from_block,
+ u64 parent_generation)
+{
+ struct btrfsic_block_link *l;
+
+ l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
+ next_block_ctx->dev_bytenr,
+ from_block->dev_state->bdev,
+ from_block->dev_bytenr,
+ &state->block_link_hashtable);
+ if (NULL == l) {
+ l = btrfsic_block_link_alloc();
+ if (NULL == l) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc" " failed!\n");
+ return NULL;
+ }
+
+ l->block_ref_to = next_block;
+ l->block_ref_from = from_block;
+ l->ref_cnt = 1;
+ l->parent_generation = parent_generation;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+
+ list_add(&l->node_ref_to, &from_block->ref_to_list);
+ list_add(&l->node_ref_from, &next_block->ref_from_list);
+
+ btrfsic_block_link_hashtable_add(l,
+ &state->block_link_hashtable);
+ } else {
+ l->ref_cnt++;
+ l->parent_generation = parent_generation;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+ }
+
+ return l;
+}
+
+static struct btrfsic_block *btrfsic_block_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx,
+ const char *additional_string,
+ int is_metadata,
+ int is_iodone,
+ int never_written,
+ int mirror_num,
+ int *was_created)
+{
+ struct btrfsic_block *block;
+
+ block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
+ block_ctx->dev_bytenr,
+ &state->block_hashtable);
+ if (NULL == block) {
+ struct btrfsic_dev_state *dev_state;
+
+ block = btrfsic_block_alloc();
+ if (NULL == block) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ return NULL;
+ }
+ dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
+ if (NULL == dev_state) {
+ printk(KERN_INFO
+ "btrfsic: error, lookup dev_state failed!\n");
+ btrfsic_block_free(block);
+ return NULL;
+ }
+ block->dev_state = dev_state;
+ block->dev_bytenr = block_ctx->dev_bytenr;
+ block->logical_bytenr = block_ctx->start;
+ block->is_metadata = is_metadata;
+ block->is_iodone = is_iodone;
+ block->never_written = never_written;
+ block->mirror_num = mirror_num;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "New %s%c-block @%llu (%s/%llu/%d)\n",
+ additional_string,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ mirror_num);
+ list_add(&block->all_blocks_node, &state->all_blocks_list);
+ btrfsic_block_hashtable_add(block, &state->block_hashtable);
+ if (NULL != was_created)
+ *was_created = 1;
+ } else {
+ if (NULL != was_created)
+ *was_created = 0;
+ }
+
+ return block;
+}
+
+static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
+ u64 bytenr,
+ struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr, char *data)
+{
+ int num_copies;
+ int mirror_num;
+ int ret;
+ struct btrfsic_block_data_ctx block_ctx;
+ int match = 0;
+
+ num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ bytenr, PAGE_SIZE);
+
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ ret = btrfsic_map_block(state, bytenr, PAGE_SIZE,
+ &block_ctx, mirror_num);
+ if (ret) {
+ printk(KERN_INFO "btrfsic:"
+ " btrfsic_map_block(logical @%llu,"
+ " mirror %d) failed!\n",
+ (unsigned long long)bytenr, mirror_num);
+ continue;
+ }
+
+ if (dev_state->bdev == block_ctx.dev->bdev &&
+ dev_bytenr == block_ctx.dev_bytenr) {
+ match++;
+ btrfsic_release_block_ctx(&block_ctx);
+ break;
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ }
+
+ if (!match) {
+ printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
+ " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
+ " phys_bytenr=%llu)!\n",
+ (unsigned long long)bytenr, dev_state->name,
+ (unsigned long long)dev_bytenr);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ ret = btrfsic_map_block(state, bytenr, PAGE_SIZE,
+ &block_ctx, mirror_num);
+ if (ret)
+ continue;
+
+ printk(KERN_INFO "Read logical bytenr @%llu maps to"
+ " (%s/%llu/%d)\n",
+ (unsigned long long)bytenr,
+ block_ctx.dev->name,
+ (unsigned long long)block_ctx.dev_bytenr,
+ mirror_num);
+ }
+ WARN_ON(1);
+ }
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
+ struct block_device *bdev)
+{
+ struct btrfsic_dev_state *ds;
+
+ ds = btrfsic_dev_state_hashtable_lookup(bdev,
+ &btrfsic_dev_state_hashtable);
+ return ds;
+}
+
+int btrfsic_submit_bh(int rw, struct buffer_head *bh)
+{
+ struct btrfsic_dev_state *dev_state;
+
+ if (!btrfsic_is_initialized)
+ return submit_bh(rw, bh);
+
+ mutex_lock(&btrfsic_mutex);
+ /* since btrfsic_submit_bh() might also be called before
+ * btrfsic_mount(), this might return NULL */
+ dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
+
+ /* Only called to write the superblock (incl. FLUSH/FUA) */
+ if (NULL != dev_state &&
+ (rw & WRITE) && bh->b_size > 0) {
+ u64 dev_bytenr;
+
+ dev_bytenr = 4096 * bh->b_blocknr;
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
+ " size=%lu, data=%p, bdev=%p)\n",
+ rw, bh->b_blocknr,
+ (unsigned long long)dev_bytenr, bh->b_size,
+ bh->b_data, bh->b_bdev);
+ btrfsic_process_written_block(dev_state, dev_bytenr,
+ bh->b_data, bh->b_size, NULL,
+ NULL, bh, rw);
+ } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n",
+ rw, bh->b_bdev);
+ if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
+ if ((dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "btrfsic_submit_bh(%s) with FLUSH"
+ " but dummy block already in use"
+ " (ignored)!\n",
+ dev_state->name);
+ } else {
+ struct btrfsic_block *const block =
+ &dev_state->dummy_block_for_bio_bh_flush;
+
+ block->is_iodone = 0;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = rw;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ }
+ }
+ mutex_unlock(&btrfsic_mutex);
+ return submit_bh(rw, bh);
+}
+
+void btrfsic_submit_bio(int rw, struct bio *bio)
+{
+ struct btrfsic_dev_state *dev_state;
+
+ if (!btrfsic_is_initialized) {
+ submit_bio(rw, bio);
+ return;
+ }
+
+ mutex_lock(&btrfsic_mutex);
+ /* since btrfsic_submit_bio() is also called before
+ * btrfsic_mount(), this might return NULL */
+ dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
+ if (NULL != dev_state &&
+ (rw & WRITE) && NULL != bio->bi_io_vec) {
+ unsigned int i;
+ u64 dev_bytenr;
+ int bio_is_patched;
+
+ dev_bytenr = 512 * bio->bi_sector;
+ bio_is_patched = 0;
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bio(rw=0x%x, bi_vcnt=%u,"
+ " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
+ rw, bio->bi_vcnt, bio->bi_sector,
+ (unsigned long long)dev_bytenr,
+ bio->bi_bdev);
+
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ u8 *mapped_data;
+
+ mapped_data = kmap(bio->bi_io_vec[i].bv_page);
+ if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE) ==
+ (dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "#%u: page=%p, mapped=%p, len=%u,"
+ " offset=%u\n",
+ i, bio->bi_io_vec[i].bv_page,
+ mapped_data,
+ bio->bi_io_vec[i].bv_len,
+ bio->bi_io_vec[i].bv_offset);
+ btrfsic_process_written_block(dev_state, dev_bytenr,
+ mapped_data,
+ bio->bi_io_vec[i].bv_len,
+ bio, &bio_is_patched,
+ NULL, rw);
+ kunmap(bio->bi_io_vec[i].bv_page);
+ dev_bytenr += bio->bi_io_vec[i].bv_len;
+ }
+ } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n",
+ rw, bio->bi_bdev);
+ if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
+ if ((dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "btrfsic_submit_bio(%s) with FLUSH"
+ " but dummy block already in use"
+ " (ignored)!\n",
+ dev_state->name);
+ } else {
+ struct btrfsic_block *const block =
+ &dev_state->dummy_block_for_bio_bh_flush;
+
+ block->is_iodone = 0;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = rw;
+ block->orig_bio_bh_private = bio->bi_private;
+ block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ }
+ }
+ mutex_unlock(&btrfsic_mutex);
+
+ submit_bio(rw, bio);
+}
+
+int btrfsic_mount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices,
+ int including_extent_data, u32 print_mask)
+{
+ int ret;
+ struct btrfsic_state *state;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+
+ state = kzalloc(sizeof(*state), GFP_NOFS);
+ if (NULL == state) {
+ printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
+ return -1;
+ }
+
+ if (!btrfsic_is_initialized) {
+ mutex_init(&btrfsic_mutex);
+ btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
+ btrfsic_is_initialized = 1;
+ }
+ mutex_lock(&btrfsic_mutex);
+ state->root = root;
+ state->print_mask = print_mask;
+ state->include_extent_data = including_extent_data;
+ state->csum_size = 0;
+ INIT_LIST_HEAD(&state->all_blocks_list);
+ btrfsic_block_hashtable_init(&state->block_hashtable);
+ btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
+ state->max_superblock_generation = 0;
+ state->latest_superblock = NULL;
+
+ list_for_each_entry(device, dev_head, dev_list) {
+ struct btrfsic_dev_state *ds;
+ char *p;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ ds = btrfsic_dev_state_alloc();
+ if (NULL == ds) {
+ printk(KERN_INFO
+ "btrfs check-integrity: kmalloc() failed!\n");
+ mutex_unlock(&btrfsic_mutex);
+ return -1;
+ }
+ ds->bdev = device->bdev;
+ ds->state = state;
+ bdevname(ds->bdev, ds->name);
+ ds->name[BDEVNAME_SIZE - 1] = '\0';
+ for (p = ds->name; *p != '\0'; p++);
+ while (p > ds->name && *p != '/')
+ p--;
+ if (*p == '/')
+ p++;
+ strlcpy(ds->name, p, sizeof(ds->name));
+ btrfsic_dev_state_hashtable_add(ds,
+ &btrfsic_dev_state_hashtable);
+ }
+
+ ret = btrfsic_process_superblock(state, fs_devices);
+ if (0 != ret) {
+ mutex_unlock(&btrfsic_mutex);
+ btrfsic_unmount(root, fs_devices);
+ return ret;
+ }
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
+ btrfsic_dump_database(state);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
+ btrfsic_dump_tree(state);
+
+ mutex_unlock(&btrfsic_mutex);
+ return 0;
+}
+
+void btrfsic_unmount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices)
+{
+ struct list_head *elem_all;
+ struct list_head *tmp_all;
+ struct btrfsic_state *state;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+
+ if (!btrfsic_is_initialized)
+ return;
+
+ mutex_lock(&btrfsic_mutex);
+
+ state = NULL;
+ list_for_each_entry(device, dev_head, dev_list) {
+ struct btrfsic_dev_state *ds;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ ds = btrfsic_dev_state_hashtable_lookup(
+ device->bdev,
+ &btrfsic_dev_state_hashtable);
+ if (NULL != ds) {
+ state = ds->state;
+ btrfsic_dev_state_hashtable_remove(ds);
+ btrfsic_dev_state_free(ds);
+ }
+ }
+
+ if (NULL == state) {
+ printk(KERN_INFO
+ "btrfsic: error, cannot find state information"
+ " on umount!\n");
+ mutex_unlock(&btrfsic_mutex);
+ return;
+ }
+
+ /*
+ * Don't care about keeping the lists' state up to date,
+ * just free all memory that was allocated dynamically.
+ * Free the blocks and the block_links.
+ */
+ list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
+ struct btrfsic_block *const b_all =
+ list_entry(elem_all, struct btrfsic_block,
+ all_blocks_node);
+ struct list_head *elem_ref_to;
+ struct list_head *tmp_ref_to;
+
+ list_for_each_safe(elem_ref_to, tmp_ref_to,
+ &b_all->ref_to_list) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_rem_link(state, l);
+
+ l->ref_cnt--;
+ if (0 == l->ref_cnt)
+ btrfsic_block_link_free(l);
+ }
+
+ if (b_all->is_iodone)
+ btrfsic_block_free(b_all);
+ else
+ printk(KERN_INFO "btrfs: attempt to free %c-block"
+ " @%llu (%s/%llu/%d) on umount which is"
+ " not yet iodone!\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num);
+ }
+
+ mutex_unlock(&btrfsic_mutex);
+
+ kfree(state);
+}
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
new file mode 100644
index 00000000000..8b59175cc50
--- /dev/null
+++ b/fs/btrfs/check-integrity.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) STRATO AG 2011. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#if !defined(__BTRFS_CHECK_INTEGRITY__)
+#define __BTRFS_CHECK_INTEGRITY__
+
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+int btrfsic_submit_bh(int rw, struct buffer_head *bh);
+void btrfsic_submit_bio(int rw, struct bio *bio);
+#else
+#define btrfsic_submit_bh submit_bh
+#define btrfsic_submit_bio submit_bio
+#endif
+
+int btrfsic_mount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices,
+ int including_extent_data, u32 print_mask);
+void btrfsic_unmount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices);
+
+#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index dede441bdee..0639a555e16 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
new_root_objectid, &disk_key, level,
- buf->start, 0);
+ buf->start, 0, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
if (ret)
return ret;
@@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
- ret = btrfs_inc_ref(trans, root, buf, 1);
+ ret = btrfs_inc_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_dec_ref(trans, root, buf, 0);
+ ret = btrfs_dec_ref(trans, root, buf, 0, 1);
BUG_ON(ret);
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
BUG_ON(ret);
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
}
if (new_flags != 0) {
@@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
- ret = btrfs_dec_ref(trans, root, buf, 1);
+ ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
@@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
root->root_key.objectid, &disk_key,
- level, search_start, empty_size);
+ level, search_start, empty_size, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ last_ref, 1);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ last_ref, 1);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid);
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
/* once for the root ptr */
free_extent_buffer(mid);
return 0;
@@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, right->len);
- btrfs_free_tree_block(trans, root, right, 0, 1);
+ btrfs_free_tree_block(trans, root, right, 0, 1, 0);
free_extent_buffer(right);
right = NULL;
} else {
@@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
free_extent_buffer(mid);
mid = NULL;
} else {
@@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid, &lower_key,
- level, root->node->start, 0);
+ level, root->node->start, 0, 0);
if (IS_ERR(c))
return PTR_ERR(c);
@@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid,
- &disk_key, level, c->start, 0);
+ &disk_key, level, c->start, 0, 0);
if (IS_ERR(split))
return PTR_ERR(split);
@@ -2970,7 +2970,7 @@ again:
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
- &disk_key, 0, l->start, 0);
+ &disk_key, 0, l->start, 0, 0);
if (IS_ERR(right))
return PTR_ERR(right);
@@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
- btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
return 0;
}
/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 67385033323..27ebe61d3cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@@ -692,6 +695,54 @@ struct btrfs_root_ref {
__le16 name_len;
} __attribute__ ((__packed__));
+struct btrfs_disk_balance_args {
+ /*
+ * profiles to operate on, single is denoted by
+ * BTRFS_AVAIL_ALLOC_BIT_SINGLE
+ */
+ __le64 profiles;
+
+ /* usage filter */
+ __le64 usage;
+
+ /* devid filter */
+ __le64 devid;
+
+ /* devid subset filter [pstart..pend) */
+ __le64 pstart;
+ __le64 pend;
+
+ /* btrfs virtual address space subset filter [vstart..vend) */
+ __le64 vstart;
+ __le64 vend;
+
+ /*
+ * profile to convert to, single is denoted by
+ * BTRFS_AVAIL_ALLOC_BIT_SINGLE
+ */
+ __le64 target;
+
+ /* BTRFS_BALANCE_ARGS_* */
+ __le64 flags;
+
+ __le64 unused[8];
+} __attribute__ ((__packed__));
+
+/*
+ * store balance parameters to disk so that balance can be properly
+ * resumed after crash or unmount
+ */
+struct btrfs_balance_item {
+ /* BTRFS_BALANCE_* */
+ __le64 flags;
+
+ struct btrfs_disk_balance_args data;
+ struct btrfs_disk_balance_args meta;
+ struct btrfs_disk_balance_args sys;
+
+ __le64 unused[4];
+} __attribute__ ((__packed__));
+
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
@@ -751,14 +802,32 @@ struct btrfs_csum_item {
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
-#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
-#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
-#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
-#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
-#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
-#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
-#define BTRFS_NR_RAID_TYPES 5
+#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
+#define BTRFS_NR_RAID_TYPES 5
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
+ BTRFS_BLOCK_GROUP_SYSTEM | \
+ BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_DUP | \
+ BTRFS_BLOCK_GROUP_RAID10)
+/*
+ * We need a bit for restriper to be able to tell when chunks of type
+ * SINGLE are available. This "extended" profile format is used in
+ * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
+ * (on-disk). The corresponding on-disk bit in chunk.type is reserved
+ * to avoid remappings between two formats in future.
+ */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
struct btrfs_block_group_item {
__le64 used;
@@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
+struct btrfs_balance_control;
struct btrfs_delayed_root;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
@@ -971,7 +1041,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
- unsigned long mount_opt:20;
+ unsigned long mount_opt:21;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
@@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
spinlock_t ref_cache_lock;
u64 total_ref_cache_size;
+ /*
+ * these three are in extended format (availability of single
+ * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
+ * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
+ */
u64 avail_data_alloc_bits;
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;
- u64 data_alloc_profile;
- u64 metadata_alloc_profile;
- u64 system_alloc_profile;
+
+ /* restriper state */
+ spinlock_t balance_lock;
+ struct mutex balance_mutex;
+ atomic_t balance_running;
+ atomic_t balance_pause_req;
+ atomic_t balance_cancel_req;
+ struct btrfs_balance_control *balance_ctl;
+ wait_queue_head_t balance_wait_q;
unsigned data_chunk_allocations;
unsigned metadata_ratio;
@@ -1155,6 +1236,10 @@ struct btrfs_fs_info {
int scrub_workers_refcnt;
struct btrfs_workers scrub_workers;
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ u32 check_integrity_print_mask;
+#endif
+
/* filesystem state */
u64 fs_state;
@@ -1383,6 +1468,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
+#define BTRFS_BALANCE_ITEM_KEY 248
+
/*
* string items are for debugging. They just store a short string of
* data in the FS
@@ -1413,6 +1500,9 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
#define BTRFS_MOUNT_RECOVERY (1 << 18)
+#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
+#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
+#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2077,8 +2167,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
-/* struct btrfs_super_block */
+/* struct btrfs_balance_item */
+BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
+static inline void btrfs_balance_data(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
+}
+
+static inline void btrfs_set_balance_data(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
+}
+
+static inline void btrfs_balance_meta(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
+}
+
+static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
+}
+
+static inline void btrfs_balance_sys(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
+}
+
+static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
+ struct btrfs_balance_item *bi,
+ struct btrfs_disk_balance_args *ba)
+{
+ write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
+}
+
+static inline void
+btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
+ struct btrfs_disk_balance_args *disk)
+{
+ memset(cpu, 0, sizeof(*cpu));
+
+ cpu->profiles = le64_to_cpu(disk->profiles);
+ cpu->usage = le64_to_cpu(disk->usage);
+ cpu->devid = le64_to_cpu(disk->devid);
+ cpu->pstart = le64_to_cpu(disk->pstart);
+ cpu->pend = le64_to_cpu(disk->pend);
+ cpu->vstart = le64_to_cpu(disk->vstart);
+ cpu->vend = le64_to_cpu(disk->vend);
+ cpu->target = le64_to_cpu(disk->target);
+ cpu->flags = le64_to_cpu(disk->flags);
+}
+
+static inline void
+btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
+ struct btrfs_balance_args *cpu)
+{
+ memset(disk, 0, sizeof(*disk));
+
+ disk->profiles = cpu_to_le64(cpu->profiles);
+ disk->usage = cpu_to_le64(cpu->usage);
+ disk->devid = cpu_to_le64(cpu->devid);
+ disk->pstart = cpu_to_le64(cpu->pstart);
+ disk->pend = cpu_to_le64(cpu->pend);
+ disk->vstart = cpu_to_le64(cpu->vstart);
+ disk->vend = cpu_to_le64(cpu->vend);
+ disk->target = cpu_to_le64(cpu->target);
+ disk->flags = cpu_to_le64(cpu->flags);
+}
+
+/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
@@ -2196,7 +2364,7 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
return btrfs_item_size(eb, e) - offset;
}
-static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
+static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
}
@@ -2277,11 +2445,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size);
+ u64 hint, u64 empty_size, int for_cow);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- u64 parent, int last_ref);
+ u64 parent, int last_ref, int for_cow);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@@ -2301,17 +2469,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 search_end, struct btrfs_key *ins,
u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref);
+ struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref);
+ struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset);
+ u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int for_cow);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -2323,7 +2491,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset);
+ u64 root_objectid, u64 owner, u64 offset, int for_cow);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -2482,10 +2650,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
}
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+{
+ ++p->slots[0];
+ if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+ return btrfs_next_leaf(root, p);
+ return 0;
+}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
void btrfs_drop_snapshot(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int update_ref);
+ struct btrfs_block_rsv *block_rsv, int update_ref,
+ int for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
@@ -2500,6 +2676,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
+ kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
@@ -2510,6 +2687,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
+/**
+ * profile_is_valid - tests whether a given profile is valid and reduced
+ * @flags: profile to validate
+ * @extended: if true @flags is treated as an extended profile
+ */
+static inline int profile_is_valid(u64 flags, int extended)
+{
+ u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (extended)
+ mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ if (flags & mask)
+ return 0;
+ /* true if zero or exactly one bit set */
+ return (flags & (~flags + 1)) == flags;
+}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 9c1eccc2c50..fe4cd0f1cef 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -595,8 +595,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret)
+ if (!ret) {
+ trace_btrfs_space_reservation(root->fs_info, "delayed_item",
+ item->key.objectid,
+ num_bytes, 1);
item->bytes_reserved = num_bytes;
+ }
return ret;
}
@@ -610,6 +614,9 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return;
rsv = &root->fs_info->delayed_block_rsv;
+ trace_btrfs_space_reservation(root->fs_info, "delayed_item",
+ item->key.objectid, item->bytes_reserved,
+ 0);
btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@@ -624,7 +631,7 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
- int release = false;
+ bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &root->fs_info->delayed_block_rsv;
@@ -651,8 +658,13 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (ret == -EAGAIN)
ret = -ENOSPC;
- if (!ret)
+ if (!ret) {
node->bytes_reserved = num_bytes;
+ trace_btrfs_space_reservation(root->fs_info,
+ "delayed_inode",
+ btrfs_ino(inode),
+ num_bytes, 1);
+ }
return ret;
} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
spin_lock(&BTRFS_I(inode)->lock);
@@ -707,11 +719,17 @@ out:
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
- if (!ret)
+ if (!ret) {
+ trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
+ btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
+ }
- if (release)
+ if (release) {
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(root, src_rsv, num_bytes);
+ }
return ret;
}
@@ -725,6 +743,8 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
return;
rsv = &root->fs_info->delayed_block_rsv;
+ trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
+ node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(root, rsv,
node->bytes_reserved);
node->bytes_reserved = 0;
@@ -1372,13 +1392,6 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
goto release_node;
}
- ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
- /*
- * we have reserved enough space when we start a new transaction,
- * so reserving metadata failure is impossible
- */
- BUG_ON(ret);
-
delayed_item->key.objectid = btrfs_ino(dir);
btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
delayed_item->key.offset = index;
@@ -1391,6 +1404,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
dir_item->type = type;
memcpy((char *)(dir_item + 1), name, name_len);
+ ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
+ /*
+ * we have reserved enough space when we start a new transaction,
+ * so reserving metadata failure is impossible
+ */
+ BUG_ON(ret);
+
+
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 125cf76fcd0..66e4f29505a 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
+ /* merging of sequenced refs is not allowed */
+ if (ref1->seq < ref2->seq)
+ return -1;
+ if (ref1->seq > ref2->seq)
+ return 1;
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -150,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
/*
* find an head entry based on bytenr. This returns the delayed ref
- * head if it was able to find one, or NULL if nothing was in that spot
+ * head if it was able to find one, or NULL if nothing was in that spot.
+ * If return_bigger is given, the next bigger entry is returned if no exact
+ * match is found.
*/
static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
u64 bytenr,
- struct btrfs_delayed_ref_node **last)
+ struct btrfs_delayed_ref_node **last,
+ int return_bigger)
{
- struct rb_node *n = root->rb_node;
+ struct rb_node *n;
struct btrfs_delayed_ref_node *entry;
- int cmp;
+ int cmp = 0;
+again:
+ n = root->rb_node;
+ entry = NULL;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
@@ -182,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
else
return entry;
}
+ if (entry && return_bigger) {
+ if (cmp > 0) {
+ n = rb_next(&entry->rb_node);
+ if (!n)
+ n = rb_first(root);
+ entry = rb_entry(n, struct btrfs_delayed_ref_node,
+ rb_node);
+ bytenr = entry->bytenr;
+ return_bigger = 0;
+ goto again;
+ }
+ return entry;
+ }
return NULL;
}
@@ -209,6 +233,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
+int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+ u64 seq)
+{
+ struct seq_list *elem;
+
+ assert_spin_locked(&delayed_refs->lock);
+ if (list_empty(&delayed_refs->seq_head))
+ return 0;
+
+ elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
+ if (seq >= elem->seq) {
+ pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
+ seq, elem->seq, delayed_refs);
+ return 1;
+ }
+ return 0;
+}
+
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 start)
{
@@ -223,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
node = rb_first(&delayed_refs->root);
} else {
ref = NULL;
- find_ref_head(&delayed_refs->root, start, &ref);
+ find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
if (ref) {
- struct btrfs_delayed_ref_node *tmp;
-
- node = rb_prev(&ref->rb_node);
- while (node) {
- tmp = rb_entry(node,
- struct btrfs_delayed_ref_node,
- rb_node);
- if (tmp->bytenr < start)
- break;
- ref = tmp;
- node = rb_prev(&ref->rb_node);
- }
node = &ref->rb_node;
} else
node = rb_first(&delayed_refs->root);
@@ -390,7 +420,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
-static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes,
int action, int is_data)
@@ -437,6 +468,7 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
+ ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
@@ -468,14 +500,17 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed tree ref into the rbtree.
*/
-static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action)
+ u64 ref_root, int level, int action,
+ int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@@ -491,14 +526,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
+ if (need_ref_seq(for_cow, ref_root))
+ seq = inc_delayed_seq(delayed_refs);
+ ref->seq = seq;
+
full_ref = btrfs_delayed_node_to_tree_ref(ref);
- if (parent) {
- full_ref->parent = parent;
+ full_ref->parent = parent;
+ full_ref->root = ref_root;
+ if (parent)
ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
- } else {
- full_ref->root = ref_root;
+ else
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
- }
full_ref->level = level;
trace_btrfs_delayed_tree_ref(ref, full_ref, action);
@@ -522,15 +560,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed data ref into the rbtree.
*/
-static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, u64 owner, u64 offset,
- int action)
+ int action, int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@@ -546,14 +586,18 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
+ if (need_ref_seq(for_cow, ref_root))
+ seq = inc_delayed_seq(delayed_refs);
+ ref->seq = seq;
+
full_ref = btrfs_delayed_node_to_data_ref(ref);
- if (parent) {
- full_ref->parent = parent;
+ full_ref->parent = parent;
+ full_ref->root = ref_root;
+ if (parent)
ref->type = BTRFS_SHARED_DATA_REF_KEY;
- } else {
- full_ref->root = ref_root;
+ else
ref->type = BTRFS_EXTENT_DATA_REF_KEY;
- }
+
full_ref->objectid = owner;
full_ref->offset = offset;
@@ -580,10 +624,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
-int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_extent_op *extent_op,
+ int for_cow)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@@ -610,13 +656,17 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
- action, 0);
+ ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ num_bytes, action, 0);
BUG_ON(ret);
- ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, level, action);
+ ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
+ num_bytes, parent, ref_root, level, action,
+ for_cow);
BUG_ON(ret);
+ if (!need_ref_seq(for_cow, ref_root) &&
+ waitqueue_active(&delayed_refs->seq_wait))
+ wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
@@ -624,11 +674,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
-int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_extent_op *extent_op,
+ int for_cow)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@@ -655,18 +707,23 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
- action, 1);
+ ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ num_bytes, action, 1);
BUG_ON(ret);
- ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, owner, offset, action);
+ ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
+ num_bytes, parent, ref_root, owner, offset,
+ action, for_cow);
BUG_ON(ret);
+ if (!need_ref_seq(for_cow, ref_root) &&
+ waitqueue_active(&delayed_refs->seq_wait))
+ wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
-int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
@@ -683,11 +740,13 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
+ ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
BUG_ON(ret);
+ if (waitqueue_active(&delayed_refs->seq_wait))
+ wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
@@ -704,7 +763,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
- ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
+ ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
if (ref)
return btrfs_delayed_node_to_head(ref);
return NULL;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index e287e3b0eab..d8f244d9492 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node {
/* the size of the extent */
u64 num_bytes;
+ /* seq number to keep track of insertion order */
+ u64 seq;
+
/* ref count on this data structure */
atomic_t refs;
@@ -98,19 +101,15 @@ struct btrfs_delayed_ref_head {
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
- union {
- u64 root;
- u64 parent;
- };
+ u64 root;
+ u64 parent;
int level;
};
struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_node node;
- union {
- u64 root;
- u64 parent;
- };
+ u64 root;
+ u64 parent;
u64 objectid;
u64 offset;
};
@@ -140,6 +139,26 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
+
+ /*
+ * seq number of delayed refs. We need to know if a backref was being
+ * added before the currently processed ref or afterwards.
+ */
+ u64 seq;
+
+ /*
+ * seq_list holds a list of all seq numbers that are currently being
+ * added to the list. While walking backrefs (btrfs_find_all_roots,
+ * qgroups), which might take some time, no newer ref must be processed,
+ * as it might influence the outcome of the walk.
+ */
+ struct list_head seq_head;
+
+ /*
+ * when the only refs we have in the list must not be processed, we want
+ * to wait for more refs to show up or for the end of backref walking.
+ */
+ wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -151,16 +170,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
}
}
-int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_extent_op *extent_op,
+ int for_cow);
+int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_extent_op *extent_op,
+ int for_cow);
+int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
@@ -170,6 +194,60 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
+
+struct seq_list {
+ struct list_head list;
+ u64 seq;
+};
+
+static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
+{
+ assert_spin_locked(&delayed_refs->lock);
+ ++delayed_refs->seq;
+ return delayed_refs->seq;
+}
+
+static inline void
+btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+ struct seq_list *elem)
+{
+ assert_spin_locked(&delayed_refs->lock);
+ elem->seq = delayed_refs->seq;
+ list_add_tail(&elem->list, &delayed_refs->seq_head);
+}
+
+static inline void
+btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+ struct seq_list *elem)
+{
+ spin_lock(&delayed_refs->lock);
+ list_del(&elem->list);
+ wake_up(&delayed_refs->seq_wait);
+ spin_unlock(&delayed_refs->lock);
+}
+
+int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+ u64 seq);
+
+/*
+ * delayed refs with a ref_seq > 0 must be held back during backref walking.
+ * this only applies to items in one of the fs-trees. for_cow items never need
+ * to be held back, so they won't get a ref_seq number.
+ */
+static inline int need_ref_seq(int for_cow, u64 rootid)
+{
+ if (for_cow)
+ return 0;
+
+ if (rootid == BTRFS_FS_TREE_OBJECTID)
+ return 1;
+
+ if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+
+ return 0;
+}
+
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d8525662ca7..7aa9cd36bf1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -43,6 +43,7 @@
#include "tree-log.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "check-integrity.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@@ -1143,7 +1144,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->orphan_item_inserted = 0;
root->orphan_cleanup_state = 0;
- root->fs_info = fs_info;
root->objectid = objectid;
root->last_trans = 0;
root->highest_objectid = 0;
@@ -1217,6 +1217,14 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
return 0;
}
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
+ if (root)
+ root->fs_info = fs_info;
+ return root;
+}
+
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
@@ -1224,7 +1232,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
- root = kzalloc(sizeof(*root), GFP_NOFS);
+ root = btrfs_alloc_root(fs_info);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1244,7 +1252,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->ref_cows = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
- BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
+ BTRFS_TREE_LOG_OBJECTID, NULL,
+ 0, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
@@ -1318,7 +1327,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
u32 blocksize;
int ret = 0;
- root = kzalloc(sizeof(*root), GFP_NOFS);
+ root = btrfs_alloc_root(fs_info);
if (!root)
return ERR_PTR(-ENOMEM);
if (location->offset == (u64)-1) {
@@ -1874,9 +1883,9 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
}
-struct btrfs_root *open_ctree(struct super_block *sb,
- struct btrfs_fs_devices *fs_devices,
- char *options)
+int open_ctree(struct super_block *sb,
+ struct btrfs_fs_devices *fs_devices,
+ char *options)
{
u32 sectorsize;
u32 nodesize;
@@ -1888,8 +1897,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
struct btrfs_key location;
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
- struct btrfs_root *tree_root = btrfs_sb(sb);
- struct btrfs_fs_info *fs_info = tree_root->fs_info;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_root *tree_root;
struct btrfs_root *extent_root;
struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
@@ -1900,16 +1909,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
int num_backups_tried = 0;
int backup_index = 0;
- extent_root = fs_info->extent_root =
- kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
- csum_root = fs_info->csum_root =
- kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
- chunk_root = fs_info->chunk_root =
- kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
- dev_root = fs_info->dev_root =
- kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
+ extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info);
+ csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
+ chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
+ dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
- if (!extent_root || !csum_root || !chunk_root || !dev_root) {
+ if (!tree_root || !extent_root || !csum_root ||
+ !chunk_root || !dev_root) {
err = -ENOMEM;
goto fail;
}
@@ -1998,6 +2005,17 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->scrub_pause_wait);
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ fs_info->check_integrity_print_mask = 0;
+#endif
+
+ spin_lock_init(&fs_info->balance_lock);
+ mutex_init(&fs_info->balance_mutex);
+ atomic_set(&fs_info->balance_running, 0);
+ atomic_set(&fs_info->balance_pause_req, 0);
+ atomic_set(&fs_info->balance_cancel_req, 0);
+ fs_info->balance_ctl = NULL;
+ init_waitqueue_head(&fs_info->balance_wait_q);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -2267,9 +2285,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
(unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
BTRFS_UUID_SIZE);
- mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_chunk_tree(chunk_root);
- mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
sb->s_id);
@@ -2318,9 +2334,6 @@ retry_root_backup:
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
- fs_info->data_alloc_profile = (u64)-1;
- fs_info->metadata_alloc_profile = (u64)-1;
- fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
ret = btrfs_init_space_info(fs_info);
if (ret) {
@@ -2353,6 +2366,19 @@ retry_root_backup:
btrfs_set_opt(fs_info->mount_opt, SSD);
}
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+ ret = btrfsic_mount(tree_root, fs_devices,
+ btrfs_test_opt(tree_root,
+ CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
+ 1 : 0,
+ fs_info->check_integrity_print_mask);
+ if (ret)
+ printk(KERN_WARNING "btrfs: failed to initialize"
+ " integrity check module %s\n", sb->s_id);
+ }
+#endif
+
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
!(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
@@ -2368,7 +2394,7 @@ retry_root_backup:
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
- log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
err = -ENOMEM;
goto fail_trans_kthread;
@@ -2423,13 +2449,17 @@ retry_root_backup:
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
+
+ if (!err)
+ err = btrfs_recover_balance(fs_info->tree_root);
+
if (err) {
close_ctree(tree_root);
- return ERR_PTR(err);
+ return err;
}
}
- return tree_root;
+ return 0;
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
@@ -2475,8 +2505,7 @@ fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
btrfs_close_devices(fs_info->fs_devices);
- free_fs_info(fs_info);
- return ERR_PTR(err);
+ return err;
recovery_tree_root:
if (!btrfs_test_opt(tree_root, RECOVERY))
@@ -2631,7 +2660,7 @@ static int write_dev_supers(struct btrfs_device *device,
* we fua the first super. The others we allow
* to go down lazy.
*/
- ret = submit_bh(WRITE_FUA, bh);
+ ret = btrfsic_submit_bh(WRITE_FUA, bh);
if (ret)
errors++;
}
@@ -2708,7 +2737,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
device->flush_bio = bio;
bio_get(bio);
- submit_bio(WRITE_FLUSH, bio);
+ btrfsic_submit_bio(WRITE_FLUSH, bio);
return 0;
}
@@ -2972,6 +3001,9 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
+ /* pause restriper - we want to resume on mount */
+ btrfs_pause_balance(root->fs_info);
+
btrfs_scrub_cancel(root);
/* wait for any defraggers to finish */
@@ -2979,7 +3011,7 @@ int close_ctree(struct btrfs_root *root)
(atomic_read(&fs_info->defrag_running) == 0));
/* clear out the rbtree of defraggable inodes */
- btrfs_run_defrag_inodes(root->fs_info);
+ btrfs_run_defrag_inodes(fs_info);
/*
* Here come 2 situations when btrfs is broken to flip readonly:
@@ -3008,8 +3040,8 @@ int close_ctree(struct btrfs_root *root)
btrfs_put_block_group_cache(fs_info);
- kthread_stop(root->fs_info->transaction_kthread);
- kthread_stop(root->fs_info->cleaner_kthread);
+ kthread_stop(fs_info->transaction_kthread);
+ kthread_stop(fs_info->cleaner_kthread);
fs_info->closing = 2;
smp_mb();
@@ -3027,14 +3059,14 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(fs_info->extent_root->commit_root);
free_extent_buffer(fs_info->tree_root->node);
free_extent_buffer(fs_info->tree_root->commit_root);
- free_extent_buffer(root->fs_info->chunk_root->node);
- free_extent_buffer(root->fs_info->chunk_root->commit_root);
- free_extent_buffer(root->fs_info->dev_root->node);
- free_extent_buffer(root->fs_info->dev_root->commit_root);
- free_extent_buffer(root->fs_info->csum_root->node);
- free_extent_buffer(root->fs_info->csum_root->commit_root);
+ free_extent_buffer(fs_info->chunk_root->node);
+ free_extent_buffer(fs_info->chunk_root->commit_root);
+ free_extent_buffer(fs_info->dev_root->node);
+ free_extent_buffer(fs_info->dev_root->commit_root);
+ free_extent_buffer(fs_info->csum_root->node);
+ free_extent_buffer(fs_info->csum_root->commit_root);
- btrfs_free_block_groups(root->fs_info);
+ btrfs_free_block_groups(fs_info);
del_fs_roots(fs_info);
@@ -3054,14 +3086,17 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ btrfsic_unmount(root, fs_info->fs_devices);
+#endif
+
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
bdi_destroy(&fs_info->bdi);
cleanup_srcu_struct(&fs_info->subvol_srcu);
- free_fs_info(fs_info);
-
return 0;
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index c99d0a8f13f..e4bc4741319 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -46,9 +46,9 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
int clean_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf);
-struct btrfs_root *open_ctree(struct super_block *sb,
- struct btrfs_fs_devices *fs_devices,
- char *options);
+int open_ctree(struct super_block *sb,
+ struct btrfs_fs_devices *fs_devices,
+ char *options);
int close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 1b8dc33778f..5f77166fd01 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -67,7 +67,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
u64 root_objectid, u32 generation,
int check_generation)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
struct inode *inode;
struct btrfs_key key;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f5fbe576d2b..700879ed64c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
- flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
- BTRFS_BLOCK_GROUP_METADATA;
+ flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
@@ -1872,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset)
+ u64 root_objectid, u64 owner, u64 offset, int for_cow)
{
int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
root_objectid == BTRFS_TREE_LOG_OBJECTID);
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL);
+ BTRFS_ADD_DELAYED_REF, NULL, for_cow);
} else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ num_bytes,
parent, root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_REF, NULL);
+ BTRFS_ADD_DELAYED_REF, NULL, for_cow);
}
return ret;
}
@@ -2233,6 +2236,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
/*
+ * locked_ref is the head node, so we have to go one
+ * node back for any delayed ref updates
+ */
+ ref = select_delayed_ref(locked_ref);
+
+ if (ref && ref->seq &&
+ btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
+ /*
+ * there are still refs with lower seq numbers in the
+ * process of being added. Don't run this ref yet.
+ */
+ list_del_init(&locked_ref->cluster);
+ mutex_unlock(&locked_ref->mutex);
+ locked_ref = NULL;
+ delayed_refs->num_heads_ready++;
+ spin_unlock(&delayed_refs->lock);
+ cond_resched();
+ spin_lock(&delayed_refs->lock);
+ continue;
+ }
+
+ /*
* record the must insert reserved flag before we
* drop the spin lock.
*/
@@ -2242,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
- /*
- * locked_ref is the head node, so we have to go one
- * node back for any delayed ref updates
- */
- ref = select_delayed_ref(locked_ref);
if (!ref) {
/* All delayed refs have been processed, Go ahead
* and send the head node to run_one_delayed_ref,
@@ -2267,9 +2287,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
BUG_ON(ret);
kfree(extent_op);
- cond_resched();
- spin_lock(&delayed_refs->lock);
- continue;
+ goto next;
}
list_del_init(&locked_ref->cluster);
@@ -2279,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
-
+ /*
+ * we modified num_entries, but as we're currently running
+ * delayed refs, skip
+ * wake_up(&delayed_refs->seq_wait);
+ * here.
+ */
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2289,13 +2312,34 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(ref);
kfree(extent_op);
count++;
-
+next:
+ do_chunk_alloc(trans, root->fs_info->extent_root,
+ 2 * 1024 * 1024,
+ btrfs_get_alloc_profile(root, 0),
+ CHUNK_ALLOC_NO_FORCE);
cond_resched();
spin_lock(&delayed_refs->lock);
}
return count;
}
+
+static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
+ unsigned long num_refs)
+{
+ struct list_head *first_seq = delayed_refs->seq_head.next;
+
+ spin_unlock(&delayed_refs->lock);
+ pr_debug("waiting for more refs (num %ld, first %p)\n",
+ num_refs, first_seq);
+ wait_event(delayed_refs->seq_wait,
+ num_refs != delayed_refs->num_entries ||
+ delayed_refs->seq_head.next != first_seq);
+ pr_debug("done waiting for more refs (num %ld, first %p)\n",
+ delayed_refs->num_entries, delayed_refs->seq_head.next);
+ spin_lock(&delayed_refs->lock);
+}
+
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -2311,15 +2355,23 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
int ret;
+ u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
+ unsigned long num_refs = 0;
+ int consider_waiting;
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
+ do_chunk_alloc(trans, root->fs_info->extent_root,
+ 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
+ CHUNK_ALLOC_NO_FORCE);
+
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
+ consider_waiting = 0;
spin_lock(&delayed_refs->lock);
if (count == 0) {
count = delayed_refs->num_entries * 2;
@@ -2336,11 +2388,35 @@ again:
* of refs to process starting at the first one we are able to
* lock
*/
+ delayed_start = delayed_refs->run_delayed_start;
ret = btrfs_find_ref_cluster(trans, &cluster,
delayed_refs->run_delayed_start);
if (ret)
break;
+ if (delayed_start >= delayed_refs->run_delayed_start) {
+ if (consider_waiting == 0) {
+ /*
+ * btrfs_find_ref_cluster looped. let's do one
+ * more cycle. if we don't run any delayed ref
+ * during that cycle (because we can't because
+ * all of them are blocked) and if the number of
+ * refs doesn't change, we avoid busy waiting.
+ */
+ consider_waiting = 1;
+ num_refs = delayed_refs->num_entries;
+ } else {
+ wait_for_more_refs(delayed_refs, num_refs);
+ /*
+ * after waiting, things have changed. we
+ * dropped the lock and someone else might have
+ * run some refs, built new clusters and so on.
+ * therefore, we restart staleness detection.
+ */
+ consider_waiting = 0;
+ }
+ }
+
ret = run_clustered_refs(trans, root, &cluster);
BUG_ON(ret < 0);
@@ -2348,6 +2424,11 @@ again:
if (count == 0)
break;
+
+ if (ret || delayed_refs->run_delayed_start == 0) {
+ /* refs were run, let's reset staleness detection */
+ consider_waiting = 0;
+ }
}
if (run_all) {
@@ -2405,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->update_key = 0;
extent_op->is_data = is_data ? 1 : 0;
- ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
+ ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
+ num_bytes, extent_op);
if (ret)
kfree(extent_op);
return ret;
@@ -2590,7 +2672,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- int full_backref, int inc)
+ int full_backref, int inc, int for_cow)
{
u64 bytenr;
u64 num_bytes;
@@ -2603,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
int level;
int ret = 0;
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
- u64, u64, u64, u64, u64, u64);
+ u64, u64, u64, u64, u64, u64, int);
ref_root = btrfs_header_owner(buf);
nritems = btrfs_header_nritems(buf);
@@ -2640,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
- key.offset);
+ key.offset, for_cow);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, level - 1, 0);
+ parent, ref_root, level - 1, 0,
+ for_cow);
if (ret)
goto fail;
}
@@ -2659,15 +2742,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref)
+ struct extent_buffer *buf, int full_backref, int for_cow)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref)
+ struct extent_buffer *buf, int full_backref, int for_cow)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2993,9 +3076,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
- found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
- BTRFS_BLOCK_GROUP_SYSTEM |
- BTRFS_BLOCK_GROUP_METADATA);
+ found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
found->total_bytes = total_bytes;
found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
@@ -3016,20 +3097,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
- u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP);
- if (extra_flags) {
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- fs_info->avail_data_alloc_bits |= extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_METADATA)
- fs_info->avail_metadata_alloc_bits |= extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
- fs_info->avail_system_alloc_bits |= extra_flags;
- }
+ u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ /* chunk -> extended profile */
+ if (extra_flags == 0)
+ extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ fs_info->avail_data_alloc_bits |= extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ fs_info->avail_metadata_alloc_bits |= extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ fs_info->avail_system_alloc_bits |= extra_flags;
}
+/*
+ * @flags: available profiles in extended format (see ctree.h)
+ *
+ * Returns reduced profile in chunk format. If profile changing is in
+ * progress (either running or paused) picks the target profile (if it's
+ * already available), otherwise falls back to plain reducing.
+ */
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
/*
@@ -3040,6 +3128,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
+ /* pick restriper's target profile if it's available */
+ spin_lock(&root->fs_info->balance_lock);
+ if (root->fs_info->balance_ctl) {
+ struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
+ u64 tgt = 0;
+
+ if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
+ (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+ (flags & bctl->data.target)) {
+ tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
+ } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+ (flags & bctl->sys.target)) {
+ tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
+ } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+ (flags & bctl->meta.target)) {
+ tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
+ }
+
+ if (tgt) {
+ spin_unlock(&root->fs_info->balance_lock);
+ flags = tgt;
+ goto out;
+ }
+ }
+ spin_unlock(&root->fs_info->balance_lock);
+
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
if (num_devices < 4)
@@ -3059,22 +3175,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
((flags & BTRFS_BLOCK_GROUP_RAID1) |
(flags & BTRFS_BLOCK_GROUP_RAID10) |
- (flags & BTRFS_BLOCK_GROUP_DUP)))
+ (flags & BTRFS_BLOCK_GROUP_DUP))) {
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
+ }
+
+out:
+ /* extended -> chunk profile */
+ flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
return flags;
}
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_DATA)
- flags |= root->fs_info->avail_data_alloc_bits &
- root->fs_info->data_alloc_profile;
+ flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
- flags |= root->fs_info->avail_system_alloc_bits &
- root->fs_info->system_alloc_profile;
+ flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
- flags |= root->fs_info->avail_metadata_alloc_bits &
- root->fs_info->metadata_alloc_profile;
+ flags |= root->fs_info->avail_metadata_alloc_bits;
+
return btrfs_reduce_alloc_profile(root, flags);
}
@@ -3191,6 +3310,8 @@ commit_trans:
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
+ trace_btrfs_space_reservation(root->fs_info, "space_info",
+ (u64)data_sinfo, bytes, 1);
spin_unlock(&data_sinfo->lock);
return 0;
@@ -3210,6 +3331,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
+ trace_btrfs_space_reservation(root->fs_info, "space_info",
+ (u64)data_sinfo, bytes, 0);
spin_unlock(&data_sinfo->lock);
}
@@ -3257,27 +3380,15 @@ static int should_alloc_chunk(struct btrfs_root *root,
if (num_bytes - num_allocated < thresh)
return 1;
}
-
- /*
- * we have two similar checks here, one based on percentage
- * and once based on a hard number of 256MB. The idea
- * is that if we have a good amount of free
- * room, don't allocate a chunk. A good mount is
- * less than 80% utilized of the chunks we have allocated,
- * or more than 256MB free
- */
- if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
- return 0;
-
- if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
- return 0;
-
thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
- /* 256MB or 5% of the FS */
- thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+ /* 256MB or 2% of the FS */
+ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
+ /* system chunks need a much small threshold */
+ if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ thresh = 32 * 1024 * 1024;
- if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+ if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
return 0;
return 1;
}
@@ -3291,7 +3402,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int wait_for_alloc = 0;
int ret = 0;
- flags = btrfs_reduce_alloc_profile(extent_root, flags);
+ BUG_ON(!profile_is_valid(flags, 0));
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
@@ -3582,6 +3693,10 @@ again:
if (used <= space_info->total_bytes) {
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info",
+ (u64)space_info,
+ orig_bytes, 1);
ret = 0;
} else {
/*
@@ -3649,6 +3764,10 @@ again:
if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info",
+ (u64)space_info,
+ orig_bytes, 1);
ret = 0;
} else {
wait_ordered = true;
@@ -3755,7 +3874,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
spin_unlock(&block_rsv->lock);
}
-static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
+static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
@@ -3791,6 +3911,9 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_may_use -= num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ (u64)space_info,
+ num_bytes, 0);
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
@@ -3947,7 +4070,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
if (global_rsv->full || global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
- block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
+ block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
+ num_bytes);
}
/*
@@ -4006,11 +4130,15 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = sinfo->total_bytes - num_bytes;
block_rsv->reserved += num_bytes;
sinfo->bytes_may_use += num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ (u64)sinfo, num_bytes, 1);
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ (u64)sinfo, num_bytes, 0);
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
@@ -4045,7 +4173,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
- block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
+ block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
+ (u64)-1);
WARN_ON(fs_info->delalloc_block_rsv.size > 0);
WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
WARN_ON(fs_info->trans_block_rsv.size > 0);
@@ -4062,6 +4191,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
if (!trans->bytes_reserved)
return;
+ trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans,
+ trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
@@ -4079,6 +4210,8 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
* when we are truly done with the orphan item.
*/
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+ trace_btrfs_space_reservation(root->fs_info, "orphan",
+ btrfs_ino(inode), num_bytes, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
@@ -4086,6 +4219,8 @@ void btrfs_orphan_release_metadata(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+ trace_btrfs_space_reservation(root->fs_info, "orphan",
+ btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
}
@@ -4213,12 +4348,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
/* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(root, inode))
flush = 0;
- else
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
if (flush && btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
+ mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
@@ -4266,8 +4400,14 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (dropped)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
- if (to_free)
+ if (to_free) {
btrfs_block_rsv_release(root, block_rsv, to_free);
+ trace_btrfs_space_reservation(root->fs_info,
+ "delalloc",
+ btrfs_ino(inode),
+ to_free, 0);
+ }
+ mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
}
@@ -4278,7 +4418,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
}
BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock);
+ mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+ if (to_reserve)
+ trace_btrfs_space_reservation(root->fs_info,"delalloc",
+ btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
return 0;
@@ -4308,6 +4452,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(inode), to_free, 0);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
@@ -4562,7 +4708,10 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
if (reserve == RESERVE_ALLOC) {
- BUG_ON(space_info->bytes_may_use < num_bytes);
+ trace_btrfs_space_reservation(cache->fs_info,
+ "space_info",
+ (u64)space_info,
+ num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
}
@@ -4928,6 +5077,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
+ if (waitqueue_active(&delayed_refs->seq_wait))
+ wake_up(&delayed_refs->seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
@@ -4955,16 +5106,17 @@ out:
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- u64 parent, int last_ref)
+ u64 parent, int last_ref, int for_cow)
{
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
- parent, root->root_key.objectid,
- btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL);
+ ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+ buf->start, buf->len,
+ parent, root->root_key.objectid,
+ btrfs_header_level(buf),
+ BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
}
@@ -4999,12 +5151,12 @@ out:
btrfs_put_block_group(cache);
}
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int for_cow)
{
int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
/*
* tree log blocks never actually go into the extent allocation
@@ -5016,14 +5168,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL);
+ BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
} else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
- parent, root_objectid, owner,
- offset, BTRFS_DROP_DELAYED_REF, NULL);
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ num_bytes,
+ parent, root_objectid, owner,
+ offset, BTRFS_DROP_DELAYED_REF,
+ NULL, for_cow);
BUG_ON(ret);
}
return ret;
@@ -5146,6 +5301,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ins->objectid = 0;
ins->offset = 0;
+ trace_find_free_extent(orig_root, num_bytes, empty_size, data);
+
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
printk(KERN_ERR "No space info for %llu\n", data);
@@ -5295,15 +5452,6 @@ alloc:
if (unlikely(block_group->ro))
goto loop;
- spin_lock(&block_group->free_space_ctl->tree_lock);
- if (cached &&
- block_group->free_space_ctl->free_space <
- num_bytes + empty_cluster + empty_size) {
- spin_unlock(&block_group->free_space_ctl->tree_lock);
- goto loop;
- }
- spin_unlock(&block_group->free_space_ctl->tree_lock);
-
/*
* Ok we want to try and use the cluster allocator, so
* lets look there
@@ -5331,6 +5479,8 @@ alloc:
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
+ trace_btrfs_reserve_extent_cluster(root,
+ block_group, search_start, num_bytes);
goto checks;
}
@@ -5349,8 +5499,15 @@ refill_cluster:
* plenty of times and not have found
* anything, so we are likely way too
* fragmented for the clustering stuff to find
- * anything. */
- if (loop >= LOOP_NO_EMPTY_SIZE) {
+ * anything.
+ *
+ * However, if the cluster is taken from the
+ * current block group, release the cluster
+ * first, so that we stand a better chance of
+ * succeeding in the unclustered
+ * allocation. */
+ if (loop >= LOOP_NO_EMPTY_SIZE &&
+ last_ptr->block_group != block_group) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
}
@@ -5361,6 +5518,11 @@ refill_cluster:
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
+ if (loop >= LOOP_NO_EMPTY_SIZE) {
+ spin_unlock(&last_ptr->refill_lock);
+ goto unclustered_alloc;
+ }
+
/* allocate a cluster in this block group */
ret = btrfs_find_space_cluster(trans, root,
block_group, last_ptr,
@@ -5377,6 +5539,9 @@ refill_cluster:
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
+ trace_btrfs_reserve_extent_cluster(root,
+ block_group, search_start,
+ num_bytes);
goto checks;
}
} else if (!cached && loop > LOOP_CACHING_NOWAIT
@@ -5401,6 +5566,15 @@ refill_cluster:
}
unclustered_alloc:
+ spin_lock(&block_group->free_space_ctl->tree_lock);
+ if (cached &&
+ block_group->free_space_ctl->free_space <
+ num_bytes + empty_cluster + empty_size) {
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
+ goto loop;
+ }
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
+
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size);
/*
@@ -5438,9 +5612,6 @@ checks:
goto loop;
}
- ins->objectid = search_start;
- ins->offset = num_bytes;
-
if (offset < search_start)
btrfs_add_free_space(used_block_group, offset,
search_start - offset);
@@ -5457,6 +5628,8 @@ checks:
ins->objectid = search_start;
ins->offset = num_bytes;
+ trace_btrfs_reserve_extent(orig_root, block_group,
+ search_start, num_bytes);
if (offset < search_start)
btrfs_add_free_space(used_block_group, offset,
search_start - offset);
@@ -5842,9 +6015,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
- 0, root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT, NULL);
+ ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
+ ins->offset, 0,
+ root_objectid, owner, offset,
+ BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
return ret;
}
@@ -5997,10 +6171,11 @@ use_block_rsv(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOSPC);
}
-static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
+static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
- block_rsv_release_bytes(block_rsv, NULL, 0);
+ block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
}
/*
@@ -6014,7 +6189,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size)
+ u64 hint, u64 empty_size, int for_cow)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
@@ -6030,7 +6205,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
empty_size, hint, (u64)-1, &ins, 0);
if (ret) {
- unuse_block_rsv(block_rsv, blocksize);
+ unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
}
@@ -6058,10 +6233,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
extent_op->update_flags = 1;
extent_op->is_data = 0;
- ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
+ ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+ ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
- extent_op);
+ extent_op, for_cow);
BUG_ON(ret);
}
return buf;
@@ -6078,6 +6254,7 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
+ int for_reloc;
};
#define DROP_REFERENCE 1
@@ -6216,9 +6393,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
/* wc->stage == UPDATE_BACKREF */
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
- ret = btrfs_inc_ref(trans, root, eb, 1);
+ ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
BUG_ON(ret);
- ret = btrfs_dec_ref(trans, root, eb, 0);
+ ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
BUG_ON(ret);
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag, 0);
@@ -6362,7 +6539,7 @@ skip:
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
- root->root_key.objectid, level - 1, 0);
+ root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret);
}
btrfs_tree_unlock(next);
@@ -6436,9 +6613,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = btrfs_dec_ref(trans, root, eb, 1);
+ ret = btrfs_dec_ref(trans, root, eb, 1,
+ wc->for_reloc);
else
- ret = btrfs_dec_ref(trans, root, eb, 0);
+ ret = btrfs_dec_ref(trans, root, eb, 0,
+ wc->for_reloc);
BUG_ON(ret);
}
/* make block locked assertion in clean_tree_block happy */
@@ -6465,7 +6644,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
- btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+ btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
@@ -6549,7 +6728,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* blocks are properly updated.
*/
void btrfs_drop_snapshot(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int update_ref)
+ struct btrfs_block_rsv *block_rsv, int update_ref,
+ int for_reloc)
{
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@@ -6637,6 +6817,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
+ wc->for_reloc = for_reloc;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
@@ -6721,6 +6902,7 @@ out:
* drop subtree rooted at tree block 'node'.
*
* NOTE: this function will unlock and release tree block 'node'
+ * only used by relocation code
*/
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -6765,6 +6947,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
+ wc->for_reloc = 1;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
@@ -6792,6 +6975,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
+ if (root->fs_info->balance_ctl) {
+ struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
+ u64 tgt = 0;
+
+ /* pick restriper's target profile and return */
+ if (flags & BTRFS_BLOCK_GROUP_DATA &&
+ bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
+ } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
+ bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
+ } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
+ bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
+ }
+
+ if (tgt) {
+ /* extended -> chunk profile */
+ tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+ return tgt;
+ }
+ }
+
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
@@ -7085,7 +7291,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
* space to fit our block group in.
*/
if (device->total_bytes > device->bytes_used + min_free) {
- ret = find_free_dev_extent(NULL, device, min_free,
+ ret = find_free_dev_extent(device, min_free,
&dev_offset, NULL);
if (!ret)
dev_nr++;
@@ -7447,6 +7653,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
+ update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7466,6 +7673,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return 0;
}
+static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+ u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ /* chunk -> extended profile */
+ if (extra_flags == 0)
+ extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ fs_info->avail_data_alloc_bits &= ~extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ fs_info->avail_metadata_alloc_bits &= ~extra_flags;
+ if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ fs_info->avail_system_alloc_bits &= ~extra_flags;
+}
+
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start)
{
@@ -7476,6 +7699,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct inode *inode;
int ret;
+ int index;
int factor;
root = root->fs_info->extent_root;
@@ -7491,6 +7715,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, block_group);
memcpy(&key, &block_group->key, sizeof(key));
+ index = get_block_group_index(block_group);
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
@@ -7565,6 +7790,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* are still on the list after taking the semaphore
*/
list_del_init(&block_group->list);
+ if (list_empty(&block_group->space_info->block_groups[index]))
+ clear_avail_alloc_bits(root->fs_info, block_group->flags);
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 49f3c9dc09f..9d09a4f8187 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -18,6 +18,7 @@
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
+#include "check-integrity.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -1895,7 +1896,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start-page_offset(page));
- submit_bio(WRITE_SYNC, bio);
+ btrfsic_submit_bio(WRITE_SYNC, bio);
wait_for_completion(&compl);
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
@@ -2393,7 +2394,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags, start);
else
- submit_bio(rw, bio);
+ btrfsic_submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
@@ -3579,6 +3580,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
atomic_set(&eb->blocking_writers, 0);
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->spinning_writers, 0);
+ eb->lock_nested = 0;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7604c300132..bc6a042cb6f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -129,6 +129,7 @@ struct extent_buffer {
struct list_head leak_list;
struct rcu_head rcu_head;
atomic_t refs;
+ pid_t lock_owner;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
@@ -137,6 +138,7 @@ struct extent_buffer {
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
+ int lock_nested;
/* protects write locks */
rwlock_t lock;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 034d9850322..859ba2dd889 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -678,7 +678,7 @@ next_slot:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
- start - extent_offset);
+ start - extent_offset, 0);
BUG_ON(ret);
*hint_byte = disk_bytenr;
}
@@ -753,7 +753,7 @@ next_slot:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
- extent_offset);
+ extent_offset, 0);
BUG_ON(ret);
inode_sub_bytes(inode,
extent_end - key.offset);
@@ -962,7 +962,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
- ino, orig_offset);
+ ino, orig_offset, 0);
BUG_ON(ret);
if (split == start) {
@@ -989,7 +989,7 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- ino, orig_offset);
+ ino, orig_offset, 0);
BUG_ON(ret);
}
other_start = 0;
@@ -1006,7 +1006,7 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- ino, orig_offset);
+ ino, orig_offset, 0);
BUG_ON(ret);
}
if (del_nr == 0) {
@@ -1274,7 +1274,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
dirty_pages);
if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
pos += copied;
num_written += copied;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 9a897bf7953..d20ff87ca60 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -319,9 +319,11 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
io_ctl_unmap_page(io_ctl);
for (i = 0; i < io_ctl->num_pages; i++) {
- ClearPageChecked(io_ctl->pages[i]);
- unlock_page(io_ctl->pages[i]);
- page_cache_release(io_ctl->pages[i]);
+ if (io_ctl->pages[i]) {
+ ClearPageChecked(io_ctl->pages[i]);
+ unlock_page(io_ctl->pages[i]);
+ page_cache_release(io_ctl->pages[i]);
+ }
}
}
@@ -635,7 +637,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (!num_entries)
return 0;
- io_ctl_init(&io_ctl, inode, root);
+ ret = io_ctl_init(&io_ctl, inode, root);
+ if (ret)
+ return ret;
+
ret = readahead_cache(inode);
if (ret)
goto out;
@@ -838,7 +843,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct io_ctl io_ctl;
struct list_head bitmap_list;
struct btrfs_key key;
- u64 start, end, len;
+ u64 start, extent_start, extent_end, len;
int entries = 0;
int bitmaps = 0;
int ret;
@@ -849,7 +854,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (!i_size_read(inode))
return -1;
- io_ctl_init(&io_ctl, inode, root);
+ ret = io_ctl_init(&io_ctl, inode, root);
+ if (ret)
+ return -1;
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list))
@@ -857,25 +864,12 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_cluster,
block_group_list);
- /*
- * We shouldn't have switched the pinned extents yet so this is the
- * right one
- */
- unpin = root->fs_info->pinned_extents;
-
/* Lock all pages first so we can lock the extent safely. */
io_ctl_prepare_pages(&io_ctl, inode, 0);
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state, GFP_NOFS);
- /*
- * When searching for pinned extents, we need to start at our start
- * offset.
- */
- if (block_group)
- start = block_group->key.objectid;
-
node = rb_first(&ctl->free_space_offset);
if (!node && cluster) {
node = rb_first(&cluster->root);
@@ -918,9 +912,20 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* We want to add any pinned extents to our free space cache
* so we don't leak the space
*/
+
+ /*
+ * We shouldn't have switched the pinned extents yet so this is the
+ * right one
+ */
+ unpin = root->fs_info->pinned_extents;
+
+ if (block_group)
+ start = block_group->key.objectid;
+
while (block_group && (start < block_group->key.objectid +
block_group->key.offset)) {
- ret = find_first_extent_bit(unpin, start, &start, &end,
+ ret = find_first_extent_bit(unpin, start,
+ &extent_start, &extent_end,
EXTENT_DIRTY);
if (ret) {
ret = 0;
@@ -928,20 +933,21 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
}
/* This pinned extent is out of our range */
- if (start >= block_group->key.objectid +
+ if (extent_start >= block_group->key.objectid +
block_group->key.offset)
break;
- len = block_group->key.objectid +
- block_group->key.offset - start;
- len = min(len, end + 1 - start);
+ extent_start = max(extent_start, start);
+ extent_end = min(block_group->key.objectid +
+ block_group->key.offset, extent_end + 1);
+ len = extent_end - extent_start;
entries++;
- ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
+ ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
if (ret)
goto out_nospc;
- start = end + 1;
+ start = extent_end;
}
/* Write out the bitmaps */
@@ -2283,23 +2289,23 @@ out:
static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *entry,
struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
+ u64 offset, u64 bytes,
+ u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
unsigned long next_zero;
unsigned long i;
- unsigned long search_bits;
- unsigned long total_bits;
+ unsigned long want_bits;
+ unsigned long min_bits;
unsigned long found_bits;
unsigned long start = 0;
unsigned long total_found = 0;
int ret;
- bool found = false;
i = offset_to_bit(entry->offset, block_group->sectorsize,
max_t(u64, offset, entry->offset));
- search_bits = bytes_to_bits(bytes, block_group->sectorsize);
- total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
+ want_bits = bytes_to_bits(bytes, block_group->sectorsize);
+ min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
again:
found_bits = 0;
@@ -2308,7 +2314,7 @@ again:
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
next_zero = find_next_zero_bit(entry->bitmap,
BITS_PER_BITMAP, i);
- if (next_zero - i >= search_bits) {
+ if (next_zero - i >= min_bits) {
found_bits = next_zero - i;
break;
}
@@ -2318,10 +2324,9 @@ again:
if (!found_bits)
return -ENOSPC;
- if (!found) {
+ if (!total_found) {
start = i;
cluster->max_size = 0;
- found = true;
}
total_found += found_bits;
@@ -2329,13 +2334,8 @@ again:
if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize;
- if (total_found < total_bits) {
- i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
- if (i - start > total_bits * 2) {
- total_found = 0;
- cluster->max_size = 0;
- found = false;
- }
+ if (total_found < want_bits || cluster->max_size < cont1_bytes) {
+ i = next_zero + 1;
goto again;
}
@@ -2346,28 +2346,31 @@ again:
&entry->offset_index, 1);
BUG_ON(ret);
+ trace_btrfs_setup_cluster(block_group, cluster,
+ total_found * block_group->sectorsize, 1);
return 0;
}
/*
* This searches the block group for just extents to fill the cluster with.
+ * Try to find a cluster with at least bytes total bytes, at least one
+ * extent of cont1_bytes, and other clusters of at least min_bytes.
*/
static noinline int
setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 min_bytes)
+ u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *first = NULL;
struct btrfs_free_space *entry = NULL;
- struct btrfs_free_space *prev = NULL;
struct btrfs_free_space *last;
struct rb_node *node;
u64 window_start;
u64 window_free;
u64 max_extent;
- u64 max_gap = 128 * 1024;
+ u64 total_size = 0;
entry = tree_search_offset(ctl, offset, 0, 1);
if (!entry)
@@ -2377,8 +2380,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* We don't want bitmaps, so just move along until we find a normal
* extent entry.
*/
- while (entry->bitmap) {
- if (list_empty(&entry->list))
+ while (entry->bitmap || entry->bytes < min_bytes) {
+ if (entry->bitmap && list_empty(&entry->list))
list_add_tail(&entry->list, bitmaps);
node = rb_next(&entry->offset_index);
if (!node)
@@ -2391,12 +2394,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
max_extent = entry->bytes;
first = entry;
last = entry;
- prev = entry;
- while (window_free <= min_bytes) {
- node = rb_next(&entry->offset_index);
- if (!node)
- return -ENOSPC;
+ for (node = rb_next(&entry->offset_index); node;
+ node = rb_next(&entry->offset_index)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bitmap) {
@@ -2405,26 +2405,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
continue;
}
- /*
- * we haven't filled the empty size and the window is
- * very large. reset and try again
- */
- if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
- entry->offset - window_start > (min_bytes * 2)) {
- first = entry;
- window_start = entry->offset;
- window_free = entry->bytes;
- last = entry;
+ if (entry->bytes < min_bytes)
+ continue;
+
+ last = entry;
+ window_free += entry->bytes;
+ if (entry->bytes > max_extent)
max_extent = entry->bytes;
- } else {
- last = entry;
- window_free += entry->bytes;
- if (entry->bytes > max_extent)
- max_extent = entry->bytes;
- }
- prev = entry;
}
+ if (window_free < bytes || max_extent < cont1_bytes)
+ return -ENOSPC;
+
cluster->window_start = first->offset;
node = &first->offset_index;
@@ -2438,17 +2430,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
- if (entry->bitmap)
+ if (entry->bitmap || entry->bytes < min_bytes)
continue;
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
+ total_size += entry->bytes;
BUG_ON(ret);
} while (node && entry != last);
cluster->max_size = max_extent;
-
+ trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
return 0;
}
@@ -2460,7 +2453,7 @@ static noinline int
setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 min_bytes)
+ u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
@@ -2485,7 +2478,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
if (entry->bytes < min_bytes)
continue;
ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
- bytes, min_bytes);
+ bytes, cont1_bytes, min_bytes);
if (!ret)
return 0;
}
@@ -2499,7 +2492,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
/*
* here we try to find a cluster of blocks in a block group. The goal
- * is to find at least bytes free and up to empty_size + bytes free.
+ * is to find at least bytes+empty_size.
* We might not find them all in one contiguous area.
*
* returns zero and sets up cluster if things worked out, otherwise
@@ -2515,23 +2508,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_free_space *entry, *tmp;
LIST_HEAD(bitmaps);
u64 min_bytes;
+ u64 cont1_bytes;
int ret;
- /* for metadata, allow allocates with more holes */
+ /*
+ * Choose the minimum extent size we'll require for this
+ * cluster. For SSD_SPREAD, don't allow any fragmentation.
+ * For metadata, allow allocates with smaller extents. For
+ * data, keep it dense.
+ */
if (btrfs_test_opt(root, SSD_SPREAD)) {
- min_bytes = bytes + empty_size;
+ cont1_bytes = min_bytes = bytes + empty_size;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
- /*
- * we want to do larger allocations when we are
- * flushing out the delayed refs, it helps prevent
- * making more work as we go along.
- */
- if (trans->transaction->delayed_refs.flushing)
- min_bytes = max(bytes, (bytes + empty_size) >> 1);
- else
- min_bytes = max(bytes, (bytes + empty_size) >> 4);
- } else
- min_bytes = max(bytes, (bytes + empty_size) >> 2);
+ cont1_bytes = bytes;
+ min_bytes = block_group->sectorsize;
+ } else {
+ cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
+ min_bytes = block_group->sectorsize;
+ }
spin_lock(&ctl->tree_lock);
@@ -2539,7 +2533,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
* If we know we don't have enough space to make a cluster don't even
* bother doing all the work to try and find one.
*/
- if (ctl->free_space < min_bytes) {
+ if (ctl->free_space < bytes) {
spin_unlock(&ctl->tree_lock);
return -ENOSPC;
}
@@ -2552,11 +2546,17 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
goto out;
}
+ trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
+ min_bytes);
+
+ INIT_LIST_HEAD(&bitmaps);
ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
- bytes, min_bytes);
+ bytes + empty_size,
+ cont1_bytes, min_bytes);
if (ret)
ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
- offset, bytes, min_bytes);
+ offset, bytes + empty_size,
+ cont1_bytes, min_bytes);
/* Clear our temporary list */
list_for_each_entry_safe(entry, tmp, &bitmaps, list)
@@ -2567,6 +2567,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
list_add_tail(&cluster->block_group_list,
&block_group->cluster_list);
cluster->block_group = block_group;
+ } else {
+ trace_btrfs_failed_cluster_setup(block_group);
}
out:
spin_unlock(&cluster->lock);
@@ -2588,17 +2590,57 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
cluster->block_group = NULL;
}
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
- u64 *trimmed, u64 start, u64 end, u64 minlen)
+static int do_trimming(struct btrfs_block_group_cache *block_group,
+ u64 *total_trimmed, u64 start, u64 bytes,
+ u64 reserved_start, u64 reserved_bytes)
{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry = NULL;
+ struct btrfs_space_info *space_info = block_group->space_info;
struct btrfs_fs_info *fs_info = block_group->fs_info;
- u64 bytes = 0;
- u64 actually_trimmed;
- int ret = 0;
+ int ret;
+ int update = 0;
+ u64 trimmed = 0;
- *trimmed = 0;
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+ if (!block_group->ro) {
+ block_group->reserved += reserved_bytes;
+ space_info->bytes_reserved += reserved_bytes;
+ update = 1;
+ }
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+
+ ret = btrfs_error_discard_extent(fs_info->extent_root,
+ start, bytes, &trimmed);
+ if (!ret)
+ *total_trimmed += trimmed;
+
+ btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
+
+ if (update) {
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+ if (block_group->ro)
+ space_info->bytes_readonly += reserved_bytes;
+ block_group->reserved -= reserved_bytes;
+ space_info->bytes_reserved -= reserved_bytes;
+ spin_unlock(&space_info->lock);
+ spin_unlock(&block_group->lock);
+ }
+
+ return ret;
+}
+
+static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space *entry;
+ struct rb_node *node;
+ int ret = 0;
+ u64 extent_start;
+ u64 extent_bytes;
+ u64 bytes;
while (start < end) {
spin_lock(&ctl->tree_lock);
@@ -2609,81 +2651,118 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
}
entry = tree_search_offset(ctl, start, 0, 1);
- if (!entry)
- entry = tree_search_offset(ctl,
- offset_to_bitmap(ctl, start),
- 1, 1);
-
- if (!entry || entry->offset >= end) {
+ if (!entry) {
spin_unlock(&ctl->tree_lock);
break;
}
- if (entry->bitmap) {
- ret = search_bitmap(ctl, entry, &start, &bytes);
- if (!ret) {
- if (start >= end) {
- spin_unlock(&ctl->tree_lock);
- break;
- }
- bytes = min(bytes, end - start);
- bitmap_clear_bits(ctl, entry, start, bytes);
- if (entry->bytes == 0)
- free_bitmap(ctl, entry);
- } else {
- start = entry->offset + BITS_PER_BITMAP *
- block_group->sectorsize;
+ /* skip bitmaps */
+ while (entry->bitmap) {
+ node = rb_next(&entry->offset_index);
+ if (!node) {
spin_unlock(&ctl->tree_lock);
- ret = 0;
- continue;
+ goto out;
}
- } else {
- start = entry->offset;
- bytes = min(entry->bytes, end - start);
- unlink_free_space(ctl, entry);
- kmem_cache_free(btrfs_free_space_cachep, entry);
+ entry = rb_entry(node, struct btrfs_free_space,
+ offset_index);
}
+ if (entry->offset >= end) {
+ spin_unlock(&ctl->tree_lock);
+ break;
+ }
+
+ extent_start = entry->offset;
+ extent_bytes = entry->bytes;
+ start = max(start, extent_start);
+ bytes = min(extent_start + extent_bytes, end) - start;
+ if (bytes < minlen) {
+ spin_unlock(&ctl->tree_lock);
+ goto next;
+ }
+
+ unlink_free_space(ctl, entry);
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+
spin_unlock(&ctl->tree_lock);
- if (bytes >= minlen) {
- struct btrfs_space_info *space_info;
- int update = 0;
-
- space_info = block_group->space_info;
- spin_lock(&space_info->lock);
- spin_lock(&block_group->lock);
- if (!block_group->ro) {
- block_group->reserved += bytes;
- space_info->bytes_reserved += bytes;
- update = 1;
- }
- spin_unlock(&block_group->lock);
- spin_unlock(&space_info->lock);
-
- ret = btrfs_error_discard_extent(fs_info->extent_root,
- start,
- bytes,
- &actually_trimmed);
-
- btrfs_add_free_space(block_group, start, bytes);
- if (update) {
- spin_lock(&space_info->lock);
- spin_lock(&block_group->lock);
- if (block_group->ro)
- space_info->bytes_readonly += bytes;
- block_group->reserved -= bytes;
- space_info->bytes_reserved -= bytes;
- spin_unlock(&space_info->lock);
- spin_unlock(&block_group->lock);
- }
+ ret = do_trimming(block_group, total_trimmed, start, bytes,
+ extent_start, extent_bytes);
+ if (ret)
+ break;
+next:
+ start += bytes;
- if (ret)
- break;
- *trimmed += actually_trimmed;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ cond_resched();
+ }
+out:
+ return ret;
+}
+
+static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space *entry;
+ int ret = 0;
+ int ret2;
+ u64 bytes;
+ u64 offset = offset_to_bitmap(ctl, start);
+
+ while (offset < end) {
+ bool next_bitmap = false;
+
+ spin_lock(&ctl->tree_lock);
+
+ if (ctl->free_space < minlen) {
+ spin_unlock(&ctl->tree_lock);
+ break;
+ }
+
+ entry = tree_search_offset(ctl, offset, 1, 0);
+ if (!entry) {
+ spin_unlock(&ctl->tree_lock);
+ next_bitmap = true;
+ goto next;
+ }
+
+ bytes = minlen;
+ ret2 = search_bitmap(ctl, entry, &start, &bytes);
+ if (ret2 || start >= end) {
+ spin_unlock(&ctl->tree_lock);
+ next_bitmap = true;
+ goto next;
+ }
+
+ bytes = min(bytes, end - start);
+ if (bytes < minlen) {
+ spin_unlock(&ctl->tree_lock);
+ goto next;
+ }
+
+ bitmap_clear_bits(ctl, entry, start, bytes);
+ if (entry->bytes == 0)
+ free_bitmap(ctl, entry);
+
+ spin_unlock(&ctl->tree_lock);
+
+ ret = do_trimming(block_group, total_trimmed, start, bytes,
+ start, bytes);
+ if (ret)
+ break;
+next:
+ if (next_bitmap) {
+ offset += BITS_PER_BITMAP * ctl->unit;
+ } else {
+ start += bytes;
+ if (start >= offset + BITS_PER_BITMAP * ctl->unit)
+ offset += BITS_PER_BITMAP * ctl->unit;
}
- start += bytes;
- bytes = 0;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
@@ -2696,6 +2775,22 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
return ret;
}
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen)
+{
+ int ret;
+
+ *trimmed = 0;
+
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
+ if (ret)
+ return ret;
+
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+
+ return ret;
+}
+
/*
* Find the left-most item in the cache tree, and then return the
* smallest inode number in the item.
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f8962a957d6..213ffa86ce1 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,6 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
trans->bytes_reserved);
if (ret)
goto out;
+ trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+ trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -498,6 +500,8 @@ again:
out_put:
iput(inode);
out_release:
+ trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+ trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
out:
trans->block_rsv = rsv;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 81b235a61f8..0da19a0ea00 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1951,12 +1951,28 @@ enum btrfs_orphan_cleanup_state {
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
+ struct btrfs_block_rsv *block_rsv;
int ret;
if (!list_empty(&root->orphan_list) ||
root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
return;
+ spin_lock(&root->orphan_lock);
+ if (!list_empty(&root->orphan_list)) {
+ spin_unlock(&root->orphan_lock);
+ return;
+ }
+
+ if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
+ spin_unlock(&root->orphan_lock);
+ return;
+ }
+
+ block_rsv = root->orphan_block_rsv;
+ root->orphan_block_rsv = NULL;
+ spin_unlock(&root->orphan_lock);
+
if (root->orphan_item_inserted &&
btrfs_root_refs(&root->root_item) > 0) {
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
@@ -1965,10 +1981,9 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
root->orphan_item_inserted = 0;
}
- if (root->orphan_block_rsv) {
- WARN_ON(root->orphan_block_rsv->size > 0);
- btrfs_free_block_rsv(root, root->orphan_block_rsv);
- root->orphan_block_rsv = NULL;
+ if (block_rsv) {
+ WARN_ON(block_rsv->size > 0);
+ btrfs_free_block_rsv(root, block_rsv);
}
}
@@ -2224,14 +2239,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
- /*
- * Need to hold the imutex for reservation purposes, not
- * a huge deal here but I have a WARN_ON in
- * btrfs_delalloc_reserve_space to catch offenders.
- */
- mutex_lock(&inode->i_mutex);
ret = btrfs_truncate(inode);
- mutex_unlock(&inode->i_mutex);
} else {
nr_unlink++;
}
@@ -2845,7 +2853,7 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans,
BUG_ON(!root->fs_info->enospc_unlink);
root->fs_info->enospc_unlink = 0;
}
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -3009,7 +3017,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
- int encoding;
int ret;
int err = 0;
u64 ino = btrfs_ino(inode);
@@ -3059,7 +3066,6 @@ search_again:
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- encoding = 0;
if (found_key.objectid != ino)
break;
@@ -3072,10 +3078,6 @@ search_again:
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
- encoding = btrfs_file_extent_compression(leaf, fi);
- encoding |= btrfs_file_extent_encryption(leaf, fi);
- encoding |= btrfs_file_extent_other_encoding(leaf, fi);
-
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
item_end +=
btrfs_file_extent_num_bytes(leaf, fi);
@@ -3103,7 +3105,7 @@ search_again:
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
- if (!del_item && !encoding) {
+ if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
extent_num_bytes = new_size -
@@ -3179,7 +3181,7 @@ delete:
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
btrfs_header_owner(leaf),
- ino, extent_offset);
+ ino, extent_offset, 0);
BUG_ON(ret);
}
@@ -3434,7 +3436,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
i_size_write(inode, newsize);
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
ret = btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
} else {
/*
@@ -4655,7 +4657,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
}
out_unlock:
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
if (drop_inode) {
inode_dec_link_count(inode);
@@ -4723,7 +4725,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
}
out_unlock:
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
@@ -4782,7 +4784,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
}
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
@@ -4848,7 +4850,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_fail:
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
if (drop_on_err)
iput(inode);
btrfs_btree_balance_dirty(root, nr);
@@ -5121,7 +5123,7 @@ again:
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
- WARN_ON(1);
+ BUG();
if (!trans) {
kunmap(page);
free_extent_map(em);
@@ -6402,10 +6404,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
- /* Need this to keep space reservations serialized */
- mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
- mutex_unlock(&inode->i_mutex);
if (!ret)
ret = btrfs_update_time(vma->vm_file);
if (ret) {
@@ -6494,8 +6493,8 @@ out_unlock:
if (!ret)
return VM_FAULT_LOCKED;
unlock_page(page);
- btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
out:
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
return ret;
}
@@ -6668,7 +6667,7 @@ end_trans:
err = ret;
nr = trans->blocks_used;
- ret = btrfs_end_transaction_throttle(trans, root);
+ ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
}
@@ -6749,6 +6748,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_io_tree_init(&ei->io_tree, &inode->i_data);
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
mutex_init(&ei->log_mutex);
+ mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->i_orphan);
INIT_LIST_HEAD(&ei->delalloc_inodes);
@@ -7074,7 +7074,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
btrfs_end_log_trans(root);
}
out_fail:
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&root->fs_info->subvol_sem);
@@ -7246,7 +7246,7 @@ out_unlock:
if (!err)
d_instantiate(dentry, inode);
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5441ff1480f..ab620014bcc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -176,6 +176,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
struct btrfs_trans_handle *trans;
unsigned int flags, oldflags;
int ret;
+ u64 ip_oldflags;
+ unsigned int i_oldflags;
if (btrfs_root_readonly(root))
return -EROFS;
@@ -192,6 +194,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
mutex_lock(&inode->i_mutex);
+ ip_oldflags = ip->flags;
+ i_oldflags = inode->i_flags;
+
flags = btrfs_mask_flags(inode->i_mode, flags);
oldflags = btrfs_flags_to_ioctl(ip->flags);
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
@@ -249,19 +254,24 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
- trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_drop;
+ }
btrfs_update_iflags(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
btrfs_end_transaction(trans, root);
+ out_drop:
+ if (ret) {
+ ip->flags = ip_oldflags;
+ inode->i_flags = i_oldflags;
+ }
mnt_drop_write_file(file);
-
- ret = 0;
out_unlock:
mutex_unlock(&inode->i_mutex);
return ret;
@@ -276,14 +286,13 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
- struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
- u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+ u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -312,7 +321,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
- ret = btrfs_trim_fs(root, &range);
+ ret = btrfs_trim_fs(fs_info->tree_root, &range);
if (ret < 0)
return ret;
@@ -358,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root,
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
- 0, objectid, NULL, 0, 0, 0);
+ 0, objectid, NULL, 0, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
@@ -858,10 +867,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
return 0;
file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
- mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
- mutex_unlock(&inode->i_mutex);
if (ret)
return ret;
again:
@@ -1203,13 +1210,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ mutex_lock(&root->fs_info->volume_mutex);
+ if (root->fs_info->balance_ctl) {
+ printk(KERN_INFO "btrfs: balance in progress\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
+ if (IS_ERR(vol_args)) {
+ ret = PTR_ERR(vol_args);
+ goto out;
+ }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
devstr = strchr(sizestr, ':');
if (devstr) {
@@ -1226,7 +1241,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
- goto out_unlock;
+ goto out_free;
}
if (!strcmp(sizestr, "max"))
new_size = device->bdev->bd_inode->i_size;
@@ -1241,7 +1256,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
new_size = memparse(sizestr, NULL);
if (new_size == 0) {
ret = -EINVAL;
- goto out_unlock;
+ goto out_free;
}
}
@@ -1250,7 +1265,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (mod < 0) {
if (new_size > old_size) {
ret = -EINVAL;
- goto out_unlock;
+ goto out_free;
}
new_size = old_size - new_size;
} else if (mod > 0) {
@@ -1259,11 +1274,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (new_size < 256 * 1024 * 1024) {
ret = -EINVAL;
- goto out_unlock;
+ goto out_free;
}
if (new_size > device->bdev->bd_inode->i_size) {
ret = -EFBIG;
- goto out_unlock;
+ goto out_free;
}
do_div(new_size, root->sectorsize);
@@ -1276,7 +1291,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- goto out_unlock;
+ goto out_free;
}
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
@@ -1284,9 +1299,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
ret = btrfs_shrink_device(device, new_size);
}
-out_unlock:
- mutex_unlock(&root->fs_info->volume_mutex);
+out_free:
kfree(vol_args);
+out:
+ mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@@ -2052,14 +2068,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ mutex_lock(&root->fs_info->volume_mutex);
+ if (root->fs_info->balance_ctl) {
+ printk(KERN_INFO "btrfs: balance in progress\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
+ if (IS_ERR(vol_args)) {
+ ret = PTR_ERR(vol_args);
+ goto out;
+ }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
kfree(vol_args);
+out:
+ mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@@ -2074,14 +2101,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ mutex_lock(&root->fs_info->volume_mutex);
+ if (root->fs_info->balance_ctl) {
+ printk(KERN_INFO "btrfs: balance in progress\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
+ if (IS_ERR(vol_args)) {
+ ret = PTR_ERR(vol_args);
+ goto out;
+ }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
kfree(vol_args);
+out:
+ mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@@ -2427,7 +2465,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(inode),
- new_key.offset - datao);
+ new_key.offset - datao,
+ 0);
BUG_ON(ret);
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
@@ -2977,7 +3016,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
{
int ret = 0;
int size;
- u64 extent_offset;
+ u64 extent_item_pos;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL;
@@ -3008,15 +3047,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
}
ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
+ btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = -ENOENT;
if (ret < 0)
goto out;
- extent_offset = loi->logical - key.objectid;
+ extent_item_pos = loi->logical - key.objectid;
ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
- extent_offset, build_ino_list, inodes);
+ extent_item_pos, build_ino_list,
+ inodes);
if (ret < 0)
goto out;
@@ -3034,6 +3075,163 @@ out:
return ret;
}
+void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+ struct btrfs_ioctl_balance_args *bargs)
+{
+ struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+
+ bargs->flags = bctl->flags;
+
+ if (atomic_read(&fs_info->balance_running))
+ bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
+ if (atomic_read(&fs_info->balance_pause_req))
+ bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
+ if (atomic_read(&fs_info->balance_cancel_req))
+ bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
+
+ memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
+ memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
+ memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
+
+ if (lock) {
+ spin_lock(&fs_info->balance_lock);
+ memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+ spin_unlock(&fs_info->balance_lock);
+ } else {
+ memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+ }
+}
+
+static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ioctl_balance_args *bargs;
+ struct btrfs_balance_control *bctl;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
+
+ if (arg) {
+ bargs = memdup_user(arg, sizeof(*bargs));
+ if (IS_ERR(bargs)) {
+ ret = PTR_ERR(bargs);
+ goto out;
+ }
+
+ if (bargs->flags & BTRFS_BALANCE_RESUME) {
+ if (!fs_info->balance_ctl) {
+ ret = -ENOTCONN;
+ goto out_bargs;
+ }
+
+ bctl = fs_info->balance_ctl;
+ spin_lock(&fs_info->balance_lock);
+ bctl->flags |= BTRFS_BALANCE_RESUME;
+ spin_unlock(&fs_info->balance_lock);
+
+ goto do_balance;
+ }
+ } else {
+ bargs = NULL;
+ }
+
+ if (fs_info->balance_ctl) {
+ ret = -EINPROGRESS;
+ goto out_bargs;
+ }
+
+ bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ if (!bctl) {
+ ret = -ENOMEM;
+ goto out_bargs;
+ }
+
+ bctl->fs_info = fs_info;
+ if (arg) {
+ memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
+ memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
+ memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
+
+ bctl->flags = bargs->flags;
+ } else {
+ /* balance everything - no filters */
+ bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
+ }
+
+do_balance:
+ ret = btrfs_balance(bctl, bargs);
+ /*
+ * bctl is freed in __cancel_balance or in free_fs_info if
+ * restriper was paused all the way until unmount
+ */
+ if (arg) {
+ if (copy_to_user(arg, bargs, sizeof(*bargs)))
+ ret = -EFAULT;
+ }
+
+out_bargs:
+ kfree(bargs);
+out:
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
+ return ret;
+}
+
+static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case BTRFS_BALANCE_CTL_PAUSE:
+ return btrfs_pause_balance(root->fs_info);
+ case BTRFS_BALANCE_CTL_CANCEL:
+ return btrfs_cancel_balance(root->fs_info);
+ }
+
+ return -EINVAL;
+}
+
+static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
+ void __user *arg)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ioctl_balance_args *bargs;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mutex_lock(&fs_info->balance_mutex);
+ if (!fs_info->balance_ctl) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+ if (!bargs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ update_ioctl_balance_args(fs_info, 1, bargs);
+
+ if (copy_to_user(arg, bargs, sizeof(*bargs)))
+ ret = -EFAULT;
+
+ kfree(bargs);
+out:
+ mutex_unlock(&fs_info->balance_mutex);
+ return ret;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -3078,7 +3276,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
- return btrfs_balance(root->fs_info->dev_root);
+ return btrfs_ioctl_balance(root, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
@@ -3110,6 +3308,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_scrub_cancel(root, argp);
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
+ case BTRFS_IOC_BALANCE_V2:
+ return btrfs_ioctl_balance(root, argp);
+ case BTRFS_IOC_BALANCE_CTL:
+ return btrfs_ioctl_balance_ctl(root, arg);
+ case BTRFS_IOC_BALANCE_PROGRESS:
+ return btrfs_ioctl_balance_progress(root, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 252ae9915de..4f69028a68c 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
__u64 reserved[124]; /* pad to 1k */
};
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE 1
+#define BTRFS_BALANCE_CTL_CANCEL 2
+
+/*
+ * this is packed, because it should be exactly the same as its disk
+ * byte order counterpart (struct btrfs_disk_balance_args)
+ */
+struct btrfs_balance_args {
+ __u64 profiles;
+ __u64 usage;
+ __u64 devid;
+ __u64 pstart;
+ __u64 pend;
+ __u64 vstart;
+ __u64 vend;
+
+ __u64 target;
+
+ __u64 flags;
+
+ __u64 unused[8];
+} __attribute__ ((__packed__));
+
+/* report balance progress to userspace */
+struct btrfs_balance_progress {
+ __u64 expected; /* estimated # of chunks that will be
+ * relocated to fulfill the request */
+ __u64 considered; /* # of chunks we have considered so far */
+ __u64 completed; /* # of chunks relocated so far */
+};
+
+#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
+#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
+
+struct btrfs_ioctl_balance_args {
+ __u64 flags; /* in/out */
+ __u64 state; /* out */
+
+ struct btrfs_balance_args data; /* in/out */
+ struct btrfs_balance_args meta; /* in/out */
+ struct btrfs_balance_args sys; /* in/out */
+
+ struct btrfs_balance_progress stat; /* out */
+
+ __u64 unused[72]; /* pad to 1k */
+};
+
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
+#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
+ struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
+#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
+ struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index d77b67c4b27..5e178d8f716 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
*/
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
+ if (eb->lock_nested) {
+ read_lock(&eb->lock);
+ if (eb->lock_nested && current->pid == eb->lock_owner) {
+ read_unlock(&eb->lock);
+ return;
+ }
+ read_unlock(&eb->lock);
+ }
if (rw == BTRFS_WRITE_LOCK) {
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
@@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
*/
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
+ if (eb->lock_nested) {
+ read_lock(&eb->lock);
+ if (&eb->lock_nested && current->pid == eb->lock_owner) {
+ read_unlock(&eb->lock);
+ return;
+ }
+ read_unlock(&eb->lock);
+ }
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
@@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
again:
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers) &&
+ current->pid == eb->lock_owner) {
+ /*
+ * This extent is already write-locked by our thread. We allow
+ * an additional read lock to be added because it's for the same
+ * thread. btrfs_find_all_roots() depends on this as it may be
+ * called on a partly (write-)locked tree.
+ */
+ BUG_ON(eb->lock_nested);
+ eb->lock_nested = 1;
+ read_unlock(&eb->lock);
+ return;
+ }
+ read_unlock(&eb->lock);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
- wait_event(eb->write_lock_wq,
- atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);
@@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
+ eb->lock_owner = current->pid;
return 1;
}
@@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
+ if (eb->lock_nested) {
+ read_lock(&eb->lock);
+ if (eb->lock_nested && current->pid == eb->lock_owner) {
+ eb->lock_nested = 0;
+ read_unlock(&eb->lock);
+ return;
+ }
+ read_unlock(&eb->lock);
+ }
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
@@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
+ if (eb->lock_nested) {
+ read_lock(&eb->lock);
+ if (eb->lock_nested && current->pid == eb->lock_owner) {
+ eb->lock_nested = 0;
+ read_unlock(&eb->lock);
+ return;
+ }
+ read_unlock(&eb->lock);
+ }
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
@@ -181,6 +229,7 @@ again:
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
+ eb->lock_owner = current->pid;
return 0;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfb55434a46..8c1aae2c845 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent,
btrfs_header_owner(leaf),
- key.objectid, key.offset);
+ key.objectid, key.offset, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
- key.objectid, key.offset);
+ key.objectid, key.offset, 1);
BUG_ON(ret);
}
if (dirty)
@@ -1778,21 +1778,23 @@ again:
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0);
+ src->root_key.objectid, level - 1, 0,
+ 1);
BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
- 0);
+ 0, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0);
+ src->root_key.objectid, level - 1, 0,
+ 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
- 0);
+ 0, 1);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
@@ -2244,7 +2246,7 @@ again:
} else {
list_del_init(&reloc_root->root_list);
}
- btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
+ btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
}
if (found) {
@@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
node->eb->start, blocksize,
upper->eb->start,
btrfs_header_owner(upper->eb),
- node->level, 0);
+ node->level, 0, 1);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
@@ -2947,9 +2949,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
while (index <= last_index) {
- mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
- mutex_unlock(&inode->i_mutex);
if (ret)
goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ddf2c90d3fc..9770cc5bfb7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -25,6 +25,7 @@
#include "transaction.h"
#include "backref.h"
#include "extent_io.h"
+#include "check-integrity.h"
/*
* This is only the first step towards a full-features scrub. It reads all
@@ -309,7 +310,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
u8 ref_level;
unsigned long ptr = 0;
const int bufsize = 4096;
- u64 extent_offset;
+ u64 extent_item_pos;
path = btrfs_alloc_path();
@@ -329,12 +330,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
if (ret < 0)
goto out;
- extent_offset = swarn.logical - found_key.objectid;
+ extent_item_pos = swarn.logical - found_key.objectid;
swarn.extent_item_size = found_key.offset;
eb = path->nodes[0];
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, path->slots[0]);
+ btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
@@ -351,7 +353,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
} else {
swarn.path = path;
iterate_extent_inodes(fs_info, path, found_key.objectid,
- extent_offset,
+ extent_item_pos,
scrub_print_warning_inode, &swarn);
}
@@ -732,7 +734,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = scrub_fixup_end_io;
bio->bi_private = &complete;
- submit_bio(rw, bio);
+ btrfsic_submit_bio(rw, bio);
/* this will also unplug the queue */
wait_for_completion(&complete);
@@ -958,7 +960,7 @@ static int scrub_submit(struct scrub_dev *sdev)
sdev->curr = -1;
atomic_inc(&sdev->in_flight);
- submit_bio(READ, sbio->bio);
+ btrfsic_submit_bio(READ, sbio->bio);
return 0;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ae488aa1966..3ce97b217cb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -147,13 +147,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
static void btrfs_put_super(struct super_block *sb)
{
- struct btrfs_root *root = btrfs_sb(sb);
- int ret;
-
- ret = close_ctree(root);
- sb->s_fs_info = NULL;
-
- (void)ret; /* FIXME: need to fix VFS to return error? */
+ (void)close_ctree(btrfs_sb(sb)->tree_root);
+ /* FIXME: need to fix VFS to return error? */
+ /* AV: return it _where_? ->put_super() can be triggered by any number
+ * of async events, up to and including delivery of SIGKILL to the
+ * last process that kept it busy. Or segfault in the aforementioned
+ * process... Whom would you report that to?
+ */
}
enum {
@@ -163,8 +163,11 @@ enum {
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
- Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
- Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
+ Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
+ Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
+ Opt_check_integrity, Opt_check_integrity_including_extent_data,
+ Opt_check_integrity_print_mask,
+ Opt_err,
};
static match_table_t tokens = {
@@ -199,6 +202,10 @@ static match_table_t tokens = {
{Opt_inode_cache, "inode_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_recovery, "recovery"},
+ {Opt_skip_balance, "skip_balance"},
+ {Opt_check_integrity, "check_int"},
+ {Opt_check_integrity_including_extent_data, "check_int_data"},
+ {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
{Opt_err, NULL},
};
@@ -397,6 +404,40 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk(KERN_INFO "btrfs: enabling auto recovery");
btrfs_set_opt(info->mount_opt, RECOVERY);
break;
+ case Opt_skip_balance:
+ btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
+ break;
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ case Opt_check_integrity_including_extent_data:
+ printk(KERN_INFO "btrfs: enabling check integrity"
+ " including extent data\n");
+ btrfs_set_opt(info->mount_opt,
+ CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
+ btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
+ break;
+ case Opt_check_integrity:
+ printk(KERN_INFO "btrfs: enabling check integrity\n");
+ btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
+ break;
+ case Opt_check_integrity_print_mask:
+ intarg = 0;
+ match_int(&args[0], &intarg);
+ if (intarg) {
+ info->check_integrity_print_mask = intarg;
+ printk(KERN_INFO "btrfs:"
+ " check_integrity_print_mask 0x%x\n",
+ info->check_integrity_print_mask);
+ }
+ break;
+#else
+ case Opt_check_integrity_including_extent_data:
+ case Opt_check_integrity:
+ case Opt_check_integrity_print_mask:
+ printk(KERN_ERR "btrfs: support for check_integrity*"
+ " not compiled in!\n");
+ ret = -EINVAL;
+ goto out;
+#endif
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -500,7 +541,8 @@ out:
static struct dentry *get_default_root(struct super_block *sb,
u64 subvol_objectid)
{
- struct btrfs_root *root = sb->s_fs_info;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_root *root = fs_info->tree_root;
struct btrfs_root *new_root;
struct btrfs_dir_item *di;
struct btrfs_path *path;
@@ -530,7 +572,7 @@ static struct dentry *get_default_root(struct super_block *sb,
* will mount by default if we haven't been given a specific subvolume
* to mount.
*/
- dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
+ dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
if (IS_ERR(di)) {
btrfs_free_path(path);
@@ -544,7 +586,7 @@ static struct dentry *get_default_root(struct super_block *sb,
*/
btrfs_free_path(path);
dir_id = BTRFS_FIRST_FREE_OBJECTID;
- new_root = root->fs_info->fs_root;
+ new_root = fs_info->fs_root;
goto setup_root;
}
@@ -552,7 +594,7 @@ static struct dentry *get_default_root(struct super_block *sb,
btrfs_free_path(path);
find_root:
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
+ new_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (IS_ERR(new_root))
return ERR_CAST(new_root);
@@ -588,7 +630,7 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct dentry *root_dentry;
- struct btrfs_root *tree_root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_key key;
int err;
@@ -603,18 +645,16 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_flags |= MS_POSIXACL;
#endif
- tree_root = open_ctree(sb, fs_devices, (char *)data);
-
- if (IS_ERR(tree_root)) {
+ err = open_ctree(sb, fs_devices, (char *)data);
+ if (err) {
printk("btrfs: open_ctree failed\n");
- return PTR_ERR(tree_root);
+ return err;
}
- sb->s_fs_info = tree_root;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL);
+ inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail_close;
@@ -631,23 +671,25 @@ static int btrfs_fill_super(struct super_block *sb,
save_mount_options(sb, data);
cleancache_init_fs(sb);
+ sb->s_flags |= MS_ACTIVE;
return 0;
fail_close:
- close_ctree(tree_root);
+ close_ctree(fs_info->tree_root);
return err;
}
int btrfs_sync_fs(struct super_block *sb, int wait)
{
struct btrfs_trans_handle *trans;
- struct btrfs_root *root = btrfs_sb(sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_root *root = fs_info->tree_root;
int ret;
trace_btrfs_sync_fs(wait);
if (!wait) {
- filemap_flush(root->fs_info->btree_inode->i_mapping);
+ filemap_flush(fs_info->btree_inode->i_mapping);
return 0;
}
@@ -663,8 +705,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
- struct btrfs_root *root = btrfs_sb(dentry->d_sb);
- struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
+ struct btrfs_root *root = info->tree_root;
char *compress_type;
if (btrfs_test_opt(root, DEGRADED))
@@ -722,28 +764,25 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",autodefrag");
if (btrfs_test_opt(root, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
+ if (btrfs_test_opt(root, SKIP_BALANCE))
+ seq_puts(seq, ",skip_balance");
return 0;
}
static int btrfs_test_super(struct super_block *s, void *data)
{
- struct btrfs_root *test_root = data;
- struct btrfs_root *root = btrfs_sb(s);
+ struct btrfs_fs_info *p = data;
+ struct btrfs_fs_info *fs_info = btrfs_sb(s);
- /*
- * If this super block is going away, return false as it
- * can't match as an existing super block.
- */
- if (!atomic_read(&s->s_active))
- return 0;
- return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+ return fs_info->fs_devices == p->fs_devices;
}
static int btrfs_set_super(struct super_block *s, void *data)
{
- s->s_fs_info = data;
-
- return set_anon_super(s, data);
+ int err = set_anon_super(s, data);
+ if (!err)
+ s->s_fs_info = data;
+ return err;
}
/*
@@ -903,12 +942,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (!fs_info)
return ERR_PTR(-ENOMEM);
- fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
- if (!fs_info->tree_root) {
- error = -ENOMEM;
- goto error_fs_info;
- }
- fs_info->tree_root->fs_info = fs_info;
fs_info->fs_devices = fs_devices;
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
@@ -928,43 +961,30 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
}
bdev = fs_devices->latest_bdev;
- s = sget(fs_type, btrfs_test_super, btrfs_set_super,
- fs_info->tree_root);
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto error_close_devices;
}
if (s->s_root) {
- if ((flags ^ s->s_flags) & MS_RDONLY) {
- deactivate_locked_super(s);
- error = -EBUSY;
- goto error_close_devices;
- }
-
btrfs_close_devices(fs_devices);
free_fs_info(fs_info);
+ if ((flags ^ s->s_flags) & MS_RDONLY)
+ error = -EBUSY;
} else {
char b[BDEVNAME_SIZE];
s->s_flags = flags | MS_NOSEC;
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
- btrfs_sb(s)->fs_info->bdev_holder = fs_type;
+ btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data,
flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(s);
- return ERR_PTR(error);
- }
-
- s->s_flags |= MS_ACTIVE;
}
- root = get_default_root(s, subvol_objectid);
- if (IS_ERR(root)) {
+ root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
+ if (IS_ERR(root))
deactivate_locked_super(s);
- return root;
- }
return root;
@@ -977,7 +997,8 @@ error_fs_info:
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
- struct btrfs_root *root = btrfs_sb(sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_root *root = fs_info->tree_root;
int ret;
ret = btrfs_parse_options(root, data);
@@ -993,13 +1014,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
ret = btrfs_commit_super(root);
WARN_ON(ret);
} else {
- if (root->fs_info->fs_devices->rw_devices == 0)
+ if (fs_info->fs_devices->rw_devices == 0)
return -EACCES;
- if (btrfs_super_log_root(root->fs_info->super_copy) != 0)
+ if (btrfs_super_log_root(fs_info->super_copy) != 0)
return -EINVAL;
- ret = btrfs_cleanup_fs_roots(root->fs_info);
+ ret = btrfs_cleanup_fs_roots(fs_info);
WARN_ON(ret);
/* recover relocation */
@@ -1168,18 +1189,18 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
- struct btrfs_root *root = btrfs_sb(dentry->d_sb);
- struct btrfs_super_block *disk_super = root->fs_info->super_copy;
- struct list_head *head = &root->fs_info->space_info;
+ struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
+ struct btrfs_super_block *disk_super = fs_info->super_copy;
+ struct list_head *head = &fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
- __be32 *fsid = (__be32 *)root->fs_info->fsid;
+ __be32 *fsid = (__be32 *)fs_info->fsid;
int ret;
/* holding chunk_muext to avoid allocating new chunks */
- mutex_lock(&root->fs_info->chunk_mutex);
+ mutex_lock(&fs_info->chunk_mutex);
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
@@ -1198,14 +1219,14 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
buf->f_bavail = total_free_data;
- ret = btrfs_calc_avail_data_space(root, &total_free_data);
+ ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
if (ret) {
- mutex_unlock(&root->fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
buf->f_bavail += total_free_data;
buf->f_bavail = buf->f_bavail >> bits;
- mutex_unlock(&root->fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
@@ -1219,11 +1240,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+static void btrfs_kill_super(struct super_block *sb)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ kill_anon_super(sb);
+ free_fs_info(fs_info);
+}
+
static struct file_system_type btrfs_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
.mount = btrfs_mount,
- .kill_sb = kill_anon_super,
+ .kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -1257,17 +1285,17 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
static int btrfs_freeze(struct super_block *sb)
{
- struct btrfs_root *root = btrfs_sb(sb);
- mutex_lock(&root->fs_info->transaction_kthread_mutex);
- mutex_lock(&root->fs_info->cleaner_mutex);
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ mutex_lock(&fs_info->transaction_kthread_mutex);
+ mutex_lock(&fs_info->cleaner_mutex);
return 0;
}
static int btrfs_unfreeze(struct super_block *sb)
{
- struct btrfs_root *root = btrfs_sb(sb);
- mutex_unlock(&root->fs_info->cleaner_mutex);
- mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ mutex_unlock(&fs_info->cleaner_mutex);
+ mutex_unlock(&fs_info->transaction_kthread_mutex);
return 0;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 81376d94cd3..287a6728b1a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
+ WARN_ON(transaction->delayed_refs.root.rb_node);
+ WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
@@ -108,8 +110,11 @@ loop:
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
+ cur_trans->delayed_refs.seq = 1;
+ init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
+ INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
@@ -321,6 +326,8 @@ again:
}
if (num_bytes) {
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ (u64)h, num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
}
@@ -467,19 +474,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
- while (count < 4) {
+ while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (cur &&
trans->transaction->delayed_refs.num_heads_ready > 64) {
trans->delayed_ref_updates = 0;
-
- /*
- * do a full flush if the transaction is trying
- * to close
- */
- if (trans->transaction->delayed_refs.flushing)
- cur = 0;
btrfs_run_delayed_refs(trans, root, cur);
} else {
break;
@@ -1393,9 +1393,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
- btrfs_drop_snapshot(root, NULL, 0);
+ btrfs_drop_snapshot(root, NULL, 0, 0);
else
- btrfs_drop_snapshot(root, NULL, 1);
+ btrfs_drop_snapshot(root, NULL, 1, 0);
}
return 0;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3568374d419..cb877e0886a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset,
0, root->root_key.objectid,
- key->objectid, offset);
+ key->objectid, offset, 0);
BUG_ON(ret);
} else {
/*
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
new file mode 100644
index 00000000000..12f5147bd2b
--- /dev/null
+++ b/fs/btrfs/ulist.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "ulist.h"
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ * The implementation is preliminary and can probably be sped up
+ * significantly. A first step would be to store the values in an rbtree
+ * as soon as ULIST_SIZE is exceeded.
+ *
+ * A sample usage for ulists is the enumeration of directed graphs without
+ * visiting a node twice. The pseudo-code could look like this:
+ *
+ * ulist = ulist_alloc();
+ * ulist_add(ulist, root);
+ * elem = NULL;
+ *
+ * while ((elem = ulist_next(ulist, elem)) {
+ * for (all child nodes n in elem)
+ * ulist_add(ulist, n);
+ * do something useful with the node;
+ * }
+ * ulist_free(ulist);
+ *
+ * This assumes the graph nodes are adressable by u64. This stems from the
+ * usage for tree enumeration in btrfs, where the logical addresses are
+ * 64 bit.
+ *
+ * It is also useful for tree enumeration which could be done elegantly
+ * recursively, but is not possible due to kernel stack limitations. The
+ * loop would be similar to the above.
+ */
+
+/**
+ * ulist_init - freshly initialize a ulist
+ * @ulist: the ulist to initialize
+ *
+ * Note: don't use this function to init an already used ulist, use
+ * ulist_reinit instead.
+ */
+void ulist_init(struct ulist *ulist)
+{
+ ulist->nnodes = 0;
+ ulist->nodes = ulist->int_nodes;
+ ulist->nodes_alloced = ULIST_SIZE;
+}
+EXPORT_SYMBOL(ulist_init);
+
+/**
+ * ulist_fini - free up additionally allocated memory for the ulist
+ * @ulist: the ulist from which to free the additional memory
+ *
+ * This is useful in cases where the base 'struct ulist' has been statically
+ * allocated.
+ */
+void ulist_fini(struct ulist *ulist)
+{
+ /*
+ * The first ULIST_SIZE elements are stored inline in struct ulist.
+ * Only if more elements are alocated they need to be freed.
+ */
+ if (ulist->nodes_alloced > ULIST_SIZE)
+ kfree(ulist->nodes);
+ ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
+}
+EXPORT_SYMBOL(ulist_fini);
+
+/**
+ * ulist_reinit - prepare a ulist for reuse
+ * @ulist: ulist to be reused
+ *
+ * Free up all additional memory allocated for the list elements and reinit
+ * the ulist.
+ */
+void ulist_reinit(struct ulist *ulist)
+{
+ ulist_fini(ulist);
+ ulist_init(ulist);
+}
+EXPORT_SYMBOL(ulist_reinit);
+
+/**
+ * ulist_alloc - dynamically allocate a ulist
+ * @gfp_mask: allocation flags to for base allocation
+ *
+ * The allocated ulist will be returned in an initialized state.
+ */
+struct ulist *ulist_alloc(unsigned long gfp_mask)
+{
+ struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
+
+ if (!ulist)
+ return NULL;
+
+ ulist_init(ulist);
+
+ return ulist;
+}
+EXPORT_SYMBOL(ulist_alloc);
+
+/**
+ * ulist_free - free dynamically allocated ulist
+ * @ulist: ulist to free
+ *
+ * It is not necessary to call ulist_fini before.
+ */
+void ulist_free(struct ulist *ulist)
+{
+ if (!ulist)
+ return;
+ ulist_fini(ulist);
+ kfree(ulist);
+}
+EXPORT_SYMBOL(ulist_free);
+
+/**
+ * ulist_add - add an element to the ulist
+ * @ulist: ulist to add the element to
+ * @val: value to add to ulist
+ * @aux: auxiliary value to store along with val
+ * @gfp_mask: flags to use for allocation
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks write
+ * locking is needed
+ *
+ * Add an element to a ulist. The @val will only be added if it doesn't
+ * already exist. If it is added, the auxiliary value @aux is stored along with
+ * it. In case @val already exists in the ulist, @aux is ignored, even if
+ * it differs from the already stored value.
+ *
+ * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
+ * inserted.
+ * In case of allocation failure -ENOMEM is returned and the ulist stays
+ * unaltered.
+ */
+int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
+ unsigned long gfp_mask)
+{
+ int i;
+
+ for (i = 0; i < ulist->nnodes; ++i) {
+ if (ulist->nodes[i].val == val)
+ return 0;
+ }
+
+ if (ulist->nnodes >= ulist->nodes_alloced) {
+ u64 new_alloced = ulist->nodes_alloced + 128;
+ struct ulist_node *new_nodes;
+ void *old = NULL;
+
+ /*
+ * if nodes_alloced == ULIST_SIZE no memory has been allocated
+ * yet, so pass NULL to krealloc
+ */
+ if (ulist->nodes_alloced > ULIST_SIZE)
+ old = ulist->nodes;
+
+ new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
+ gfp_mask);
+ if (!new_nodes)
+ return -ENOMEM;
+
+ if (!old)
+ memcpy(new_nodes, ulist->int_nodes,
+ sizeof(ulist->int_nodes));
+
+ ulist->nodes = new_nodes;
+ ulist->nodes_alloced = new_alloced;
+ }
+ ulist->nodes[ulist->nnodes].val = val;
+ ulist->nodes[ulist->nnodes].aux = aux;
+ ++ulist->nnodes;
+
+ return 1;
+}
+EXPORT_SYMBOL(ulist_add);
+
+/**
+ * ulist_next - iterate ulist
+ * @ulist: ulist to iterate
+ * @prev: previously returned element or %NULL to start iteration
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks only read
+ * locking is needed
+ *
+ * This function is used to iterate an ulist. The iteration is started with
+ * @prev = %NULL. It returns the next element from the ulist or %NULL when the
+ * end is reached. No guarantee is made with respect to the order in which
+ * the elements are returned. They might neither be returned in order of
+ * addition nor in ascending order.
+ * It is allowed to call ulist_add during an enumeration. Newly added items
+ * are guaranteed to show up in the running enumeration.
+ */
+struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
+{
+ int next;
+
+ if (ulist->nnodes == 0)
+ return NULL;
+
+ if (!prev)
+ return &ulist->nodes[0];
+
+ next = (prev - ulist->nodes) + 1;
+ if (next < 0 || next >= ulist->nnodes)
+ return NULL;
+
+ return &ulist->nodes[next];
+}
+EXPORT_SYMBOL(ulist_next);
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
new file mode 100644
index 00000000000..2e25dec58ec
--- /dev/null
+++ b/fs/btrfs/ulist.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ *
+ */
+
+#ifndef __ULIST__
+#define __ULIST__
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ * The implementation is preliminary and can probably be sped up
+ * significantly. A first step would be to store the values in an rbtree
+ * as soon as ULIST_SIZE is exceeded.
+ */
+
+/*
+ * number of elements statically allocated inside struct ulist
+ */
+#define ULIST_SIZE 16
+
+/*
+ * element of the list
+ */
+struct ulist_node {
+ u64 val; /* value to store */
+ unsigned long aux; /* auxiliary value saved along with the val */
+};
+
+struct ulist {
+ /*
+ * number of elements stored in list
+ */
+ unsigned long nnodes;
+
+ /*
+ * number of nodes we already have room for
+ */
+ unsigned long nodes_alloced;
+
+ /*
+ * pointer to the array storing the elements. The first ULIST_SIZE
+ * elements are stored inline. In this case the it points to int_nodes.
+ * After exceeding ULIST_SIZE, dynamic memory is allocated.
+ */
+ struct ulist_node *nodes;
+
+ /*
+ * inline storage space for the first ULIST_SIZE entries
+ */
+ struct ulist_node int_nodes[ULIST_SIZE];
+};
+
+void ulist_init(struct ulist *ulist);
+void ulist_fini(struct ulist *ulist);
+void ulist_reinit(struct ulist *ulist);
+struct ulist *ulist_alloc(unsigned long gfp_mask);
+void ulist_free(struct ulist *ulist);
+int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
+ unsigned long gfp_mask);
+struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f4b839fd3c9..0b4e2af7954 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
+#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
#include "ctree.h"
@@ -32,6 +33,7 @@
#include "print-tree.h"
#include "volumes.h"
#include "async-thread.h"
+#include "check-integrity.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -246,7 +248,7 @@ loop_lock:
sync_pending = 0;
}
- submit_bio(cur->bi_rw, cur);
+ btrfsic_submit_bio(cur->bi_rw, cur);
num_run++;
batch_run++;
if (need_resched())
@@ -706,8 +708,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
u64 devid;
u64 transid;
- mutex_lock(&uuid_mutex);
-
flags |= FMODE_EXCL;
bdev = blkdev_get_by_path(path, flags, holder);
@@ -716,6 +716,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
goto error;
}
+ mutex_lock(&uuid_mutex);
ret = set_blocksize(bdev, 4096);
if (ret)
goto error_close;
@@ -737,9 +738,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
brelse(bh);
error_close:
+ mutex_unlock(&uuid_mutex);
blkdev_put(bdev, flags);
error:
- mutex_unlock(&uuid_mutex);
return ret;
}
@@ -829,7 +830,6 @@ out:
/*
* find_free_dev_extent - find free space in the specified device
- * @trans: transaction handler
* @device: the device which we search the free space in
* @num_bytes: the size of the free space that we need
* @start: store the start of the free space.
@@ -848,8 +848,7 @@ out:
* But if we don't find suitable free space, it is used to store the size of
* the max free space.
*/
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
{
struct btrfs_key key;
@@ -893,7 +892,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret > 0) {
@@ -1282,7 +1281,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
bool clear_super = false;
mutex_lock(&uuid_mutex);
- mutex_lock(&root->fs_info->volume_mutex);
all_avail = root->fs_info->avail_data_alloc_bits |
root->fs_info->avail_system_alloc_bits |
@@ -1452,7 +1450,6 @@ error_close:
if (bdev)
blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
out:
- mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
error_undo:
@@ -1469,8 +1466,7 @@ error_undo:
/*
* does all the dirty work required for changing file system's UUID.
*/
-static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static int btrfs_prepare_sprout(struct btrfs_root *root)
{
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
struct btrfs_fs_devices *old_devices;
@@ -1629,7 +1625,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
}
filemap_write_and_wait(bdev->bd_inode->i_mapping);
- mutex_lock(&root->fs_info->volume_mutex);
devices = &root->fs_info->fs_devices->devices;
/*
@@ -1695,7 +1690,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
- ret = btrfs_prepare_sprout(trans, root);
+ ret = btrfs_prepare_sprout(root);
BUG_ON(ret);
}
@@ -1757,8 +1752,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_relocate_sys_chunks(root);
BUG_ON(ret);
}
-out:
- mutex_unlock(&root->fs_info->volume_mutex);
+
return ret;
error:
blkdev_put(bdev, FMODE_EXCL);
@@ -1766,7 +1760,7 @@ error:
mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount);
}
- goto out;
+ return ret;
}
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
@@ -2077,6 +2071,362 @@ error:
return ret;
}
+static int insert_balance_item(struct btrfs_root *root,
+ struct btrfs_balance_control *bctl)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_balance_item *item;
+ struct btrfs_disk_balance_args disk_bargs;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret, err;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ key.objectid = BTRFS_BALANCE_OBJECTID;
+ key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*item));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
+
+ memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
+
+ btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
+ btrfs_set_balance_data(leaf, item, &disk_bargs);
+ btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
+ btrfs_set_balance_meta(leaf, item, &disk_bargs);
+ btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
+ btrfs_set_balance_sys(leaf, item, &disk_bargs);
+
+ btrfs_set_balance_flags(leaf, item, bctl->flags);
+
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ err = btrfs_commit_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+ return ret;
+}
+
+static int del_balance_item(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret, err;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
+ key.objectid = BTRFS_BALANCE_OBJECTID;
+ key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+out:
+ btrfs_free_path(path);
+ err = btrfs_commit_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+ return ret;
+}
+
+/*
+ * This is a heuristic used to reduce the number of chunks balanced on
+ * resume after balance was interrupted.
+ */
+static void update_balance_args(struct btrfs_balance_control *bctl)
+{
+ /*
+ * Turn on soft mode for chunk types that were being converted.
+ */
+ if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
+ bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
+ if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
+ bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
+ if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
+ bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
+
+ /*
+ * Turn on usage filter if is not already used. The idea is
+ * that chunks that we have already balanced should be
+ * reasonably full. Don't do it for chunks that are being
+ * converted - that will keep us from relocating unconverted
+ * (albeit full) chunks.
+ */
+ if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+ !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+ bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
+ bctl->data.usage = 90;
+ }
+ if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+ !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+ bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
+ bctl->sys.usage = 90;
+ }
+ if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+ !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+ bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
+ bctl->meta.usage = 90;
+ }
+}
+
+/*
+ * Should be called with both balance and volume mutexes held to
+ * serialize other volume operations (add_dev/rm_dev/resize) with
+ * restriper. Same goes for unset_balance_control.
+ */
+static void set_balance_control(struct btrfs_balance_control *bctl)
+{
+ struct btrfs_fs_info *fs_info = bctl->fs_info;
+
+ BUG_ON(fs_info->balance_ctl);
+
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = bctl;
+ spin_unlock(&fs_info->balance_lock);
+}
+
+static void unset_balance_control(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+
+ BUG_ON(!fs_info->balance_ctl);
+
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = NULL;
+ spin_unlock(&fs_info->balance_lock);
+
+ kfree(bctl);
+}
+
+/*
+ * Balance filters. Return 1 if chunk should be filtered out
+ * (should not be balanced).
+ */
+static int chunk_profiles_filter(u64 chunk_profile,
+ struct btrfs_balance_args *bargs)
+{
+ chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ if (chunk_profile == 0)
+ chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ if (bargs->profiles & chunk_profile)
+ return 0;
+
+ return 1;
+}
+
+static u64 div_factor_fine(u64 num, int factor)
+{
+ if (factor <= 0)
+ return 0;
+ if (factor >= 100)
+ return num;
+
+ num *= factor;
+ do_div(num, 100);
+ return num;
+}
+
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
+{
+ struct btrfs_block_group_cache *cache;
+ u64 chunk_used, user_thresh;
+ int ret = 1;
+
+ cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+ chunk_used = btrfs_block_group_used(&cache->item);
+
+ user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
+ if (chunk_used < user_thresh)
+ ret = 0;
+
+ btrfs_put_block_group(cache);
+ return ret;
+}
+
+static int chunk_devid_filter(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
+{
+ struct btrfs_stripe *stripe;
+ int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ int i;
+
+ for (i = 0; i < num_stripes; i++) {
+ stripe = btrfs_stripe_nr(chunk, i);
+ if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
+ return 0;
+ }
+
+ return 1;
+}
+
+/* [pstart, pend) */
+static int chunk_drange_filter(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
+{
+ struct btrfs_stripe *stripe;
+ int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ u64 stripe_offset;
+ u64 stripe_length;
+ int factor;
+ int i;
+
+ if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
+ return 0;
+
+ if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
+ factor = num_stripes / factor;
+
+ for (i = 0; i < num_stripes; i++) {
+ stripe = btrfs_stripe_nr(chunk, i);
+ if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
+ continue;
+
+ stripe_offset = btrfs_stripe_offset(leaf, stripe);
+ stripe_length = btrfs_chunk_length(leaf, chunk);
+ do_div(stripe_length, factor);
+
+ if (stripe_offset < bargs->pend &&
+ stripe_offset + stripe_length > bargs->pstart)
+ return 0;
+ }
+
+ return 1;
+}
+
+/* [vstart, vend) */
+static int chunk_vrange_filter(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
+{
+ if (chunk_offset < bargs->vend &&
+ chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
+ /* at least part of the chunk is inside this vrange */
+ return 0;
+
+ return 1;
+}
+
+static int chunk_soft_convert_filter(u64 chunk_profile,
+ struct btrfs_balance_args *bargs)
+{
+ if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
+ return 0;
+
+ chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ if (chunk_profile == 0)
+ chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ if (bargs->target & chunk_profile)
+ return 1;
+
+ return 0;
+}
+
+static int should_balance_chunk(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 chunk_offset)
+{
+ struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
+ struct btrfs_balance_args *bargs = NULL;
+ u64 chunk_type = btrfs_chunk_type(leaf, chunk);
+
+ /* type filter */
+ if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
+ (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
+ return 0;
+ }
+
+ if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
+ bargs = &bctl->data;
+ else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+ bargs = &bctl->sys;
+ else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
+ bargs = &bctl->meta;
+
+ /* profiles filter */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
+ chunk_profiles_filter(chunk_type, bargs)) {
+ return 0;
+ }
+
+ /* usage filter */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
+ chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
+ return 0;
+ }
+
+ /* devid filter */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
+ chunk_devid_filter(leaf, chunk, bargs)) {
+ return 0;
+ }
+
+ /* drange filter, makes sense only with devid filter */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
+ chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
+ return 0;
+ }
+
+ /* vrange filter */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
+ chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
+ return 0;
+ }
+
+ /* soft profile changing mode */
+ if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
+ chunk_soft_convert_filter(chunk_type, bargs)) {
+ return 0;
+ }
+
+ return 1;
+}
+
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
@@ -2086,29 +2436,28 @@ static u64 div_factor(u64 num, int factor)
return num;
}
-int btrfs_balance(struct btrfs_root *dev_root)
+static int __btrfs_balance(struct btrfs_fs_info *fs_info)
{
- int ret;
- struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
+ struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ struct btrfs_root *chunk_root = fs_info->chunk_root;
+ struct btrfs_root *dev_root = fs_info->dev_root;
+ struct list_head *devices;
struct btrfs_device *device;
u64 old_size;
u64 size_to_free;
+ struct btrfs_chunk *chunk;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
- struct btrfs_trans_handle *trans;
struct btrfs_key found_key;
-
- if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mutex_lock(&dev_root->fs_info->volume_mutex);
- dev_root = dev_root->fs_info->dev_root;
+ struct btrfs_trans_handle *trans;
+ struct extent_buffer *leaf;
+ int slot;
+ int ret;
+ int enospc_errors = 0;
+ bool counting = true;
/* step one make some room on all the devices */
+ devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
old_size = device->total_bytes;
size_to_free = div_factor(old_size, 1);
@@ -2137,11 +2486,23 @@ int btrfs_balance(struct btrfs_root *dev_root)
ret = -ENOMEM;
goto error;
}
+
+ /* zero out stat counters */
+ spin_lock(&fs_info->balance_lock);
+ memset(&bctl->stat, 0, sizeof(bctl->stat));
+ spin_unlock(&fs_info->balance_lock);
+again:
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) {
+ if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
+ atomic_read(&fs_info->balance_cancel_req)) {
+ ret = -ECANCELED;
+ goto error;
+ }
+
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0)
goto error;
@@ -2151,15 +2512,19 @@ int btrfs_balance(struct btrfs_root *dev_root)
* failed
*/
if (ret == 0)
- break;
+ BUG(); /* FIXME break ? */
ret = btrfs_previous_item(chunk_root, path, 0,
BTRFS_CHUNK_ITEM_KEY);
- if (ret)
+ if (ret) {
+ ret = 0;
break;
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
if (found_key.objectid != key.objectid)
break;
@@ -2167,22 +2532,375 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (found_key.offset == 0)
break;
+ chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+
+ if (!counting) {
+ spin_lock(&fs_info->balance_lock);
+ bctl->stat.considered++;
+ spin_unlock(&fs_info->balance_lock);
+ }
+
+ ret = should_balance_chunk(chunk_root, leaf, chunk,
+ found_key.offset);
btrfs_release_path(path);
+ if (!ret)
+ goto loop;
+
+ if (counting) {
+ spin_lock(&fs_info->balance_lock);
+ bctl->stat.expected++;
+ spin_unlock(&fs_info->balance_lock);
+ goto loop;
+ }
+
ret = btrfs_relocate_chunk(chunk_root,
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
if (ret && ret != -ENOSPC)
goto error;
+ if (ret == -ENOSPC) {
+ enospc_errors++;
+ } else {
+ spin_lock(&fs_info->balance_lock);
+ bctl->stat.completed++;
+ spin_unlock(&fs_info->balance_lock);
+ }
+loop:
key.offset = found_key.offset - 1;
}
- ret = 0;
+
+ if (counting) {
+ btrfs_release_path(path);
+ counting = false;
+ goto again;
+ }
error:
btrfs_free_path(path);
- mutex_unlock(&dev_root->fs_info->volume_mutex);
+ if (enospc_errors) {
+ printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
+ enospc_errors);
+ if (!ret)
+ ret = -ENOSPC;
+ }
+
return ret;
}
+static inline int balance_need_close(struct btrfs_fs_info *fs_info)
+{
+ /* cancel requested || normal exit path */
+ return atomic_read(&fs_info->balance_cancel_req) ||
+ (atomic_read(&fs_info->balance_pause_req) == 0 &&
+ atomic_read(&fs_info->balance_cancel_req) == 0);
+}
+
+static void __cancel_balance(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+
+ unset_balance_control(fs_info);
+ ret = del_balance_item(fs_info->tree_root);
+ BUG_ON(ret);
+}
+
+void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+ struct btrfs_ioctl_balance_args *bargs);
+
+/*
+ * Should be called with both balance and volume mutexes held
+ */
+int btrfs_balance(struct btrfs_balance_control *bctl,
+ struct btrfs_ioctl_balance_args *bargs)
+{
+ struct btrfs_fs_info *fs_info = bctl->fs_info;
+ u64 allowed;
+ int ret;
+
+ if (btrfs_fs_closing(fs_info) ||
+ atomic_read(&fs_info->balance_pause_req) ||
+ atomic_read(&fs_info->balance_cancel_req)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * In case of mixed groups both data and meta should be picked,
+ * and identical options should be given for both of them.
+ */
+ allowed = btrfs_super_incompat_flags(fs_info->super_copy);
+ if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
+ (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
+ if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
+ !(bctl->flags & BTRFS_BALANCE_METADATA) ||
+ memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
+ printk(KERN_ERR "btrfs: with mixed groups data and "
+ "metadata balance options must be the same\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * Profile changing sanity checks. Skip them if a simple
+ * balance is requested.
+ */
+ if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
+ BTRFS_BALANCE_ARGS_CONVERT))
+ goto do_balance;
+
+ allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+ if (fs_info->fs_devices->num_devices == 1)
+ allowed |= BTRFS_BLOCK_GROUP_DUP;
+ else if (fs_info->fs_devices->num_devices < 4)
+ allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
+ else
+ allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10);
+
+ if (!profile_is_valid(bctl->data.target, 1) ||
+ bctl->data.target & ~allowed) {
+ printk(KERN_ERR "btrfs: unable to start balance with target "
+ "data profile %llu\n",
+ (unsigned long long)bctl->data.target);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!profile_is_valid(bctl->meta.target, 1) ||
+ bctl->meta.target & ~allowed) {
+ printk(KERN_ERR "btrfs: unable to start balance with target "
+ "metadata profile %llu\n",
+ (unsigned long long)bctl->meta.target);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!profile_is_valid(bctl->sys.target, 1) ||
+ bctl->sys.target & ~allowed) {
+ printk(KERN_ERR "btrfs: unable to start balance with target "
+ "system profile %llu\n",
+ (unsigned long long)bctl->sys.target);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) {
+ printk(KERN_ERR "btrfs: dup for data is not allowed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* allow to reduce meta or sys integrity only if force set */
+ allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10;
+ if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+ (fs_info->avail_system_alloc_bits & allowed) &&
+ !(bctl->sys.target & allowed)) ||
+ ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+ (fs_info->avail_metadata_alloc_bits & allowed) &&
+ !(bctl->meta.target & allowed))) {
+ if (bctl->flags & BTRFS_BALANCE_FORCE) {
+ printk(KERN_INFO "btrfs: force reducing metadata "
+ "integrity\n");
+ } else {
+ printk(KERN_ERR "btrfs: balance will reduce metadata "
+ "integrity, use force if you want this\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+do_balance:
+ ret = insert_balance_item(fs_info->tree_root, bctl);
+ if (ret && ret != -EEXIST)
+ goto out;
+
+ if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
+ BUG_ON(ret == -EEXIST);
+ set_balance_control(bctl);
+ } else {
+ BUG_ON(ret != -EEXIST);
+ spin_lock(&fs_info->balance_lock);
+ update_balance_args(bctl);
+ spin_unlock(&fs_info->balance_lock);
+ }
+
+ atomic_inc(&fs_info->balance_running);
+ mutex_unlock(&fs_info->balance_mutex);
+
+ ret = __btrfs_balance(fs_info);
+
+ mutex_lock(&fs_info->balance_mutex);
+ atomic_dec(&fs_info->balance_running);
+
+ if (bargs) {
+ memset(bargs, 0, sizeof(*bargs));
+ update_ioctl_balance_args(fs_info, 0, bargs);
+ }
+
+ if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
+ balance_need_close(fs_info)) {
+ __cancel_balance(fs_info);
+ }
+
+ wake_up(&fs_info->balance_wait_q);
+
+ return ret;
+out:
+ if (bctl->flags & BTRFS_BALANCE_RESUME)
+ __cancel_balance(fs_info);
+ else
+ kfree(bctl);
+ return ret;
+}
+
+static int balance_kthread(void *data)
+{
+ struct btrfs_balance_control *bctl =
+ (struct btrfs_balance_control *)data;
+ struct btrfs_fs_info *fs_info = bctl->fs_info;
+ int ret = 0;
+
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
+
+ set_balance_control(bctl);
+
+ if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ printk(KERN_INFO "btrfs: force skipping balance\n");
+ } else {
+ printk(KERN_INFO "btrfs: continuing balance\n");
+ ret = btrfs_balance(bctl, NULL);
+ }
+
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
+ return ret;
+}
+
+int btrfs_recover_balance(struct btrfs_root *tree_root)
+{
+ struct task_struct *tsk;
+ struct btrfs_balance_control *bctl;
+ struct btrfs_balance_item *item;
+ struct btrfs_disk_balance_args disk_bargs;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ if (!bctl) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = BTRFS_BALANCE_OBJECTID;
+ key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out_bctl;
+ if (ret > 0) { /* ret = -ENOENT; */
+ ret = 0;
+ goto out_bctl;
+ }
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
+
+ bctl->fs_info = tree_root->fs_info;
+ bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+
+ btrfs_balance_data(leaf, item, &disk_bargs);
+ btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
+ btrfs_balance_meta(leaf, item, &disk_bargs);
+ btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
+ btrfs_balance_sys(leaf, item, &disk_bargs);
+ btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
+
+ tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
+ if (IS_ERR(tsk))
+ ret = PTR_ERR(tsk);
+ else
+ goto out;
+
+out_bctl:
+ kfree(bctl);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
+{
+ int ret = 0;
+
+ mutex_lock(&fs_info->balance_mutex);
+ if (!fs_info->balance_ctl) {
+ mutex_unlock(&fs_info->balance_mutex);
+ return -ENOTCONN;
+ }
+
+ if (atomic_read(&fs_info->balance_running)) {
+ atomic_inc(&fs_info->balance_pause_req);
+ mutex_unlock(&fs_info->balance_mutex);
+
+ wait_event(fs_info->balance_wait_q,
+ atomic_read(&fs_info->balance_running) == 0);
+
+ mutex_lock(&fs_info->balance_mutex);
+ /* we are good with balance_ctl ripped off from under us */
+ BUG_ON(atomic_read(&fs_info->balance_running));
+ atomic_dec(&fs_info->balance_pause_req);
+ } else {
+ ret = -ENOTCONN;
+ }
+
+ mutex_unlock(&fs_info->balance_mutex);
+ return ret;
+}
+
+int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
+{
+ mutex_lock(&fs_info->balance_mutex);
+ if (!fs_info->balance_ctl) {
+ mutex_unlock(&fs_info->balance_mutex);
+ return -ENOTCONN;
+ }
+
+ atomic_inc(&fs_info->balance_cancel_req);
+ /*
+ * if we are running just wait and return, balance item is
+ * deleted in btrfs_balance in this case
+ */
+ if (atomic_read(&fs_info->balance_running)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ wait_event(fs_info->balance_wait_q,
+ atomic_read(&fs_info->balance_running) == 0);
+ mutex_lock(&fs_info->balance_mutex);
+ } else {
+ /* __cancel_balance needs volume_mutex */
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
+
+ if (fs_info->balance_ctl)
+ __cancel_balance(fs_info);
+
+ mutex_unlock(&fs_info->volume_mutex);
+ }
+
+ BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+ atomic_dec(&fs_info->balance_cancel_req);
+ mutex_unlock(&fs_info->balance_mutex);
+ return 0;
+}
+
/*
* shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks.
@@ -2323,8 +3041,7 @@ done:
return ret;
}
-static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static int btrfs_add_system_chunk(struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_chunk *chunk, int item_size)
{
@@ -2441,10 +3158,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_stripe_size = 1024 * 1024 * 1024;
max_chunk_size = 10 * max_stripe_size;
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
- max_stripe_size = 256 * 1024 * 1024;
+ /* for larger filesystems, use larger metadata chunks */
+ if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
+ max_stripe_size = 1024 * 1024 * 1024;
+ else
+ max_stripe_size = 256 * 1024 * 1024;
max_chunk_size = max_stripe_size;
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
- max_stripe_size = 8 * 1024 * 1024;
+ max_stripe_size = 32 * 1024 * 1024;
max_chunk_size = 2 * max_stripe_size;
} else {
printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
@@ -2496,7 +3217,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (total_avail == 0)
continue;
- ret = find_free_dev_extent(trans, device,
+ ret = find_free_dev_extent(device,
max_stripe_size * dev_stripes,
&dev_offset, &max_avail);
if (ret && ret != -ENOSPC)
@@ -2687,7 +3408,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
BUG_ON(ret);
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
- ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
+ ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
item_size);
BUG_ON(ret);
}
@@ -2752,8 +3473,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
return ret;
alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
- (fs_info->metadata_alloc_profile &
- fs_info->avail_metadata_alloc_bits);
+ fs_info->avail_metadata_alloc_bits;
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
@@ -2763,8 +3483,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
sys_chunk_offset = chunk_offset + chunk_size;
alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
- (fs_info->system_alloc_profile &
- fs_info->avail_system_alloc_bits);
+ fs_info->avail_system_alloc_bits;
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
@@ -2901,26 +3620,13 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 stripe_nr;
u64 stripe_nr_orig;
u64 stripe_nr_end;
- int stripes_allocated = 8;
- int stripes_required = 1;
int stripe_index;
int i;
+ int ret = 0;
int num_stripes;
int max_errors = 0;
struct btrfs_bio *bbio = NULL;
- if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
- stripes_allocated = 1;
-again:
- if (bbio_ret) {
- bbio = kzalloc(btrfs_bio_size(stripes_allocated),
- GFP_NOFS);
- if (!bbio)
- return -ENOMEM;
-
- atomic_set(&bbio->error, 0);
- }
-
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
read_unlock(&em_tree->lock);
@@ -2939,32 +3645,6 @@ again:
if (mirror_num > map->num_stripes)
mirror_num = 0;
- /* if our btrfs_bio struct is too small, back off and try again */
- if (rw & REQ_WRITE) {
- if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_DUP)) {
- stripes_required = map->num_stripes;
- max_errors = 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- stripes_required = map->sub_stripes;
- max_errors = 1;
- }
- }
- if (rw & REQ_DISCARD) {
- if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID10)) {
- stripes_required = map->num_stripes;
- }
- }
- if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
- stripes_allocated < stripes_required) {
- stripes_allocated = map->num_stripes;
- free_extent_map(em);
- kfree(bbio);
- goto again;
- }
stripe_nr = offset;
/*
* stripe_nr counts the total number of stripes we have to stride
@@ -2980,10 +3660,7 @@ again:
if (rw & REQ_DISCARD)
*length = min_t(u64, em->len - offset, *length);
- else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP)) {
+ else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
/* we limit the length of each bio to what fits in a stripe */
*length = min_t(u64, em->len - offset,
map->stripe_len - stripe_offset);
@@ -3059,81 +3736,55 @@ again:
}
BUG_ON(stripe_index >= map->num_stripes);
+ bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS);
+ if (!bbio) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ atomic_set(&bbio->error, 0);
+
if (rw & REQ_DISCARD) {
+ int factor = 0;
+ int sub_stripes = 0;
+ u64 stripes_per_dev = 0;
+ u32 remaining_stripes = 0;
+
+ if (map->type &
+ (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+ sub_stripes = 1;
+ else
+ sub_stripes = map->sub_stripes;
+
+ factor = map->num_stripes / sub_stripes;
+ stripes_per_dev = div_u64_rem(stripe_nr_end -
+ stripe_nr_orig,
+ factor,
+ &remaining_stripes);
+ }
+
for (i = 0; i < num_stripes; i++) {
bbio->stripes[i].physical =
map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
bbio->stripes[i].dev = map->stripes[stripe_index].dev;
- if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- u64 stripes;
- u32 last_stripe = 0;
- int j;
-
- div_u64_rem(stripe_nr_end - 1,
- map->num_stripes,
- &last_stripe);
-
- for (j = 0; j < map->num_stripes; j++) {
- u32 test;
-
- div_u64_rem(stripe_nr_end - 1 - j,
- map->num_stripes, &test);
- if (test == stripe_index)
- break;
- }
- stripes = stripe_nr_end - 1 - j;
- do_div(stripes, map->num_stripes);
- bbio->stripes[i].length = map->stripe_len *
- (stripes - stripe_nr + 1);
-
- if (i == 0) {
- bbio->stripes[i].length -=
- stripe_offset;
- stripe_offset = 0;
- }
- if (stripe_index == last_stripe)
- bbio->stripes[i].length -=
- stripe_end_offset;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- u64 stripes;
- int j;
- int factor = map->num_stripes /
- map->sub_stripes;
- u32 last_stripe = 0;
-
- div_u64_rem(stripe_nr_end - 1,
- factor, &last_stripe);
- last_stripe *= map->sub_stripes;
-
- for (j = 0; j < factor; j++) {
- u32 test;
-
- div_u64_rem(stripe_nr_end - 1 - j,
- factor, &test);
-
- if (test ==
- stripe_index / map->sub_stripes)
- break;
- }
- stripes = stripe_nr_end - 1 - j;
- do_div(stripes, factor);
- bbio->stripes[i].length = map->stripe_len *
- (stripes - stripe_nr + 1);
-
- if (i < map->sub_stripes) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ bbio->stripes[i].length = stripes_per_dev *
+ map->stripe_len;
+ if (i / sub_stripes < remaining_stripes)
+ bbio->stripes[i].length +=
+ map->stripe_len;
+ if (i < sub_stripes)
bbio->stripes[i].length -=
stripe_offset;
- if (i == map->sub_stripes - 1)
- stripe_offset = 0;
- }
- if (stripe_index >= last_stripe &&
- stripe_index <= (last_stripe +
- map->sub_stripes - 1)) {
+ if ((i / sub_stripes + 1) %
+ sub_stripes == remaining_stripes)
bbio->stripes[i].length -=
stripe_end_offset;
- }
+ if (i == sub_stripes - 1)
+ stripe_offset = 0;
} else
bbio->stripes[i].length = *length;
@@ -3155,15 +3806,22 @@ again:
stripe_index++;
}
}
- if (bbio_ret) {
- *bbio_ret = bbio;
- bbio->num_stripes = num_stripes;
- bbio->max_errors = max_errors;
- bbio->mirror_num = mirror_num;
+
+ if (rw & REQ_WRITE) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ max_errors = 1;
+ }
}
+
+ *bbio_ret = bbio;
+ bbio->num_stripes = num_stripes;
+ bbio->max_errors = max_errors;
+ bbio->mirror_num = mirror_num;
out:
free_extent_map(em);
- return 0;
+ return ret;
}
int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
@@ -3304,7 +3962,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
/* don't bother with additional async steps for reads, right now */
if (!(rw & REQ_WRITE)) {
bio_get(bio);
- submit_bio(rw, bio);
+ btrfsic_submit_bio(rw, bio);
bio_put(bio);
return 0;
}
@@ -3399,7 +4057,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
if (async_submit)
schedule_bio(root, dev, rw, bio);
else
- submit_bio(rw, bio);
+ btrfsic_submit_bio(rw, bio);
} else {
bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
bio->bi_sector = logical >> 9;
@@ -3568,7 +4226,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
struct btrfs_fs_devices *fs_devices;
int ret;
- mutex_lock(&uuid_mutex);
+ BUG_ON(!mutex_is_locked(&uuid_mutex));
fs_devices = root->fs_info->fs_devices->seed;
while (fs_devices) {
@@ -3606,7 +4264,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
fs_devices->seed = root->fs_info->fs_devices->seed;
root->fs_info->fs_devices->seed = fs_devices;
out:
- mutex_unlock(&uuid_mutex);
return ret;
}
@@ -3749,6 +4406,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
if (!path)
return -ENOMEM;
+ mutex_lock(&uuid_mutex);
+ lock_chunks(root);
+
/* first we search for all of the device items, and then we
* read in all of the chunk items. This way we can create chunk
* mappings that reference all of the devices that are afound
@@ -3799,6 +4459,9 @@ again:
}
ret = 0;
error:
+ unlock_chunks(root);
+ mutex_unlock(&uuid_mutex);
+
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 78f2d4d4f37..19ac95048b8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -186,6 +186,51 @@ struct map_lookup {
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
+/*
+ * Restriper's general type filter
+ */
+#define BTRFS_BALANCE_DATA (1ULL << 0)
+#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
+#define BTRFS_BALANCE_METADATA (1ULL << 2)
+
+#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
+ BTRFS_BALANCE_SYSTEM | \
+ BTRFS_BALANCE_METADATA)
+
+#define BTRFS_BALANCE_FORCE (1ULL << 3)
+#define BTRFS_BALANCE_RESUME (1ULL << 4)
+
+/*
+ * Balance filters
+ */
+#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
+#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
+#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
+#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
+#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
+
+/*
+ * Profile changing flags. When SOFT is set we won't relocate chunk if
+ * it already has the target profile (even though it may be
+ * half-filled).
+ */
+#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
+#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
+
+struct btrfs_balance_args;
+struct btrfs_balance_progress;
+struct btrfs_balance_control {
+ struct btrfs_fs_info *fs_info;
+
+ struct btrfs_balance_args data;
+ struct btrfs_balance_args meta;
+ struct btrfs_balance_args sys;
+
+ u64 flags;
+
+ struct btrfs_balance_progress stat;
+};
+
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
@@ -228,9 +273,12 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
-int btrfs_balance(struct btrfs_root *dev_root);
+int btrfs_balance(struct btrfs_balance_control *bctl,
+ struct btrfs_ioctl_balance_args *bargs);
+int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
+int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 3848b04e310..e7a5659087e 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -200,7 +200,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction(trans, root);
return ret;
}
diff --git a/fs/namei.c b/fs/namei.c
index c283a1ec008..208c6aa4a98 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -140,21 +140,19 @@ static int do_getname(const char __user *filename, char *page)
static char *getname_flags(const char __user *filename, int flags, int *empty)
{
- char *tmp, *result;
-
- result = ERR_PTR(-ENOMEM);
- tmp = __getname();
- if (tmp) {
- int retval = do_getname(filename, tmp);
-
- result = tmp;
- if (retval < 0) {
- if (retval == -ENOENT && empty)
- *empty = 1;
- if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
- __putname(tmp);
- result = ERR_PTR(retval);
- }
+ char *result = __getname();
+ int retval;
+
+ if (!result)
+ return ERR_PTR(-ENOMEM);
+
+ retval = do_getname(filename, result);
+ if (retval < 0) {
+ if (retval == -ENOENT && empty)
+ *empty = 1;
+ if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
+ __putname(result);
+ return ERR_PTR(retval);
}
}
audit_getname(result);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5485a5388ec..9cde9edf9c4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,65 +198,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static struct mm_struct *__check_mem_permission(struct task_struct *task)
-{
- struct mm_struct *mm;
-
- mm = get_task_mm(task);
- if (!mm)
- return ERR_PTR(-EINVAL);
-
- /*
- * A task can always look at itself, in case it chooses
- * to use system calls instead of load instructions.
- */
- if (task == current)
- return mm;
-
- /*
- * If current is actively ptrace'ing, and would also be
- * permitted to freshly attach with ptrace now, permit it.
- */
- if (task_is_stopped_or_traced(task)) {
- int match;
- rcu_read_lock();
- match = (ptrace_parent(task) == current);
- rcu_read_unlock();
- if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
- return mm;
- }
-
- /*
- * No one else is allowed.
- */
- mmput(mm);
- return ERR_PTR(-EPERM);
-}
-
-/*
- * If current may access user memory in @task return a reference to the
- * corresponding mm, otherwise ERR_PTR.
- */
-static struct mm_struct *check_mem_permission(struct task_struct *task)
-{
- struct mm_struct *mm;
- int err;
-
- /*
- * Avoid racing if task exec's as we might get a new mm but validate
- * against old credentials.
- */
- err = mutex_lock_killable(&task->signal->cred_guard_mutex);
- if (err)
- return ERR_PTR(err);
-
- mm = __check_mem_permission(task);
- mutex_unlock(&task->signal->cred_guard_mutex);
-
- return mm;
-}
-
-struct mm_struct *mm_for_maps(struct task_struct *task)
+static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
{
struct mm_struct *mm;
int err;
@@ -267,7 +209,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
mm = get_task_mm(task);
if (mm && mm != current->mm &&
- !ptrace_may_access(task, PTRACE_MODE_READ)) {
+ !ptrace_may_access(task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
@@ -276,6 +218,11 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
return mm;
}
+struct mm_struct *mm_for_maps(struct task_struct *task)
+{
+ return mm_access(task, PTRACE_MODE_READ);
+}
+
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
@@ -752,38 +699,39 @@ static const struct file_operations proc_single_file_operations = {
static int mem_open(struct inode* inode, struct file* file)
{
- file->private_data = (void*)((long)current->self_exec_id);
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ struct mm_struct *mm;
+
+ if (!task)
+ return -ESRCH;
+
+ mm = mm_access(task, PTRACE_MODE_ATTACH);
+ put_task_struct(task);
+
+ if (IS_ERR(mm))
+ return PTR_ERR(mm);
+
/* OK to pass negative loff_t, we can catch out-of-range */
file->f_mode |= FMODE_UNSIGNED_OFFSET;
+ file->private_data = mm;
+
return 0;
}
static ssize_t mem_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ int ret;
char *page;
unsigned long src = *ppos;
- int ret = -ESRCH;
- struct mm_struct *mm;
+ struct mm_struct *mm = file->private_data;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out;
-
- mm = check_mem_permission(task);
- ret = PTR_ERR(mm);
- if (IS_ERR(mm))
- goto out_free;
-
- ret = -EIO;
-
- if (file->private_data != (void*)((long)current->self_exec_id))
- goto out_put;
+ return -ENOMEM;
ret = 0;
@@ -810,13 +758,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
}
*ppos = src;
-out_put:
- mmput(mm);
-out_free:
free_page((unsigned long) page);
-out:
- put_task_struct(task);
-out_no_task:
return ret;
}
@@ -825,27 +767,15 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
{
int copied;
char *page;
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
unsigned long dst = *ppos;
- struct mm_struct *mm;
+ struct mm_struct *mm = file->private_data;
- copied = -ESRCH;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- copied = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out_task;
-
- mm = check_mem_permission(task);
- copied = PTR_ERR(mm);
- if (IS_ERR(mm))
- goto out_free;
-
- copied = -EIO;
- if (file->private_data != (void *)((long)current->self_exec_id))
- goto out_mm;
+ return -ENOMEM;
copied = 0;
while (count > 0) {
@@ -869,13 +799,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
}
*ppos = dst;
-out_mm:
- mmput(mm);
-out_free:
free_page((unsigned long) page);
-out_task:
- put_task_struct(task);
-out_no_task:
return copied;
}
@@ -895,11 +819,20 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig)
return file->f_pos;
}
+static int mem_release(struct inode *inode, struct file *file)
+{
+ struct mm_struct *mm = file->private_data;
+
+ mmput(mm);
+ return 0;
+}
+
static const struct file_operations proc_mem_operations = {
.llseek = mem_lseek,
.read = mem_read,
.write = mem_write,
.open = mem_open,
+ .release = mem_release,
};
static ssize_t environ_read(struct file *file, char __user *buf,
@@ -1199,9 +1132,6 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
ssize_t length;
uid_t loginuid;
- if (!capable(CAP_AUDIT_CONTROL))
- return -EPERM;
-
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock();
@@ -1230,7 +1160,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
goto out_free_page;
}
- length = audit_set_loginuid(current, loginuid);
+ length = audit_set_loginuid(loginuid);
if (likely(length == 0))
length = count;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 574d4ee9b62..74b9baf36ac 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
xfs_fsize_t bsize;
bsize = ioend->io_offset + ioend->io_size;
- isize = MAX(ip->i_size, ip->i_new_size);
- isize = MIN(isize, bsize);
+ isize = MIN(i_size_read(VFS_I(ip)), bsize);
return isize > ip->i_d.di_size ? isize : 0;
}
@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
}
/*
- * Update on-disk file size now that data has been written to disk. The
- * current in-memory file size is i_size. If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated. If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
+ * Update on-disk file size now that data has been written to disk.
*
* This function does not block as blocking on the inode lock in IO completion
* can lead to IO completion order dependency deadlocks.. If it can't get the
@@ -1279,6 +1274,15 @@ xfs_end_io_direct_write(
struct xfs_ioend *ioend = iocb->private;
/*
+ * While the generic direct I/O code updates the inode size, it does
+ * so only after the end_io handler is called, which means our
+ * end_io handler thinks the on-disk size is outside the in-core
+ * size. To prevent this just update it a little bit earlier here.
+ */
+ if (offset + size > i_size_read(ioend->io_inode))
+ i_size_write(ioend->io_inode, offset + size);
+
+ /*
* blockdev_direct_IO can return an error even after the I/O
* completion handler was called. Thus we need to protect
* against double-freeing.
@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
if (to > inode->i_size) {
/*
- * punch out the delalloc blocks we have already allocated. We
- * don't call xfs_setattr() to do this as we may be in the
- * middle of a multi-iovec write and so the vfs inode->i_size
- * will not match the xfs ip->i_size and so it will zero too
- * much. Hence we jus truncate the page cache to zero what is
- * necessary and punch the delalloc blocks directly.
+ * Punch out the delalloc blocks we have already allocated.
+ *
+ * Don't bother with xfs_setattr given that nothing can have
+ * made it to disk yet as the page is still locked at this
+ * point.
*/
struct xfs_inode *ip = XFS_I(inode);
xfs_fileoff_t start_fsb;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 1e5d97f86ea..08b9ac644c3 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error)
goto out;
- /*
- * Commit the last in the sequence of transactions.
- */
- xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index c1b55e59655..d25eafd4d28 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
dp = args->dp;
mp = dp->i_mount;
dp->i_d.di_forkoff = forkoff;
- dp->i_df.if_ext_max =
- XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
- dp->i_afp->if_ext_max =
- XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
ifp = dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE);
@@ -326,7 +322,6 @@ xfs_attr_fork_reset(
ASSERT(ip->i_d.di_anextents == 0);
ASSERT(ip->i_afp == NULL);
- ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(args->op_flags & XFS_DA_OP_ADDNAME) ||
!(mp->m_flags & XFS_MOUNT_ATTR2) ||
dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
- dp->i_afp->if_ext_max =
- XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
- dp->i_df.if_ext_max =
- XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index d0ab7883705..188ef2fbd62 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -249,7 +249,27 @@ xfs_bmbt_lookup_ge(
}
/*
-* Update the record referred to by cur to the value given
+ * Check if the inode needs to be converted to btree format.
+ */
+static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
+{
+ return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(ip, whichfork) >
+ XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Check if the inode should be converted to extent format.
+ */
+static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
+{
+ return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+ XFS_IFORK_NEXTENTS(ip, whichfork) <=
+ XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Update the record referred to by cur to the value given
* by [off, bno, len, state].
* This either works (return 0) or gets an EFSCORRUPTED error.
*/
@@ -683,8 +703,8 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+
+ if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
&bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
@@ -767,8 +787,8 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+
+ if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
&tmp_rval, XFS_DATA_FORK);
@@ -836,8 +856,8 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+
+ if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
1, &tmp_rval, XFS_DATA_FORK);
@@ -884,8 +904,7 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
- if (XFS_IFORK_FORMAT(bma->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(bma->ip, XFS_DATA_FORK) > ifp->if_ext_max) {
+ if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
@@ -1421,8 +1440,7 @@ xfs_bmap_add_extent_unwritten_real(
}
/* convert to a btree if necessary */
- if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > ifp->if_ext_max) {
+ if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
@@ -1812,8 +1830,7 @@ xfs_bmap_add_extent_hole_real(
}
/* convert to a btree if necessary */
- if (XFS_IFORK_FORMAT(bma->ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(bma->ip, whichfork) > ifp->if_ext_max) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
@@ -3037,8 +3054,7 @@ xfs_bmap_extents_to_btree(
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
/*
* Make space in the inode incore.
*/
@@ -3184,13 +3200,8 @@ xfs_bmap_forkoff_reset(
ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
- if (dfl_forkoff > ip->i_d.di_forkoff) {
+ if (dfl_forkoff > ip->i_d.di_forkoff)
ip->i_d.di_forkoff = dfl_forkoff;
- ip->i_df.if_ext_max =
- XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
- ip->i_afp->if_ext_max =
- XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
- }
}
}
@@ -3430,8 +3441,6 @@ xfs_bmap_add_attrfork(
int error; /* error return value */
ASSERT(XFS_IFORK_Q(ip) == 0);
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
mp = ip->i_mount;
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
@@ -3486,12 +3495,9 @@ xfs_bmap_add_attrfork(
error = XFS_ERROR(EINVAL);
goto error1;
}
- ip->i_df.if_ext_max =
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+
ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
- ip->i_afp->if_ext_max =
- XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_afp->if_flags = XFS_IFEXTENTS;
logflags = 0;
xfs_bmap_init(&flist, &firstblock);
@@ -3535,20 +3541,17 @@ xfs_bmap_add_attrfork(
} else
spin_unlock(&mp->m_sb_lock);
}
- if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
+
+ error = xfs_bmap_finish(&tp, &flist, &committed);
+ if (error)
goto error2;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
- return error;
+ return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
error2:
xfs_bmap_cancel(&flist);
error1:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
error0:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
return error;
}
@@ -3994,11 +3997,8 @@ xfs_bmap_one_block(
xfs_bmbt_irec_t s; /* internal version of extent */
#ifndef DEBUG
- if (whichfork == XFS_DATA_FORK) {
- return S_ISREG(ip->i_d.di_mode) ?
- (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
- (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
- }
+ if (whichfork == XFS_DATA_FORK)
+ return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
#endif /* !DEBUG */
if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
return 0;
@@ -4010,7 +4010,7 @@ xfs_bmap_one_block(
xfs_bmbt_get_all(ep, &s);
rval = s.br_startoff == 0 && s.br_blockcount == 1;
if (rval && whichfork == XFS_DATA_FORK)
- ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize);
+ ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
return rval;
}
@@ -4379,8 +4379,6 @@ xfs_bmapi_read(
XFS_STATS_INC(xs_blk_mapr);
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, whichfork);
@@ -4871,8 +4869,6 @@ xfs_bmapi_write(
return XFS_ERROR(EIO);
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
XFS_STATS_INC(xs_blk_mapw);
@@ -4981,8 +4977,7 @@ xfs_bmapi_write(
/*
* Transform from btree to extents, give it cur.
*/
- if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+ if (xfs_bmap_wants_extents(ip, whichfork)) {
int tmp_logflags = 0;
ASSERT(bma.cur);
@@ -4992,10 +4987,10 @@ xfs_bmapi_write(
if (error)
goto error0;
}
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
- XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
+ XFS_IFORK_NEXTENTS(ip, whichfork) >
+ XFS_IFORK_MAXEXT(ip, whichfork));
error = 0;
error0:
/*
@@ -5095,8 +5090,7 @@ xfs_bunmapi(
ASSERT(len > 0);
ASSERT(nexts >= 0);
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
@@ -5322,7 +5316,8 @@ xfs_bunmapi(
*/
if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
+ XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+ XFS_IFORK_MAXEXT(ip, whichfork) &&
del.br_startoff > got.br_startoff &&
del.br_startoff + del.br_blockcount <
got.br_startoff + got.br_blockcount) {
@@ -5353,13 +5348,11 @@ nodelete:
}
}
*done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
/*
* Convert to a btree if necessary.
*/
- if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+ if (xfs_bmap_needs_btree(ip, whichfork)) {
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
&cur, 0, &tmp_logflags, whichfork);
@@ -5370,8 +5363,7 @@ nodelete:
/*
* transform from btree to extents, give it cur
*/
- else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+ else if (xfs_bmap_wants_extents(ip, whichfork)) {
ASSERT(cur != NULL);
error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
whichfork);
@@ -5382,8 +5374,6 @@ nodelete:
/*
* transform from extents to local?
*/
- ASSERT(ifp->if_ext_max ==
- XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
error = 0;
error0:
/*
@@ -5434,7 +5424,7 @@ xfs_getbmapx_fix_eof_hole(
if (startblock == HOLESTARTBLOCK) {
mp = ip->i_mount;
out->bmv_block = -1;
- fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, ip->i_size));
+ fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
fixlen -= out->bmv_offset;
if (prealloced && out->bmv_offset + out->bmv_length == end) {
/* Came to hole at EOF. Trim it. */
@@ -5522,7 +5512,7 @@ xfs_getbmap(
fixlen = XFS_MAXIOFFSET(mp);
} else {
prealloced = 0;
- fixlen = ip->i_size;
+ fixlen = XFS_ISIZE(ip);
}
}
@@ -5551,7 +5541,7 @@ xfs_getbmap(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
- if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
+ if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
if (error)
goto out_unlock_iolock;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 654dc6f05ba..dd974a55c77 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -163,12 +163,14 @@ xfs_swap_extents_check_format(
/* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
+ XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
+ XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return EINVAL;
/* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
+ XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
+ XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL;
/*
@@ -180,18 +182,25 @@ xfs_swap_extents_check_format(
* (a common defrag case) which will occur when the temp inode is in
* extent format...
*/
- if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
- ((XFS_IFORK_BOFF(ip) &&
- tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
- XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
- return EINVAL;
+ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+ if (XFS_IFORK_BOFF(ip) &&
+ tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+ return EINVAL;
+ if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
+ XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+ return EINVAL;
+ }
/* Reciprocal target->temp btree format checks */
- if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
- ((XFS_IFORK_BOFF(tip) &&
- ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
- XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
- return EINVAL;
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+ if (XFS_IFORK_BOFF(tip) &&
+ ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+ return EINVAL;
+
+ if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
+ XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
+ return EINVAL;
+ }
return 0;
}
@@ -349,16 +358,6 @@ xfs_swap_extents(
*tifp = *tempifp; /* struct copy */
/*
- * Fix the in-memory data fork values that are dependent on the fork
- * offset in the inode. We can't assume they remain the same as attr2
- * has dynamic fork offsets.
- */
- ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
- (uint)sizeof(xfs_bmbt_rec_t);
- tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
- (uint)sizeof(xfs_bmbt_rec_t);
-
- /*
* Fix the on-disk inode values
*/
tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f675f3d9d7b..7e5bc872f2b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -327,7 +327,7 @@ xfs_file_aio_read(
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((iocb->ki_pos & target->bt_smask) ||
(size & target->bt_smask)) {
- if (iocb->ki_pos == ip->i_size)
+ if (iocb->ki_pos == i_size_read(inode))
return 0;
return -XFS_ERROR(EINVAL);
}
@@ -412,51 +412,6 @@ xfs_file_splice_read(
return ret;
}
-STATIC void
-xfs_aio_write_isize_update(
- struct inode *inode,
- loff_t *ppos,
- ssize_t bytes_written)
-{
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t isize = i_size_read(inode);
-
- if (bytes_written > 0)
- XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
- if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
- *ppos > isize))
- *ppos = isize;
-
- if (*ppos > ip->i_size) {
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- if (*ppos > ip->i_size)
- ip->i_size = *ppos;
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
- }
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred. In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
- struct xfs_inode *ip,
- xfs_fsize_t new_size)
-{
- if (new_size == ip->i_new_size) {
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- if (new_size == ip->i_new_size)
- ip->i_new_size = 0;
- if (ip->i_d.di_size > ip->i_size)
- ip->i_d.di_size = ip->i_size;
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
- }
-}
-
/*
* xfs_file_splice_write() does not use xfs_rw_ilock() because
* generic_file_splice_write() takes the i_mutex itself. This, in theory,
@@ -475,7 +430,6 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t new_size;
int ioflags = 0;
ssize_t ret;
@@ -489,19 +443,12 @@ xfs_file_splice_write(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- new_size = *ppos + count;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (new_size > ip->i_size)
- ip->i_new_size = new_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+ if (ret > 0)
+ XFS_STATS_ADD(xs_write_bytes, ret);
- xfs_aio_write_isize_update(inode, ppos, ret);
- xfs_aio_write_newsize_update(ip, new_size);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -689,28 +636,26 @@ out_lock:
/*
* Common pre-write limit and setup checks.
*
- * Returns with iolock held according to @iolock.
+ * Called with the iolocked held either shared and exclusive according to
+ * @iolock, and returns with it held. Might upgrade the iolock to exclusive
+ * if called for a direct write beyond i_size.
*/
STATIC ssize_t
xfs_file_aio_write_checks(
struct file *file,
loff_t *pos,
size_t *count,
- xfs_fsize_t *new_sizep,
int *iolock)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t new_size;
int error = 0;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- *new_sizep = 0;
restart:
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
- *iolock = 0;
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -720,36 +665,21 @@ restart:
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
- * write. There is no need to issue zeroing if another in-flght IO ends
- * at or before this one If zeronig is needed and we are currently
- * holding the iolock shared, we need to update it to exclusive which
- * involves dropping all locks and relocking to maintain correct locking
- * order. If we do this, restart the function to ensure all checks and
- * values are still valid.
+ * write. If zeroing is needed and we are currently holding the
+ * iolock shared, we need to update it to exclusive which involves
+ * dropping all locks and relocking to maintain correct locking order.
+ * If we do this, restart the function to ensure all checks and values
+ * are still valid.
*/
- if ((ip->i_new_size && *pos > ip->i_new_size) ||
- (!ip->i_new_size && *pos > ip->i_size)) {
+ if (*pos > i_size_read(inode)) {
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
goto restart;
}
- error = -xfs_zero_eof(ip, *pos, ip->i_size);
+ error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
}
-
- /*
- * If this IO extends beyond EOF, we may need to update ip->i_new_size.
- * We have already zeroed space beyond EOF (if necessary). Only update
- * ip->i_new_size if this IO ends beyond any other in-flight writes.
- */
- new_size = *pos + *count;
- if (new_size > ip->i_size) {
- if (new_size > ip->i_new_size)
- ip->i_new_size = new_size;
- *new_sizep = new_size;
- }
-
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
@@ -794,9 +724,7 @@ xfs_file_dio_aio_write(
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
- size_t ocount,
- xfs_fsize_t *new_size,
- int *iolock)
+ size_t ocount)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -806,10 +734,10 @@ xfs_file_dio_aio_write(
ssize_t ret = 0;
size_t count = ocount;
int unaligned_io = 0;
+ int iolock;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- *iolock = 0;
if ((pos & target->bt_smask) || (count & target->bt_smask))
return -XFS_ERROR(EINVAL);
@@ -824,31 +752,31 @@ xfs_file_dio_aio_write(
* EOF zeroing cases and fill out the new inode size as appropriate.
*/
if (unaligned_io || mapping->nrpages)
- *iolock = XFS_IOLOCK_EXCL;
+ iolock = XFS_IOLOCK_EXCL;
else
- *iolock = XFS_IOLOCK_SHARED;
- xfs_rw_ilock(ip, *iolock);
+ iolock = XFS_IOLOCK_SHARED;
+ xfs_rw_ilock(ip, iolock);
/*
* Recheck if there are cached pages that need invalidate after we got
* the iolock to protect against other threads adding new pages while
* we were waiting for the iolock.
*/
- if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
+ xfs_rw_iunlock(ip, iolock);
+ iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, iolock);
}
- ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
+ ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
- return ret;
+ goto out;
if (mapping->nrpages) {
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
- return ret;
+ goto out;
}
/*
@@ -857,15 +785,18 @@ xfs_file_dio_aio_write(
*/
if (unaligned_io)
inode_dio_wait(inode);
- else if (*iolock == XFS_IOLOCK_EXCL) {
+ else if (iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- *iolock = XFS_IOLOCK_SHARED;
+ iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount);
+out:
+ xfs_rw_iunlock(ip, iolock);
+
/* No fallback to buffered IO on errors for XFS. */
ASSERT(ret < 0 || ret == count);
return ret;
@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write(
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
- size_t ocount,
- xfs_fsize_t *new_size,
- int *iolock)
+ size_t ocount)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
+ int iolock = XFS_IOLOCK_EXCL;
size_t count = ocount;
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ xfs_rw_ilock(ip, iolock);
- ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
+ ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
- return ret;
+ goto out;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
@@ -908,13 +837,15 @@ write_retry:
* page locks and retry *once*
*/
if (ret == -ENOSPC && !enospc) {
- ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
- if (ret)
- return ret;
enospc = 1;
- goto write_retry;
+ ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+ if (!ret)
+ goto write_retry;
}
+
current->backing_dev_info = NULL;
+out:
+ xfs_rw_iunlock(ip, iolock);
return ret;
}
@@ -930,9 +861,7 @@ xfs_file_aio_write(
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
- int iolock;
size_t ocount = 0;
- xfs_fsize_t new_size = 0;
XFS_STATS_INC(xs_write_calls);
@@ -951,33 +880,22 @@ xfs_file_aio_write(
return -EIO;
if (unlikely(file->f_flags & O_DIRECT))
- ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
- ocount, &new_size, &iolock);
+ ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
else
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
- ocount, &new_size, &iolock);
-
- xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
+ ocount);
- if (ret <= 0)
- goto out_unlock;
+ if (ret > 0) {
+ ssize_t err;
- /* Handle various SYNC-type writes */
- if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
- loff_t end = pos + ret - 1;
- int error;
+ XFS_STATS_ADD(xs_write_bytes, ret);
- xfs_rw_iunlock(ip, iolock);
- error = xfs_file_fsync(file, pos, end,
- (file->f_flags & __O_SYNC) ? 0 : 1);
- xfs_rw_ilock(ip, iolock);
- if (error)
- ret = error;
+ /* Handle various SYNC-type writes */
+ err = generic_write_sync(file, pos, ret);
+ if (err < 0)
+ ret = err;
}
-out_unlock:
- xfs_aio_write_newsize_update(ip, new_size);
- xfs_rw_iunlock(ip, iolock);
return ret;
}
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c
index ed88ed16811..652b875a9d4 100644
--- a/fs/xfs/xfs_fs_subr.c
+++ b/fs/xfs/xfs_fs_subr.c
@@ -90,7 +90,7 @@ xfs_wait_on_pages(
if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
return -filemap_fdatawait_range(mapping, first,
- last == -1 ? ip->i_size - 1 : last);
+ last == -1 ? XFS_ISIZE(ip) - 1 : last);
}
return 0;
}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3960a066d7f..8c3e46394d4 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -77,7 +77,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
- ASSERT(completion_done(&ip->i_flush));
+ ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -94,8 +94,6 @@ xfs_inode_alloc(
ip->i_update_core = 0;
ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
- ip->i_size = 0;
- ip->i_new_size = 0;
return ip;
}
@@ -150,7 +148,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
- ASSERT(completion_done(&ip->i_flush));
+ ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always
@@ -450,8 +448,6 @@ again:
*ipp = ip;
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
/*
* If we have a real type for an on-disk inode, we can set ops(&unlock)
* now. If it's a new inode being created, xfs_ialloc will handle it.
@@ -715,3 +711,19 @@ xfs_isilocked(
return 0;
}
#endif
+
+void
+__xfs_iflock(
+ struct xfs_inode *ip)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
+ DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
+
+ do {
+ prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ if (xfs_isiflocked(ip))
+ io_schedule();
+ } while (!xfs_iflock_nowait(ip));
+
+ finish_wait(wq, &wait.wait);
+}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9dda7cc3284..b21022499c2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -299,11 +299,8 @@ xfs_iformat(
{
xfs_attr_shortform_t *atp;
int size;
- int error;
+ int error = 0;
xfs_fsize_t di_size;
- ip->i_df.if_ext_max =
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
- error = 0;
if (unlikely(be32_to_cpu(dip->di_nextents) +
be16_to_cpu(dip->di_anextents) >
@@ -350,7 +347,6 @@ xfs_iformat(
return XFS_ERROR(EFSCORRUPTED);
}
ip->i_d.di_size = 0;
- ip->i_size = 0;
ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
break;
@@ -409,10 +405,10 @@ xfs_iformat(
}
if (!XFS_DFORK_Q(dip))
return 0;
+
ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
- ip->i_afp->if_ext_max =
- XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
@@ -604,10 +600,11 @@ xfs_iformat_btree(
* or the number of extents is greater than the number of
* blocks.
*/
- if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
- || XFS_BMDR_SPACE_CALC(nrecs) >
- XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
- || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+ if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
+ XFS_IFORK_MAXEXT(ip, whichfork) ||
+ XFS_BMDR_SPACE_CALC(nrecs) >
+ XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
+ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -835,12 +832,6 @@ xfs_iread(
* with the uninitialized part of it.
*/
ip->i_d.di_mode = 0;
- /*
- * Initialize the per-fork minima and maxima for a new
- * inode here. xfs_iformat will do it for old inodes.
- */
- ip->i_df.if_ext_max =
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
}
/*
@@ -861,7 +852,6 @@ xfs_iread(
}
ip->i_delayed_blks = 0;
- ip->i_size = ip->i_d.di_size;
/*
* Mark the buffer containing the inode as something to keep
@@ -1051,7 +1041,6 @@ xfs_ialloc(
}
ip->i_d.di_size = 0;
- ip->i_size = 0;
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
@@ -1166,52 +1155,6 @@ xfs_ialloc(
}
/*
- * Check to make sure that there are no blocks allocated to the
- * file beyond the size of the file. We don't check this for
- * files with fixed size extents or real time extents, but we
- * at least do it for regular files.
- */
-#ifdef DEBUG
-STATIC void
-xfs_isize_check(
- struct xfs_inode *ip,
- xfs_fsize_t isize)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t map_first;
- int nimaps;
- xfs_bmbt_irec_t imaps[2];
- int error;
-
- if (!S_ISREG(ip->i_d.di_mode))
- return;
-
- if (XFS_IS_REALTIME_INODE(ip))
- return;
-
- if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
- return;
-
- nimaps = 2;
- map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
- /*
- * The filesystem could be shutting down, so bmapi may return
- * an error.
- */
- error = xfs_bmapi_read(ip, map_first,
- (XFS_B_TO_FSB(mp,
- (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
- imaps, &nimaps, XFS_BMAPI_ENTIRE);
- if (error)
- return;
- ASSERT(nimaps == 1);
- ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
-}
-#else /* DEBUG */
-#define xfs_isize_check(ip, isize)
-#endif /* DEBUG */
-
-/*
* Free up the underlying blocks past new_size. The new size must be smaller
* than the current size. This routine can be used both for the attribute and
* data fork, and does not modify the inode size, which is left to the caller.
@@ -1252,12 +1195,14 @@ xfs_itruncate_extents(
int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(new_size <= ip->i_size);
+ ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ trace_xfs_itruncate_extents_start(ip, new_size);
+
/*
* Since it is possible for space to become allocated beyond
* the end of the file (in a crash where the space is allocated
@@ -1325,6 +1270,14 @@ xfs_itruncate_extents(
goto out;
}
+ /*
+ * Always re-log the inode so that our permanent transaction can keep
+ * on rolling it forward in the log.
+ */
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ trace_xfs_itruncate_extents_end(ip, new_size);
+
out:
*tpp = tp;
return error;
@@ -1338,74 +1291,6 @@ out_bmap_cancel:
goto out;
}
-int
-xfs_itruncate_data(
- struct xfs_trans **tpp,
- struct xfs_inode *ip,
- xfs_fsize_t new_size)
-{
- int error;
-
- trace_xfs_itruncate_data_start(ip, new_size);
-
- /*
- * The first thing we do is set the size to new_size permanently on
- * disk. This way we don't have to worry about anyone ever being able
- * to look at the data being freed even in the face of a crash.
- * What we're getting around here is the case where we free a block, it
- * is allocated to another file, it is written to, and then we crash.
- * If the new data gets written to the file but the log buffers
- * containing the free and reallocation don't, then we'd end up with
- * garbage in the blocks being freed. As long as we make the new_size
- * permanent before actually freeing any blocks it doesn't matter if
- * they get written to.
- */
- if (ip->i_d.di_nextents > 0) {
- /*
- * If we are not changing the file size then do not update
- * the on-disk file size - we may be called from
- * xfs_inactive_free_eofblocks(). If we update the on-disk
- * file size and then the system crashes before the contents
- * of the file are flushed to disk then the files may be
- * full of holes (ie NULL files bug).
- */
- if (ip->i_size != new_size) {
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
- }
- }
-
- error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
- if (error)
- return error;
-
- /*
- * If we are not changing the file size then do not update the on-disk
- * file size - we may be called from xfs_inactive_free_eofblocks().
- * If we update the on-disk file size and then the system crashes
- * before the contents of the file are flushed to disk then the files
- * may be full of holes (ie NULL files bug).
- */
- xfs_isize_check(ip, new_size);
- if (ip->i_size != new_size) {
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- }
-
- ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
- ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
-
- /*
- * Always re-log the inode so that our permanent transaction can keep
- * on rolling it forward in the log.
- */
- xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
-
- trace_xfs_itruncate_data_end(ip, new_size);
- return 0;
-}
-
/*
* This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It
@@ -1824,8 +1709,7 @@ xfs_ifree(
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
- ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
- (!S_ISREG(ip->i_d.di_mode)));
+ ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -1844,8 +1728,6 @@ xfs_ifree(
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
- ip->i_df.if_ext_max =
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/*
@@ -2151,7 +2033,7 @@ xfs_idestroy_fork(
* once someone is waiting for it to be unpinned.
*/
static void
-xfs_iunpin_nowait(
+xfs_iunpin(
struct xfs_inode *ip)
{
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait(
}
+static void
+__xfs_iunpin_wait(
+ struct xfs_inode *ip)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
+ DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
+
+ xfs_iunpin(ip);
+
+ do {
+ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ if (xfs_ipincount(ip))
+ io_schedule();
+ } while (xfs_ipincount(ip));
+ finish_wait(wq, &wait.wait);
+}
+
void
xfs_iunpin_wait(
struct xfs_inode *ip)
{
- if (xfs_ipincount(ip)) {
- xfs_iunpin_nowait(ip);
- wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
- }
+ if (xfs_ipincount(ip))
+ __xfs_iunpin_wait(ip);
}
/*
@@ -2510,9 +2407,9 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(!completion_done(&ip->i_flush));
+ ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
- ip->i_d.di_nextents > ip->i_df.if_ext_max);
+ ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp;
mp = ip->i_mount;
@@ -2529,7 +2426,7 @@ xfs_iflush(
* out for us if they occur after the log force completes.
*/
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
- xfs_iunpin_nowait(ip);
+ xfs_iunpin(ip);
xfs_ifunlock(ip);
return EAGAIN;
}
@@ -2626,9 +2523,9 @@ xfs_iflush_int(
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(!completion_done(&ip->i_flush));
+ ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
- ip->i_d.di_nextents > ip->i_df.if_ext_max);
+ ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp;
mp = ip->i_mount;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f0e6b151ba3..2f27b745408 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -66,7 +66,6 @@ typedef struct xfs_ifork {
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
- unsigned char if_ext_max; /* max # of extent records */
union {
xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
@@ -206,12 +205,12 @@ typedef struct xfs_icdinode {
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_nextents = (n)) : \
((ip)->i_d.di_anextents = (n)))
-
+#define XFS_IFORK_MAXEXT(ip, w) \
+ (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
#ifdef __KERNEL__
-struct bhv_desc;
struct xfs_buf;
struct xfs_bmap_free;
struct xfs_bmbt_irec;
@@ -220,12 +219,6 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_dquot;
-typedef struct dm_attrs_s {
- __uint32_t da_dmevmask; /* DMIG event mask */
- __uint16_t da_dmstate; /* DMIG state info */
- __uint16_t da_pad; /* DMIG extra padding */
-} dm_attrs_t;
-
typedef struct xfs_inode {
/* Inode linking and identification information. */
struct xfs_mount *i_mount; /* fs mount struct ptr */
@@ -244,27 +237,19 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
- struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */
- wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */
- unsigned short i_flags; /* see defined flags below */
+ unsigned long i_flags; /* see defined flags below */
unsigned char i_update_core; /* timestamps/size is dirty */
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
- xfs_fsize_t i_size; /* in-memory size */
- xfs_fsize_t i_new_size; /* size when write completes */
-
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t;
-#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
- (ip)->i_size : (ip)->i_d.di_size;
-
/* Convert from vfs inode to xfs inode */
static inline struct xfs_inode *XFS_I(struct inode *inode)
{
@@ -278,6 +263,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
}
/*
+ * For regular files we only update the on-disk filesize when actually
+ * writing data back to disk. Until then only the copy in the VFS inode
+ * is uptodate.
+ */
+static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
+{
+ if (S_ISREG(ip->i_d.di_mode))
+ return i_size_read(VFS_I(ip));
+ return ip->i_d.di_size;
+}
+
+/*
* i_flags helper functions
*/
static inline void
@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
return ret;
}
+static inline int
+xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
+{
+ int ret;
+
+ spin_lock(&ip->i_flags_lock);
+ ret = ip->i_flags & flags;
+ if (!ret)
+ ip->i_flags |= flags;
+ spin_unlock(&ip->i_flags_lock);
+ return ret;
+}
+
/*
* Project quota id helpers (previously projid was 16bit only
* and using two 16bit values to hold new 32bit projid was chosen
@@ -351,35 +361,19 @@ xfs_set_projid(struct xfs_inode *ip,
}
/*
- * Manage the i_flush queue embedded in the inode. This completion
- * queue synchronizes processes attempting to flush the in-core
- * inode back to disk.
- */
-static inline void xfs_iflock(xfs_inode_t *ip)
-{
- wait_for_completion(&ip->i_flush);
-}
-
-static inline int xfs_iflock_nowait(xfs_inode_t *ip)
-{
- return try_wait_for_completion(&ip->i_flush);
-}
-
-static inline void xfs_ifunlock(xfs_inode_t *ip)
-{
- complete(&ip->i_flush);
-}
-
-/*
* In-core inode flags.
*/
-#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
-#define XFS_ISTALE 0x0002 /* inode has been staled */
-#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
-#define XFS_INEW 0x0008 /* inode has just been allocated */
-#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
-#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
+#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
+#define XFS_ISTALE (1 << 1) /* inode has been staled */
+#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
+#define XFS_INEW (1 << 3) /* inode has just been allocated */
+#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
+#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
+#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
+#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
+#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
+#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
/*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -392,6 +386,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
XFS_IFILESTREAM);
/*
+ * Synchronize processes attempting to flush the in-core inode back to disk.
+ */
+
+extern void __xfs_iflock(struct xfs_inode *ip);
+
+static inline int xfs_iflock_nowait(struct xfs_inode *ip)
+{
+ return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
+}
+
+static inline void xfs_iflock(struct xfs_inode *ip)
+{
+ if (!xfs_iflock_nowait(ip))
+ __xfs_iflock(ip);
+}
+
+static inline void xfs_ifunlock(struct xfs_inode *ip)
+{
+ xfs_iflags_clear(ip, XFS_IFLOCK);
+ wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
+}
+
+static inline int xfs_isiflocked(struct xfs_inode *ip)
+{
+ return xfs_iflags_test(ip, XFS_IFLOCK);
+}
+
+/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
* 1<<16 - 1<<32-1 -- lockdep annotation (integers)
@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
int, xfs_fsize_t);
-int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
- xfs_fsize_t);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index cfd6c7f8cc3..91d71dcd485 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -79,8 +79,6 @@ xfs_inode_item_size(
break;
case XFS_DINODE_FMT_BTREE:
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
iip->ili_format.ilf_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
XFS_ILOG_DEV | XFS_ILOG_UUID);
@@ -557,7 +555,7 @@ xfs_inode_item_unpin(
trace_xfs_inode_unpin(ip, _RET_IP_);
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
- wake_up(&ip->i_ipin_wait);
+ wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
}
/*
@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf(
* If a flush is not in progress anymore, chances are that the
* inode was taken off the AIL. So, just get out.
*/
- if (completion_done(&ip->i_flush) ||
+ if (!xfs_isiflocked(ip) ||
!(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return true;
@@ -752,7 +750,7 @@ xfs_inode_item_push(
struct xfs_inode *ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
- ASSERT(!completion_done(&ip->i_flush));
+ ASSERT(xfs_isiflocked(ip));
/*
* Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 9afa282aa93..246c7d57c6f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb(
xfs_fileoff_t *last_fsb)
{
xfs_fileoff_t new_last_fsb = 0;
- xfs_extlen_t align;
+ xfs_extlen_t align = 0;
int eof, error;
- if (XFS_IS_REALTIME_INODE(ip))
- ;
- /*
- * If mounted with the "-o swalloc" option, roundup the allocation
- * request to a stripe width boundary if the file size is >=
- * stripe width and we are allocating past the allocation eof.
- */
- else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
- (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
- new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
- /*
- * Roundup the allocation request to a stripe unit (m_dalign) boundary
- * if the file size is >= stripe unit size, and we are allocating past
- * the allocation eof.
- */
- else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
- new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+ if (!XFS_IS_REALTIME_INODE(ip)) {
+ /*
+ * Round up the allocation request to a stripe unit
+ * (m_dalign) boundary if the file size is >= stripe unit
+ * size, and we are allocating past the allocation eof.
+ *
+ * If mounted with the "-o swalloc" option the alignment is
+ * increased from the strip unit size to the stripe width.
+ */
+ if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+ align = mp->m_swidth;
+ else if (mp->m_dalign)
+ align = mp->m_dalign;
+
+ if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align))
+ new_last_fsb = roundup_64(*last_fsb, align);
+ }
/*
* Always round up the allocation request to an extent boundary
@@ -154,7 +154,7 @@ xfs_iomap_write_direct(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- if ((offset + count) > ip->i_size) {
+ if ((offset + count) > XFS_ISIZE(ip)) {
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
goto error_out;
@@ -211,7 +211,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, 0);
bmapi_flag = 0;
- if (offset < ip->i_size || extsz)
+ if (offset < XFS_ISIZE(ip) || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate(
int found_delalloc = 0;
*prealloc = 0;
- if ((offset + count) <= ip->i_size)
+ if (offset + count <= XFS_ISIZE(ip))
return 0;
/*
@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size(
* if we pass in alloc_blocks = 0. Hence the "+ 1" to
* ensure we always pass in a non-zero value.
*/
- alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
+ alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks));
@@ -564,7 +564,7 @@ xfs_iomap_write_allocate(
* back....
*/
nimaps = 1;
- end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
+ end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
error = xfs_bmap_last_offset(NULL, ip, &last_block,
XFS_DATA_FORK);
if (error)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f9babd17922..ab302539e5b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -750,6 +750,7 @@ xfs_setattr_size(
struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
int mask = iattr->ia_valid;
+ xfs_off_t oldsize, newsize;
struct xfs_trans *tp;
int error;
uint lock_flags;
@@ -777,11 +778,13 @@ xfs_setattr_size(
lock_flags |= XFS_IOLOCK_EXCL;
xfs_ilock(ip, lock_flags);
+ oldsize = inode->i_size;
+ newsize = iattr->ia_size;
+
/*
* Short circuit the truncate case for zero length files.
*/
- if (iattr->ia_size == 0 &&
- ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
goto out_unlock;
@@ -807,14 +810,14 @@ xfs_setattr_size(
* the inode to the transaction, because the inode cannot be unlocked
* once it is a part of the transaction.
*/
- if (iattr->ia_size > ip->i_size) {
+ if (newsize > oldsize) {
/*
* Do the first part of growing a file: zero any data in the
* last block that is beyond the old EOF. We need to do this
* before the inode is joined to the transaction to modify
* i_size.
*/
- error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+ error = xfs_zero_eof(ip, newsize, oldsize);
if (error)
goto out_unlock;
}
@@ -833,8 +836,8 @@ xfs_setattr_size(
* here and prevents waiting for other data not within the range we
* care about here.
*/
- if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
- error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0,
+ if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+ error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
FI_NONE);
if (error)
goto out_unlock;
@@ -845,8 +848,7 @@ xfs_setattr_size(
*/
inode_dio_wait(inode);
- error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
- xfs_get_blocks);
+ error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
goto out_unlock;
@@ -857,7 +859,7 @@ xfs_setattr_size(
if (error)
goto out_trans_cancel;
- truncate_setsize(inode, iattr->ia_size);
+ truncate_setsize(inode, newsize);
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
@@ -876,19 +878,29 @@ xfs_setattr_size(
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (iattr->ia_size != ip->i_size &&
- (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
iattr->ia_ctime = iattr->ia_mtime =
current_fs_time(inode->i_sb);
mask |= ATTR_CTIME | ATTR_MTIME;
}
- if (iattr->ia_size > ip->i_size) {
- ip->i_d.di_size = iattr->ia_size;
- ip->i_size = iattr->ia_size;
- } else if (iattr->ia_size <= ip->i_size ||
- (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
- error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+ /*
+ * The first thing we do is set the size to new_size permanently on
+ * disk. This way we don't have to worry about anyone ever being able
+ * to look at the data being freed even in the face of a crash.
+ * What we're getting around here is the case where we free a block, it
+ * is allocated to another file, it is written to, and then we crash.
+ * If the new data gets written to the file but the log buffers
+ * containing the free and reallocation don't, then we'd end up with
+ * garbage in the blocks being freed. As long as we make the new size
+ * permanent before actually freeing any blocks it doesn't matter if
+ * they get written to.
+ */
+ ip->i_d.di_size = newsize;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ if (newsize <= oldsize) {
+ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error)
goto out_trans_abort;
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5cc3dde1bc9..eafbcff81f3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -31,6 +31,7 @@
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_itruncate_data(&tp, ip, 0);
+ ip->i_d.di_size = 0;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT);
goto out_unlock;
}
+ ASSERT(ip->i_d.di_nextents == 0);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 281961c1d81..ee5b695c99a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -828,14 +828,6 @@ xfs_fs_inode_init_once(
/* xfs inode */
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
- init_waitqueue_head(&ip->i_ipin_wait);
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&ip->i_flush);
- complete(&ip->i_flush);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 72c01a1c16e..40b75eecd2b 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
return 1;
/*
- * do some unlocked checks first to avoid unnecessary lock traffic.
- * The first is a flush lock check, the second is a already in reclaim
- * check. Only do these checks if we are not going to block on locks.
+ * If we are asked for non-blocking operation, do unlocked checks to
+ * see if the inode already is being flushed or in reclaim to avoid
+ * lock traffic.
*/
if ((flags & SYNC_TRYLOCK) &&
- (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+ __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
return 1;
- }
/*
* The radix tree lock here protects a thread in xfs_iget from racing
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a9d5b1e06ef..6b6df5802e9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
__field(int, flags)
@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
__entry->flags = flags;
),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
"offset 0x%llx count 0x%zx ioflags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
- __entry->new_size,
__entry->offset,
__entry->count,
__print_flags(__entry->flags, "|", XFS_IO_FLAGS))
@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(loff_t, size)
- __field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
__field(int, type)
@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
__entry->type = type;
@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->startblock = irec ? irec->br_startblock : 0;
__entry->blockcount = irec ? irec->br_blockcount : 0;
),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count %zd type %s "
- "startoff 0x%llx startblock %lld blockcount 0x%llx",
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
+ "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
- __entry->new_size,
__entry->offset,
__entry->count,
__print_symbolic(__entry->type, XFS_IO_TYPES),
@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
__field(xfs_ino_t, ino)
__field(loff_t, isize)
__field(loff_t, disize)
- __field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->isize = ip->i_size;
+ __entry->isize = VFS_I(ip)->i_size;
__entry->disize = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
),
- TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+ TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
"offset 0x%llx count %zd",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->isize,
__entry->disize,
- __entry->new_size,
__entry->offset,
__entry->count)
);
@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
TRACE_EVENT(xfs_pagecache_inval,
TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__field(xfs_ino_t, ino)
__field(int, format)
__field(int, nex)
- __field(int, max_nex)
__field(int, broot_size)
__field(int, fork_off)
),
@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__entry->ino = ip->i_ino;
__entry->format = ip->i_d.di_format;
__entry->nex = ip->i_d.di_nextents;
- __entry->max_nex = ip->i_df.if_ext_max;
__entry->broot_size = ip->i_df.if_broot_bytes;
__entry->fork_off = XFS_IFORK_BOFF(ip);
),
TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
- "Max in-fork extents %d, broot size %d, fork offset %d",
+ "broot size %d, fork offset %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
__print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
__entry->nex,
- __entry->max_nex,
__entry->broot_size,
__entry->fork_off)
)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index f2fea868d4d..0cf52da9d24 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -175,7 +175,7 @@ xfs_free_eofblocks(
* Figure out if there are any blocks beyond the end
* of the file. If not, then there is nothing to do.
*/
- end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
if (last_fsb <= end_fsb)
return 0;
@@ -226,7 +226,14 @@ xfs_free_eofblocks(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_itruncate_data(&tp, ip, ip->i_size);
+ /*
+ * Do not update the on-disk file size. If we update the
+ * on-disk file size and then the system crashes before the
+ * contents of the file are flushed to disk then the files
+ * may be full of holes (ie NULL files bug).
+ */
+ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
+ XFS_ISIZE(ip));
if (error) {
/*
* If we get an error at this point we simply don't
@@ -540,8 +547,8 @@ xfs_release(
return 0;
if ((S_ISREG(ip->i_d.di_mode) &&
- ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
- ip->i_delayed_blks > 0)) &&
+ (VFS_I(ip)->i_size > 0 ||
+ (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
@@ -618,7 +625,7 @@ xfs_inactive(
* only one with a reference to the inode.
*/
truncate = ((ip->i_d.di_nlink == 0) &&
- ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
+ ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
S_ISREG(ip->i_d.di_mode));
@@ -632,12 +639,12 @@ xfs_inactive(
if (ip->i_d.di_nlink != 0) {
if ((S_ISREG(ip->i_d.di_mode) &&
- ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
- ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS) &&
- (!(ip->i_d.di_flags &
+ (VFS_I(ip)->i_size > 0 ||
+ (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
+ (ip->i_df.if_flags & XFS_IFEXTENTS) &&
+ (!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
- (ip->i_delayed_blks != 0)))) {
+ ip->i_delayed_blks != 0))) {
error = xfs_free_eofblocks(mp, ip, 0);
if (error)
return VN_INACTIVE_CACHE;
@@ -670,13 +677,18 @@ xfs_inactive(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_itruncate_data(&tp, ip, 0);
+ ip->i_d.di_size = 0;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE;
}
+
+ ASSERT(ip->i_d.di_nextents == 0);
} else if (S_ISLNK(ip->i_d.di_mode)) {
/*
@@ -1961,11 +1973,11 @@ xfs_zero_remaining_bytes(
* since nothing can read beyond eof. The space will
* be zeroed when the file is extended anyway.
*/
- if (startoff >= ip->i_size)
+ if (startoff >= XFS_ISIZE(ip))
return 0;
- if (endoff > ip->i_size)
- endoff = ip->i_size;
+ if (endoff > XFS_ISIZE(ip))
+ endoff = XFS_ISIZE(ip);
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp,
@@ -2260,7 +2272,7 @@ xfs_change_file_space(
bf->l_start += offset;
break;
case 2: /*SEEK_END*/
- bf->l_start += ip->i_size;
+ bf->l_start += XFS_ISIZE(ip);
break;
default:
return XFS_ERROR(EINVAL);
@@ -2277,7 +2289,7 @@ xfs_change_file_space(
bf->l_whence = 0;
startoffset = bf->l_start;
- fsize = ip->i_size;
+ fsize = XFS_ISIZE(ip);
/*
* XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 9eabffbc4e5..033f6aa670d 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -134,7 +134,7 @@ struct pl08x_txd {
struct dma_async_tx_descriptor tx;
struct list_head node;
struct list_head dsg_list;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
dma_addr_t llis_bus;
struct pl08x_lli *llis_va;
/* Default cctl value for LLIs */
@@ -197,7 +197,7 @@ struct pl08x_dma_chan {
dma_addr_t dst_addr;
u32 src_cctl;
u32 dst_cctl;
- enum dma_data_direction runtime_direction;
+ enum dma_transfer_direction runtime_direction;
dma_cookie_t lc;
struct list_head pend_list;
struct pl08x_txd *at;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 426ab9f4dd8..9ff7a2c48b5 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/elf-em.h>
+#include <linux/ptrace.h>
/* The netlink messages for the audit system is divided into blocks:
* 1000 - 1099 are for commanding the audit system
@@ -181,6 +182,40 @@
* AUDIT_UNUSED_BITS is updated if need be. */
#define AUDIT_UNUSED_BITS 0x07FFFC00
+/* AUDIT_FIELD_COMPARE rule list */
+#define AUDIT_COMPARE_UID_TO_OBJ_UID 1
+#define AUDIT_COMPARE_GID_TO_OBJ_GID 2
+#define AUDIT_COMPARE_EUID_TO_OBJ_UID 3
+#define AUDIT_COMPARE_EGID_TO_OBJ_GID 4
+#define AUDIT_COMPARE_AUID_TO_OBJ_UID 5
+#define AUDIT_COMPARE_SUID_TO_OBJ_UID 6
+#define AUDIT_COMPARE_SGID_TO_OBJ_GID 7
+#define AUDIT_COMPARE_FSUID_TO_OBJ_UID 8
+#define AUDIT_COMPARE_FSGID_TO_OBJ_GID 9
+
+#define AUDIT_COMPARE_UID_TO_AUID 10
+#define AUDIT_COMPARE_UID_TO_EUID 11
+#define AUDIT_COMPARE_UID_TO_FSUID 12
+#define AUDIT_COMPARE_UID_TO_SUID 13
+
+#define AUDIT_COMPARE_AUID_TO_FSUID 14
+#define AUDIT_COMPARE_AUID_TO_SUID 15
+#define AUDIT_COMPARE_AUID_TO_EUID 16
+
+#define AUDIT_COMPARE_EUID_TO_SUID 17
+#define AUDIT_COMPARE_EUID_TO_FSUID 18
+
+#define AUDIT_COMPARE_SUID_TO_FSUID 19
+
+#define AUDIT_COMPARE_GID_TO_EGID 20
+#define AUDIT_COMPARE_GID_TO_FSGID 21
+#define AUDIT_COMPARE_GID_TO_SGID 22
+
+#define AUDIT_COMPARE_EGID_TO_FSGID 23
+#define AUDIT_COMPARE_EGID_TO_SGID 24
+#define AUDIT_COMPARE_SGID_TO_FSGID 25
+
+#define AUDIT_MAX_FIELD_COMPARE AUDIT_COMPARE_SGID_TO_FSGID
/* Rule fields */
/* These are useful when checking the
@@ -222,6 +257,9 @@
#define AUDIT_PERM 106
#define AUDIT_DIR 107
#define AUDIT_FILETYPE 108
+#define AUDIT_OBJ_UID 109
+#define AUDIT_OBJ_GID 110
+#define AUDIT_FIELD_COMPARE 111
#define AUDIT_ARG0 200
#define AUDIT_ARG1 (AUDIT_ARG0+1)
@@ -408,28 +446,24 @@ struct audit_field {
void *lsm_rule;
};
-#define AUDITSC_INVALID 0
-#define AUDITSC_SUCCESS 1
-#define AUDITSC_FAILURE 2
-#define AUDITSC_RESULT(x) ( ((long)(x))<0?AUDITSC_FAILURE:AUDITSC_SUCCESS )
extern int __init audit_register_class(int class, unsigned *list);
extern int audit_classify_syscall(int abi, unsigned syscall);
extern int audit_classify_arch(int arch);
#ifdef CONFIG_AUDITSYSCALL
/* These are defined in auditsc.c */
/* Public API */
-extern void audit_finish_fork(struct task_struct *child);
extern int audit_alloc(struct task_struct *task);
-extern void audit_free(struct task_struct *task);
-extern void audit_syscall_entry(int arch,
- int major, unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3);
-extern void audit_syscall_exit(int failed, long return_code);
+extern void __audit_free(struct task_struct *task);
+extern void __audit_syscall_entry(int arch,
+ int major, unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3);
+extern void __audit_syscall_exit(int ret_success, long ret_value);
extern void __audit_getname(const char *name);
extern void audit_putname(const char *name);
extern void __audit_inode(const char *name, const struct dentry *dentry);
extern void __audit_inode_child(const struct dentry *dentry,
const struct inode *parent);
+extern void __audit_seccomp(unsigned long syscall);
extern void __audit_ptrace(struct task_struct *t);
static inline int audit_dummy_context(void)
@@ -437,6 +471,27 @@ static inline int audit_dummy_context(void)
void *p = current->audit_context;
return !p || *(int *)p;
}
+static inline void audit_free(struct task_struct *task)
+{
+ if (unlikely(task->audit_context))
+ __audit_free(task);
+}
+static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+ unsigned long a1, unsigned long a2,
+ unsigned long a3)
+{
+ if (unlikely(!audit_dummy_context()))
+ __audit_syscall_entry(arch, major, a0, a1, a2, a3);
+}
+static inline void audit_syscall_exit(void *pt_regs)
+{
+ if (unlikely(current->audit_context)) {
+ int success = is_syscall_success(pt_regs);
+ int return_code = regs_return_value(pt_regs);
+
+ __audit_syscall_exit(success, return_code);
+ }
+}
static inline void audit_getname(const char *name)
{
if (unlikely(!audit_dummy_context()))
@@ -453,6 +508,12 @@ static inline void audit_inode_child(const struct dentry *dentry,
}
void audit_core_dumps(long signr);
+static inline void audit_seccomp(unsigned long syscall)
+{
+ if (unlikely(!audit_dummy_context()))
+ __audit_seccomp(syscall);
+}
+
static inline void audit_ptrace(struct task_struct *t)
{
if (unlikely(!audit_dummy_context()))
@@ -463,17 +524,16 @@ static inline void audit_ptrace(struct task_struct *t)
extern unsigned int audit_serial(void);
extern int auditsc_get_stamp(struct audit_context *ctx,
struct timespec *t, unsigned int *serial);
-extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid);
+extern int audit_set_loginuid(uid_t loginuid);
#define audit_get_loginuid(t) ((t)->loginuid)
#define audit_get_sessionid(t) ((t)->sessionid)
extern void audit_log_task_context(struct audit_buffer *ab);
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int audit_bprm(struct linux_binprm *bprm);
-extern void audit_socketcall(int nargs, unsigned long *args);
-extern int audit_sockaddr(int len, void *addr);
+extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_socketcall(int nargs, unsigned long *args);
+extern int __audit_sockaddr(int len, void *addr);
extern void __audit_fd_pair(int fd1, int fd2);
-extern int audit_set_macxattr(const char *name);
extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
@@ -499,6 +559,23 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
if (unlikely(!audit_dummy_context()))
__audit_ipc_set_perm(qbytes, uid, gid, mode);
}
+static inline int audit_bprm(struct linux_binprm *bprm)
+{
+ if (unlikely(!audit_dummy_context()))
+ return __audit_bprm(bprm);
+ return 0;
+}
+static inline void audit_socketcall(int nargs, unsigned long *args)
+{
+ if (unlikely(!audit_dummy_context()))
+ __audit_socketcall(nargs, args);
+}
+static inline int audit_sockaddr(int len, void *addr)
+{
+ if (unlikely(!audit_dummy_context()))
+ return __audit_sockaddr(len, addr);
+ return 0;
+}
static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
{
if (unlikely(!audit_dummy_context()))
@@ -544,12 +621,11 @@ static inline void audit_mmap_fd(int fd, int flags)
extern int audit_n_rules;
extern int audit_signals;
-#else
-#define audit_finish_fork(t)
+#else /* CONFIG_AUDITSYSCALL */
#define audit_alloc(t) ({ 0; })
#define audit_free(t) do { ; } while (0)
#define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
-#define audit_syscall_exit(f,r) do { ; } while (0)
+#define audit_syscall_exit(r) do { ; } while (0)
#define audit_dummy_context() 1
#define audit_getname(n) do { ; } while (0)
#define audit_putname(n) do { ; } while (0)
@@ -558,6 +634,7 @@ extern int audit_signals;
#define audit_inode(n,d) do { (void)(d); } while (0)
#define audit_inode_child(i,p) do { ; } while (0)
#define audit_core_dumps(i) do { ; } while (0)
+#define audit_seccomp(i) do { ; } while (0)
#define auditsc_get_stamp(c,t,s) (0)
#define audit_get_loginuid(t) (-1)
#define audit_get_sessionid(t) (-1)
@@ -568,7 +645,6 @@ extern int audit_signals;
#define audit_socketcall(n,a) ((void)0)
#define audit_fd_pair(n,a) ((void)0)
#define audit_sockaddr(len, addr) ({ 0; })
-#define audit_set_macxattr(n) do { ; } while (0)
#define audit_mq_open(o,m,a) ((void)0)
#define audit_mq_sendrecv(d,l,p,t) ((void)0)
#define audit_mq_notify(d,n) ((void)0)
@@ -579,7 +655,7 @@ extern int audit_signals;
#define audit_ptrace(t) ((void)0)
#define audit_n_rules 0
#define audit_signals 0
-#endif
+#endif /* CONFIG_AUDITSYSCALL */
#ifdef CONFIG_AUDIT
/* These are defined in audit.c */
diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index efae755017d..b01558b1581 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -46,7 +46,7 @@ struct signature_hdr {
char mpi[0];
} __packed;
-#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+#if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE)
int digsig_verify(struct key *keyring, const char *sig, int siglen,
const char *digest, int digestlen);
@@ -59,6 +59,6 @@ static inline int digsig_verify(struct key *keyring, const char *sig,
return -EOPNOTSUPP;
}
-#endif /* CONFIG_DIGSIG */
+#endif /* CONFIG_SIGNATURE */
#endif /* _DIGSIG_H */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 75f53f874b2..679b349d9b6 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -23,7 +23,6 @@
#include <linux/device.h>
#include <linux/uio.h>
-#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <asm/page.h>
@@ -72,11 +71,93 @@ enum dma_transaction_type {
DMA_ASYNC_TX,
DMA_SLAVE,
DMA_CYCLIC,
+ DMA_INTERLEAVE,
+/* last transaction type for creation of the capabilities mask */
+ DMA_TX_TYPE_END,
};
-/* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
+/**
+ * enum dma_transfer_direction - dma transfer mode and direction indicator
+ * @DMA_MEM_TO_MEM: Async/Memcpy mode
+ * @DMA_MEM_TO_DEV: Slave mode & From Memory to Device
+ * @DMA_DEV_TO_MEM: Slave mode & From Device to Memory
+ * @DMA_DEV_TO_DEV: Slave mode & From Device to Device
+ */
+enum dma_transfer_direction {
+ DMA_MEM_TO_MEM,
+ DMA_MEM_TO_DEV,
+ DMA_DEV_TO_MEM,
+ DMA_DEV_TO_DEV,
+ DMA_TRANS_NONE,
+};
+
+/**
+ * Interleaved Transfer Request
+ * ----------------------------
+ * A chunk is collection of contiguous bytes to be transfered.
+ * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG).
+ * ICGs may or maynot change between chunks.
+ * A FRAME is the smallest series of contiguous {chunk,icg} pairs,
+ * that when repeated an integral number of times, specifies the transfer.
+ * A transfer template is specification of a Frame, the number of times
+ * it is to be repeated and other per-transfer attributes.
+ *
+ * Practically, a client driver would have ready a template for each
+ * type of transfer it is going to need during its lifetime and
+ * set only 'src_start' and 'dst_start' before submitting the requests.
+ *
+ *
+ * | Frame-1 | Frame-2 | ~ | Frame-'numf' |
+ * |====....==.===...=...|====....==.===...=...| ~ |====....==.===...=...|
+ *
+ * == Chunk size
+ * ... ICG
+ */
+
+/**
+ * struct data_chunk - Element of scatter-gather list that makes a frame.
+ * @size: Number of bytes to read from source.
+ * size_dst := fn(op, size_src), so doesn't mean much for destination.
+ * @icg: Number of bytes to jump after last src/dst address of this
+ * chunk and before first src/dst address for next chunk.
+ * Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
+ * Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ */
+struct data_chunk {
+ size_t size;
+ size_t icg;
+};
+/**
+ * struct dma_interleaved_template - Template to convey DMAC the transfer pattern
+ * and attributes.
+ * @src_start: Bus address of source for the first chunk.
+ * @dst_start: Bus address of destination for the first chunk.
+ * @dir: Specifies the type of Source and Destination.
+ * @src_inc: If the source address increments after reading from it.
+ * @dst_inc: If the destination address increments after writing to it.
+ * @src_sgl: If the 'icg' of sgl[] applies to Source (scattered read).
+ * Otherwise, source is read contiguously (icg ignored).
+ * Ignored if src_inc is false.
+ * @dst_sgl: If the 'icg' of sgl[] applies to Destination (scattered write).
+ * Otherwise, destination is filled contiguously (icg ignored).
+ * Ignored if dst_inc is false.
+ * @numf: Number of frames in this template.
+ * @frame_size: Number of chunks in a frame i.e, size of sgl[].
+ * @sgl: Array of {chunk,icg} pairs that make up a frame.
+ */
+struct dma_interleaved_template {
+ dma_addr_t src_start;
+ dma_addr_t dst_start;
+ enum dma_transfer_direction dir;
+ bool src_inc;
+ bool dst_inc;
+ bool src_sgl;
+ bool dst_sgl;
+ size_t numf;
+ size_t frame_size;
+ struct data_chunk sgl[0];
+};
/**
* enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -269,7 +350,7 @@ enum dma_slave_buswidth {
* struct, if applicable.
*/
struct dma_slave_config {
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
dma_addr_t src_addr;
dma_addr_t dst_addr;
enum dma_slave_buswidth src_addr_width;
@@ -433,6 +514,7 @@ struct dma_tx_state {
* @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
* The function takes a buffer of size buf_len. The callback function will
* be called after period_len bytes have been transferred.
+ * @device_prep_interleaved_dma: Transfer expression in a generic way.
* @device_control: manipulate all pending operations on a channel, returns
* zero or error code
* @device_tx_status: poll for transaction completion, the optional
@@ -492,11 +574,14 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
struct dma_chan *chan, struct scatterlist *sgl,
- unsigned int sg_len, enum dma_data_direction direction,
+ unsigned int sg_len, enum dma_transfer_direction direction,
unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
- size_t period_len, enum dma_data_direction direction);
+ size_t period_len, enum dma_transfer_direction direction);
+ struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags);
int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
unsigned long arg);
@@ -522,7 +607,7 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
struct dma_chan *chan, void *buf, size_t len,
- enum dma_data_direction dir, unsigned long flags)
+ enum dma_transfer_direction dir, unsigned long flags)
{
struct scatterlist sg;
sg_init_one(&sg, buf, len);
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 4bfe0a2f7d5..f2c64f92c4a 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -127,7 +127,7 @@ struct dw_cyclic_desc {
struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
dma_addr_t buf_addr, size_t buf_len, size_t period_len,
- enum dma_data_direction direction);
+ enum dma_transfer_direction direction);
void dw_dma_cyclic_free(struct dma_chan *chan);
int dw_dma_cyclic_start(struct dma_chan *chan);
void dw_dma_cyclic_stop(struct dma_chan *chan);
diff --git a/include/linux/key.h b/include/linux/key.h
index 183a6af7715..bfc014c5735 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -293,6 +293,9 @@ static inline bool key_is_instantiated(const struct key *key)
(rcu_dereference_protected((KEY)->payload.rcudata, \
rwsem_is_locked(&((struct key *)(KEY))->sem)))
+#define rcu_assign_keypointer(KEY, PAYLOAD) \
+ (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+
#ifdef CONFIG_SYSCTL
extern ctl_table key_sysctls[];
#endif
diff --git a/include/linux/kref.h b/include/linux/kref.h
index abc0120b09b..9c07dcebded 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -17,6 +17,7 @@
#include <linux/bug.h>
#include <linux/atomic.h>
+#include <linux/kernel.h>
struct kref {
atomic_t refcount;
diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
new file mode 100644
index 00000000000..69b6dbf46b5
--- /dev/null
+++ b/include/linux/mtd/gpmi-nand.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __MACH_MXS_GPMI_NAND_H__
+#define __MACH_MXS_GPMI_NAND_H__
+
+/* The size of the resources is fixed. */
+#define GPMI_NAND_RES_SIZE 6
+
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME "GPMI NAND GPMI Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "GPMI NAND BCH Registers"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_DMA_CHANNELS_RES_NAME "GPMI NAND DMA Channels"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "GPMI NAND DMA Interrupt"
+
+/**
+ * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
+ *
+ * This structure communicates platform-specific information to the GPMI NAND
+ * driver that can't be expressed as resources.
+ *
+ * @platform_init: A pointer to a function the driver will call to
+ * initialize the platform (e.g., set up the pin mux).
+ * @min_prop_delay_in_ns: Minimum propagation delay of GPMI signals to and
+ * from the NAND Flash device, in nanoseconds.
+ * @max_prop_delay_in_ns: Maximum propagation delay of GPMI signals to and
+ * from the NAND Flash device, in nanoseconds.
+ * @max_chip_count: The maximum number of chips for which the driver
+ * should configure the hardware. This value most
+ * likely reflects the number of pins that are
+ * connected to a NAND Flash device. If this is
+ * greater than the SoC hardware can support, the
+ * driver will print a message and fail to initialize.
+ * @partitions: An optional pointer to an array of partition
+ * descriptions.
+ * @partition_count: The number of elements in the partitions array.
+ */
+struct gpmi_nand_platform_data {
+ /* SoC hardware information. */
+ int (*platform_init)(void);
+
+ /* NAND Flash information. */
+ unsigned int min_prop_delay_in_ns;
+ unsigned int max_prop_delay_in_ns;
+ unsigned int max_chip_count;
+
+ /* Medium information. */
+ struct mtd_partition *partitions;
+ unsigned partition_count;
+};
+#endif
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a27e56ca41a..c2f1f6a5fcb 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -112,6 +112,7 @@
#include <linux/compiler.h> /* For unlikely. */
#include <linux/sched.h> /* For struct task_struct. */
+#include <linux/err.h> /* for IS_ERR_VALUE */
extern long arch_ptrace(struct task_struct *child, long request,
@@ -266,6 +267,15 @@ static inline void ptrace_release_task(struct task_struct *task)
#define force_successful_syscall_return() do { } while (0)
#endif
+#ifndef is_syscall_success
+/*
+ * On most systems we can tell if a syscall is a success based on if the retval
+ * is an error value. On some systems like ia64 and powerpc they have different
+ * indicators of success/failure and must define their own.
+ */
+#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
+#endif
+
/*
* <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
*
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index cb2dd118cc0..8cd7fe59cf1 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -30,7 +30,7 @@ struct sh_desc {
struct sh_dmae_regs hw;
struct list_head node;
struct dma_async_tx_descriptor async_tx;
- enum dma_data_direction direction;
+ enum dma_transfer_direction direction;
dma_cookie_t cookie;
size_t partial;
int chunks;
@@ -48,6 +48,7 @@ struct sh_dmae_channel {
unsigned int offset;
unsigned int dmars;
unsigned int dmars_bit;
+ unsigned int chclr_offset;
};
struct sh_dmae_pdata {
@@ -68,6 +69,7 @@ struct sh_dmae_pdata {
unsigned int dmaor_is_32bit:1;
unsigned int needs_tend_set:1;
unsigned int no_dmars:1;
+ unsigned int chclr_present:1;
};
/* DMA register */
diff --git a/include/linux/sirfsoc_dma.h b/include/linux/sirfsoc_dma.h
new file mode 100644
index 00000000000..29d959333d8
--- /dev/null
+++ b/include/linux/sirfsoc_dma.h
@@ -0,0 +1,6 @@
+#ifndef _SIRFSOC_DMA_H_
+#define _SIRFSOC_DMA_H_
+
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id);
+
+#endif
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index ecdaeb98b29..5cf685086dd 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -312,7 +312,6 @@ struct tty_driver {
*/
struct tty_struct **ttys;
struct ktermios **termios;
- struct ktermios **termios_locked;
void *driver_state;
/*
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b31702ac15b..84f3001a568 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -16,6 +16,8 @@ struct btrfs_delayed_ref_node;
struct btrfs_delayed_tree_ref;
struct btrfs_delayed_data_ref;
struct btrfs_delayed_ref_head;
+struct btrfs_block_group_cache;
+struct btrfs_free_cluster;
struct map_lookup;
struct extent_buffer;
@@ -44,6 +46,17 @@ struct extent_buffer;
obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \
(obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-"
+#define BTRFS_GROUP_FLAGS \
+ { BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
+ { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
+ { BTRFS_BLOCK_GROUP_METADATA, "METADATA"}, \
+ { BTRFS_BLOCK_GROUP_RAID0, "RAID0"}, \
+ { BTRFS_BLOCK_GROUP_RAID1, "RAID1"}, \
+ { BTRFS_BLOCK_GROUP_DUP, "DUP"}, \
+ { BTRFS_BLOCK_GROUP_RAID10, "RAID10"}
+
+#define BTRFS_UUID_SIZE 16
+
TRACE_EVENT(btrfs_transaction_commit,
TP_PROTO(struct btrfs_root *root),
@@ -621,6 +634,34 @@ TRACE_EVENT(btrfs_cow_block,
__entry->cow_level)
);
+TRACE_EVENT(btrfs_space_reservation,
+
+ TP_PROTO(struct btrfs_fs_info *fs_info, char *type, u64 val,
+ u64 bytes, int reserve),
+
+ TP_ARGS(fs_info, type, val, bytes, reserve),
+
+ TP_STRUCT__entry(
+ __array( u8, fsid, BTRFS_UUID_SIZE )
+ __string( type, type )
+ __field( u64, val )
+ __field( u64, bytes )
+ __field( int, reserve )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
+ __assign_str(type, type);
+ __entry->val = val;
+ __entry->bytes = bytes;
+ __entry->reserve = reserve;
+ ),
+
+ TP_printk("%pU: %s: %Lu %s %Lu", __entry->fsid, __get_str(type),
+ __entry->val, __entry->reserve ? "reserve" : "release",
+ __entry->bytes)
+);
+
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
@@ -659,6 +700,168 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free,
TP_ARGS(root, start, len)
);
+TRACE_EVENT(find_free_extent,
+
+ TP_PROTO(struct btrfs_root *root, u64 num_bytes, u64 empty_size,
+ u64 data),
+
+ TP_ARGS(root, num_bytes, empty_size, data),
+
+ TP_STRUCT__entry(
+ __field( u64, root_objectid )
+ __field( u64, num_bytes )
+ __field( u64, empty_size )
+ __field( u64, data )
+ ),
+
+ TP_fast_assign(
+ __entry->root_objectid = root->root_key.objectid;
+ __entry->num_bytes = num_bytes;
+ __entry->empty_size = empty_size;
+ __entry->data = data;
+ ),
+
+ TP_printk("root = %Lu(%s), len = %Lu, empty_size = %Lu, "
+ "flags = %Lu(%s)", show_root_type(__entry->root_objectid),
+ __entry->num_bytes, __entry->empty_size, __entry->data,
+ __print_flags((unsigned long)__entry->data, "|",
+ BTRFS_GROUP_FLAGS))
+);
+
+DECLARE_EVENT_CLASS(btrfs__reserve_extent,
+
+ TP_PROTO(struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group, u64 start,
+ u64 len),
+
+ TP_ARGS(root, block_group, start, len),
+
+ TP_STRUCT__entry(
+ __field( u64, root_objectid )
+ __field( u64, bg_objectid )
+ __field( u64, flags )
+ __field( u64, start )
+ __field( u64, len )
+ ),
+
+ TP_fast_assign(
+ __entry->root_objectid = root->root_key.objectid;
+ __entry->bg_objectid = block_group->key.objectid;
+ __entry->flags = block_group->flags;
+ __entry->start = start;
+ __entry->len = len;
+ ),
+
+ TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
+ "start = %Lu, len = %Lu",
+ show_root_type(__entry->root_objectid), __entry->bg_objectid,
+ __entry->flags, __print_flags((unsigned long)__entry->flags,
+ "|", BTRFS_GROUP_FLAGS),
+ __entry->start, __entry->len)
+);
+
+DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
+
+ TP_PROTO(struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group, u64 start,
+ u64 len),
+
+ TP_ARGS(root, block_group, start, len)
+);
+
+DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
+
+ TP_PROTO(struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group, u64 start,
+ u64 len),
+
+ TP_ARGS(root, block_group, start, len)
+);
+
+TRACE_EVENT(btrfs_find_cluster,
+
+ TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start,
+ u64 bytes, u64 empty_size, u64 min_bytes),
+
+ TP_ARGS(block_group, start, bytes, empty_size, min_bytes),
+
+ TP_STRUCT__entry(
+ __field( u64, bg_objectid )
+ __field( u64, flags )
+ __field( u64, start )
+ __field( u64, bytes )
+ __field( u64, empty_size )
+ __field( u64, min_bytes )
+ ),
+
+ TP_fast_assign(
+ __entry->bg_objectid = block_group->key.objectid;
+ __entry->flags = block_group->flags;
+ __entry->start = start;
+ __entry->bytes = bytes;
+ __entry->empty_size = empty_size;
+ __entry->min_bytes = min_bytes;
+ ),
+
+ TP_printk("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
+ " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
+ __entry->flags,
+ __print_flags((unsigned long)__entry->flags, "|",
+ BTRFS_GROUP_FLAGS), __entry->start,
+ __entry->bytes, __entry->empty_size, __entry->min_bytes)
+);
+
+TRACE_EVENT(btrfs_failed_cluster_setup,
+
+ TP_PROTO(struct btrfs_block_group_cache *block_group),
+
+ TP_ARGS(block_group),
+
+ TP_STRUCT__entry(
+ __field( u64, bg_objectid )
+ ),
+
+ TP_fast_assign(
+ __entry->bg_objectid = block_group->key.objectid;
+ ),
+
+ TP_printk("block_group = %Lu", __entry->bg_objectid)
+);
+
+TRACE_EVENT(btrfs_setup_cluster,
+
+ TP_PROTO(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster, u64 size, int bitmap),
+
+ TP_ARGS(block_group, cluster, size, bitmap),
+
+ TP_STRUCT__entry(
+ __field( u64, bg_objectid )
+ __field( u64, flags )
+ __field( u64, start )
+ __field( u64, max_size )
+ __field( u64, size )
+ __field( int, bitmap )
+ ),
+
+ TP_fast_assign(
+ __entry->bg_objectid = block_group->key.objectid;
+ __entry->flags = block_group->flags;
+ __entry->start = cluster->window_start;
+ __entry->max_size = cluster->max_size;
+ __entry->size = size;
+ __entry->bitmap = bitmap;
+ ),
+
+ TP_printk("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
+ "size = %Lu, max_size = %Lu, bitmap = %d",
+ __entry->bg_objectid,
+ __entry->flags,
+ __print_flags((unsigned long)__entry->flags, "|",
+ BTRFS_GROUP_FLAGS), __entry->start,
+ __entry->size, __entry->max_size, __entry->bitmap)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/init/Kconfig b/init/Kconfig
index 6ac2236244c..3f42cd66f0f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -355,7 +355,7 @@ config AUDIT
config AUDITSYSCALL
bool "Enable system-call auditing support"
- depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH)
+ depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || ARM)
default y if SECURITY_SELINUX
help
Enable low-overhead system-call auditing infrastructure that
@@ -372,6 +372,20 @@ config AUDIT_TREE
depends on AUDITSYSCALL
select FSNOTIFY
+config AUDIT_LOGINUID_IMMUTABLE
+ bool "Make audit loginuid immutable"
+ depends on AUDIT
+ help
+ The config option toggles if a task setting its loginuid requires
+ CAP_SYS_AUDITCONTROL or if that task should require no special permissions
+ but should instead only allow setting its loginuid if it was never
+ previously set. On systems which use systemd or a similar central
+ process to restart login services this should be set to true. On older
+ systems in which an admin would typically have to directly stop and
+ start processes this should be set to false. Setting this to true allows
+ one to drop potentially dangerous capabilites from the login tasks,
+ but may not be backwards compatible with older init systems.
+
source "kernel/irq/Kconfig"
menu "RCU Subsystem"
diff --git a/kernel/audit.c b/kernel/audit.c
index 57e3f510793..bb0eb5bb9a0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -631,7 +631,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
}
*ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
- audit_log_format(*ab, "user pid=%d uid=%u auid=%u ses=%u",
+ audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",
pid, uid, auid, ses);
if (sid) {
rc = security_secid_to_secctx(sid, &ctx, &len);
@@ -1423,7 +1423,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
char *p, *pathname;
if (prefix)
- audit_log_format(ab, " %s", prefix);
+ audit_log_format(ab, "%s", prefix);
/* We will allow 11 spaces for ' (deleted)' to be appended */
pathname = kmalloc(PATH_MAX+11, ab->gfp_mask);
diff --git a/kernel/audit.h b/kernel/audit.h
index 91e7071c4d2..81676680337 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -36,12 +36,8 @@ enum audit_state {
AUDIT_DISABLED, /* Do not create per-task audit_context.
* No syscall-specific audit records can
* be generated. */
- AUDIT_SETUP_CONTEXT, /* Create the per-task audit_context,
- * but don't necessarily fill it in at
- * syscall entry time (i.e., filter
- * instead). */
AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context,
- * and always fill it in at syscall
+ * and fill it in at syscall
* entry time. This makes a full
* syscall record available if some
* other part of the kernel decides it
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f8277c80d67..a6c3f1abd20 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -235,13 +235,15 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
switch(listnr) {
default:
goto exit_err;
- case AUDIT_FILTER_USER:
- case AUDIT_FILTER_TYPE:
#ifdef CONFIG_AUDITSYSCALL
case AUDIT_FILTER_ENTRY:
+ if (rule->action == AUDIT_ALWAYS)
+ goto exit_err;
case AUDIT_FILTER_EXIT:
case AUDIT_FILTER_TASK:
#endif
+ case AUDIT_FILTER_USER:
+ case AUDIT_FILTER_TYPE:
;
}
if (unlikely(rule->action == AUDIT_POSSIBLE)) {
@@ -385,7 +387,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
goto exit_free;
break;
case AUDIT_FILETYPE:
- if ((f->val & ~S_IFMT) > S_IFMT)
+ if (f->val & ~S_IFMT)
goto exit_free;
break;
case AUDIT_INODE:
@@ -459,6 +461,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_ARG1:
case AUDIT_ARG2:
case AUDIT_ARG3:
+ case AUDIT_OBJ_UID:
+ case AUDIT_OBJ_GID:
break;
case AUDIT_ARCH:
entry->rule.arch_f = f;
@@ -522,7 +526,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
goto exit_free;
break;
case AUDIT_FILTERKEY:
- err = -EINVAL;
if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
goto exit_free;
str = audit_unpack_string(&bufp, &remain, f->val);
@@ -536,7 +539,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
goto exit_free;
break;
case AUDIT_FILETYPE:
- if ((f->val & ~S_IFMT) > S_IFMT)
+ if (f->val & ~S_IFMT)
+ goto exit_free;
+ break;
+ case AUDIT_FIELD_COMPARE:
+ if (f->val > AUDIT_MAX_FIELD_COMPARE)
goto exit_free;
break;
default:
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e7fe2b0d29b..caaea6e944f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,9 +70,15 @@
#include "audit.h"
+/* flags stating the success for a syscall */
+#define AUDITSC_INVALID 0
+#define AUDITSC_SUCCESS 1
+#define AUDITSC_FAILURE 2
+
/* AUDIT_NAMES is the number of slots we reserve in the audit_context
- * for saving names from getname(). */
-#define AUDIT_NAMES 20
+ * for saving names from getname(). If we get more names we will allocate
+ * a name dynamically and also add those to the list anchored by names_list. */
+#define AUDIT_NAMES 5
/* Indicates that audit should log the full pathname. */
#define AUDIT_NAME_FULL -1
@@ -101,9 +107,8 @@ struct audit_cap_data {
*
* Further, in fs/namei.c:path_lookup() we store the inode and device. */
struct audit_names {
+ struct list_head list; /* audit_context->names_list */
const char *name;
- int name_len; /* number of name's characters to log */
- unsigned name_put; /* call __putname() for this name */
unsigned long ino;
dev_t dev;
umode_t mode;
@@ -113,6 +118,14 @@ struct audit_names {
u32 osid;
struct audit_cap_data fcap;
unsigned int fcap_ver;
+ int name_len; /* number of name's characters to log */
+ bool name_put; /* call __putname() for this name */
+ /*
+ * This was an allocated audit_names and not from the array of
+ * names allocated in the task audit context. Thus this name
+ * should be freed on syscall exit
+ */
+ bool should_free;
};
struct audit_aux_data {
@@ -174,8 +187,17 @@ struct audit_context {
long return_code;/* syscall return code */
u64 prio;
int return_valid; /* return code is valid */
- int name_count;
- struct audit_names names[AUDIT_NAMES];
+ /*
+ * The names_list is the list of all audit_names collected during this
+ * syscall. The first AUDIT_NAMES entries in the names_list will
+ * actually be from the preallocated_names array for performance
+ * reasons. Except during allocation they should never be referenced
+ * through the preallocated_names array and should only be found/used
+ * by running the names_list.
+ */
+ struct audit_names preallocated_names[AUDIT_NAMES];
+ int name_count; /* total records in names_list */
+ struct list_head names_list; /* anchor for struct audit_names->list */
char * filterkey; /* key for rule that triggered record */
struct path pwd;
struct audit_context *previous; /* For nested syscalls */
@@ -305,21 +327,21 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
}
}
-static int audit_match_filetype(struct audit_context *ctx, int which)
+static int audit_match_filetype(struct audit_context *ctx, int val)
{
- unsigned index = which & ~S_IFMT;
- umode_t mode = which & S_IFMT;
+ struct audit_names *n;
+ umode_t mode = (umode_t)val;
if (unlikely(!ctx))
return 0;
- if (index >= ctx->name_count)
- return 0;
- if (ctx->names[index].ino == -1)
- return 0;
- if ((ctx->names[index].mode ^ mode) & S_IFMT)
- return 0;
- return 1;
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if ((n->ino != -1) &&
+ ((n->mode & S_IFMT) == mode))
+ return 1;
+ }
+
+ return 0;
}
/*
@@ -441,6 +463,134 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
return 0;
}
+static int audit_compare_id(uid_t uid1,
+ struct audit_names *name,
+ unsigned long name_offset,
+ struct audit_field *f,
+ struct audit_context *ctx)
+{
+ struct audit_names *n;
+ unsigned long addr;
+ uid_t uid2;
+ int rc;
+
+ BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
+
+ if (name) {
+ addr = (unsigned long)name;
+ addr += name_offset;
+
+ uid2 = *(uid_t *)addr;
+ rc = audit_comparator(uid1, f->op, uid2);
+ if (rc)
+ return rc;
+ }
+
+ if (ctx) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ addr = (unsigned long)n;
+ addr += name_offset;
+
+ uid2 = *(uid_t *)addr;
+
+ rc = audit_comparator(uid1, f->op, uid2);
+ if (rc)
+ return rc;
+ }
+ }
+ return 0;
+}
+
+static int audit_field_compare(struct task_struct *tsk,
+ const struct cred *cred,
+ struct audit_field *f,
+ struct audit_context *ctx,
+ struct audit_names *name)
+{
+ switch (f->val) {
+ /* process to file object comparisons */
+ case AUDIT_COMPARE_UID_TO_OBJ_UID:
+ return audit_compare_id(cred->uid,
+ name, offsetof(struct audit_names, uid),
+ f, ctx);
+ case AUDIT_COMPARE_GID_TO_OBJ_GID:
+ return audit_compare_id(cred->gid,
+ name, offsetof(struct audit_names, gid),
+ f, ctx);
+ case AUDIT_COMPARE_EUID_TO_OBJ_UID:
+ return audit_compare_id(cred->euid,
+ name, offsetof(struct audit_names, uid),
+ f, ctx);
+ case AUDIT_COMPARE_EGID_TO_OBJ_GID:
+ return audit_compare_id(cred->egid,
+ name, offsetof(struct audit_names, gid),
+ f, ctx);
+ case AUDIT_COMPARE_AUID_TO_OBJ_UID:
+ return audit_compare_id(tsk->loginuid,
+ name, offsetof(struct audit_names, uid),
+ f, ctx);
+ case AUDIT_COMPARE_SUID_TO_OBJ_UID:
+ return audit_compare_id(cred->suid,
+ name, offsetof(struct audit_names, uid),
+ f, ctx);
+ case AUDIT_COMPARE_SGID_TO_OBJ_GID:
+ return audit_compare_id(cred->sgid,
+ name, offsetof(struct audit_names, gid),
+ f, ctx);
+ case AUDIT_COMPARE_FSUID_TO_OBJ_UID:
+ return audit_compare_id(cred->fsuid,
+ name, offsetof(struct audit_names, uid),
+ f, ctx);
+ case AUDIT_COMPARE_FSGID_TO_OBJ_GID:
+ return audit_compare_id(cred->fsgid,
+ name, offsetof(struct audit_names, gid),
+ f, ctx);
+ /* uid comparisons */
+ case AUDIT_COMPARE_UID_TO_AUID:
+ return audit_comparator(cred->uid, f->op, tsk->loginuid);
+ case AUDIT_COMPARE_UID_TO_EUID:
+ return audit_comparator(cred->uid, f->op, cred->euid);
+ case AUDIT_COMPARE_UID_TO_SUID:
+ return audit_comparator(cred->uid, f->op, cred->suid);
+ case AUDIT_COMPARE_UID_TO_FSUID:
+ return audit_comparator(cred->uid, f->op, cred->fsuid);
+ /* auid comparisons */
+ case AUDIT_COMPARE_AUID_TO_EUID:
+ return audit_comparator(tsk->loginuid, f->op, cred->euid);
+ case AUDIT_COMPARE_AUID_TO_SUID:
+ return audit_comparator(tsk->loginuid, f->op, cred->suid);
+ case AUDIT_COMPARE_AUID_TO_FSUID:
+ return audit_comparator(tsk->loginuid, f->op, cred->fsuid);
+ /* euid comparisons */
+ case AUDIT_COMPARE_EUID_TO_SUID:
+ return audit_comparator(cred->euid, f->op, cred->suid);
+ case AUDIT_COMPARE_EUID_TO_FSUID:
+ return audit_comparator(cred->euid, f->op, cred->fsuid);
+ /* suid comparisons */
+ case AUDIT_COMPARE_SUID_TO_FSUID:
+ return audit_comparator(cred->suid, f->op, cred->fsuid);
+ /* gid comparisons */
+ case AUDIT_COMPARE_GID_TO_EGID:
+ return audit_comparator(cred->gid, f->op, cred->egid);
+ case AUDIT_COMPARE_GID_TO_SGID:
+ return audit_comparator(cred->gid, f->op, cred->sgid);
+ case AUDIT_COMPARE_GID_TO_FSGID:
+ return audit_comparator(cred->gid, f->op, cred->fsgid);
+ /* egid comparisons */
+ case AUDIT_COMPARE_EGID_TO_SGID:
+ return audit_comparator(cred->egid, f->op, cred->sgid);
+ case AUDIT_COMPARE_EGID_TO_FSGID:
+ return audit_comparator(cred->egid, f->op, cred->fsgid);
+ /* sgid comparison */
+ case AUDIT_COMPARE_SGID_TO_FSGID:
+ return audit_comparator(cred->sgid, f->op, cred->fsgid);
+ default:
+ WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n");
+ return 0;
+ }
+ return 0;
+}
+
/* Determine if any context name data matches a rule's watch data */
/* Compare a task_struct with an audit_rule. Return 1 on match, 0
* otherwise.
@@ -457,13 +607,14 @@ static int audit_filter_rules(struct task_struct *tsk,
bool task_creation)
{
const struct cred *cred;
- int i, j, need_sid = 1;
+ int i, need_sid = 1;
u32 sid;
cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &rule->fields[i];
+ struct audit_names *n;
int result = 0;
switch (f->type) {
@@ -522,12 +673,14 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_DEVMAJOR:
- if (name)
- result = audit_comparator(MAJOR(name->dev),
- f->op, f->val);
- else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) {
+ if (name) {
+ if (audit_comparator(MAJOR(name->dev), f->op, f->val) ||
+ audit_comparator(MAJOR(name->rdev), f->op, f->val))
+ ++result;
+ } else if (ctx) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(MAJOR(n->dev), f->op, f->val) ||
+ audit_comparator(MAJOR(n->rdev), f->op, f->val)) {
++result;
break;
}
@@ -535,12 +688,14 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_DEVMINOR:
- if (name)
- result = audit_comparator(MINOR(name->dev),
- f->op, f->val);
- else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
+ if (name) {
+ if (audit_comparator(MINOR(name->dev), f->op, f->val) ||
+ audit_comparator(MINOR(name->rdev), f->op, f->val))
+ ++result;
+ } else if (ctx) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(MINOR(n->dev), f->op, f->val) ||
+ audit_comparator(MINOR(n->rdev), f->op, f->val)) {
++result;
break;
}
@@ -551,8 +706,32 @@ static int audit_filter_rules(struct task_struct *tsk,
if (name)
result = (name->ino == f->val);
else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(ctx->names[j].ino, f->op, f->val)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(n->ino, f->op, f->val)) {
+ ++result;
+ break;
+ }
+ }
+ }
+ break;
+ case AUDIT_OBJ_UID:
+ if (name) {
+ result = audit_comparator(name->uid, f->op, f->val);
+ } else if (ctx) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(n->uid, f->op, f->val)) {
+ ++result;
+ break;
+ }
+ }
+ }
+ break;
+ case AUDIT_OBJ_GID:
+ if (name) {
+ result = audit_comparator(name->gid, f->op, f->val);
+ } else if (ctx) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(n->gid, f->op, f->val)) {
++result;
break;
}
@@ -607,11 +786,10 @@ static int audit_filter_rules(struct task_struct *tsk,
name->osid, f->type, f->op,
f->lsm_rule, ctx);
} else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (security_audit_rule_match(
- ctx->names[j].osid,
- f->type, f->op,
- f->lsm_rule, ctx)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (security_audit_rule_match(n->osid, f->type,
+ f->op, f->lsm_rule,
+ ctx)) {
++result;
break;
}
@@ -643,8 +821,10 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_FILETYPE:
result = audit_match_filetype(ctx, f->val);
break;
+ case AUDIT_FIELD_COMPARE:
+ result = audit_field_compare(tsk, cred, f, ctx, name);
+ break;
}
-
if (!result)
return 0;
}
@@ -722,40 +902,53 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
return AUDIT_BUILD_CONTEXT;
}
-/* At syscall exit time, this filter is called if any audit_names[] have been
+/*
+ * Given an audit_name check the inode hash table to see if they match.
+ * Called holding the rcu read lock to protect the use of audit_inode_hash
+ */
+static int audit_filter_inode_name(struct task_struct *tsk,
+ struct audit_names *n,
+ struct audit_context *ctx) {
+ int word, bit;
+ int h = audit_hash_ino((u32)n->ino);
+ struct list_head *list = &audit_inode_hash[h];
+ struct audit_entry *e;
+ enum audit_state state;
+
+ word = AUDIT_WORD(ctx->major);
+ bit = AUDIT_BIT(ctx->major);
+
+ if (list_empty(list))
+ return 0;
+
+ list_for_each_entry_rcu(e, list, list) {
+ if ((e->rule.mask[word] & bit) == bit &&
+ audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) {
+ ctx->current_state = state;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* At syscall exit time, this filter is called if any audit_names have been
* collected during syscall processing. We only check rules in sublists at hash
- * buckets applicable to the inode numbers in audit_names[].
+ * buckets applicable to the inode numbers in audit_names.
* Regarding audit_state, same rules apply as for audit_filter_syscall().
*/
void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
{
- int i;
- struct audit_entry *e;
- enum audit_state state;
+ struct audit_names *n;
if (audit_pid && tsk->tgid == audit_pid)
return;
rcu_read_lock();
- for (i = 0; i < ctx->name_count; i++) {
- int word = AUDIT_WORD(ctx->major);
- int bit = AUDIT_BIT(ctx->major);
- struct audit_names *n = &ctx->names[i];
- int h = audit_hash_ino((u32)n->ino);
- struct list_head *list = &audit_inode_hash[h];
-
- if (list_empty(list))
- continue;
- list_for_each_entry_rcu(e, list, list) {
- if ((e->rule.mask[word] & bit) == bit &&
- audit_filter_rules(tsk, &e->rule, ctx, n,
- &state, false)) {
- rcu_read_unlock();
- ctx->current_state = state;
- return;
- }
- }
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_filter_inode_name(tsk, n, ctx))
+ break;
}
rcu_read_unlock();
}
@@ -766,7 +959,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
{
struct audit_context *context = tsk->audit_context;
- if (likely(!context))
+ if (!context)
return NULL;
context->return_valid = return_valid;
@@ -799,7 +992,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
static inline void audit_free_names(struct audit_context *context)
{
- int i;
+ struct audit_names *n, *next;
#if AUDIT_DEBUG == 2
if (context->put_count + context->ino_count != context->name_count) {
@@ -810,10 +1003,9 @@ static inline void audit_free_names(struct audit_context *context)
context->serial, context->major, context->in_syscall,
context->name_count, context->put_count,
context->ino_count);
- for (i = 0; i < context->name_count; i++) {
+ list_for_each_entry(n, &context->names_list, list) {
printk(KERN_ERR "names[%d] = %p = %s\n", i,
- context->names[i].name,
- context->names[i].name ?: "(null)");
+ n->name, n->name ?: "(null)");
}
dump_stack();
return;
@@ -824,9 +1016,12 @@ static inline void audit_free_names(struct audit_context *context)
context->ino_count = 0;
#endif
- for (i = 0; i < context->name_count; i++) {
- if (context->names[i].name && context->names[i].name_put)
- __putname(context->names[i].name);
+ list_for_each_entry_safe(n, next, &context->names_list, list) {
+ list_del(&n->list);
+ if (n->name && n->name_put)
+ __putname(n->name);
+ if (n->should_free)
+ kfree(n);
}
context->name_count = 0;
path_put(&context->pwd);
@@ -864,6 +1059,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
return NULL;
audit_zero_context(context, state);
INIT_LIST_HEAD(&context->killed_trees);
+ INIT_LIST_HEAD(&context->names_list);
return context;
}
@@ -886,7 +1082,7 @@ int audit_alloc(struct task_struct *tsk)
return 0; /* Return if not auditing. */
state = audit_filter_task(tsk, &key);
- if (likely(state == AUDIT_DISABLED))
+ if (state == AUDIT_DISABLED)
return 0;
if (!(context = audit_alloc_context(state))) {
@@ -975,7 +1171,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
while (vma) {
if ((vma->vm_flags & VM_EXECUTABLE) &&
vma->vm_file) {
- audit_log_d_path(ab, "exe=",
+ audit_log_d_path(ab, " exe=",
&vma->vm_file->f_path);
break;
}
@@ -1166,8 +1362,8 @@ static void audit_log_execve_info(struct audit_context *context,
struct audit_buffer **ab,
struct audit_aux_data_execve *axi)
{
- int i;
- size_t len, len_sent = 0;
+ int i, len;
+ size_t len_sent = 0;
const char __user *p;
char *buf;
@@ -1324,6 +1520,68 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_end(ab);
}
+static void audit_log_name(struct audit_context *context, struct audit_names *n,
+ int record_num, int *call_panic)
+{
+ struct audit_buffer *ab;
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+ if (!ab)
+ return; /* audit_panic has been called */
+
+ audit_log_format(ab, "item=%d", record_num);
+
+ if (n->name) {
+ switch (n->name_len) {
+ case AUDIT_NAME_FULL:
+ /* log the full path */
+ audit_log_format(ab, " name=");
+ audit_log_untrustedstring(ab, n->name);
+ break;
+ case 0:
+ /* name was specified as a relative path and the
+ * directory component is the cwd */
+ audit_log_d_path(ab, " name=", &context->pwd);
+ break;
+ default:
+ /* log the name's directory component */
+ audit_log_format(ab, " name=");
+ audit_log_n_untrustedstring(ab, n->name,
+ n->name_len);
+ }
+ } else
+ audit_log_format(ab, " name=(null)");
+
+ if (n->ino != (unsigned long)-1) {
+ audit_log_format(ab, " inode=%lu"
+ " dev=%02x:%02x mode=%#ho"
+ " ouid=%u ogid=%u rdev=%02x:%02x",
+ n->ino,
+ MAJOR(n->dev),
+ MINOR(n->dev),
+ n->mode,
+ n->uid,
+ n->gid,
+ MAJOR(n->rdev),
+ MINOR(n->rdev));
+ }
+ if (n->osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+ if (security_secid_to_secctx(
+ n->osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u", n->osid);
+ *call_panic = 2;
+ } else {
+ audit_log_format(ab, " obj=%s", ctx);
+ security_release_secctx(ctx, len);
+ }
+ }
+
+ audit_log_fcaps(ab, n);
+
+ audit_log_end(ab);
+}
+
static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
{
const struct cred *cred;
@@ -1331,6 +1589,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
struct audit_buffer *ab;
struct audit_aux_data *aux;
const char *tty;
+ struct audit_names *n;
/* tsk == current */
context->pid = tsk->pid;
@@ -1466,70 +1725,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (context->pwd.dentry && context->pwd.mnt) {
ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
if (ab) {
- audit_log_d_path(ab, "cwd=", &context->pwd);
+ audit_log_d_path(ab, " cwd=", &context->pwd);
audit_log_end(ab);
}
}
- for (i = 0; i < context->name_count; i++) {
- struct audit_names *n = &context->names[i];
- ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
- if (!ab)
- continue; /* audit_panic has been called */
-
- audit_log_format(ab, "item=%d", i);
-
- if (n->name) {
- switch(n->name_len) {
- case AUDIT_NAME_FULL:
- /* log the full path */
- audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, n->name);
- break;
- case 0:
- /* name was specified as a relative path and the
- * directory component is the cwd */
- audit_log_d_path(ab, "name=", &context->pwd);
- break;
- default:
- /* log the name's directory component */
- audit_log_format(ab, " name=");
- audit_log_n_untrustedstring(ab, n->name,
- n->name_len);
- }
- } else
- audit_log_format(ab, " name=(null)");
-
- if (n->ino != (unsigned long)-1) {
- audit_log_format(ab, " inode=%lu"
- " dev=%02x:%02x mode=%#ho"
- " ouid=%u ogid=%u rdev=%02x:%02x",
- n->ino,
- MAJOR(n->dev),
- MINOR(n->dev),
- n->mode,
- n->uid,
- n->gid,
- MAJOR(n->rdev),
- MINOR(n->rdev));
- }
- if (n->osid != 0) {
- char *ctx = NULL;
- u32 len;
- if (security_secid_to_secctx(
- n->osid, &ctx, &len)) {
- audit_log_format(ab, " osid=%u", n->osid);
- call_panic = 2;
- } else {
- audit_log_format(ab, " obj=%s", ctx);
- security_release_secctx(ctx, len);
- }
- }
-
- audit_log_fcaps(ab, n);
-
- audit_log_end(ab);
- }
+ i = 0;
+ list_for_each_entry(n, &context->names_list, list)
+ audit_log_name(context, n, i++, &call_panic);
/* Send end of event record to help user space know we are finished */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -1545,12 +1748,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
*
* Called from copy_process and do_exit
*/
-void audit_free(struct task_struct *tsk)
+void __audit_free(struct task_struct *tsk)
{
struct audit_context *context;
context = audit_get_context(tsk, 0, 0);
- if (likely(!context))
+ if (!context)
return;
/* Check for system calls that do not go through the exit
@@ -1583,7 +1786,7 @@ void audit_free(struct task_struct *tsk)
* will only be written if another part of the kernel requests that it
* be written).
*/
-void audit_syscall_entry(int arch, int major,
+void __audit_syscall_entry(int arch, int major,
unsigned long a1, unsigned long a2,
unsigned long a3, unsigned long a4)
{
@@ -1591,7 +1794,7 @@ void audit_syscall_entry(int arch, int major,
struct audit_context *context = tsk->audit_context;
enum audit_state state;
- if (unlikely(!context))
+ if (!context)
return;
/*
@@ -1648,7 +1851,7 @@ void audit_syscall_entry(int arch, int major,
context->prio = 0;
state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
}
- if (likely(state == AUDIT_DISABLED))
+ if (state == AUDIT_DISABLED)
return;
context->serial = 0;
@@ -1658,30 +1861,9 @@ void audit_syscall_entry(int arch, int major,
context->ppid = 0;
}
-void audit_finish_fork(struct task_struct *child)
-{
- struct audit_context *ctx = current->audit_context;
- struct audit_context *p = child->audit_context;
- if (!p || !ctx)
- return;
- if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
- return;
- p->arch = ctx->arch;
- p->major = ctx->major;
- memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
- p->ctime = ctx->ctime;
- p->dummy = ctx->dummy;
- p->in_syscall = ctx->in_syscall;
- p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
- p->ppid = current->pid;
- p->prio = ctx->prio;
- p->current_state = ctx->current_state;
-}
-
/**
* audit_syscall_exit - deallocate audit context after a system call
- * @valid: success/failure flag
- * @return_code: syscall return value
+ * @pt_regs: syscall registers
*
* Tear down after system call. If the audit context has been marked as
* auditable (either because of the AUDIT_RECORD_CONTEXT state from
@@ -1689,14 +1871,18 @@ void audit_finish_fork(struct task_struct *child)
* message), then write out the syscall information. In call cases,
* free the names stored from getname().
*/
-void audit_syscall_exit(int valid, long return_code)
+void __audit_syscall_exit(int success, long return_code)
{
struct task_struct *tsk = current;
struct audit_context *context;
- context = audit_get_context(tsk, valid, return_code);
+ if (success)
+ success = AUDITSC_SUCCESS;
+ else
+ success = AUDITSC_FAILURE;
- if (likely(!context))
+ context = audit_get_context(tsk, success, return_code);
+ if (!context)
return;
if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
@@ -1821,6 +2007,30 @@ retry:
#endif
}
+static struct audit_names *audit_alloc_name(struct audit_context *context)
+{
+ struct audit_names *aname;
+
+ if (context->name_count < AUDIT_NAMES) {
+ aname = &context->preallocated_names[context->name_count];
+ memset(aname, 0, sizeof(*aname));
+ } else {
+ aname = kzalloc(sizeof(*aname), GFP_NOFS);
+ if (!aname)
+ return NULL;
+ aname->should_free = true;
+ }
+
+ aname->ino = (unsigned long)-1;
+ list_add_tail(&aname->list, &context->names_list);
+
+ context->name_count++;
+#if AUDIT_DEBUG
+ context->ino_count++;
+#endif
+ return aname;
+}
+
/**
* audit_getname - add a name to the list
* @name: name to add
@@ -1831,9 +2041,7 @@ retry:
void __audit_getname(const char *name)
{
struct audit_context *context = current->audit_context;
-
- if (IS_ERR(name) || !name)
- return;
+ struct audit_names *n;
if (!context->in_syscall) {
#if AUDIT_DEBUG == 2
@@ -1843,13 +2051,15 @@ void __audit_getname(const char *name)
#endif
return;
}
- BUG_ON(context->name_count >= AUDIT_NAMES);
- context->names[context->name_count].name = name;
- context->names[context->name_count].name_len = AUDIT_NAME_FULL;
- context->names[context->name_count].name_put = 1;
- context->names[context->name_count].ino = (unsigned long)-1;
- context->names[context->name_count].osid = 0;
- ++context->name_count;
+
+ n = audit_alloc_name(context);
+ if (!n)
+ return;
+
+ n->name = name;
+ n->name_len = AUDIT_NAME_FULL;
+ n->name_put = true;
+
if (!context->pwd.dentry)
get_fs_pwd(current->fs, &context->pwd);
}
@@ -1871,12 +2081,13 @@ void audit_putname(const char *name)
printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n",
__FILE__, __LINE__, context->serial, name);
if (context->name_count) {
+ struct audit_names *n;
int i;
- for (i = 0; i < context->name_count; i++)
+
+ list_for_each_entry(n, &context->names_list, list)
printk(KERN_ERR "name[%d] = %p = %s\n", i,
- context->names[i].name,
- context->names[i].name ?: "(null)");
- }
+ n->name, n->name ?: "(null)");
+ }
#endif
__putname(name);
}
@@ -1897,39 +2108,11 @@ void audit_putname(const char *name)
#endif
}
-static int audit_inc_name_count(struct audit_context *context,
- const struct inode *inode)
-{
- if (context->name_count >= AUDIT_NAMES) {
- if (inode)
- printk(KERN_DEBUG "audit: name_count maxed, losing inode data: "
- "dev=%02x:%02x, inode=%lu\n",
- MAJOR(inode->i_sb->s_dev),
- MINOR(inode->i_sb->s_dev),
- inode->i_ino);
-
- else
- printk(KERN_DEBUG "name_count maxed, losing inode data\n");
- return 1;
- }
- context->name_count++;
-#if AUDIT_DEBUG
- context->ino_count++;
-#endif
- return 0;
-}
-
-
static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
{
struct cpu_vfs_cap_data caps;
int rc;
- memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t));
- memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t));
- name->fcap.fE = 0;
- name->fcap_ver = 0;
-
if (!dentry)
return 0;
@@ -1969,30 +2152,25 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent
*/
void __audit_inode(const char *name, const struct dentry *dentry)
{
- int idx;
struct audit_context *context = current->audit_context;
const struct inode *inode = dentry->d_inode;
+ struct audit_names *n;
if (!context->in_syscall)
return;
- if (context->name_count
- && context->names[context->name_count-1].name
- && context->names[context->name_count-1].name == name)
- idx = context->name_count - 1;
- else if (context->name_count > 1
- && context->names[context->name_count-2].name
- && context->names[context->name_count-2].name == name)
- idx = context->name_count - 2;
- else {
- /* FIXME: how much do we care about inodes that have no
- * associated name? */
- if (audit_inc_name_count(context, inode))
- return;
- idx = context->name_count - 1;
- context->names[idx].name = NULL;
+
+ list_for_each_entry_reverse(n, &context->names_list, list) {
+ if (n->name && (n->name == name))
+ goto out;
}
+
+ /* unable to find the name from a previous getname() */
+ n = audit_alloc_name(context);
+ if (!n)
+ return;
+out:
handle_path(dentry);
- audit_copy_inode(&context->names[idx], dentry, inode);
+ audit_copy_inode(n, dentry, inode);
}
/**
@@ -2011,11 +2189,11 @@ void __audit_inode(const char *name, const struct dentry *dentry)
void __audit_inode_child(const struct dentry *dentry,
const struct inode *parent)
{
- int idx;
struct audit_context *context = current->audit_context;
const char *found_parent = NULL, *found_child = NULL;
const struct inode *inode = dentry->d_inode;
const char *dname = dentry->d_name.name;
+ struct audit_names *n;
int dirlen = 0;
if (!context->in_syscall)
@@ -2025,9 +2203,7 @@ void __audit_inode_child(const struct dentry *dentry,
handle_one(inode);
/* parent is more likely, look for it first */
- for (idx = 0; idx < context->name_count; idx++) {
- struct audit_names *n = &context->names[idx];
-
+ list_for_each_entry(n, &context->names_list, list) {
if (!n->name)
continue;
@@ -2040,9 +2216,7 @@ void __audit_inode_child(const struct dentry *dentry,
}
/* no matching parent, look for matching child */
- for (idx = 0; idx < context->name_count; idx++) {
- struct audit_names *n = &context->names[idx];
-
+ list_for_each_entry(n, &context->names_list, list) {
if (!n->name)
continue;
@@ -2060,34 +2234,29 @@ void __audit_inode_child(const struct dentry *dentry,
add_names:
if (!found_parent) {
- if (audit_inc_name_count(context, parent))
+ n = audit_alloc_name(context);
+ if (!n)
return;
- idx = context->name_count - 1;
- context->names[idx].name = NULL;
- audit_copy_inode(&context->names[idx], NULL, parent);
+ audit_copy_inode(n, NULL, parent);
}
if (!found_child) {
- if (audit_inc_name_count(context, inode))
+ n = audit_alloc_name(context);
+ if (!n)
return;
- idx = context->name_count - 1;
/* Re-use the name belonging to the slot for a matching parent
* directory. All names for this context are relinquished in
* audit_free_names() */
if (found_parent) {
- context->names[idx].name = found_parent;
- context->names[idx].name_len = AUDIT_NAME_FULL;
+ n->name = found_parent;
+ n->name_len = AUDIT_NAME_FULL;
/* don't call __putname() */
- context->names[idx].name_put = 0;
- } else {
- context->names[idx].name = NULL;
+ n->name_put = false;
}
if (inode)
- audit_copy_inode(&context->names[idx], NULL, inode);
- else
- context->names[idx].ino = (unsigned long)-1;
+ audit_copy_inode(n, NULL, inode);
}
}
EXPORT_SYMBOL_GPL(__audit_inode_child);
@@ -2121,19 +2290,28 @@ int auditsc_get_stamp(struct audit_context *ctx,
static atomic_t session_id = ATOMIC_INIT(0);
/**
- * audit_set_loginuid - set a task's audit_context loginuid
- * @task: task whose audit context is being modified
+ * audit_set_loginuid - set current task's audit_context loginuid
* @loginuid: loginuid value
*
* Returns 0.
*
* Called (set) from fs/proc/base.c::proc_loginuid_write().
*/
-int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
+int audit_set_loginuid(uid_t loginuid)
{
- unsigned int sessionid = atomic_inc_return(&session_id);
+ struct task_struct *task = current;
struct audit_context *context = task->audit_context;
+ unsigned int sessionid;
+
+#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
+ if (task->loginuid != -1)
+ return -EPERM;
+#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+#endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+ sessionid = atomic_inc_return(&session_id);
if (context && context->in_syscall) {
struct audit_buffer *ab;
@@ -2271,14 +2449,11 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
context->ipc.has_perm = 1;
}
-int audit_bprm(struct linux_binprm *bprm)
+int __audit_bprm(struct linux_binprm *bprm)
{
struct audit_aux_data_execve *ax;
struct audit_context *context = current->audit_context;
- if (likely(!audit_enabled || !context || context->dummy))
- return 0;
-
ax = kmalloc(sizeof(*ax), GFP_KERNEL);
if (!ax)
return -ENOMEM;
@@ -2299,13 +2474,10 @@ int audit_bprm(struct linux_binprm *bprm)
* @args: args array
*
*/
-void audit_socketcall(int nargs, unsigned long *args)
+void __audit_socketcall(int nargs, unsigned long *args)
{
struct audit_context *context = current->audit_context;
- if (likely(!context || context->dummy))
- return;
-
context->type = AUDIT_SOCKETCALL;
context->socketcall.nargs = nargs;
memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
@@ -2331,13 +2503,10 @@ void __audit_fd_pair(int fd1, int fd2)
*
* Returns 0 for success or NULL context or < 0 on error.
*/
-int audit_sockaddr(int len, void *a)
+int __audit_sockaddr(int len, void *a)
{
struct audit_context *context = current->audit_context;
- if (likely(!context || context->dummy))
- return 0;
-
if (!context->sockaddr) {
void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
if (!p)
@@ -2499,6 +2668,25 @@ void __audit_mmap_fd(int fd, int flags)
context->type = AUDIT_MMAP;
}
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+ uid_t auid, uid;
+ gid_t gid;
+ unsigned int sessionid;
+
+ auid = audit_get_loginuid(current);
+ sessionid = audit_get_sessionid(current);
+ current_uid_gid(&uid, &gid);
+
+ audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
+ auid, uid, gid, sessionid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " pid=%d comm=", current->pid);
+ audit_log_untrustedstring(ab, current->comm);
+ audit_log_format(ab, " reason=");
+ audit_log_string(ab, reason);
+ audit_log_format(ab, " sig=%ld", signr);
+}
/**
* audit_core_dumps - record information about processes that end abnormally
* @signr: signal value
@@ -2509,10 +2697,6 @@ void __audit_mmap_fd(int fd, int flags)
void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
- u32 sid;
- uid_t auid = audit_get_loginuid(current), uid;
- gid_t gid;
- unsigned int sessionid = audit_get_sessionid(current);
if (!audit_enabled)
return;
@@ -2521,24 +2705,17 @@ void audit_core_dumps(long signr)
return;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
- current_uid_gid(&uid, &gid);
- audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
- auid, uid, gid, sessionid);
- security_task_getsecid(current, &sid);
- if (sid) {
- char *ctx = NULL;
- u32 len;
+ audit_log_abend(ab, "memory violation", signr);
+ audit_log_end(ab);
+}
- if (security_secid_to_secctx(sid, &ctx, &len))
- audit_log_format(ab, " ssid=%u", sid);
- else {
- audit_log_format(ab, " subj=%s", ctx);
- security_release_secctx(ctx, len);
- }
- }
- audit_log_format(ab, " pid=%d comm=", current->pid);
- audit_log_untrustedstring(ab, current->comm);
- audit_log_format(ab, " sig=%ld", signr);
+void __audit_seccomp(unsigned long syscall)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ audit_log_abend(ab, "seccomp", SIGKILL);
+ audit_log_format(ab, " syscall=%ld", syscall);
audit_log_end(ab);
}
diff --git a/kernel/capability.c b/kernel/capability.c
index 0fcf1c14a29..3f1adb6c647 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -384,7 +384,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
BUG();
}
- if (has_ns_capability(current, ns, cap)) {
+ if (security_capable(current_cred(), ns, cap) == 0) {
current->flags |= PF_SUPERPRIV;
return true;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index c44738267be..294b1709170 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -964,8 +964,7 @@ void do_exit(long code)
acct_collect(code, group_dead);
if (group_dead)
tty_audit_exit();
- if (unlikely(tsk->audit_context))
- audit_free(tsk);
+ audit_free(tsk);
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
diff --git a/kernel/fork.c b/kernel/fork.c
index f3fa18887cc..051f090d40c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1527,8 +1527,6 @@ long do_fork(unsigned long clone_flags,
init_completion(&vfork);
}
- audit_finish_fork(p);
-
/*
* We set PF_STARTING at creation in case tracing wants to
* use this to distinguish a fully live task from one that
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 57d4b13b631..e8d76c5895e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -6,6 +6,7 @@
* This defines a simple but solid secure-computing mode.
*/
+#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
#include <linux/compat.h>
@@ -54,6 +55,7 @@ void __secure_computing(int this_syscall)
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
+ audit_seccomp(this_syscall);
do_exit(SIGKILL);
}
diff --git a/lib/Kconfig b/lib/Kconfig
index 201e1b33d72..169eb7c598e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -286,25 +286,24 @@ config CORDIC
calculations are in fixed point. Module will be called cordic.
config MPILIB
- tristate "Multiprecision maths library"
+ tristate
help
Multiprecision maths library from GnuPG.
It is used to implement RSA digital signature verification,
which is used by IMA/EVM digital signature extension.
config MPILIB_EXTRA
- bool "Multiprecision maths library - additional sources"
+ bool
depends on MPILIB
help
- Multiprecision maths library from GnuPG.
- It is used to implement RSA digital signature verification,
- which is used by IMA/EVM digital signature extension.
- This code in unnecessary for RSA digital signature verification,
- and can be compiled if needed.
+ Additional sources of multiprecision maths library from GnuPG.
+ This code is unnecessary for RSA digital signature verification,
+ but can be compiled if needed.
-config DIGSIG
- tristate "In-kernel signature checker"
- depends on KEYS
+config SIGNATURE
+ tristate
+ depends on KEYS && CRYPTO
+ select CRYPTO_SHA1
select MPILIB
help
Digital signature verification. Currently only RSA is supported.
diff --git a/lib/Makefile b/lib/Makefile
index dace162c7e1..d71aae1b01b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -119,7 +119,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_DIGSIG) += digsig.o
+obj-$(CONFIG_SIGNATURE) += digsig.o
hostprogs-y := gen_crc32table
clean-files := crc32table.h
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index d384ea92148..5bd1cc1b4a5 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -3,11 +3,11 @@ config INTEGRITY
def_bool y
depends on IMA || EVM
-config INTEGRITY_DIGSIG
+config INTEGRITY_SIGNATURE
boolean "Digital signature verification using multiple keyrings"
depends on INTEGRITY && KEYS
default n
- select DIGSIG
+ select SIGNATURE
help
This option enables digital signature verification support
using multiple keyrings. It defines separate keyrings for each
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index bece0563ee5..d43799cc14f 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -3,7 +3,7 @@
#
obj-$(CONFIG_INTEGRITY) += integrity.o
-obj-$(CONFIG_INTEGRITY_DIGSIG) += digsig.o
+obj-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
integrity-y := iint.o
diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index c5c5a72c30b..2ad942fb1e2 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -56,9 +56,11 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
audit_log_format(ab, " name=");
audit_log_untrustedstring(ab, fname);
}
- if (inode)
- audit_log_format(ab, " dev=%s ino=%lu",
- inode->i_sb->s_id, inode->i_ino);
+ if (inode) {
+ audit_log_format(ab, " dev=");
+ audit_log_untrustedstring(ab, inode->i_sb->s_id);
+ audit_log_format(ab, " ino=%lu", inode->i_ino);
+ }
audit_log_format(ab, " res=%d", !result ? 0 : 1);
audit_log_end(ab);
}
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 4da6ba81d15..7a25ecec5aa 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -51,7 +51,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
#define INTEGRITY_KEYRING_IMA 2
#define INTEGRITY_KEYRING_MAX 3
-#ifdef CONFIG_INTEGRITY_DIGSIG
+#ifdef CONFIG_INTEGRITY_SIGNATURE
int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
const char *digest, int digestlen);
@@ -65,7 +65,7 @@ static inline int integrity_digsig_verify(const unsigned int id,
return -EOPNOTSUPP;
}
-#endif /* CONFIG_INTEGRITY_DIGSIG */
+#endif /* CONFIG_INTEGRITY_SIGNATURE */
/* set during initialization */
extern int iint_initialized;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 41144f71d61..2d1bb8af769 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -314,7 +314,7 @@ static struct key *request_user_key(const char *master_desc, u8 **master_key,
goto error;
down_read(&ukey->sem);
- upayload = rcu_dereference(ukey->payload.data);
+ upayload = ukey->payload.data;
*master_key = upayload->data;
*master_keylen = upayload->datalen;
error:
@@ -810,7 +810,7 @@ static int encrypted_instantiate(struct key *key, const void *data,
goto out;
}
- rcu_assign_pointer(key->payload.data, epayload);
+ rcu_assign_keypointer(key, epayload);
out:
kfree(datablob);
return ret;
@@ -874,7 +874,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
memcpy(new_epayload->payload_data, epayload->payload_data,
epayload->payload_datalen);
- rcu_assign_pointer(key->payload.data, new_epayload);
+ rcu_assign_keypointer(key, new_epayload);
call_rcu(&epayload->rcu, encrypted_rcu_free);
out:
kfree(buf);
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index df87272e3f5..013f7e5d3a2 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -18,6 +18,8 @@
#include <linux/module.h>
#include <linux/err.h>
#include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
+#include "encrypted.h"
/*
* request_trusted_key - request the trusted key
@@ -37,7 +39,7 @@ struct key *request_trusted_key(const char *trusted_desc,
goto error;
down_read(&tkey->sem);
- tpayload = rcu_dereference(tkey->payload.data);
+ tpayload = tkey->payload.data;
*master_key = tpayload->key;
*master_keylen = tpayload->key_len;
error:
diff --git a/security/keys/gc.c b/security/keys/gc.c
index bf4d8da5a79..a42b45531aa 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -145,7 +145,9 @@ static void key_gc_keyring(struct key *keyring, time_t limit)
if (!klist)
goto unlock_dont_gc;
- for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+ loop = klist->nkeys;
+ smp_rmb();
+ for (loop--; loop >= 0; loop--) {
key = klist->keys[loop];
if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
(key->expiry > 0 && key->expiry <= limit))
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 37a7f3b2885..d605f75292e 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -319,7 +319,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
struct key *keyring, *key;
key_ref_t key_ref;
long err;
- int sp, kix;
+ int sp, nkeys, kix;
keyring = key_ref_to_ptr(keyring_ref);
possessed = is_key_possessed(keyring_ref);
@@ -380,7 +380,9 @@ descend:
goto not_this_keyring;
/* iterate through the keys in this keyring first */
- for (kix = 0; kix < keylist->nkeys; kix++) {
+ nkeys = keylist->nkeys;
+ smp_rmb();
+ for (kix = 0; kix < nkeys; kix++) {
key = keylist->keys[kix];
kflags = key->flags;
@@ -421,7 +423,9 @@ descend:
/* search through the keyrings nested in this one */
kix = 0;
ascend:
- for (; kix < keylist->nkeys; kix++) {
+ nkeys = keylist->nkeys;
+ smp_rmb();
+ for (; kix < nkeys; kix++) {
key = keylist->keys[kix];
if (key->type != &key_type_keyring)
continue;
@@ -515,7 +519,7 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
struct keyring_list *klist;
unsigned long possessed;
struct key *keyring, *key;
- int loop;
+ int nkeys, loop;
keyring = key_ref_to_ptr(keyring_ref);
possessed = is_key_possessed(keyring_ref);
@@ -524,7 +528,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
klist = rcu_dereference(keyring->payload.subscriptions);
if (klist) {
- for (loop = 0; loop < klist->nkeys; loop++) {
+ nkeys = klist->nkeys;
+ smp_rmb();
+ for (loop = 0; loop < nkeys ; loop++) {
key = klist->keys[loop];
if (key->type == ktype &&
@@ -622,7 +628,7 @@ static int keyring_detect_cycle(struct key *A, struct key *B)
struct keyring_list *keylist;
struct key *subtree, *key;
- int sp, kix, ret;
+ int sp, nkeys, kix, ret;
rcu_read_lock();
@@ -645,7 +651,9 @@ descend:
ascend:
/* iterate through the remaining keys in this keyring */
- for (; kix < keylist->nkeys; kix++) {
+ nkeys = keylist->nkeys;
+ smp_rmb();
+ for (; kix < nkeys; kix++) {
key = keylist->keys[kix];
if (key == A)
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 0ed5fdf238a..2d5d041f204 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -993,7 +993,7 @@ out:
kfree(datablob);
kfree(options);
if (!ret)
- rcu_assign_pointer(key->payload.data, payload);
+ rcu_assign_keypointer(key, payload);
else
kfree(payload);
return ret;
@@ -1067,7 +1067,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
goto out;
}
}
- rcu_assign_pointer(key->payload.data, new_p);
+ rcu_assign_keypointer(key, new_p);
call_rcu(&p->rcu, trusted_rcu_free);
out:
kfree(datablob);
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 7bd6f138236..293b8c45b1d 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -232,13 +232,14 @@ static void dump_common_audit_data(struct audit_buffer *ab,
case LSM_AUDIT_DATA_PATH: {
struct inode *inode;
- audit_log_d_path(ab, "path=", &a->u.path);
+ audit_log_d_path(ab, " path=", &a->u.path);
inode = a->u.path.dentry->d_inode;
- if (inode)
- audit_log_format(ab, " dev=%s ino=%lu",
- inode->i_sb->s_id,
- inode->i_ino);
+ if (inode) {
+ audit_log_format(ab, " dev=");
+ audit_log_untrustedstring(ab, inode->i_sb->s_id);
+ audit_log_format(ab, " ino=%lu", inode->i_ino);
+ }
break;
}
case LSM_AUDIT_DATA_DENTRY: {
@@ -248,10 +249,11 @@ static void dump_common_audit_data(struct audit_buffer *ab,
audit_log_untrustedstring(ab, a->u.dentry->d_name.name);
inode = a->u.dentry->d_inode;
- if (inode)
- audit_log_format(ab, " dev=%s ino=%lu",
- inode->i_sb->s_id,
- inode->i_ino);
+ if (inode) {
+ audit_log_format(ab, " dev=");
+ audit_log_untrustedstring(ab, inode->i_sb->s_id);
+ audit_log_format(ab, " ino=%lu", inode->i_ino);
+ }
break;
}
case LSM_AUDIT_DATA_INODE: {
@@ -266,8 +268,9 @@ static void dump_common_audit_data(struct audit_buffer *ab,
dentry->d_name.name);
dput(dentry);
}
- audit_log_format(ab, " dev=%s ino=%lu", inode->i_sb->s_id,
- inode->i_ino);
+ audit_log_format(ab, " dev=");
+ audit_log_untrustedstring(ab, inode->i_sb->s_id);
+ audit_log_format(ab, " ino=%lu", inode->i_ino);
break;
}
case LSM_AUDIT_DATA_TASK:
@@ -315,7 +318,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
.dentry = u->dentry,
.mnt = u->mnt
};
- audit_log_d_path(ab, "path=", &path);
+ audit_log_d_path(ab, " path=", &path);
break;
}
if (!u->addr)
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 4a9b4b2eb75..867558c9833 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -492,13 +492,13 @@ static bool tomoyo_correct_word2(const char *string, size_t len)
if (d < '0' || d > '7' || e < '0' || e > '7')
break;
c = tomoyo_make_byte(c, d, e);
- if (tomoyo_invalid(c))
- continue; /* pattern is not \000 */
+ if (c <= ' ' || c >= 127)
+ continue;
}
goto out;
} else if (in_repetition && c == '/') {
goto out;
- } else if (tomoyo_invalid(c)) {
+ } else if (c <= ' ' || c >= 127) {
goto out;
}
}
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index 6fd9391b3a6..4fa1dbd8ee8 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -133,7 +133,7 @@ static int atmel_abdac_prepare_dma(struct atmel_abdac *dac,
period_len = frames_to_bytes(runtime, runtime->period_size);
cdesc = dw_dma_cyclic_prep(chan, runtime->dma_addr, buffer_len,
- period_len, DMA_TO_DEVICE);
+ period_len, DMA_MEM_TO_DEV);
if (IS_ERR(cdesc)) {
dev_dbg(&dac->pdev->dev, "could not prepare cyclic DMA\n");
return PTR_ERR(cdesc);
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index 73516f69ac7..61dade69835 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -102,7 +102,7 @@ static void atmel_ac97c_dma_capture_period_done(void *arg)
static int atmel_ac97c_prepare_dma(struct atmel_ac97c *chip,
struct snd_pcm_substream *substream,
- enum dma_data_direction direction)
+ enum dma_transfer_direction direction)
{
struct dma_chan *chan;
struct dw_cyclic_desc *cdesc;
@@ -118,7 +118,7 @@ static int atmel_ac97c_prepare_dma(struct atmel_ac97c *chip,
return -EINVAL;
}
- if (direction == DMA_TO_DEVICE)
+ if (direction == DMA_MEM_TO_DEV)
chan = chip->dma.tx_chan;
else
chan = chip->dma.rx_chan;
@@ -133,7 +133,7 @@ static int atmel_ac97c_prepare_dma(struct atmel_ac97c *chip,
return PTR_ERR(cdesc);
}
- if (direction == DMA_TO_DEVICE) {
+ if (direction == DMA_MEM_TO_DEV) {
cdesc->period_callback = atmel_ac97c_dma_playback_period_done;
set_bit(DMA_TX_READY, &chip->flags);
} else {
@@ -393,7 +393,7 @@ static int atmel_ac97c_playback_prepare(struct snd_pcm_substream *substream)
if (cpu_is_at32ap7000()) {
if (!test_bit(DMA_TX_READY, &chip->flags))
retval = atmel_ac97c_prepare_dma(chip, substream,
- DMA_TO_DEVICE);
+ DMA_MEM_TO_DEV);
} else {
/* Initialize and start the PDC */
writel(runtime->dma_addr, chip->regs + ATMEL_PDC_TPR);
@@ -484,7 +484,7 @@ static int atmel_ac97c_capture_prepare(struct snd_pcm_substream *substream)
if (cpu_is_at32ap7000()) {
if (!test_bit(DMA_RX_READY, &chip->flags))
retval = atmel_ac97c_prepare_dma(chip, substream,
- DMA_FROM_DEVICE);
+ DMA_DEV_TO_MEM);
} else {
/* Initialize and start the PDC */
writel(runtime->dma_addr, chip->regs + ATMEL_PDC_RPR);
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index ad409381f8c..b413ed05e74 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -12,6 +12,9 @@ config SND_HWDEP
config SND_RAWMIDI
tristate
+config SND_COMPRESS_OFFLOAD
+ tristate
+
# To be effective this also requires INPUT - users should say:
# select SND_JACK if INPUT=y || INPUT=SND
# to avoid having to force INPUT on.
@@ -154,16 +157,6 @@ config SND_DYNAMIC_MINORS
If you are unsure about this, say N here.
-config SND_COMPRESS_OFFLOAD
- tristate "ALSA Compressed audio offload support"
- default n
- help
- If you want support for offloading compressed audio and have such
- a hardware, then you should say Y here and also to the DSP driver
- of your platform.
-
- If you are unsure about this, say N here.
-
config SND_SUPPORT_OLD_API
bool "Support old ALSA API"
default y
diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c
index 762bb108c51..f13ad536b2d 100644
--- a/sound/pci/au88x0/au88x0.c
+++ b/sound/pci/au88x0/au88x0.c
@@ -268,8 +268,14 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
card->shortname, chip->io, chip->irq);
// (4) Alloc components.
+ err = snd_vortex_mixer(chip);
+ if (err < 0) {
+ snd_card_free(card);
+ return err;
+ }
// ADB pcm.
- if ((err = snd_vortex_new_pcm(chip, VORTEX_PCM_ADB, NR_ADB)) < 0) {
+ err = snd_vortex_new_pcm(chip, VORTEX_PCM_ADB, NR_PCM);
+ if (err < 0) {
snd_card_free(card);
return err;
}
@@ -299,11 +305,6 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
return err;
}
#endif
- // snd_ac97_mixer and Vortex mixer.
- if ((err = snd_vortex_mixer(chip)) < 0) {
- snd_card_free(card);
- return err;
- }
if ((err = snd_vortex_midi(chip)) < 0) {
snd_card_free(card);
return err;
diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h
index 02f6e08f759..bb938153a96 100644
--- a/sound/pci/au88x0/au88x0.h
+++ b/sound/pci/au88x0/au88x0.h
@@ -105,6 +105,7 @@
#define MIX_SPDIF(x) (vortex->mixspdif[x])
#define NR_WTPB 0x20 /* WT channels per each bank. */
+#define NR_PCM 0x10
/* Structs */
typedef struct {
diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c
index 0488633ea87..0ef2f971220 100644
--- a/sound/pci/au88x0/au88x0_pcm.c
+++ b/sound/pci/au88x0/au88x0_pcm.c
@@ -168,6 +168,7 @@ static int snd_vortex_pcm_open(struct snd_pcm_substream *substream)
runtime->hw = snd_vortex_playback_hw_adb;
#ifdef CHIP_AU8830
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+ VORTEX_IS_QUAD(vortex) &&
VORTEX_PCM_TYPE(substream->pcm) == VORTEX_PCM_ADB) {
runtime->hw.channels_max = 4;
snd_pcm_hw_constraint_list(runtime, 0,
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0852e204a4c..fb35474c120 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2498,6 +2498,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB),
SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB),
+ SND_PCI_QUIRK(0x10de, 0xcb89, "Macbook Pro 7,1", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1297, 0x3166, "Shuttle", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1458, 0xa022, "ga-ma770-ud3", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1462, 0x1002, "MSI Wind U115", POS_FIX_LPIB),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 87e684fa830..3556408d6ec 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1596,7 +1596,7 @@ static const struct snd_pci_quirk stac92hd73xx_cfg_tbl[] = {
SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02bd,
"Dell Studio 1557", STAC_DELL_M6_DMIC),
SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02fe,
- "Dell Studio XPS 1645", STAC_DELL_M6_BOTH),
+ "Dell Studio XPS 1645", STAC_DELL_M6_DMIC),
SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0413,
"Dell Studio 1558", STAC_DELL_M6_DMIC),
{} /* terminator */
diff --git a/sound/pci/oxygen/xonar_wm87x6.c b/sound/pci/oxygen/xonar_wm87x6.c
index 478303e6c2b..63cff90706b 100644
--- a/sound/pci/oxygen/xonar_wm87x6.c
+++ b/sound/pci/oxygen/xonar_wm87x6.c
@@ -177,6 +177,7 @@ static void wm8776_registers_init(struct oxygen *chip)
struct xonar_wm87x6 *data = chip->model_data;
wm8776_write(chip, WM8776_RESET, 0);
+ wm8776_write(chip, WM8776_PHASESWAP, WM8776_PH_MASK);
wm8776_write(chip, WM8776_DACCTRL1, WM8776_DZCEN |
WM8776_PL_LEFT_LEFT | WM8776_PL_RIGHT_RIGHT);
wm8776_write(chip, WM8776_DACMUTE, chip->dac_mute ? WM8776_DMUTE : 0);
diff --git a/sound/soc/ep93xx/ep93xx-pcm.c b/sound/soc/ep93xx/ep93xx-pcm.c
index 3fc96130d1a..de839044987 100644
--- a/sound/soc/ep93xx/ep93xx-pcm.c
+++ b/sound/soc/ep93xx/ep93xx-pcm.c
@@ -113,9 +113,9 @@ static int ep93xx_pcm_open(struct snd_pcm_substream *substream)
rtd->dma_data.name = dma_params->name;
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- rtd->dma_data.direction = DMA_TO_DEVICE;
+ rtd->dma_data.direction = DMA_MEM_TO_DEV;
else
- rtd->dma_data.direction = DMA_FROM_DEVICE;
+ rtd->dma_data.direction = DMA_DEV_TO_MEM;
rtd->dma_chan = dma_request_channel(mask, ep93xx_pcm_dma_filter,
&rtd->dma_data);
diff --git a/sound/soc/imx/imx-pcm-dma-mx2.c b/sound/soc/imx/imx-pcm-dma-mx2.c
index 1cf2fe889f6..aecdba9f65a 100644
--- a/sound/soc/imx/imx-pcm-dma-mx2.c
+++ b/sound/soc/imx/imx-pcm-dma-mx2.c
@@ -107,12 +107,12 @@ static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream,
}
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
- slave_config.direction = DMA_TO_DEVICE;
+ slave_config.direction = DMA_MEM_TO_DEV;
slave_config.dst_addr = dma_params->dma_addr;
slave_config.dst_addr_width = buswidth;
slave_config.dst_maxburst = dma_params->burstsize;
} else {
- slave_config.direction = DMA_FROM_DEVICE;
+ slave_config.direction = DMA_DEV_TO_MEM;
slave_config.src_addr = dma_params->dma_addr;
slave_config.src_addr_width = buswidth;
slave_config.src_maxburst = dma_params->burstsize;
@@ -159,7 +159,7 @@ static int snd_imx_pcm_hw_params(struct snd_pcm_substream *substream,
iprtd->period_bytes * iprtd->periods,
iprtd->period_bytes,
substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
if (!iprtd->desc) {
dev_err(&chan->dev->device, "cannot prepare slave dma\n");
return -EINVAL;
diff --git a/sound/soc/mxs/mxs-pcm.c b/sound/soc/mxs/mxs-pcm.c
index 0e12f4e0a76..105f42a394d 100644
--- a/sound/soc/mxs/mxs-pcm.c
+++ b/sound/soc/mxs/mxs-pcm.c
@@ -136,7 +136,7 @@ static int snd_mxs_pcm_hw_params(struct snd_pcm_substream *substream,
iprtd->period_bytes * iprtd->periods,
iprtd->period_bytes,
substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
if (!iprtd->desc) {
dev_err(&chan->dev->device, "cannot prepare slave dma\n");
return -EINVAL;
diff --git a/sound/soc/samsung/dma.c b/sound/soc/samsung/dma.c
index 427ae0d9817..e4ba17ce6b3 100644
--- a/sound/soc/samsung/dma.c
+++ b/sound/soc/samsung/dma.c
@@ -86,7 +86,7 @@ static void dma_enqueue(struct snd_pcm_substream *substream)
dma_info.cap = (samsung_dma_has_circular() ? DMA_CYCLIC : DMA_SLAVE);
dma_info.direction =
(substream->stream == SNDRV_PCM_STREAM_PLAYBACK
- ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
dma_info.fp = audio_buffdone;
dma_info.fp_param = substream;
dma_info.period = prtd->dma_period;
@@ -171,7 +171,7 @@ static int dma_hw_params(struct snd_pcm_substream *substream,
dma_info.client = prtd->params->client;
dma_info.direction =
(substream->stream == SNDRV_PCM_STREAM_PLAYBACK
- ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
dma_info.width = prtd->params->dma_size;
dma_info.fifo = prtd->params->dma_addr;
prtd->params->ch = prtd->params->ops->request(
diff --git a/sound/soc/sh/siu_pcm.c b/sound/soc/sh/siu_pcm.c
index f8f681690a7..0193e595d41 100644
--- a/sound/soc/sh/siu_pcm.c
+++ b/sound/soc/sh/siu_pcm.c
@@ -131,7 +131,7 @@ static int siu_pcm_wr_set(struct siu_port *port_info,
sg_dma_address(&sg) = buff;
desc = siu_stream->chan->device->device_prep_slave_sg(siu_stream->chan,
- &sg, 1, DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ &sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(dev, "Failed to allocate a dma descriptor\n");
return -ENOMEM;
@@ -181,7 +181,7 @@ static int siu_pcm_rd_set(struct siu_port *port_info,
sg_dma_address(&sg) = buff;
desc = siu_stream->chan->device->device_prep_slave_sg(siu_stream->chan,
- &sg, 1, DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ &sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(dev, "Failed to allocate dma descriptor\n");
return -ENOMEM;
diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c
index 93931def0dc..21554611557 100644
--- a/sound/soc/txx9/txx9aclc.c
+++ b/sound/soc/txx9/txx9aclc.c
@@ -134,7 +134,7 @@ txx9aclc_dma_submit(struct txx9aclc_dmadata *dmadata, dma_addr_t buf_dma_addr)
sg_dma_address(&sg) = buf_dma_addr;
desc = chan->device->device_prep_slave_sg(chan, &sg, 1,
dmadata->substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE,
+ DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(&chan->dev->device, "cannot prepare slave dma\n");