aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2019-12-05 12:17:56 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2019-12-05 12:17:56 +1100
commit841f7f890d388c1c7b66d6cf87ec31c47132d24a (patch)
treefbf309c208453bb1d303d27a5abe808ea07329a0
parentbe7bfbb4bf5b79969519c92f2d606ab7928ab47f (diff)
parentd7a7648465d2aca5dce07daf37dd6d8a6376a61a (diff)
Merge branch 'akpm-current/current'
-rw-r--r--.gitattributes2
-rw-r--r--Documentation/core-api/genalloc.rst2
-rw-r--r--Documentation/dev-tools/kcov.rst129
-rw-r--r--Documentation/memory-barriers.txt16
-rw-r--r--Documentation/process/deprecated.rst10
-rw-r--r--arch/Kconfig22
-rw-r--r--arch/alpha/include/asm/mmzone.h1
-rw-r--r--arch/alpha/include/asm/pgalloc.h4
-rw-r--r--arch/alpha/include/asm/pgtable.h24
-rw-r--r--arch/alpha/mm/init.c12
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm64/mm/mmu.c4
-rw-r--r--arch/c6x/include/asm/pgtable.h2
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/m68k/include/asm/mcf_pgalloc.h7
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h28
-rw-r--r--arch/m68k/include/asm/mmu_context.h12
-rw-r--r--arch/m68k/include/asm/motorola_pgalloc.h4
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h32
-rw-r--r--arch/m68k/include/asm/page.h9
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h11
-rw-r--r--arch/m68k/include/asm/pgtable_no.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgalloc.h5
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h18
-rw-r--r--arch/m68k/kernel/sys_m68k.c10
-rw-r--r--arch/m68k/mm/init.c6
-rw-r--r--arch/m68k/mm/kmap.c39
-rw-r--r--arch/m68k/mm/mcfmmu.c16
-rw-r--r--arch/m68k/mm/motorola.c17
-rw-r--r--arch/m68k/sun3x/dvma.c7
-rw-r--r--arch/microblaze/include/asm/page.h3
-rw-r--r--arch/microblaze/include/asm/pgalloc.h16
-rw-r--r--arch/microblaze/include/asm/pgtable.h32
-rw-r--r--arch/microblaze/kernel/signal.c10
-rw-r--r--arch/microblaze/mm/init.c7
-rw-r--r--arch/microblaze/mm/pgtable.c13
-rw-r--r--arch/mips/include/uapi/asm/msgbuf.h1
-rw-r--r--arch/mips/include/uapi/asm/sembuf.h2
-rw-r--r--arch/nds32/include/asm/page.h3
-rw-r--r--arch/nds32/include/asm/pgalloc.h3
-rw-r--r--arch/nds32/include/asm/pgtable.h12
-rw-r--r--arch/nds32/include/asm/tlb.h1
-rw-r--r--arch/nds32/kernel/pm.c4
-rw-r--r--arch/nds32/mm/fault.c16
-rw-r--r--arch/nds32/mm/init.c11
-rw-r--r--arch/nds32/mm/mm-nds32.c6
-rw-r--r--arch/nds32/mm/proc.c26
-rw-r--r--arch/parisc/include/asm/page.h30
-rw-r--r--arch/parisc/include/asm/pgalloc.h41
-rw-r--r--arch/parisc/include/asm/pgtable.h52
-rw-r--r--arch/parisc/include/asm/tlb.h2
-rw-r--r--arch/parisc/include/uapi/asm/msgbuf.h1
-rw-r--r--arch/parisc/include/uapi/asm/sembuf.h1
-rw-r--r--arch/parisc/kernel/cache.c13
-rw-r--r--arch/parisc/kernel/pci-dma.c9
-rw-r--r--arch/parisc/mm/fixmap.c10
-rw-r--r--arch/parisc/mm/hugetlbpage.c18
-rw-r--r--arch/powerpc/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/powerpc/include/uapi/asm/sembuf.h2
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/s390/include/uapi/asm/ipcbuf.h2
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/sh/mm/init.c4
-rw-r--r--arch/sparc/include/asm/pgalloc_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_32.h28
-rw-r--r--arch/sparc/include/uapi/asm/ipcbuf.h2
-rw-r--r--arch/sparc/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/sparc/include/uapi/asm/sembuf.h2
-rw-r--r--arch/sparc/mm/fault_32.c11
-rw-r--r--arch/sparc/mm/highmem.c6
-rw-r--r--arch/sparc/mm/io-unit.c6
-rw-r--r--arch/sparc/mm/iommu.c6
-rw-r--r--arch/sparc/mm/srmmu.c51
-rw-r--r--arch/um/include/asm/pgtable-2level.h1
-rw-r--r--arch/um/include/asm/pgtable-3level.h1
-rw-r--r--arch/um/include/asm/pgtable.h3
-rw-r--r--arch/um/kernel/mem.c8
-rw-r--r--arch/um/kernel/skas/mmu.c12
-rw-r--r--arch/um/kernel/skas/uaccess.c7
-rw-r--r--arch/um/kernel/tlb.c85
-rw-r--r--arch/um/kernel/trap.c4
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h3
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h2
-rw-r--r--arch/x86/mm/fault.c26
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/xtensa/include/uapi/asm/ipcbuf.h2
-rw-r--r--arch/xtensa/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/xtensa/include/uapi/asm/sembuf.h1
-rw-r--r--drivers/acpi/apei/ghes.c2
-rw-r--r--drivers/auxdisplay/charlcd.c34
-rw-r--r--drivers/base/node.c9
-rw-r--r--drivers/gpio/gpio-104-dio-48e.c73
-rw-r--r--drivers/gpio/gpio-104-idi-48.c36
-rw-r--r--drivers/gpio/gpio-74x164.c19
-rw-r--r--drivers/gpio/gpio-gpio-mm.c73
-rw-r--r--drivers/gpio/gpio-max3191x.c19
-rw-r--r--drivers/gpio/gpio-pca953x.c195
-rw-r--r--drivers/gpio/gpio-pci-idio-16.c75
-rw-r--r--drivers/gpio/gpio-pcie-idio-24.c109
-rw-r--r--drivers/gpio/gpio-pisosr.c12
-rw-r--r--drivers/gpio/gpio-uniphier.c13
-rw-r--r--drivers/gpio/gpio-ws16c48.c73
-rw-r--r--drivers/gpu/drm/drm_property.c2
-rw-r--r--drivers/misc/sram-exec.c2
-rw-r--r--drivers/rapidio/rio-access.c2
-rw-r--r--drivers/rapidio/rio-driver.c1
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c31
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.h2
-rw-r--r--drivers/usb/core/hub.c5
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--drivers/vhost/vhost.h1
-rw-r--r--fs/aio.c9
-rw-r--r--fs/binfmt_elf.c56
-rw-r--r--fs/buffer.c56
-rw-r--r--fs/eventpoll.c52
-rw-r--r--fs/proc/Kconfig8
-rw-r--r--fs/proc/generic.c37
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/page.c40
-rw-r--r--fs/ramfs/inode.c12
-rw-r--r--include/asm-generic/4level-fixup.h39
-rw-r--r--include/asm-generic/bitops/find.h17
-rw-r--r--include/linux/bitmap.h51
-rw-r--r--include/linux/bitops.h12
-rw-r--r--include/linux/build_bug.h4
-rw-r--r--include/linux/genalloc.h2
-rw-r--r--include/linux/kcov.h23
-rw-r--r--include/linux/kernel.h19
-rw-r--r--include/linux/memory_hotplug.h7
-rw-r--r--include/linux/memremap.h6
-rw-r--r--include/linux/mm.h10
-rw-r--r--include/linux/notifier.h4
-rw-r--r--include/linux/proc_fs.h4
-rw-r--r--include/linux/rbtree_augmented.h6
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/string.h45
-rw-r--r--include/linux/sysctl.h6
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/linux/vmalloc.h5
-rw-r--r--include/linux/vmstat.h50
-rw-r--r--include/linux/wait.h4
-rw-r--r--include/trace/events/vmscan.h71
-rw-r--r--include/uapi/asm-generic/ipcbuf.h2
-rw-r--r--include/uapi/asm-generic/msgbuf.h2
-rw-r--r--include/uapi/asm-generic/sembuf.h1
-rw-r--r--include/uapi/linux/kcov.h28
-rw-r--r--include/uapi/linux/scc.h1
-rw-r--r--init/Kconfig76
-rw-r--r--ipc/mqueue.c105
-rw-r--r--ipc/msg.c61
-rw-r--r--ipc/sem.c66
-rw-r--r--kernel/dma/remap.c2
-rw-r--r--kernel/hung_task.c44
-rw-r--r--kernel/kcov.c547
-rw-r--r--kernel/notifier.c43
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/sys.c4
-rw-r--r--lib/Kconfig.debug6
-rw-r--r--lib/bitmap.c12
-rw-r--r--lib/find_bit.c14
-rw-r--r--lib/genalloc.c5
-rw-r--r--lib/math/rational.c63
-rw-r--r--lib/test_bitmap.c202
-rw-r--r--lib/test_meminit.c20
-rw-r--r--lib/ubsan.c64
-rw-r--r--mm/kasan/common.c1
-rw-r--r--mm/memcontrol.c52
-rw-r--r--mm/memory.c10
-rw-r--r--mm/memory_hotplug.c102
-rw-r--r--mm/mempolicy.c10
-rw-r--r--mm/memremap.c36
-rw-r--r--mm/mmap.c43
-rw-r--r--mm/nommu.c10
-rw-r--r--mm/oom_kill.c67
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/slab_common.c12
-rw-r--r--mm/slub.c88
-rw-r--r--mm/vmalloc.c11
-rw-r--r--mm/vmscan.c18
-rw-r--r--mm/vmstat.c60
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rwxr-xr-xscripts/checkpatch.pl37
-rwxr-xr-xscripts/get_maintainer.pl38
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/filesystems/epoll/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/epoll/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c3074
-rw-r--r--usr/include/Makefile4
190 files changed, 5990 insertions, 1674 deletions
diff --git a/.gitattributes b/.gitattributes
index 89c411b5ce6b..4b32eaa9571e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,4 @@
*.c diff=cpp
*.h diff=cpp
+*.dtsi diff=dts
+*.dts diff=dts
diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst
index 098a46f55798..a5af2cbf58a5 100644
--- a/Documentation/core-api/genalloc.rst
+++ b/Documentation/core-api/genalloc.rst
@@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future.
:functions: gen_pool_for_each_chunk
.. kernel-doc:: lib/genalloc.c
- :functions: addr_in_gen_pool
+ :functions: gen_pool_has_addr
.. kernel-doc:: lib/genalloc.c
:functions: gen_pool_avail
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 42b612677799..36890b026e77 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted::
Coverage collection
-------------------
+
The following program demonstrates coverage collection from within a test
program using kcov:
@@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end).
Comparison operands collection
------------------------------
+
Comparison operands collection is similar to coverage collection:
.. code-block:: c
@@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection:
Note that the kcov modes (coverage collection or comparison operands) are
mutually exclusive.
+
+Remote coverage collection
+--------------------------
+
+With KCOV_ENABLE coverage is collected only for syscalls that are issued
+from the current process. With KCOV_REMOTE_ENABLE it's possible to collect
+coverage for arbitrary parts of the kernel code, provided that those parts
+are annotated with kcov_remote_start()/kcov_remote_stop().
+
+This allows to collect coverage from two types of kernel background
+threads: the global ones, that are spawned during kernel boot in a limited
+number of instances (e.g. one USB hub_event() worker thread is spawned per
+USB HCD); and the local ones, that are spawned when a user interacts with
+some kernel interface (e.g. vhost workers).
+
+To enable collecting coverage from a global background thread, a unique
+global handle must be assigned and passed to the corresponding
+kcov_remote_start() call. Then a userspace process can pass a list of such
+handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the
+kcov_remote_arg struct. This will attach the used kcov device to the code
+sections, that are referenced by those handles.
+
+Since there might be many local background threads spawned from different
+userspace processes, we can't use a single global handle per annotation.
+Instead, the userspace process passes a non-zero handle through the
+common_handle field of the kcov_remote_arg struct. This common handle gets
+saved to the kcov_handle field in the current task_struct and needs to be
+passed to the newly spawned threads via custom annotations. Those threads
+should in turn be annotated with kcov_remote_start()/kcov_remote_stop().
+
+Internally kcov stores handles as u64 integers. The top byte of a handle
+is used to denote the id of a subsystem that this handle belongs to, and
+the lower 4 bytes are used to denote the id of a thread instance within
+that subsystem. A reserved value 0 is used as a subsystem id for common
+handles as they don't belong to a particular subsystem. The bytes 4-7 are
+currently reserved and must be zero. In the future the number of bytes
+used for the subsystem or handle ids might be increased.
+
+When a particular userspace proccess collects coverage by via a common
+handle, kcov will collect coverage for each code section that is annotated
+to use the common handle obtained as kcov_handle from the current
+task_struct. However non common handles allow to collect coverage
+selectively from different subsystems.
+
+.. code-block:: c
+
+ struct kcov_remote_arg {
+ unsigned trace_mode;
+ unsigned area_size;
+ unsigned num_handles;
+ uint64_t common_handle;
+ uint64_t handles[0];
+ };
+
+ #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
+ #define KCOV_DISABLE _IO('c', 101)
+ #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg)
+
+ #define COVER_SIZE (64 << 10)
+
+ #define KCOV_TRACE_PC 0
+
+ #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56)
+ #define KCOV_SUBSYSTEM_USB (0x01ull << 56)
+
+ #define KCOV_SUBSYSTEM_MASK (0xffull << 56)
+ #define KCOV_INSTANCE_MASK (0xffffffffull)
+
+ static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+ {
+ if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+ return 0;
+ return subsys | inst;
+ }
+
+ #define KCOV_COMMON_ID 0x42
+ #define KCOV_USB_BUS_NUM 1
+
+ int main(int argc, char **argv)
+ {
+ int fd;
+ unsigned long *cover, n, i;
+ struct kcov_remote_arg *arg;
+
+ fd = open("/sys/kernel/debug/kcov", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+ if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+ perror("ioctl"), exit(1);
+ cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if ((void*)cover == MAP_FAILED)
+ perror("mmap"), exit(1);
+
+ /* Enable coverage collection via common handle and from USB bus #1. */
+ arg = calloc(1, sizeof(*arg) + sizeof(uint64_t));
+ if (!arg)
+ perror("calloc"), exit(1);
+ arg->trace_mode = KCOV_TRACE_PC;
+ arg->area_size = COVER_SIZE;
+ arg->num_handles = 1;
+ arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON,
+ KCOV_COMMON_ID);
+ arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB,
+ KCOV_USB_BUS_NUM);
+ if (ioctl(fd, KCOV_REMOTE_ENABLE, arg))
+ perror("ioctl"), free(arg), exit(1);
+ free(arg);
+
+ /*
+ * Here the user needs to trigger execution of a kernel code section
+ * that is either annotated with the common handle, or to trigger some
+ * activity on USB bus #1.
+ */
+ sleep(2);
+
+ n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+ for (i = 0; i < n; i++)
+ printf("0x%lx\n", cover[i + 1]);
+ if (ioctl(fd, KCOV_DISABLE, 0))
+ perror("ioctl"), exit(1);
+ if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+ perror("munmap"), exit(1);
+ if (close(fd))
+ perror("close"), exit(1);
+ return 0;
+ }
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index ec3b5865c1be..7146da061693 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1868,12 +1868,16 @@ There are some more advanced barrier functions:
(*) smp_mb__before_atomic();
(*) smp_mb__after_atomic();
- These are for use with atomic (such as add, subtract, increment and
- decrement) functions that don't return a value, especially when used for
- reference counting. These functions do not imply memory barriers.
-
- These are also used for atomic bitop functions that do not return a
- value (such as set_bit and clear_bit).
+ These are for use with atomic RMW functions that do not imply memory
+ barriers, but where the code needs a memory barrier. Examples for atomic
+ RMW functions that do not imply are memory barrier are e.g. add,
+ subtract, (failed) conditional operations, _relaxed functions,
+ but not atomic_read or atomic_set. A common example where a memory
+ barrier may be required is when atomic ops are used for reference
+ counting.
+
+ These are also used for atomic RMW bitop functions that do not imply a
+ memory barrier (such as set_bit and clear_bit).
As an example, consider a piece of code that marks an object as being dead
and then decrements the object's reference count:
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 179f2a5625a0..a0ffdc8daef3 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -84,7 +84,7 @@ buffer. This could result in linear overflows beyond the
end of the buffer, leading to all kinds of misbehaviors. While
`CONFIG_FORTIFY_SOURCE=y` and various compiler flags help reduce the
risk of using this function, there is no good reason to add new uses of
-this function. The safe replacement is :c:func:`strscpy`.
+this function. The safe replacement is stracpy() or strscpy().
strncpy() on NUL-terminated strings
-----------------------------------
@@ -93,9 +93,9 @@ will be NUL terminated. This can lead to various linear read overflows
and other misbehavior due to the missing termination. It also NUL-pads the
destination buffer if the source contents are shorter than the destination
buffer size, which may be a needless performance penalty for callers using
-only NUL-terminated strings. The safe replacement is :c:func:`strscpy`.
-(Users of :c:func:`strscpy` still needing NUL-padding will need an
-explicit :c:func:`memset` added.)
+only NUL-terminated strings. In this case, the safe replacement is
+stracpy() or strscpy(). If, however, the destination buffer still needs
+NUL-padding, the safe replacement is stracpy_pad().
If a caller is using non-NUL-terminated strings, :c:func:`strncpy()` can
still be used, but destinations should be marked with the `__nonstring
@@ -107,7 +107,7 @@ strlcpy()
:c:func:`strlcpy` reads the entire source buffer first, possibly exceeding
the given limit of bytes to copy. This is inefficient and can lead to
linear read overflows if a source string is not NUL-terminated. The
-safe replacement is :c:func:`strscpy`.
+safe replacement is stracpy() or strscpy().
Variable Length Arrays (VLAs)
-----------------------------
diff --git a/arch/Kconfig b/arch/Kconfig
index 7b861fe3f900..48b5e103bdb0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -72,11 +72,11 @@ config KPROBES
If in doubt, say "N".
config JUMP_LABEL
- bool "Optimize very unlikely/likely branches"
- depends on HAVE_ARCH_JUMP_LABEL
- depends on CC_HAS_ASM_GOTO
- help
- This option enables a transparent branch optimization that
+ bool "Optimize very unlikely/likely branches"
+ depends on HAVE_ARCH_JUMP_LABEL
+ depends on CC_HAS_ASM_GOTO
+ help
+ This option enables a transparent branch optimization that
makes certain almost-always-true or almost-always-false branch
conditions even cheaper to execute within the kernel.
@@ -84,7 +84,7 @@ config JUMP_LABEL
scheduler functionality, networking code and KVM have such
branches and include support for this optimization technique.
- If it is detected that the compiler has support for "asm goto",
+ If it is detected that the compiler has support for "asm goto",
the kernel will compile such branches with just a nop
instruction. When the condition flag is toggled to true, the
nop will be converted to a jump instruction to execute the
@@ -151,8 +151,8 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
information on the topic of unaligned memory accesses.
config ARCH_USE_BUILTIN_BSWAP
- bool
- help
+ bool
+ help
Modern versions of GCC (since 4.4) have builtin functions
for handling byte-swapping. Using these, instead of the old
inline assembler that the architecture code provides in the
@@ -221,10 +221,10 @@ config HAVE_DMA_CONTIGUOUS
bool
config GENERIC_SMP_IDLE_THREAD
- bool
+ bool
config GENERIC_IDLE_POLL_SETUP
- bool
+ bool
config ARCH_HAS_FORTIFY_SOURCE
bool
@@ -257,7 +257,7 @@ config ARCH_HAS_UNCACHED_SEGMENT
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
- bool
+ bool
# Select if arch has its private alloc_task_struct() function
config ARCH_TASK_STRUCT_ALLOCATOR
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 889b5d3ad825..7ee144f484f1 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -73,7 +73,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
-#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32))
#define pte_pfn(pte) (pte_val(pte) >> 32)
#define mk_pte(page, pgprot) \
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index eb91f1e85629..a1a29f60934c 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -27,9 +27,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
}
static inline void
-pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pgd_set(pgd, pmd);
+ pud_set(pud, pmd);
}
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 065b57f408c3..299791ce14b6 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -2,7 +2,7 @@
#ifndef _ALPHA_PGTABLE_H
#define _ALPHA_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
/*
* This file contains the functions and defines necessary to modify and use
@@ -226,8 +226,8 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
{ pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
-extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
-{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
+{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
extern inline unsigned long
@@ -238,11 +238,11 @@ pmd_page_vaddr(pmd_t pmd)
#ifndef CONFIG_DISCONTIGMEM
#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
-#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
+#define pud_page(pud) (mem_map + ((pud_val(pud) & _PFN_MASK) >> 32))
#endif
-extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
+extern inline unsigned long pud_page_vaddr(pud_t pgd)
+{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
extern inline int pte_none(pte_t pte) { return !pte_val(pte); }
extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; }
@@ -256,10 +256,10 @@ extern inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~_PFN_MASK) != _P
extern inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _PAGE_VALID; }
extern inline void pmd_clear(pmd_t * pmdp) { pmd_val(*pmdp) = 0; }
-extern inline int pgd_none(pgd_t pgd) { return !pgd_val(pgd); }
-extern inline int pgd_bad(pgd_t pgd) { return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; }
-extern inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_VALID; }
-extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; }
+extern inline int pud_none(pud_t pud) { return !pud_val(pud); }
+extern inline int pud_bad(pud_t pud) { return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pud_present(pud_t pud) { return pud_val(pud) & _PAGE_VALID; }
+extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; }
/*
* The following only work if pte_present() is true.
@@ -301,9 +301,9 @@ extern inline pte_t pte_mkspecial(pte_t pte) { return pte; }
*/
/* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
{
- pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+ pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
smp_read_barrier_depends(); /* see above */
return ret;
}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index e2cbec3789e8..12e218d3792a 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -146,6 +146,8 @@ callback_init(void * kernel_end)
{
struct crb_struct * crb;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
void *two_pages;
@@ -184,8 +186,10 @@ callback_init(void * kernel_end)
memset(two_pages, 0, 2*PAGE_SIZE);
pgd = pgd_offset_k(VMALLOC_START);
- pgd_set(pgd, (pmd_t *)two_pages);
- pmd = pmd_offset(pgd, VMALLOC_START);
+ p4d = p4d_offset(pgd, VMALLOC_START);
+ pud = pud_offset(p4d, VMALLOC_START);
+ pud_set(pud, (pmd_t *)two_pages);
+ pmd = pmd_offset(pud, VMALLOC_START);
pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
if (alpha_using_srm) {
@@ -214,9 +218,9 @@ callback_init(void * kernel_end)
/* Newer consoles (especially on larger
systems) may require more pages of
PTEs. Grab additional pages as needed. */
- if (pmd != pmd_offset(pgd, vaddr)) {
+ if (pmd != pmd_offset(pud, vaddr)) {
memset(kernel_end, 0, PAGE_SIZE);
- pmd = pmd_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
pmd_set(pmd, (pte_t *)kernel_end);
kernel_end += PAGE_SIZE;
}
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3ae120cd1715..eabcb48a7840 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -12,7 +12,7 @@
#ifndef CONFIG_MMU
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <asm/pgtable-nommu.h>
#else
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1df6eb42f22e..e822af0d9219 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
static bool __in_atomic_pool(void *start, size_t size)
{
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
static int __free_from_pool(void *start, size_t size)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5a3b15a14a7f..40797cbfba2d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1070,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
/*
* FIXME: Cleanup page tables (also in arch_add_memory() in case
@@ -1079,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
* unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
* unlocked yet.
*/
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0b6919c00413..197c473b796a 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -8,7 +8,7 @@
#ifndef _ASM_C6X_PGTABLE_H
#define _ASM_C6X_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <asm/setup.h>
#include <asm/page.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 58fd67068bac..b01d68a2d5d9 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index b34d44d666a4..82ec54c2eaa4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -28,9 +28,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
return (pmd_t *) pgd;
}
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-
#define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
(unsigned long)(page_address(page)))
@@ -45,8 +42,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
__free_page(page);
}
-#define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
-
static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_DMA, 0);
@@ -100,6 +95,4 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return new_pgd;
}
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* M68K_MCF_PGALLOC_H */
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 5d5502cb2b2d..b9f45aeded25 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -198,17 +198,9 @@ static inline int pmd_bad2(pmd_t *pmd) { return 0; }
#define pmd_present(pmd) (!pmd_none2(&(pmd)))
static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = 0; }
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t *pgdp) {}
-
#define pte_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \
__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
- __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
__FILE__, __LINE__, pgd_val(e))
@@ -340,14 +332,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
/*
- * Find an entry in the second-level pagetable.
- */
-static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
-{
- return (pmd_t *) pgd;
-}
-
-/*
* Find an entry in the third-level pagetable.
*/
#define __pte_offset(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -360,12 +344,16 @@ static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
static inline void nocache_page(void *vaddr)
{
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long addr = (unsigned long) vaddr;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mknocache(*ptep);
}
@@ -376,12 +364,16 @@ static inline void nocache_page(void *vaddr)
static inline void cache_page(void *vaddr)
{
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long addr = (unsigned long) vaddr;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mkcache(*ptep);
}
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index f5b1852b4663..cac9f289d1f6 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -100,6 +100,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
struct mm_struct *mm;
int asid;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long mmuar;
@@ -127,7 +129,15 @@ static inline void load_ksp_mmu(struct task_struct *task)
if (pgd_none(*pgd))
goto bug;
- pmd = pmd_offset(pgd, mmuar);
+ p4d = p4d_offset(pgd, mmuar);
+ if (p4d_none(*p4d))
+ goto bug;
+
+ pud = pud_offset(p4d, mmuar);
+ if (pud_none(*pud))
+ goto bug;
+
+ pmd = pmd_offset(pud, mmuar);
if (pmd_none(*pmd))
goto bug;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index acab315c851f..ff9cc401ffd1 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -106,9 +106,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
}
#define pmd_pgtable(pmd) pmd_page(pmd)
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pgd_set(pgd, pmd);
+ pud_set(pud, pmd);
}
#endif /* _MOTOROLA_PGALLOC_H */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 7f66a7bad7a5..62bedc61f110 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -117,14 +117,14 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
}
}
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
+static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
{
- pgd_val(*pgdp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
+ pud_val(*pudp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
}
#define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK))
#define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK))
-#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK))
#define pte_none(pte) (!pte_val(pte))
@@ -147,11 +147,11 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd)))
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
-#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE)
-#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; })
-#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
+#define pud_none(pud) (!pud_val(pud))
+#define pud_bad(pud) ((pud_val(pud) & _DESCTYPE_MASK) != _PAGE_TABLE)
+#define pud_present(pud) (pud_val(pud) & _PAGE_TABLE)
+#define pud_clear(pudp) ({ pud_val(*pudp) = 0; })
+#define pud_page(pud) (mem_map + ((unsigned long)(__va(pud_val(pud)) - PAGE_OFFSET) >> PAGE_SHIFT))
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -209,9 +209,9 @@ static inline pgd_t *pgd_offset_k(unsigned long address)
/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
{
- return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
+ return (pmd_t *)pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
}
/* Find an entry in the third-level page table.. */
@@ -239,11 +239,15 @@ static inline void nocache_page(void *vaddr)
if (CPU_IS_040_OR_060) {
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mknocache(*ptep);
}
@@ -255,11 +259,15 @@ static inline void cache_page(void *vaddr)
if (CPU_IS_040_OR_060) {
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mkcache(*ptep);
}
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 700d8195880c..05e1e1e77a9a 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -21,19 +21,22 @@
/*
* These are used to make use of C type-checking..
*/
-typedef struct { unsigned long pte; } pte_t;
+#if !defined(CONFIG_MMU) || CONFIG_PGTABLE_LEVELS == 3
typedef struct { unsigned long pmd[16]; } pmd_t;
+#define pmd_val(x) ((&x)->pmd[0])
+#define __pmd(x) ((pmd_t) { { (x) }, })
+#endif
+
+typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct page *pgtable_t;
#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((&x)->pmd[0])
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { { (x) }, })
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 646c174fff99..2bf5c3501e78 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -2,7 +2,12 @@
#ifndef _M68K_PGTABLE_H
#define _M68K_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+
+#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE)
+#include <asm-generic/pgtable-nopmd.h>
+#else
+#include <asm-generic/pgtable-nopud.h>
+#endif
#include <asm/setup.h>
@@ -30,9 +35,7 @@
/* PMD_SHIFT determines the size of the area a second-level page table can map */
-#ifdef CONFIG_SUN3
-#define PMD_SHIFT 17
-#else
+#if CONFIG_PGTABLE_LEVELS == 3
#define PMD_SHIFT 22
#endif
#define PMD_SIZE (1UL << PMD_SHIFT)
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index c18165b0d904..ccc4568299e5 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -2,7 +2,7 @@
#ifndef _M68KNOMMU_PGTABLE_H
#define _M68KNOMMU_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
/*
* (C) Copyright 2000-2002, Greg Ungerer <gerg@snapgear.com>
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 856121122b91..11b95dadf7c0 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -17,8 +17,6 @@
extern const char bad_pmd_string[];
-#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
-
#define __pte_free_tlb(tlb,pte,addr) \
do { \
pgtable_pte_page_dtor(pte); \
@@ -41,7 +39,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
* inside the pgd, so has no extra memory associated with it.
*/
#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr) do { } while (0)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
@@ -58,6 +55,4 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
return new_pgd;
}
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* SUN3_PGALLOC_H */
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index c987d50866b4..bc4155264810 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -110,11 +110,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pmd_set(pmdp,ptep) do {} while (0)
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
-{
- pgd_val(*pgdp) = virt_to_phys(pmdp);
-}
-
#define __pte_page(pte) \
((unsigned long) __va ((pte_val (pte) & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT))
#define __pmd_page(pmd) \
@@ -145,16 +140,9 @@ static inline int pmd_present2 (pmd_t *pmd) { return pmd_val (*pmd) & SUN3_PMD_V
#define pmd_present(pmd) (!pmd_none2(&(pmd)))
static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; }
-static inline int pgd_none (pgd_t pgd) { return 0; }
-static inline int pgd_bad (pgd_t pgd) { return 0; }
-static inline int pgd_present (pgd_t pgd) { return 1; }
-static inline void pgd_clear (pgd_t *pgdp) {}
-
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -194,12 +182,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
/* Find an entry in a kernel pagetable directory. */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* Find an entry in the second-level pagetable. */
-static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
-{
- return (pmd_t *) pgd;
-}
-
/* Find an entry in the third-level pagetable. */
#define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 6363ec83a290..18a4de7d5934 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -465,6 +465,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
for (;;) {
struct mm_struct *mm = current->mm;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
spinlock_t *ptl;
@@ -474,7 +476,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
pgd = pgd_offset(mm, (unsigned long)mem);
if (!pgd_present(*pgd))
goto bad_access;
- pmd = pmd_offset(pgd, (unsigned long)mem);
+ p4d = p4d_offset(pgd, (unsigned long)mem);
+ if (!p4d_present(*p4d))
+ goto bad_access;
+ pud = pud_offset(p4d, (unsigned long)mem);
+ if (!pud_present(*pud))
+ goto bad_access;
+ pmd = pmd_offset(pud, (unsigned long)mem);
if (!pmd_present(*pmd))
goto bad_access;
pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 778cacb7d57b..27c453f4fffe 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -130,8 +130,10 @@ static inline void init_pointer_tables(void)
/* insert pointer tables allocated so far into the tablelist */
init_pointer_table((unsigned long)kernel_pg_dir);
for (i = 0; i < PTRS_PER_PGD; i++) {
- if (pgd_present(kernel_pg_dir[i]))
- init_pointer_table(__pgd_page(kernel_pg_dir[i]));
+ pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+ if (pud_present(*pud))
+ init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i]));
}
/* insert also pointer table that we used to unmap the zero page */
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 23f9466aabb5..120030ad8dc4 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -63,18 +63,23 @@ static void __free_io_area(void *addr, unsigned long size)
{
unsigned long virtaddr = (unsigned long)addr;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
while ((long)size > 0) {
pgd_dir = pgd_offset_k(virtaddr);
- if (pgd_bad(*pgd_dir)) {
- printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
- pgd_clear(pgd_dir);
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ if (pud_bad(*pud_dir)) {
+ printk("iounmap: bad pud(%08lx)\n", pud_val(*pud_dir));
+ pud_clear(pud_dir);
return;
}
- pmd_dir = pmd_offset(pgd_dir, virtaddr);
+ pmd_dir = pmd_offset(pud_dir, virtaddr);
+#if CONFIG_PGTABLE_LEVELS == 3
if (CPU_IS_020_OR_030) {
int pmd_off = (virtaddr/PTRTREESIZE) & 15;
int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
@@ -87,6 +92,7 @@ static void __free_io_area(void *addr, unsigned long size)
} else if (pmd_type == 0)
continue;
}
+#endif
if (pmd_bad(*pmd_dir)) {
printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
@@ -159,6 +165,8 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
unsigned long virtaddr, retaddr;
long offset;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
@@ -245,18 +253,23 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
#endif
pgd_dir = pgd_offset_k(virtaddr);
- pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ pmd_dir = pmd_alloc(&init_mm, pud_dir, virtaddr);
if (!pmd_dir) {
printk("ioremap: no mem for pmd_dir\n");
return NULL;
}
+#if CONFIG_PGTABLE_LEVELS == 3
if (CPU_IS_020_OR_030) {
pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
physaddr += PTRTREESIZE;
virtaddr += PTRTREESIZE;
size -= PTRTREESIZE;
- } else {
+ } else
+#endif
+ {
pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
if (!pte_dir) {
printk("ioremap: no mem for pte_dir\n");
@@ -307,6 +320,8 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
{
unsigned long virtaddr = (unsigned long)addr;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
@@ -341,13 +356,16 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
while ((long)size > 0) {
pgd_dir = pgd_offset_k(virtaddr);
- if (pgd_bad(*pgd_dir)) {
- printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
- pgd_clear(pgd_dir);
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ if (pud_bad(*pud_dir)) {
+ printk("iocachemode: bad pud(%08lx)\n", pud_val(*pud_dir));
+ pud_clear(pud_dir);
return;
}
- pmd_dir = pmd_offset(pgd_dir, virtaddr);
+ pmd_dir = pmd_offset(pud_dir, virtaddr);
+#if CONFIG_PGTABLE_LEVELS == 3
if (CPU_IS_020_OR_030) {
int pmd_off = (virtaddr/PTRTREESIZE) & 15;
@@ -359,6 +377,7 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
continue;
}
}
+#endif
if (pmd_bad(*pmd_dir)) {
printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 6cb1e41d58d0..0ea375607767 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -92,6 +92,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
unsigned long flags, mmuar, mmutr;
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int asid;
@@ -113,7 +115,19 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
return -1;
}
- pmd = pmd_offset(pgd, mmuar);
+ p4d = p4d_offset(pgd, mmuar);
+ if (p4d_none(*p4d)) {
+ local_irq_restore(flags);
+ return -1;
+ }
+
+ pud = pud_offset(p4d, mmuar);
+ if (pud_none(*pud)) {
+ local_irq_restore(flags);
+ return -1;
+ }
+
+ pmd = pmd_offset(pud, mmuar);
if (pmd_none(*pmd)) {
local_irq_restore(flags);
return -1;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 356601bf96d9..4857985b8080 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -82,9 +82,11 @@ static pmd_t * __init kernel_ptr_table(void)
*/
last = (unsigned long)kernel_pg_dir;
for (i = 0; i < PTRS_PER_PGD; i++) {
- if (!pgd_present(kernel_pg_dir[i]))
+ pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+ if (!pud_present(*pud))
continue;
- pmd = __pgd_page(kernel_pg_dir[i]);
+ pmd = pgd_page_vaddr(kernel_pg_dir[i]);
if (pmd > last)
last = pmd;
}
@@ -118,6 +120,8 @@ static void __init map_node(int node)
#define ROOTTREESIZE (32*1024*1024)
unsigned long physaddr, virtaddr, size;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
@@ -149,14 +153,16 @@ static void __init map_node(int node)
continue;
}
}
- if (!pgd_present(*pgd_dir)) {
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ if (!pud_present(*pud_dir)) {
pmd_dir = kernel_ptr_table();
#ifdef DEBUG
printk ("[new pointer %p]", pmd_dir);
#endif
- pgd_set(pgd_dir, pmd_dir);
+ pud_set(pud_dir, pmd_dir);
} else
- pmd_dir = pmd_offset(pgd_dir, virtaddr);
+ pmd_dir = pmd_offset(pud_dir, virtaddr);
if (CPU_IS_020_OR_030) {
if (virtaddr) {
@@ -304,4 +310,3 @@ void __init paging_init(void)
node_set_state(i, N_NORMAL_MEMORY);
}
}
-
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 89e630e66555..c4b8aa1d80f4 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -80,6 +80,8 @@ inline int dvma_map_cpu(unsigned long kaddr,
unsigned long vaddr, int len)
{
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
unsigned long end;
int ret = 0;
@@ -90,12 +92,14 @@ inline int dvma_map_cpu(unsigned long kaddr,
pr_debug("dvma: mapping kern %08lx to virt %08lx\n", kaddr, vaddr);
pgd = pgd_offset_k(vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
do {
pmd_t *pmd;
unsigned long end2;
- if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) {
+ if((pmd = pmd_alloc(&init_mm, pud, vaddr)) == NULL) {
ret = -ENOMEM;
goto out;
}
@@ -196,4 +200,3 @@ void dvma_unmap_iommu(unsigned long baddr, int len)
}
}
-
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index d506bb0893f9..f4b44b24b02e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -90,7 +90,6 @@ typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgprot; } pgprot_t;
/* FIXME this can depend on linux kernel version */
# ifdef CONFIG_MMU
-typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
# else /* CONFIG_MMU */
typedef struct { unsigned long ste[64]; } pmd_t;
@@ -103,7 +102,6 @@ typedef struct { p4d_t pge[1]; } pgd_t;
# define pgprot_val(x) ((x).pgprot)
# ifdef CONFIG_MMU
-# define pmd_val(x) ((x).pmd)
# define pgd_val(x) ((x).pgd)
# else /* CONFIG_MMU */
# define pmd_val(x) ((x).ste[0])
@@ -112,7 +110,6 @@ typedef struct { p4d_t pge[1]; } pgd_t;
# endif /* CONFIG_MMU */
# define __pte(x) ((pte_t) { (x) })
-# define __pmd(x) ((pmd_t) { (x) })
# define __pgd(x) ((pgd_t) { (x) })
# define __pgprot(x) ((pgprot_t) { (x) })
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7ecb05baa601..fcf1e23f2e0a 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -41,13 +41,6 @@ static inline void free_pgd(pgd_t *pgd)
#define pmd_pgtable(pmd) pmd_page(pmd)
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte))
@@ -58,15 +51,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long) (pte))
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x)
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* CONFIG_MMU */
#endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 954b69af451f..2def331f9e2c 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -59,9 +59,7 @@ extern int mem_init_done;
#else /* CONFIG_MMU */
-#include <asm-generic/4level-fixup.h>
-
-#define __PAGETABLE_PMD_FOLDED 1
+#include <asm-generic/pgtable-nopmd.h>
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
@@ -138,13 +136,8 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
*
*/
-/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_SHIFT)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
/* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT PMD_SHIFT
+#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
@@ -165,9 +158,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
#define pte_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
- __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
__FILE__, __LINE__, pgd_val(e))
@@ -314,18 +304,6 @@ extern unsigned long empty_zero_page[1024];
#ifndef __ASSEMBLY__
/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-#define pgd_clear(xp) do { } while (0)
-#define pgd_page(pgd) \
- ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
-
-/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
@@ -479,12 +457,6 @@ static inline void ptep_mkdirty(struct mm_struct *mm,
#define pgd_index(address) ((address) >> PGDIR_SHIFT)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
-{
- return (pmd_t *) dir;
-}
-
/* Find an entry in the third-level page table.. */
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index cdd4feb279c5..c9125c328949 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -160,6 +160,9 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
int err = 0, sig = ksig->sig;
unsigned long address = 0;
#ifdef CONFIG_MMU
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
#endif
@@ -195,9 +198,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
address = ((unsigned long)frame->tramp);
#ifdef CONFIG_MMU
- pmdp = pmd_offset(pud_offset(
- pgd_offset(current->mm, address),
- address), address);
+ pgdp = pgd_offset(current->mm, address);
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
+ pmdp = pmd_offset(pudp, address);
preempt_disable();
ptep = pte_offset_map(pmdp, address);
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index a015a951c8b7..050fc621c920 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -53,8 +53,11 @@ EXPORT_SYMBOL(kmap_prot);
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
- return pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr),
- vaddr), vaddr);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+
+ return pte_offset_kernel(pmd_offset(pud, vaddr), vaddr);
}
static void __init highmem_init(void)
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 010bb9cee2e4..68c26cacd930 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -134,11 +134,16 @@ EXPORT_SYMBOL(iounmap);
int map_page(unsigned long va, phys_addr_t pa, int flags)
{
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pd;
pte_t *pg;
int err = -ENOMEM;
+
/* Use upper 10 bits of VA to index the first level map */
- pd = pmd_offset(pgd_offset_k(va), va);
+ p4d = p4d_offset(pgd_offset_k(va), va);
+ pud = pud_offset(p4d, va);
+ pd = pmd_offset(pud, va);
/* Use middle 10 bits of VA to index the second-level map */
pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
/* pg = pte_alloc_kernel(&init_mm, pd, va); */
@@ -188,13 +193,17 @@ void __init mapin_ram(void)
static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
{
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int retval = 0;
pgd = pgd_offset(mm, addr & PAGE_MASK);
if (pgd) {
- pmd = pmd_offset(pgd, addr & PAGE_MASK);
+ p4d = p4d_offset(pgd, addr & PAGE_MASK);
+ pud = pud_offset(p4d, addr & PAGE_MASK);
+ pmd = pmd_offset(pud, addr & PAGE_MASK);
if (pmd_present(*pmd)) {
pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
if (pte) {
diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h
index 9e0c2e230274..128af72f2dfe 100644
--- a/arch/mips/include/uapi/asm/msgbuf.h
+++ b/arch/mips/include/uapi/asm/msgbuf.h
@@ -2,6 +2,7 @@
#ifndef _ASM_MSGBUF_H
#define _ASM_MSGBUF_H
+#include <asm/ipcbuf.h>
/*
* The msqid64_ds structure for the MIPS architecture.
diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h
index 43e1b4a2f68a..ba7fe0c89e7d 100644
--- a/arch/mips/include/uapi/asm/sembuf.h
+++ b/arch/mips/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_SEMBUF_H
#define _ASM_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The semid64_ds structure for the MIPS architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 8feb1fa12f01..86b32014c5f9 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -41,17 +41,14 @@ void clear_page(void *page);
void copy_page(void *to, void *from);
typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
-#define pmd_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
-#define __pmd(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 37125e6884d7..85c117347c86 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -15,9 +15,6 @@
/*
* Since we have only two-level page tables, these are trivial
*/
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, pmd) do { } while (0)
-#define pgd_populate(mm, pmd, pte) BUG()
#define pmd_pgtable(pmd) pmd_page(pmd)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 6fbf251cfc26..0214e4150539 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,8 +4,7 @@
#ifndef _ASMNDS32_PGTABLE_H
#define _ASMNDS32_PGTABLE_H
-#define __PAGETABLE_PMD_FOLDED 1
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopmd.h>
#include <linux/sizes.h>
#include <asm/memory.h>
@@ -18,26 +17,20 @@
#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-#define PMD_SHIFT 22
-#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 1024
#endif
#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
#define PGDIR_SHIFT 24
#define PTRS_PER_PGD 256
-#define PMD_SHIFT 24
-#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 2048
#endif
#ifndef __ASSEMBLY__
extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
#endif /* !__ASSEMBLY__ */
@@ -368,9 +361,6 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir, addr) ((pmd_t *)(dir))
-
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const unsigned long mask = 0xfff;
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index a8aff1c8b4f4..672603804a3b 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -7,6 +7,5 @@
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tln)->mm, pmd)
#endif
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
index ffa8040d8be7..e25700e125d8 100644
--- a/arch/nds32/kernel/pm.c
+++ b/arch/nds32/kernel/pm.c
@@ -14,6 +14,7 @@ unsigned int *phy_addr_sp_tmp;
static void nds32_suspend2ram(void)
{
pgd_t *pgdv;
+ p4d_t *p4dv;
pud_t *pudv;
pmd_t *pmdv;
pte_t *ptev;
@@ -21,7 +22,8 @@ static void nds32_suspend2ram(void)
pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
- pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+ p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume);
+ pudv = pud_offset(p4dv, (unsigned int)cpu_resume);
pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 064ae5d2159d..906dfb25353c 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -31,6 +31,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
do {
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
if (pgd_none(*pgd))
@@ -41,7 +43,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
break;
}
- pmd = pmd_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
+ pmd = pmd_offset(pud, addr);
#if PTRS_PER_PMD != 1
pr_alert(", *pmd=%08lx", pmd_val(*pmd));
#endif
@@ -359,6 +363,7 @@ vmalloc_fault:
unsigned int index = pgd_index(addr);
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
@@ -369,8 +374,13 @@ vmalloc_fault:
if (!pgd_present(*pgd_k))
goto no_context;
- pud = pud_offset(pgd, addr);
- pud_k = pud_offset(pgd_k, addr);
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k))
+ goto no_context;
+
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
if (!pud_present(*pud_k))
goto no_context;
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 55703b03d172..0be3833f6814 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -54,6 +54,7 @@ static void __init map_ram(void)
{
unsigned long v, p, e;
pgd_t *pge;
+ p4d_t *p4e;
pud_t *pue;
pmd_t *pme;
pte_t *pte;
@@ -69,7 +70,8 @@ static void __init map_ram(void)
while (p < e) {
int j;
- pue = pud_offset(pge, v);
+ p4e = p4d_offset(pge, v);
+ pue = pud_offset(p4e, v);
pme = pmd_offset(pue, v);
if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) {
@@ -100,6 +102,7 @@ static void __init fixedrange_init(void)
{
unsigned long vaddr;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
#ifdef CONFIG_HIGHMEM
@@ -111,7 +114,8 @@ static void __init fixedrange_init(void)
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!fixmap_pmd_p)
@@ -126,7 +130,8 @@ static void __init fixedrange_init(void)
vaddr = PKMAP_BASE;
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 3b43798d754f..8503bee882d1 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -74,6 +74,8 @@ void setup_mm_for_reboot(char mode)
{
unsigned long pmdval;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
int i;
@@ -84,7 +86,9 @@ void setup_mm_for_reboot(char mode)
for (i = 0; i < USER_PTRS_PER_PGD; i++) {
pmdval = (i << PGDIR_SHIFT);
- pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+ p4d = p4d_offset(pgd, i << PGDIR_SHIFT);
+ pud = pud_offset(p4d, i << PGDIR_SHIFT);
+ pmd = pmd_offset(pud + i, i << PGDIR_SHIFT);
set_pmd(pmd, __pmd(pmdval));
}
}
diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c
index ba80992d13a2..837ae7728830 100644
--- a/arch/nds32/mm/proc.c
+++ b/arch/nds32/mm/proc.c
@@ -16,10 +16,14 @@ extern struct cache_info L1_cache_info[2];
int va_kernel_present(unsigned long addr)
{
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
- pmd = pmd_offset(pgd_offset_k(addr), addr);
+ p4d = p4d_offset(pgd_offset_k(addr), addr);
+ pud = pud_offset(p4d, addr);
+ pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
ptep = pte_offset_map(pmd, addr);
pte = *ptep;
@@ -32,20 +36,24 @@ int va_kernel_present(unsigned long addr)
pte_t va_present(struct mm_struct * mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_map(pmd, addr);
- pte = *ptep;
- if (pte_present(pte))
- return pte;
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ ptep = pte_offset_map(pmd, addr);
+ pte = *ptep;
+ if (pte_present(pte))
+ return pte;
+ }
}
}
}
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 93caf17ac5e2..796ae29e9b9a 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
/* NOTE: even on 64 bits, these entries are __u32 because we allocate
* the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
typedef struct { __u32 pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
-#define pte_val(x) ((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
+#if CONFIG_PGTABLE_LEVELS == 3
+typedef struct { __u32 pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { (x) } )
+/* pXd_val() do not work as lvalues, so make sure we don't use them as such. */
#define pmd_val(x) ((x).pmd + 0)
+#endif
+
+#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd + 0)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
#else
/*
* .. while these make it easier on the compiler
*/
typedef unsigned long pte_t;
+
+#if CONFIG_PGTABLE_LEVELS == 3
typedef __u32 pmd_t;
+#define pmd_val(x) (x)
+#define __pmd(x) (x)
+#endif
+
typedef __u32 pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
-#define pmd_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
-#define __pmd(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
#endif /* STRICT_MM_TYPECHECKS */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#if CONFIG_PGTABLE_LEVELS == 3
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
typedef struct page *pgtable_t;
typedef struct __physmem_range {
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d98647c29b74..9ac74da256b8 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
/* Populate first pmd with allocated memory. We mark it
* with PxD_FLAG_ATTACHED as a signal to the system that this
* pmd entry may not be cleared. */
- __pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT |
- PxD_FLAG_VALID |
- PxD_FLAG_ATTACHED)
- + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+ set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
+ PxD_FLAG_VALID |
+ PxD_FLAG_ATTACHED)
+ + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
* a signal that this pmd may not be freed */
- __pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+ set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
#endif
}
spin_lock_init(pgd_spinlock(actual_pgd));
@@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
/* Three Level Page Table Support for pmd's */
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- __pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
- (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+ set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+ (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -88,19 +88,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_pages((unsigned long)pmd, PMD_ORDER);
}
-#else
-
-/* Two Level Page Table Support for pmd's */
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- */
-
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x) do { } while (0)
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif
static inline void
@@ -110,14 +97,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
/* preserve the gateway marker if this is the beginning of
* the permanent pmd */
if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
- __pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
- PxD_FLAG_VALID |
- PxD_FLAG_ATTACHED)
- + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+ set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
+ PxD_FLAG_VALID |
+ PxD_FLAG_ATTACHED)
+ + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
else
#endif
- __pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID)
- + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+ set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+ + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
}
#define pmd_populate(mm, pmd, pte_page) \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 4ac374b3a99f..f0a365950536 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -3,7 +3,12 @@
#define _PARISC_PGTABLE_H
#include <asm/page.h>
-#include <asm-generic/4level-fixup.h>
+
+#if CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#endif
#include <asm/fixmap.h>
@@ -101,8 +106,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#if CONFIG_PGTABLE_LEVELS == 3
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
+#endif
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
@@ -132,19 +139,18 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define PTRS_PER_PTE (1UL << BITS_PER_PTE)
/* Definitions for 2nd level */
+#if CONFIG_PGTABLE_LEVELS == 3
#define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE)
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#if CONFIG_PGTABLE_LEVELS == 3
#define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define PTRS_PER_PMD (1UL << BITS_PER_PMD)
#else
-#define __PAGETABLE_PMD_FOLDED 1
#define BITS_PER_PMD 0
#endif
-#define PTRS_PER_PMD (1UL << BITS_PER_PMD)
/* Definitions for 1st level */
-#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD)
+#define PGDIR_SHIFT (PLD_SHIFT + BITS_PER_PTE + BITS_PER_PMD)
#if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
#define BITS_PER_PGD (BITS_PER_LONG - PGDIR_SHIFT)
#else
@@ -317,6 +323,8 @@ extern unsigned long *empty_zero_page;
#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+#define pud_flag(x) (pud_val(x) & PxD_FLAG_MASK)
+#define pud_address(x) ((unsigned long)(pud_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
#define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK)
#define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -334,42 +342,32 @@ static inline void pmd_clear(pmd_t *pmd) {
if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
/* This is the entry pointing to the permanent pmd
* attached to the pgd; cannot clear it */
- __pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
+ set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
else
#endif
- __pmd_val_set(*pmd, 0);
+ set_pmd(pmd, __pmd(0));
}
#if CONFIG_PGTABLE_LEVELS == 3
-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
-#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud)))
+#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
/* For 64 bit we have three level tables */
-#define pgd_none(x) (!pgd_val(x))
-#define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID))
-#define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pgd_clear(pgd_t *pgd) {
+#define pud_none(x) (!pud_val(x))
+#define pud_bad(x) (!(pud_flag(x) & PxD_FLAG_VALID))
+#define pud_present(x) (pud_flag(x) & PxD_FLAG_PRESENT)
+static inline void pud_clear(pud_t *pud) {
#if CONFIG_PGTABLE_LEVELS == 3
- if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
- /* This is the permanent pmd attached to the pgd; cannot
+ if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
+ /* This is the permanent pmd attached to the pud; cannot
* free it */
return;
#endif
- __pgd_val_set(*pgd, 0);
+ set_pud(pud, __pud(0));
}
-#else
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t * pgdp) { }
#endif
/*
@@ -452,7 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#if CONFIG_PGTABLE_LEVELS == 3
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
+((pmd_t *) pud_page_vaddr(*(dir)) + pmd_index(address))
#else
#define pmd_offset(dir,addr) ((pmd_t *) dir)
#endif
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 8c0446b04c9e..44235f367674 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -4,7 +4,9 @@
#include <asm-generic/tlb.h>
+#if CONFIG_PGTABLE_LEVELS == 3
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+#endif
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
#endif
diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h
index 3b877335da38..3b4de5b668c3 100644
--- a/arch/parisc/include/uapi/asm/msgbuf.h
+++ b/arch/parisc/include/uapi/asm/msgbuf.h
@@ -3,6 +3,7 @@
#define _PARISC_MSGBUF_H
#include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
/*
* The msqid64_ds structure for parisc architecture, copied from sparc.
diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h
index 8241cf126018..e2ca4301c7fb 100644
--- a/arch/parisc/include/uapi/asm/sembuf.h
+++ b/arch/parisc/include/uapi/asm/sembuf.h
@@ -3,6 +3,7 @@
#define _PARISC_SEMBUF_H
#include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
/*
* The semid64_ds structure for parisc architecture.
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 2407b0b789d3..1eedfecc5137 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -534,11 +534,14 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
pte_t *ptep = NULL;
if (!pgd_none(*pgd)) {
- pud_t *pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd_t *pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- ptep = pte_offset_map(pmd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud_t *pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd_t *pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ ptep = pte_offset_map(pmd, addr);
+ }
}
}
return ptep;
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index a60d47fd4d55..0f1b460ee715 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -133,9 +133,14 @@ static inline int map_uncached_pages(unsigned long vaddr, unsigned long size,
dir = pgd_offset_k(vaddr);
do {
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
-
- pmd = pmd_alloc(NULL, dir, vaddr);
+
+ p4d = p4d_offset(dir, vaddr);
+ pud = pud_offset(p4d, vaddr);
+ pmd = pmd_alloc(NULL, pud, vaddr);
+
if (!pmd)
return -ENOMEM;
if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr))
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index 474cd241c150..e2d8b0a857ee 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -14,11 +14,13 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = pmd_offset(pgd, vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
pte_t *pte;
if (pmd_none(*pmd))
- pmd = pmd_alloc(NULL, pgd, vaddr);
+ pmd = pmd_alloc(NULL, pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
if (pte_none(*pte))
@@ -32,7 +34,9 @@ void notrace clear_fixmap(enum fixed_addresses idx)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = pmd_offset(pgd, vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
pte_t *pte = pte_offset_kernel(pmd, vaddr);
if (WARN_ON(pte_none(*pte)))
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d578809e55cf..0e1e212f1c96 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -49,6 +49,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
@@ -61,7 +62,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
addr &= HPAGE_MASK;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_alloc(mm, p4d, addr);
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
@@ -74,6 +76,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
@@ -82,11 +85,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- pte = pte_offset_map(pmd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ pte = pte_offset_map(pmd, addr);
+ }
}
}
return pte;
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index 969bd83e4d3d..7919b2ba41b5 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_MSGBUF_H
#define _ASM_POWERPC_MSGBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for the PowerPC architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 008ae77c6746..85e96ccb5f0f 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_SEMBUF_H
#define _ASM_POWERPC_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9488b63dfc87..e678577258ee 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -151,10 +151,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;
- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 5b1c4f47c656..1030cd186899 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
#ifndef __S390_IPCBUF_H__
#define __S390_IPCBUF_H__
+#include <linux/posix_types.h>
+
/*
* The user_ipc_perm structure for S/390 architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f0ce22220565..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -292,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index dfdbaa50946e..d1b1ff2be17a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 10538a4d1a1e..eae0c92ec422 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd)
#define pgd_free(mm, pgd) free_pgd_fast(pgd)
#define pgd_alloc(mm) get_pgd_fast()
-static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+static inline void pud_set(pud_t * pudp, pmd_t * pmdp)
{
unsigned long pa = __nocache_pa(pmdp);
- set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4))));
+ set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4))));
}
-#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD)
+#define pud_populate(MM, PGD, PMD) pud_set(PGD, PMD)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
unsigned long address)
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 31da44826645..6d6f44c0cad9 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -12,7 +12,7 @@
#include <linux/const.h>
#ifndef __ASSEMBLY__
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
@@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd)
return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
}
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long pud_page_vaddr(pud_t pud)
{
- if (srmmu_device_memory(pgd_val(pgd))) {
+ if (srmmu_device_memory(pud_val(pud))) {
return ~0;
} else {
- unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+ unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
return (unsigned long)__nocache_va(v << 4);
}
}
@@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
}
-static inline int pgd_none(pgd_t pgd)
+static inline int pud_none(pud_t pud)
{
- return !(pgd_val(pgd) & 0xFFFFFFF);
+ return !(pud_val(pud) & 0xFFFFFFF);
}
-static inline int pgd_bad(pgd_t pgd)
+static inline int pud_bad(pud_t pud)
{
- return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+ return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
}
-static inline int pgd_present(pgd_t pgd)
+static inline int pud_present(pud_t pud)
{
- return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+ return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
}
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void pud_clear(pud_t *pudp)
{
- set_pte((pte_t *)pgdp, __pte(0));
+ set_pte((pte_t *)pudp, __pte(0));
}
/*
@@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address)
{
- return (pmd_t *) pgd_page_vaddr(*dir) +
+ return (pmd_t *) pud_page_vaddr(*dir) +
((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
}
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
index 9d0d125500e2..5b933a598a33 100644
--- a/arch/sparc/include/uapi/asm/ipcbuf.h
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
#ifndef __SPARC_IPCBUF_H
#define __SPARC_IPCBUF_H
+#include <linux/posix_types.h>
+
/*
* The ipc64_perm structure for sparc/sparc64 architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h
index eeeb91933280..0954552da188 100644
--- a/arch/sparc/include/uapi/asm/msgbuf.h
+++ b/arch/sparc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
#ifndef _SPARC_MSGBUF_H
#define _SPARC_MSGBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for sparc64 architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h
index cbcbaa4e7128..10621d066d79 100644
--- a/arch/sparc/include/uapi/asm/sembuf.h
+++ b/arch/sparc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _SPARC_SEMBUF_H
#define _SPARC_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The semid64_ds structure for sparc architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 8d69de111470..89976c9b936c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -351,6 +351,8 @@ vmalloc_fault:
*/
int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
+ pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pgd = tsk->active_mm->pgd + offset;
@@ -363,8 +365,13 @@ vmalloc_fault:
return;
}
- pmd = pmd_offset(pgd, address);
- pmd_k = pmd_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
+
+ p4d_k = p4d_offset(pgd_k, address);
+ pud_k = pud_offset(p4d_k, address);
+ pmd_k = pmd_offset(pud_k, address);
if (pmd_present(*pmd) || !pmd_present(*pmd_k))
goto bad_area_nosemaphore;
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 86bc2a58d26c..d4a80adea7e5 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -39,10 +39,14 @@ static pte_t *kmap_pte;
void __init kmap_init(void)
{
unsigned long address;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *dir;
address = __fix_to_virt(FIX_KMAP_BEGIN);
- dir = pmd_offset(pgd_offset_k(address), address);
+ p4d = p4d_offset(pgd_offset_k(address), address);
+ pud = pud_offset(p4d, address);
+ dir = pmd_offset(pud, address);
/* cache the first kmap pte */
kmap_pte = pte_offset_kernel(dir, address);
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index f770ee7229d8..33a0facd9eb5 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -239,12 +239,16 @@ static void *iounit_alloc(struct device *dev, size_t len,
page = va;
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
long i;
pgdp = pgd_offset(&init_mm, addr);
- pmdp = pmd_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_map(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 71ac353032b6..4d3c6991f0ae 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -343,6 +343,8 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
page = va;
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -354,7 +356,9 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
__flush_page_to_ram(page);
pgdp = pgd_offset(&init_mm, addr);
- pmdp = pmd_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_map(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index cc3ad64479ac..f56c3c9a9793 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -296,6 +296,8 @@ static void __init srmmu_nocache_init(void)
void *srmmu_nocache_bitmap;
unsigned int bitmap_bits;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long paddr, vaddr;
@@ -329,6 +331,8 @@ static void __init srmmu_nocache_init(void)
while (vaddr < srmmu_nocache_end) {
pgd = pgd_offset_k(vaddr);
+ p4d = p4d_offset(__nocache_fix(pgd), vaddr);
+ pud = pud_offset(__nocache_fix(p4d), vaddr);
pmd = pmd_offset(__nocache_fix(pgd), vaddr);
pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
@@ -516,13 +520,17 @@ static inline void srmmu_mapioaddr(unsigned long physaddr,
unsigned long virt_addr, int bus_type)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long tmp;
physaddr &= PAGE_MASK;
pgdp = pgd_offset_k(virt_addr);
- pmdp = pmd_offset(pgdp, virt_addr);
+ p4dp = p4d_offset(pgdp, virt_addr);
+ pudp = pud_offset(p4dp, virt_addr);
+ pmdp = pmd_offset(pudp, virt_addr);
ptep = pte_offset_kernel(pmdp, virt_addr);
tmp = (physaddr >> 4) | SRMMU_ET_PTE;
@@ -551,11 +559,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
static inline void srmmu_unmapioaddr(unsigned long virt_addr)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
+
pgdp = pgd_offset_k(virt_addr);
- pmdp = pmd_offset(pgdp, virt_addr);
+ p4dp = p4d_offset(pgdp, virt_addr);
+ pudp = pud_offset(p4dp, virt_addr);
+ pmdp = pmd_offset(pudp, virt_addr);
ptep = pte_offset_kernel(pmdp, virt_addr);
/* No need to flush uncacheable page. */
@@ -693,20 +706,24 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
unsigned long end)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
while (start < end) {
pgdp = pgd_offset_k(start);
- if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
+ if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
pmdp = __srmmu_get_nocache(
SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(__nocache_fix(pgdp), pmdp);
+ pud_set(__nocache_fix(pudp), pmdp);
}
- pmdp = pmd_offset(__nocache_fix(pgdp), start);
+ pmdp = pmd_offset(__nocache_fix(pudp), start);
if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
if (ptep == NULL)
@@ -724,19 +741,23 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
unsigned long end)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
while (start < end) {
pgdp = pgd_offset_k(start);
- if (pgd_none(*pgdp)) {
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
+ if (pud_none(*pudp)) {
pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(pgdp, pmdp);
+ pud_set((pud_t *)pgdp, pmdp);
}
- pmdp = pmd_offset(pgdp, start);
+ pmdp = pmd_offset(pudp, start);
if (srmmu_pmd_none(*pmdp)) {
ptep = __srmmu_get_nocache(PTE_SIZE,
PTE_SIZE);
@@ -779,6 +800,8 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
unsigned long probed;
unsigned long addr;
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
@@ -810,18 +833,20 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
}
pgdp = pgd_offset_k(start);
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
if (what == 2) {
*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
start += SRMMU_PGDIR_SIZE;
continue;
}
- if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+ if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(__nocache_fix(pgdp), pmdp);
+ pud_set(__nocache_fix(pudp), pmdp);
}
pmdp = pmd_offset(__nocache_fix(pgdp), start);
if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
@@ -906,6 +931,8 @@ void __init srmmu_paging_init(void)
phandle cpunode;
char node_str[128];
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long pages_avail;
@@ -967,7 +994,9 @@ void __init srmmu_paging_init(void)
srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
pgd = pgd_offset_k(PKMAP_BASE);
- pmd = pmd_offset(pgd, PKMAP_BASE);
+ p4d = p4d_offset(pgd, PKMAP_BASE);
+ pud = pud_offset(p4d, PKMAP_BASE);
+ pmd = pmd_offset(pud, PKMAP_BASE);
pte = pte_offset_kernel(pmd, PKMAP_BASE);
pkmap_page_table = pte;
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 32b3d26a7109..32106d31e4ab 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,7 +8,6 @@
#ifndef __UM_PGTABLE_2LEVEL_H
#define __UM_PGTABLE_2LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9812269fefc9..8a3b689e0f86 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,7 +7,6 @@
#ifndef __UM_PGTABLE_3LEVEL_H
#define __UM_PGTABLE_3LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 36a44d58f373..2daa58df2190 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -106,6 +106,9 @@ extern unsigned long end_iomem;
#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE)
#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
+#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+
#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
#define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff647fb37..30885d0b94ac 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
pgd_t *pgd_base)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int i, j;
@@ -107,7 +108,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
pgd = pgd_base + i;
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
pmd = pmd_offset(pud, vaddr);
@@ -124,6 +126,7 @@ static void __init fixaddr_user_init( void)
#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -144,7 +147,8 @@ static void __init fixaddr_user_init( void)
for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
p += PAGE_SIZE) {
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pte_set_val(*pte, p, PAGE_READONLY);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91fc9c2..3f0d9a573fd6 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -19,15 +19,21 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
unsigned long kernel)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
- pud = pud_alloc(mm, pgd, proc);
- if (!pud)
+
+ p4d = p4d_alloc(mm, pgd, proc);
+ if (!p4d)
goto out;
+ pud = pud_alloc(mm, p4d, proc);
+ if (!pud)
+ goto out_pud;
+
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
@@ -44,6 +50,8 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
pmd_free(mm, pmd);
out_pmd:
pud_free(mm, pud);
+ out_pud:
+ p4d_free(mm, p4d);
out:
return -ENOMEM;
}
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052f20e6..d617f8dc9c19 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -17,6 +17,7 @@
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
if (!pgd_present(*pgd))
return NULL;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return NULL;
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index b7eaf655635c..80a358c6d652 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -277,7 +277,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
return ret;
}
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
{
@@ -285,7 +285,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long next;
int ret = 0;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
@@ -299,6 +299,28 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
return ret;
}
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ p4d_t *p4d;
+ unsigned long next;
+ int ret = 0;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (!p4d_present(*p4d)) {
+ if (hvc->force || p4d_newpage(*p4d)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ p4d_mkuptodate(*p4d);
+ }
+ } else
+ ret = update_pud_range(p4d, addr, next, hvc);
+ } while (p4d++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
@@ -316,8 +338,8 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
ret = add_munmap(addr, next - addr, &hvc);
pgd_mkuptodate(*pgd);
}
- }
- else ret = update_pud_range(pgd, addr, next, &hvc);
+ } else
+ ret = update_p4d_range(pgd, addr, next, &hvc);
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
if (!ret)
@@ -338,6 +360,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -364,7 +387,23 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
continue;
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d)) {
+ last = ADD_ROUND(addr, P4D_SIZE);
+ if (last > end)
+ last = end;
+ if (p4d_newpage(*p4d)) {
+ updated = 1;
+ err = add_munmap(addr, last - addr, &hvc);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
@@ -424,6 +463,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -437,7 +477,11 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
if (!pgd_present(*pgd))
goto kill;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ goto kill;
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
@@ -490,35 +534,6 @@ kill:
force_sig(SIGKILL);
}
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
- return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
- return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
- return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
- return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
- pgd_t *pgd = pgd_offset(task->mm, addr);
- pud_t *pud = pud_offset(pgd, addr);
- pmd_t *pmd = pmd_offset(pud, addr);
-
- return pte_offset_map(pmd, addr);
-}
-
void flush_tlb_all(void)
{
/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c66c95..818553064f04 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -28,6 +28,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -104,7 +105,8 @@ good_area:
}
pgd = pgd_offset(mm, address);
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 7c5bb43ed8af..b3d0664fadc9 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -5,6 +5,9 @@
#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/msgbuf.h>
#else
+
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for x86 architecture with x32 ABI.
*
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 93030e97269a..71205b02a191 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SEMBUF_H
#define _ASM_X86_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The semid64_ds structure for x86 architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 304d31d8cbbc..c494c8c05824 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -189,7 +189,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-void vmalloc_sync_all(void)
+static void vmalloc_sync(void)
{
unsigned long address;
@@ -216,6 +216,16 @@ void vmalloc_sync_all(void)
}
}
+void vmalloc_sync_mappings(void)
+{
+ vmalloc_sync();
+}
+
+void vmalloc_sync_unmappings(void)
+{
+ vmalloc_sync();
+}
+
/*
* 32-bit:
*
@@ -318,11 +328,23 @@ out:
#else /* CONFIG_X86_64: */
-void vmalloc_sync_all(void)
+void vmalloc_sync_mappings(void)
{
+ /*
+ * 64-bit mappings might allocate new p4d/pud pages
+ * that need to be propagated to all tasks' PGDs.
+ */
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
+void vmalloc_sync_unmappings(void)
+{
+ /*
+ * Unmappings never allocate or free p4d/pud pages.
+ * No work is required here.
+ */
+}
+
/*
* 64-bit:
*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e9f239e25bb0..4d8672f9a208 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b326f1440219..abbdecb75fad 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
- struct zone *zone = page_zone(page);
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
kernel_physical_mapping_remove(start, start + size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h
index a57afa0b606f..3bd0642f6660 100644
--- a/arch/xtensa/include/uapi/asm/ipcbuf.h
+++ b/arch/xtensa/include/uapi/asm/ipcbuf.h
@@ -12,6 +12,8 @@
#ifndef _XTENSA_IPCBUF_H
#define _XTENSA_IPCBUF_H
+#include <linux/posix_types.h>
+
/*
* Pad space is left for:
* - 32-bit mode_t and seq
diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h
index d6915e9f071c..1ed2c85b693a 100644
--- a/arch/xtensa/include/uapi/asm/msgbuf.h
+++ b/arch/xtensa/include/uapi/asm/msgbuf.h
@@ -17,6 +17,8 @@
#ifndef _XTENSA_MSGBUF_H
#define _XTENSA_MSGBUF_H
+#include <asm/ipcbuf.h>
+
struct msqid64_ds {
struct ipc64_perm msg_perm;
#ifdef __XTENSA_EB__
diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h
index 09f348d643f1..3b9cdd406dfe 100644
--- a/arch/xtensa/include/uapi/asm/sembuf.h
+++ b/arch/xtensa/include/uapi/asm/sembuf.h
@@ -22,6 +22,7 @@
#define _XTENSA_SEMBUF_H
#include <asm/byteorder.h>
+#include <asm/ipcbuf.h>
struct semid64_ds {
struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 8906c80175e6..2122b83821ba 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
* New allocation must be visible in all pgd before it can be found by
* an NMI allocating from the pool.
*/
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
if (rc)
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index bef6b85778b6..874c259a8829 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -288,31 +288,6 @@ static int charlcd_init_display(struct charlcd *lcd)
}
/*
- * Parses an unsigned integer from a string, until a non-digit character
- * is found. The empty string is not accepted. No overflow checks are done.
- *
- * Returns whether the parsing was successful. Only in that case
- * the output parameters are written to.
- *
- * TODO: If the kernel adds an inplace version of kstrtoul(), this function
- * could be easily replaced by that.
- */
-static bool parse_n(const char *s, unsigned long *res, const char **next_s)
-{
- if (!isdigit(*s))
- return false;
-
- *res = 0;
- while (isdigit(*s)) {
- *res = *res * 10 + (*s - '0');
- ++s;
- }
-
- *next_s = s;
- return true;
-}
-
-/*
* Parses a movement command of the form "(.*);", where the group can be
* any number of subcommands of the form "(x|y)[0-9]+".
*
@@ -336,6 +311,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
{
unsigned long new_x = *x;
unsigned long new_y = *y;
+ char *p;
for (;;) {
if (!*s)
@@ -345,11 +321,15 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
break;
if (*s == 'x') {
- if (!parse_n(s + 1, &new_x, &s))
+ new_x = simple_strtoul(s + 1, &p, 10);
+ if (p == s + 1)
return false;
+ s = p;
} else if (*s == 'y') {
- if (!parse_n(s + 1, &new_y, &s))
+ new_y = simple_strtoul(s + 1, &p, 10);
+ if (p == s + 1)
return false;
+ s = p;
} else {
return false;
}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..98a31bafc8a2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -496,20 +496,17 @@ static ssize_t node_read_vmstat(struct device *dev,
int n = 0;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+ n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i),
sum_zone_node_page_state(nid, i));
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- n += sprintf(buf+n, "%s %lu\n",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i),
sum_zone_numa_state(nid, i));
#endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- n += sprintf(buf+n, "%s %lu\n",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS],
+ n += sprintf(buf+n, "%s %lu\n", node_stat_name(i),
node_page_state(pgdat, i));
return n;
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 400c09b905f8..1f7d9bbec0fc 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -178,46 +178,25 @@ static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset)
return !!(port_state & mask);
}
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
static int dio48e_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- size_t i;
- static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(dio48egpio->base + ports[i]);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = dio48egpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -247,37 +226,27 @@ static void dio48e_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int out_port;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = dio48egpio->base + ports[index];
- port = i / gpio_reg_size;
- out_port = (port > 2) ? port + 1 : port;
- bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
/* update output state data and set device gpio register */
- dio48egpio->out_state[port] &= ~mask[BIT_WORD(i)];
- dio48egpio->out_state[port] |= bitmask;
- outb(dio48egpio->out_state[port], dio48egpio->base + out_port);
+ dio48egpio->out_state[index] &= ~gpio_mask;
+ dio48egpio->out_state[index] |= bitmask;
+ outb(dio48egpio->out_state[index], port_addr);
raw_spin_unlock_irqrestore(&dio48egpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index c50329ab493a..d350ac0de06b 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -85,42 +85,20 @@ static int idi_48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip);
- size_t i;
+ unsigned long offset;
+ unsigned long gpio_mask;
static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = idi48gpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(idi48gpio->base + ports[i]);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e81307f9754e..05637d585152 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -6,6 +6,7 @@
* Copyright (C) 2010 Miguel Gaio <miguel.gaio@efixo.com>
*/
+#include <linux/bitops.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
#include <linux/module.h>
@@ -72,20 +73,18 @@ static void gen_74x164_set_multiple(struct gpio_chip *gc, unsigned long *mask,
unsigned long *bits)
{
struct gen_74x164_chip *chip = gpiochip_get_data(gc);
- unsigned int i, idx, shift;
- u8 bank, bankmask;
+ unsigned long offset;
+ unsigned long bankmask;
+ size_t bank;
+ unsigned long bitmask;
mutex_lock(&chip->lock);
- for (i = 0, bank = chip->registers - 1; i < chip->registers;
- i++, bank--) {
- idx = i / sizeof(*mask);
- shift = i % sizeof(*mask) * BITS_PER_BYTE;
- bankmask = mask[idx] >> shift;
- if (!bankmask)
- continue;
+ for_each_set_clump8(offset, bankmask, mask, chip->registers * 8) {
+ bank = chip->registers - 1 - offset / 8;
+ bitmask = bitmap_get_value8(bits, offset) & bankmask;
chip->buffer[bank] &= ~bankmask;
- chip->buffer[bank] |= bankmask & (bits[idx] >> shift);
+ chip->buffer[bank] |= bitmask;
}
__gen_74x164_write_config(chip);
mutex_unlock(&chip->lock);
diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index c22d6f94129c..b89b8c5ff1f5 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -167,46 +167,25 @@ static int gpiomm_gpio_get(struct gpio_chip *chip, unsigned int offset)
return !!(port_state & mask);
}
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
static int gpiomm_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
- size_t i;
- static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(gpiommgpio->base + ports[i]);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = gpiommgpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -237,37 +216,27 @@ static void gpiomm_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int out_port;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = gpiommgpio->base + ports[index];
- port = i / gpio_reg_size;
- out_port = (port > 2) ? port + 1 : port;
- bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
spin_lock_irqsave(&gpiommgpio->lock, flags);
/* update output state data and set device gpio register */
- gpiommgpio->out_state[port] &= ~mask[BIT_WORD(i)];
- gpiommgpio->out_state[port] |= bitmask;
- outb(gpiommgpio->out_state[port], gpiommgpio->base + out_port);
+ gpiommgpio->out_state[index] &= ~gpio_mask;
+ gpiommgpio->out_state[index] |= bitmask;
+ outb(gpiommgpio->out_state[index], port_addr);
spin_unlock_irqrestore(&gpiommgpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 0696d5a21431..310d1a248cae 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -31,6 +31,7 @@
*/
#include <linux/bitmap.h>
+#include <linux/bitops.h>
#include <linux/crc8.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
@@ -232,16 +233,20 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
unsigned long *bits)
{
struct max3191x_chip *max3191x = gpiochip_get_data(gpio);
- int ret, bit = 0, wordlen = max3191x_wordlen(max3191x);
+ const unsigned int wordlen = max3191x_wordlen(max3191x);
+ int ret;
+ unsigned long bit;
+ unsigned long gpio_mask;
+ unsigned long in;
mutex_lock(&max3191x->lock);
ret = max3191x_readout_locked(max3191x);
if (ret)
goto out_unlock;
- while ((bit = find_next_bit(mask, gpio->ngpio, bit)) != gpio->ngpio) {
+ bitmap_zero(bits, gpio->ngpio);
+ for_each_set_clump8(bit, gpio_mask, mask, gpio->ngpio) {
unsigned int chipnum = bit / MAX3191X_NGPIO;
- unsigned long in, shift, index;
if (max3191x_chip_is_faulting(max3191x, chipnum)) {
ret = -EIO;
@@ -249,12 +254,8 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
}
in = ((u8 *)max3191x->xfer.rx_buf)[chipnum * wordlen];
- shift = round_down(bit % BITS_PER_LONG, MAX3191X_NGPIO);
- index = bit / BITS_PER_LONG;
- bits[index] &= ~(mask[index] & (0xff << shift));
- bits[index] |= mask[index] & (in << shift); /* copy bits */
-
- bit = (chipnum + 1) * MAX3191X_NGPIO; /* go to next chip */
+ in &= gpio_mask;
+ bitmap_set_value8(bits, in, bit);
}
out_unlock:
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 82122c3c688a..6652bee01966 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -9,7 +9,7 @@
*/
#include <linux/acpi.h>
-#include <linux/bits.h>
+#include <linux/bitmap.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
@@ -115,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
#define MAX_BANK 5
#define BANK_SZ 8
+#define MAX_LINE (MAX_BANK * BANK_SZ)
#define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
@@ -146,10 +147,10 @@ struct pca953x_chip {
#ifdef CONFIG_GPIO_PCA953X_IRQ
struct mutex irq_lock;
- u8 irq_mask[MAX_BANK];
- u8 irq_stat[MAX_BANK];
- u8 irq_trig_raise[MAX_BANK];
- u8 irq_trig_fall[MAX_BANK];
+ DECLARE_BITMAP(irq_mask, MAX_LINE);
+ DECLARE_BITMAP(irq_stat, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_raise, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_fall, MAX_LINE);
struct irq_chip irq_chip;
#endif
atomic_t wakeup_path;
@@ -333,12 +334,16 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off,
return regaddr;
}
-static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
{
u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true);
- int ret;
+ u8 value[MAX_BANK];
+ int i, ret;
+
+ for (i = 0; i < NBANK(chip); i++)
+ value[i] = bitmap_get_value8(val, i * BANK_SZ);
- ret = regmap_bulk_write(chip->regmap, regaddr, val, NBANK(chip));
+ ret = regmap_bulk_write(chip->regmap, regaddr, value, NBANK(chip));
if (ret < 0) {
dev_err(&chip->client->dev, "failed writing register\n");
return ret;
@@ -347,17 +352,21 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
return 0;
}
-static int pca953x_read_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
{
u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true);
- int ret;
+ u8 value[MAX_BANK];
+ int i, ret;
- ret = regmap_bulk_read(chip->regmap, regaddr, val, NBANK(chip));
+ ret = regmap_bulk_read(chip->regmap, regaddr, value, NBANK(chip));
if (ret < 0) {
dev_err(&chip->client->dev, "failed reading register\n");
return ret;
}
+ for (i = 0; i < NBANK(chip); i++)
+ bitmap_set_value8(val, value[i], i * BANK_SZ);
+
return 0;
}
@@ -412,7 +421,9 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
ret = regmap_read(chip->regmap, inreg, &reg_val);
mutex_unlock(&chip->i2c_lock);
if (ret < 0) {
- /* NOTE: diagnostic already emitted; that's all we should
+ /*
+ * NOTE:
+ * diagnostic already emitted; that's all we should
* do unless gpio_*_value_cansleep() calls become different
* from their nonsleeping siblings (and report faults).
*/
@@ -459,9 +470,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
unsigned long *mask, unsigned long *bits)
{
struct pca953x_chip *chip = gpiochip_get_data(gc);
- unsigned int bank_mask, bank_val;
- int bank;
- u8 reg_val[MAX_BANK];
+ DECLARE_BITMAP(reg_val, MAX_LINE);
int ret;
mutex_lock(&chip->i2c_lock);
@@ -469,16 +478,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
if (ret)
goto exit;
- for (bank = 0; bank < NBANK(chip); bank++) {
- bank_mask = mask[bank / sizeof(*mask)] >>
- ((bank % sizeof(*mask)) * 8);
- if (bank_mask) {
- bank_val = bits[bank / sizeof(*bits)] >>
- ((bank % sizeof(*bits)) * 8);
- bank_val &= bank_mask;
- reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val;
- }
- }
+ bitmap_replace(reg_val, reg_val, bits, mask, gc->ngpio);
pca953x_write_regs(chip, chip->regs->output, reg_val);
exit:
@@ -605,10 +605,9 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
- u8 new_irqs;
- int level, i;
- u8 invert_irq_mask[MAX_BANK];
- u8 reg_direction[MAX_BANK];
+ DECLARE_BITMAP(irq_mask, MAX_LINE);
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ int level;
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
@@ -616,25 +615,18 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
/* Enable latch on interrupt-enabled inputs */
pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
- for (i = 0; i < NBANK(chip); i++)
- invert_irq_mask[i] = ~chip->irq_mask[i];
+ bitmap_complement(irq_mask, chip->irq_mask, gc->ngpio);
/* Unmask enabled interrupts */
- pca953x_write_regs(chip, PCAL953X_INT_MASK, invert_irq_mask);
+ pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask);
}
+ bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio);
+ bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio);
+
/* Look for any newly setup interrupt */
- for (i = 0; i < NBANK(chip); i++) {
- new_irqs = chip->irq_trig_fall[i] | chip->irq_trig_raise[i];
- new_irqs &= reg_direction[i];
-
- while (new_irqs) {
- level = __ffs(new_irqs);
- pca953x_gpio_direction_input(&chip->gpio_chip,
- level + (BANK_SZ * i));
- new_irqs &= ~(1 << level);
- }
- }
+ for_each_set_bit(level, irq_mask, gc->ngpio)
+ pca953x_gpio_direction_input(&chip->gpio_chip, level);
mutex_unlock(&chip->irq_lock);
}
@@ -675,15 +667,15 @@ static void pca953x_irq_shutdown(struct irq_data *d)
chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask;
}
-static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
+static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending)
{
- u8 cur_stat[MAX_BANK];
- u8 old_stat[MAX_BANK];
- bool pending_seen = false;
- bool trigger_seen = false;
- u8 trigger[MAX_BANK];
- u8 reg_direction[MAX_BANK];
- int ret, i;
+ struct gpio_chip *gc = &chip->gpio_chip;
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ DECLARE_BITMAP(old_stat, MAX_LINE);
+ DECLARE_BITMAP(cur_stat, MAX_LINE);
+ DECLARE_BITMAP(new_stat, MAX_LINE);
+ DECLARE_BITMAP(trigger, MAX_LINE);
+ int ret;
if (chip->driver_data & PCA_PCAL) {
/* Read the current interrupt status from the device */
@@ -692,20 +684,16 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
return false;
/* Check latched inputs and clear interrupt status */
- ret = pca953x_read_regs(chip, PCA953X_INPUT, cur_stat);
+ ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
if (ret)
return false;
- for (i = 0; i < NBANK(chip); i++) {
- /* Apply filter for rising/falling edge selection */
- pending[i] = (~cur_stat[i] & chip->irq_trig_fall[i]) |
- (cur_stat[i] & chip->irq_trig_raise[i]);
- pending[i] &= trigger[i];
- if (pending[i])
- pending_seen = true;
- }
+ /* Apply filter for rising/falling edge selection */
+ bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
+
+ bitmap_and(pending, new_stat, trigger, gc->ngpio);
- return pending_seen;
+ return !bitmap_empty(pending, gc->ngpio);
}
ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
@@ -714,64 +702,49 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
/* Remove output pins from the equation */
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
- for (i = 0; i < NBANK(chip); i++)
- cur_stat[i] &= reg_direction[i];
- memcpy(old_stat, chip->irq_stat, NBANK(chip));
+ bitmap_copy(old_stat, chip->irq_stat, gc->ngpio);
- for (i = 0; i < NBANK(chip); i++) {
- trigger[i] = (cur_stat[i] ^ old_stat[i]) & chip->irq_mask[i];
- if (trigger[i])
- trigger_seen = true;
- }
+ bitmap_and(new_stat, cur_stat, reg_direction, gc->ngpio);
+ bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
+ bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
- if (!trigger_seen)
+ if (bitmap_empty(trigger, gc->ngpio))
return false;
- memcpy(chip->irq_stat, cur_stat, NBANK(chip));
+ bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
- for (i = 0; i < NBANK(chip); i++) {
- pending[i] = (old_stat[i] & chip->irq_trig_fall[i]) |
- (cur_stat[i] & chip->irq_trig_raise[i]);
- pending[i] &= trigger[i];
- if (pending[i])
- pending_seen = true;
- }
+ bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
+ bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
+ bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
+ bitmap_and(pending, new_stat, trigger, gc->ngpio);
- return pending_seen;
+ return !bitmap_empty(pending, gc->ngpio);
}
static irqreturn_t pca953x_irq_handler(int irq, void *devid)
{
struct pca953x_chip *chip = devid;
- u8 pending[MAX_BANK];
- u8 level;
- unsigned nhandled = 0;
- int i;
+ struct gpio_chip *gc = &chip->gpio_chip;
+ DECLARE_BITMAP(pending, MAX_LINE);
+ int level;
if (!pca953x_irq_pending(chip, pending))
return IRQ_NONE;
- for (i = 0; i < NBANK(chip); i++) {
- while (pending[i]) {
- level = __ffs(pending[i]);
- handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain,
- level + (BANK_SZ * i)));
- pending[i] &= ~(1 << level);
- nhandled++;
- }
- }
+ for_each_set_bit(level, pending, gc->ngpio)
+ handle_nested_irq(irq_find_mapping(gc->irq.domain, level));
- return (nhandled > 0) ? IRQ_HANDLED : IRQ_NONE;
+ return IRQ_HANDLED;
}
-static int pca953x_irq_setup(struct pca953x_chip *chip,
- int irq_base)
+static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base)
{
struct i2c_client *client = chip->client;
struct irq_chip *irq_chip = &chip->irq_chip;
- u8 reg_direction[MAX_BANK];
- int ret, i;
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ DECLARE_BITMAP(irq_stat, MAX_LINE);
+ int ret;
if (!client->irq)
return 0;
@@ -782,7 +755,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
if (!(chip->driver_data & PCA_INT))
return 0;
- ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat);
+ ret = pca953x_read_regs(chip, chip->regs->input, irq_stat);
if (ret)
return ret;
@@ -792,8 +765,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
* this purpose.
*/
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
- for (i = 0; i < NBANK(chip); i++)
- chip->irq_stat[i] &= reg_direction[i];
+ bitmap_and(chip->irq_stat, irq_stat, reg_direction, chip->gpio_chip.ngpio);
mutex_init(&chip->irq_lock);
ret = devm_request_threaded_irq(&client->dev, client->irq,
@@ -816,9 +788,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
irq_chip->irq_set_type = pca953x_irq_set_type;
irq_chip->irq_shutdown = pca953x_irq_shutdown;
- ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
- irq_base, handle_simple_irq,
- IRQ_TYPE_NONE);
+ ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
+ irq_base, handle_simple_irq,
+ IRQ_TYPE_NONE);
if (ret) {
dev_err(&client->dev,
"could not connect irqchip to gpiochip\n");
@@ -845,8 +817,8 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
{
+ DECLARE_BITMAP(val, MAX_LINE);
int ret;
- u8 val[MAX_BANK];
ret = regcache_sync_region(chip->regmap, chip->regs->output,
chip->regs->output + NBANK(chip));
@@ -860,9 +832,9 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
/* set platform specific polarity inversion */
if (invert)
- memset(val, 0xFF, NBANK(chip));
+ bitmap_fill(val, MAX_LINE);
else
- memset(val, 0, NBANK(chip));
+ bitmap_zero(val, MAX_LINE);
ret = pca953x_write_regs(chip, chip->regs->invert, val);
out:
@@ -871,8 +843,8 @@ out:
static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
{
+ DECLARE_BITMAP(val, MAX_LINE);
int ret;
- u8 val[MAX_BANK];
ret = device_pca95xx_init(chip, invert);
if (ret)
@@ -892,7 +864,7 @@ out:
static const struct of_device_id pca953x_dt_ids[];
static int pca953x_probe(struct i2c_client *client,
- const struct i2c_device_id *i2c_id)
+ const struct i2c_device_id *i2c_id)
{
struct pca953x_platform_data *pdata;
struct pca953x_chip *chip;
@@ -901,8 +873,7 @@ static int pca953x_probe(struct i2c_client *client,
u32 invert = 0;
struct regulator *reg;
- chip = devm_kzalloc(&client->dev,
- sizeof(struct pca953x_chip), GFP_KERNEL);
+ chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
if (chip == NULL)
return -ENOMEM;
@@ -1016,7 +987,7 @@ static int pca953x_probe(struct i2c_client *client,
if (pdata && pdata->setup) {
ret = pdata->setup(client, chip->gpio_chip.base,
- chip->gpio_chip.ngpio, pdata->context);
+ chip->gpio_chip.ngpio, pdata->context);
if (ret < 0)
dev_warn(&client->dev, "setup failed, %d\n", ret);
}
@@ -1036,7 +1007,7 @@ static int pca953x_remove(struct i2c_client *client)
if (pdata && pdata->teardown) {
ret = pdata->teardown(client, chip->gpio_chip.base,
- chip->gpio_chip.ngpio, pdata->context);
+ chip->gpio_chip.ngpio, pdata->context);
if (ret < 0)
dev_err(&client->dev, "teardown failed, %d\n", ret);
} else {
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index df51dd08bdfe..638d6656ce73 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -100,45 +100,23 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
- size_t i;
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
- unsigned long port_state;
+ unsigned long offset;
+ unsigned long gpio_mask;
void __iomem *ports[] = {
&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
&idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
};
+ void __iomem *port_addr;
+ unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = ports[offset / 8];
+ port_state = ioread8(port_addr) & gpio_mask;
- /* read bits from current gpio port */
- port_state = ioread8(ports[i]);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -178,30 +156,31 @@ static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ void __iomem *ports[] = {
+ &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+ };
+ size_t index;
+ void __iomem *port_addr;
+ unsigned long bitmask;
unsigned long flags;
- unsigned int out_state;
+ unsigned long out_state;
- raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = ports[index];
- /* process output lines 0-7 */
- if (*mask & 0xFF) {
- out_state = ioread8(&idio16gpio->reg->out0_7) & ~*mask;
- out_state |= *mask & *bits;
- iowrite8(out_state, &idio16gpio->reg->out0_7);
- }
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
+
+ raw_spin_lock_irqsave(&idio16gpio->lock, flags);
- /* shift to next output line word */
- *mask >>= 8;
+ out_state = ioread8(port_addr) & ~gpio_mask;
+ out_state |= bitmask;
+ iowrite8(out_state, port_addr);
- /* process output lines 8-15 */
- if (*mask & 0xFF) {
- *bits >>= 8;
- out_state = ioread8(&idio16gpio->reg->out8_15) & ~*mask;
- out_state |= *mask & *bits;
- iowrite8(out_state, &idio16gpio->reg->out8_15);
+ raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
}
-
- raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
}
static void idio_16_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 44c1e4fc489f..1d475794a50f 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -201,52 +201,34 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
- size_t i;
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
- unsigned long port_state;
+ unsigned long offset;
+ unsigned long gpio_mask;
void __iomem *ports[] = {
&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
&idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
&idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
};
+ size_t index;
+ unsigned long port_state;
const unsigned long out_mode_mask = BIT(1);
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
/* read bits from current gpio port (port 6 is TTL GPIO) */
- if (i < 6)
- port_state = ioread8(ports[i]);
+ if (index < 6)
+ port_state = ioread8(ports[index]);
else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
else
port_state = ioread8(&idio24gpio->reg->ttl_in0_7);
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ port_state &= gpio_mask;
+
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -297,59 +279,48 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
- size_t i;
- unsigned long bits_offset;
+ unsigned long offset;
unsigned long gpio_mask;
- const unsigned int gpio_reg_size = 8;
- const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
- unsigned long flags;
- unsigned int out_state;
void __iomem *ports[] = {
&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
&idio24gpio->reg->out16_23
};
+ size_t index;
+ unsigned long bitmask;
+ unsigned long flags;
+ unsigned long out_state;
const unsigned long out_mode_mask = BIT(1);
- const unsigned int ttl_offset = 48;
- const size_t ttl_i = BIT_WORD(ttl_offset);
- const unsigned int word_offset = ttl_offset % BITS_PER_LONG;
- const unsigned long ttl_mask = (mask[ttl_i] >> word_offset) & port_mask;
- const unsigned long ttl_bits = (bits[ttl_i] >> word_offset) & ttl_mask;
-
- /* set bits are processed a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* check if any set bits for current port */
- gpio_mask = (*mask >> bits_offset) & port_mask;
- if (!gpio_mask) {
- /* no set bits for this port so move on to next port */
- continue;
- }
- raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
- /* process output lines */
- out_state = ioread8(ports[i]) & ~gpio_mask;
- out_state |= (*bits >> bits_offset) & gpio_mask;
- iowrite8(out_state, ports[i]);
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
- raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
- }
+ raw_spin_lock_irqsave(&idio24gpio->lock, flags);
- /* check if setting TTL lines and if they are in output mode */
- if (!ttl_mask || !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask))
- return;
+ /* read bits from current gpio port (port 6 is TTL GPIO) */
+ if (index < 6) {
+ out_state = ioread8(ports[index]);
+ } else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) {
+ out_state = ioread8(&idio24gpio->reg->ttl_out0_7);
+ } else {
+ /* skip TTL GPIO if set for input */
+ raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ continue;
+ }
- /* handle TTL output */
- raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+ /* set requested bit states */
+ out_state &= ~gpio_mask;
+ out_state |= bitmask;
- /* process output lines */
- out_state = ioread8(&idio24gpio->reg->ttl_out0_7) & ~ttl_mask;
- out_state |= ttl_bits;
- iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
+ /* write bits for current gpio port (port 6 is TTL GPIO) */
+ if (index < 6)
+ iowrite8(out_state, ports[index]);
+ else
+ iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
- raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ }
}
static void idio_24_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 1331b2a94679..6698feabaced 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -96,16 +96,16 @@ static int pisosr_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct pisosr_gpio *gpio = gpiochip_get_data(chip);
- unsigned int nbytes = DIV_ROUND_UP(chip->ngpio, 8);
- unsigned int i, j;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned long buffer_state;
pisosr_gpio_refresh(gpio);
bitmap_zero(bits, chip->ngpio);
- for (i = 0; i < nbytes; i++) {
- j = i / sizeof(unsigned long);
- bits[j] |= ((unsigned long) gpio->buffer[i])
- << (8 * (i % sizeof(unsigned long)));
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ buffer_state = gpio->buffer[offset / 8] & gpio_mask;
+ bitmap_set_value8(bits, buffer_state, offset);
}
return 0;
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index bd203e8fa58e..7ec97499b7f7 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -15,9 +15,6 @@
#include <linux/spinlock.h>
#include <dt-bindings/gpio/uniphier-gpio.h>
-#define UNIPHIER_GPIO_BANK_MASK \
- GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
-
#define UNIPHIER_GPIO_IRQ_MAX_NUM 24
#define UNIPHIER_GPIO_PORT_DATA 0x0 /* data */
@@ -150,15 +147,11 @@ static void uniphier_gpio_set(struct gpio_chip *chip,
static void uniphier_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
- unsigned int bank, shift, bank_mask, bank_bits;
- int i;
+ unsigned long i, bank, bank_mask, bank_bits;
- for (i = 0; i < chip->ngpio; i += UNIPHIER_GPIO_LINES_PER_BANK) {
+ for_each_set_clump8(i, bank_mask, mask, chip->ngpio) {
bank = i / UNIPHIER_GPIO_LINES_PER_BANK;
- shift = i % BITS_PER_LONG;
- bank_mask = (mask[BIT_WORD(i)] >> shift) &
- UNIPHIER_GPIO_BANK_MASK;
- bank_bits = bits[BIT_WORD(i)] >> shift;
+ bank_bits = bitmap_get_value8(bits, i);
uniphier_gpio_bank_write(chip, bank, UNIPHIER_GPIO_PORT_DATA,
bank_mask, bank_bits);
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index fe456bea81f6..cb510df2b014 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -129,42 +129,19 @@ static int ws16c48_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
- const unsigned int gpio_reg_size = 8;
- size_t i;
- const size_t num_ports = chip->ngpio / gpio_reg_size;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < num_ports; i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ port_addr = ws16c48gpio->base + offset / 8;
+ port_state = inb(port_addr) & gpio_mask;
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(ws16c48gpio->base + i);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -198,39 +175,29 @@ static void ws16c48_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int iomask;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
-
- port = i / gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ index = offset / 8;
+ port_addr = ws16c48gpio->base + index;
/* mask out GPIO configured for input */
- iomask = mask[BIT_WORD(i)] & ~ws16c48gpio->io_state[port];
- bitmask = iomask & bits[BIT_WORD(i)];
+ gpio_mask &= ~ws16c48gpio->io_state[index];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
/* update output state data and set device gpio register */
- ws16c48gpio->out_state[port] &= ~iomask;
- ws16c48gpio->out_state[port] |= bitmask;
- outb(ws16c48gpio->out_state[port], ws16c48gpio->base + port);
+ ws16c48gpio->out_state[index] &= ~gpio_mask;
+ ws16c48gpio->out_state[index] |= bitmask;
+ outb(ws16c48gpio->out_state[index], port_addr);
raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 892ce636ef72..6ee04803c362 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
struct drm_property_blob *blob;
int ret;
- if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
+ if (!length || length > INT_MAX - sizeof(struct drm_property_blob))
return ERR_PTR(-EINVAL);
blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index 426ad912b441..d054e2842a5f 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -96,7 +96,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
if (!part)
return NULL;
- if (!addr_in_gen_pool(pool, (unsigned long)dst, size))
+ if (!gen_pool_has_addr(pool, (unsigned long)dst, size))
return NULL;
base = (unsigned long)part->base;
diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c
index 33c8d1ecc988..f9e10647f94e 100644
--- a/drivers/rapidio/rio-access.c
+++ b/drivers/rapidio/rio-access.c
@@ -9,6 +9,8 @@
#include <linux/rio.h>
#include <linux/module.h>
+#include <linux/rio_drv.h>
+
/*
* Wrappers for all RIO configuration access functions. They just check
* alignment and call the low-level functions pointed to by rio_mport->ops.
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 2d99a3712b72..72874153972e 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/rio.h>
#include <linux/rio_ids.h>
+#include <linux/rio_drv.h>
#include "rio.h"
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 5716b62e0f73..f75271b669c6 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
@@ -103,6 +104,7 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
int status;
u32 temp_out;
u32 out;
+ unsigned long update_ptps;
u32 store_ptps;
u32 store_ptmc;
u32 store_te_out;
@@ -120,8 +122,10 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
if (status)
return status;
- out = (store_ptps & ~(0xFF << (thres_index * 8)));
- out |= (temp_out & 0xFF) << (thres_index * 8);
+ update_ptps = store_ptps;
+ bitmap_set_value8(&update_ptps, temp_out & 0xFF, thres_index * 8);
+ out = update_ptps;
+
status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
SOC_DTS_OFFSET_PTPS, out);
if (status)
@@ -223,6 +227,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
u32 out;
struct intel_soc_dts_sensor_entry *dts;
struct intel_soc_dts_sensors *sensors;
+ unsigned long raw;
dts = tzd->devdata;
sensors = dts->sensors;
@@ -231,8 +236,8 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
if (status)
return status;
- out = (out & dts->temp_mask) >> dts->temp_shift;
- out -= SOC_DTS_TJMAX_ENCODING;
+ raw = out;
+ out = bitmap_get_value8(&raw, dts->id * 8) - SOC_DTS_TJMAX_ENCODING;
*temp = sensors->tj_max - out * 1000;
return 0;
@@ -280,11 +285,14 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
int read_only_trip_cnt)
{
char name[10];
+ unsigned long trip;
int trip_count = 0;
int trip_mask = 0;
+ int writable_trip_cnt = 0;
+ unsigned long ptps;
u32 store_ptps;
+ unsigned long i;
int ret;
- int i;
/* Store status to restor on exit */
ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
@@ -293,11 +301,10 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
goto err_ret;
dts->id = id;
- dts->temp_mask = 0x00FF << (id * 8);
- dts->temp_shift = id * 8;
if (notification_support) {
trip_count = min(SOC_MAX_DTS_TRIPS, trip_cnt);
- trip_mask = BIT(trip_count - read_only_trip_cnt) - 1;
+ writable_trip_cnt = trip_count - read_only_trip_cnt;
+ trip_mask = GENMASK(writable_trip_cnt - 1, 0);
}
/* Check if the writable trip we provide is not used by BIOS */
@@ -306,11 +313,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
if (ret)
trip_mask = 0;
else {
- for (i = 0; i < trip_count; ++i) {
- if (trip_mask & BIT(i))
- if (store_ptps & (0xff << (i * 8)))
- trip_mask &= ~BIT(i);
- }
+ ptps = store_ptps;
+ for_each_set_clump8(i, trip, &ptps, writable_trip_cnt * 8)
+ trip_mask &= ~BIT(i / 8);
}
dts->trip_mask = trip_mask;
dts->trip_count = trip_count;
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index adfb09af33fc..c54945748200 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -24,8 +24,6 @@ struct intel_soc_dts_sensors;
struct intel_soc_dts_sensor_entry {
int id;
- u32 temp_mask;
- u32 temp_shift;
u32 store_status;
u32 trip_mask;
u32 trip_count;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 00971f200d0c..eaf28eed51b0 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -18,6 +18,7 @@
#include <linux/sched/mm.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/kcov.h>
#include <linux/ioctl.h>
#include <linux/usb.h>
#include <linux/usbdevice_fs.h>
@@ -5488,6 +5489,8 @@ static void hub_event(struct work_struct *work)
hub_dev = hub->intfdev;
intf = to_usb_interface(hub_dev);
+ kcov_remote_start_usb((u64)hdev->bus->busnum);
+
dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n",
hdev->state, hdev->maxchild,
/* NOTE: expects max 15 ports... */
@@ -5594,6 +5597,8 @@ out_hdev_lock:
/* Balance the stuff in kick_hub_wq() and allow autosuspend */
usb_autopm_put_interface(intf);
kref_put(&hub->kref, hub_release);
+
+ kcov_remote_stop();
}
static const struct usb_device_id hub_id_table[] = {
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 09f594bb069a..27270d2feac7 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -30,6 +30,7 @@
#include <linux/sched/signal.h>
#include <linux/interval_tree_generic.h>
#include <linux/nospec.h>
+#include <linux/kcov.h>
#include "vhost.h"
@@ -359,7 +360,9 @@ static int vhost_worker(void *data)
llist_for_each_entry_safe(work, work_next, node, node) {
clear_bit(VHOST_WORK_QUEUED, &work->flags);
__set_current_state(TASK_RUNNING);
+ kcov_remote_start_common(dev->kcov_handle);
work->fn(work);
+ kcov_remote_stop();
if (need_resched())
schedule();
}
@@ -556,6 +559,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
/* No owner, become one */
dev->mm = get_task_mm(current);
+ dev->kcov_handle = kcov_common_handle();
worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
@@ -581,6 +585,7 @@ err_worker:
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
+ dev->kcov_handle = 0;
err_mm:
return err;
}
@@ -692,6 +697,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;
+ dev->kcov_handle = 0;
}
if (dev->mm)
mmput(dev->mm);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 8b88e0c903da..1c752438bd75 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -187,6 +187,7 @@ struct vhost_dev {
int iov_limit;
int weight;
int byte_weight;
+ u64 kcov_handle;
};
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
diff --git a/fs/aio.c b/fs/aio.c
index a9fbad2ce5e6..f2440598df7b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1287,12 +1287,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
* the ringbuffer empty. So in practice we should be ok, but it's
* something to be aware of when touching this code.
*/
- if (until == 0)
- aio_read_events(ctx, min_nr, nr, event, &ret);
- else
- wait_event_interruptible_hrtimeout(ctx->wait,
- aio_read_events(ctx, min_nr, nr, event, &ret),
- until);
+ wait_event_interruptible_hrtimeout(ctx->wait,
+ aio_read_events(ctx, min_nr, nr, event, &ret),
+ until);
return ret;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5372eabd276a..ecd8d2698515 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -404,6 +404,17 @@ static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
ELF_PAGESTART(cmds[first_idx].p_vaddr);
}
+static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
+{
+ ssize_t rv;
+
+ rv = kernel_read(file, buf, len, &pos);
+ if (unlikely(rv != len)) {
+ return (rv < 0) ? rv : -EIO;
+ }
+ return 0;
+}
+
/**
* load_elf_phdrs() - load ELF program headers
* @elf_ex: ELF header of the binary whose program headers should be loaded
@@ -418,7 +429,6 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
{
struct elf_phdr *elf_phdata = NULL;
int retval, err = -1;
- loff_t pos = elf_ex->e_phoff;
unsigned int size;
/*
@@ -439,9 +449,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
goto out;
/* Read in the program headers */
- retval = kernel_read(elf_file, elf_phdata, size, &pos);
- if (retval != size) {
- err = (retval < 0) ? retval : -EIO;
+ retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
+ if (retval < 0) {
+ err = retval;
goto out;
}
@@ -544,7 +554,7 @@ static inline int make_prot(u32 p_flags)
an ELF header */
static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
- struct file *interpreter, unsigned long *interp_map_addr,
+ struct file *interpreter,
unsigned long no_base, struct elf_phdr *interp_elf_phdata)
{
struct elf_phdr *eppnt;
@@ -590,8 +600,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
map_addr = elf_map(interpreter, load_addr + vaddr,
eppnt, elf_prot, elf_type, total_size);
total_size = 0;
- if (!*interp_map_addr)
- *interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
goto out;
@@ -722,7 +730,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_ppnt = elf_phdata;
for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
char *elf_interpreter;
- loff_t pos;
if (elf_ppnt->p_type != PT_INTERP)
continue;
@@ -740,14 +747,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (!elf_interpreter)
goto out_free_ph;
- pos = elf_ppnt->p_offset;
- retval = kernel_read(bprm->file, elf_interpreter,
- elf_ppnt->p_filesz, &pos);
- if (retval != elf_ppnt->p_filesz) {
- if (retval >= 0)
- retval = -EIO;
+ retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
+ elf_ppnt->p_offset);
+ if (retval < 0)
goto out_free_interp;
- }
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
@@ -766,14 +769,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
would_dump(bprm, interpreter);
/* Get the exec headers */
- pos = 0;
- retval = kernel_read(interpreter, &loc->interp_elf_ex,
- sizeof(loc->interp_elf_ex), &pos);
- if (retval != sizeof(loc->interp_elf_ex)) {
- if (retval >= 0)
- retval = -EIO;
+ retval = elf_read(interpreter, &loc->interp_elf_ex,
+ sizeof(loc->interp_elf_ex), 0);
+ if (retval < 0)
goto out_free_dentry;
- }
break;
@@ -1054,11 +1053,8 @@ out_free_interp:
}
if (interpreter) {
- unsigned long interp_map_addr = 0;
-
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
- &interp_map_addr,
load_bias, interp_elf_phdata);
if (!IS_ERR((void *)elf_entry)) {
/*
@@ -1179,11 +1175,10 @@ static int load_elf_library(struct file *file)
unsigned long elf_bss, bss, len;
int retval, error, i, j;
struct elfhdr elf_ex;
- loff_t pos = 0;
error = -ENOEXEC;
- retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
- if (retval != sizeof(elf_ex))
+ retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
+ if (retval < 0)
goto out;
if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
@@ -1208,9 +1203,8 @@ static int load_elf_library(struct file *file)
eppnt = elf_phdata;
error = -ENOEXEC;
- pos = elf_ex.e_phoff;
- retval = kernel_read(file, eppnt, j, &pos);
- if (retval != j)
+ retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
+ if (retval < 0)
goto out_free_ph;
for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
diff --git a/fs/buffer.c b/fs/buffer.c
index d8c7242426bb..6ca16588dbe8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -996,10 +996,20 @@ grow_dev_page(struct block_device *bdev, sector_t block,
end_block = init_page_buffers(page, bdev,
(sector_t)index << sizebits,
size);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x01;
+#endif
goto done;
}
- if (!try_to_free_buffers(page))
+ if (!try_to_free_buffers(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x02;
+#endif
goto failed;
+ }
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x04;
+#endif
}
/*
@@ -1019,6 +1029,9 @@ grow_dev_page(struct block_device *bdev, sector_t block,
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x08;
+#endif
failed:
unlock_page(page);
put_page(page);
@@ -1074,6 +1087,12 @@ __getblk_slow(struct block_device *bdev, sector_t block,
return NULL;
}
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_stamp = jiffies;
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+#endif
for (;;) {
struct buffer_head *bh;
int ret;
@@ -1085,6 +1104,24 @@ __getblk_slow(struct block_device *bdev, sector_t block,
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
+
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ if (!time_after(jiffies, current->getblk_stamp + 3 * HZ))
+ continue;
+ printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx bdev_super_blocksize=%ld size=%u bdev_super_blocksize_bits=%d bdev_inode_blkbits=%d\n",
+ current->comm, current->pid, current->getblk_executed,
+ current->getblk_bh_count, current->getblk_bh_state,
+ IS_ERR_OR_NULL(bdev->bd_super) ? -1L :
+ bdev->bd_super->s_blocksize, size,
+ IS_ERR_OR_NULL(bdev->bd_super) ? -1 :
+ bdev->bd_super->s_blocksize_bits,
+ IS_ERR_OR_NULL(bdev->bd_inode) ? -1 :
+ bdev->bd_inode->i_blkbits);
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+ current->getblk_stamp = jiffies;
+#endif
}
}
@@ -3263,6 +3300,11 @@ EXPORT_SYMBOL(sync_dirty_buffer);
*/
static inline int buffer_busy(struct buffer_head *bh)
{
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x80;
+ current->getblk_bh_count = atomic_read(&bh->b_count);
+ current->getblk_bh_state = bh->b_state;
+#endif
return atomic_read(&bh->b_count) |
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
@@ -3301,11 +3343,18 @@ int try_to_free_buffers(struct page *page)
int ret = 0;
BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
+ if (PageWriteback(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x10;
+#endif
return 0;
+ }
if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x20;
+#endif
goto out;
}
@@ -3329,6 +3378,9 @@ int try_to_free_buffers(struct page *page)
if (ret)
cancel_dirty_page(page);
spin_unlock(&mapping->private_lock);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x40;
+#endif
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c4159bcc05d9..67a395039268 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -551,28 +551,23 @@ out_unlock:
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct nested_calls poll_safewake_ncalls;
-
-static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
-{
- unsigned long flags;
- wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie;
-
- spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1);
- wake_up_locked_poll(wqueue, EPOLLIN);
- spin_unlock_irqrestore(&wqueue->lock, flags);
-
- return 0;
-}
+static DEFINE_PER_CPU(int, wakeup_nest);
static void ep_poll_safewake(wait_queue_head_t *wq)
{
- int this_cpu = get_cpu();
-
- ep_call_nested(&poll_safewake_ncalls,
- ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
+ unsigned long flags;
+ int subclass;
- put_cpu();
+ local_irq_save(flags);
+ preempt_disable();
+ subclass = __this_cpu_read(wakeup_nest);
+ spin_lock_nested(&wq->lock, subclass + 1);
+ __this_cpu_inc(wakeup_nest);
+ wake_up_locked_poll(wq, POLLIN);
+ __this_cpu_dec(wakeup_nest);
+ spin_unlock(&wq->lock);
+ local_irq_restore(flags);
+ preempt_enable();
}
#else
@@ -671,7 +666,6 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
void *priv, int depth, bool ep_locked)
{
__poll_t res;
- int pwake = 0;
struct epitem *epi, *nepi;
LIST_HEAD(txlist);
@@ -738,26 +732,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
*/
list_splice(&txlist, &ep->rdllist);
__pm_relax(ep->ws);
-
- if (!list_empty(&ep->rdllist)) {
- /*
- * Wake up (if active) both the eventpoll wait list and
- * the ->poll() wait list (delayed after we release the lock).
- */
- if (waitqueue_active(&ep->wq))
- wake_up(&ep->wq);
- if (waitqueue_active(&ep->poll_wait))
- pwake++;
- }
write_unlock_irq(&ep->lock);
if (!ep_locked)
mutex_unlock(&ep->mtx);
- /* We have to call this outside the lock */
- if (pwake)
- ep_poll_safewake(&ep->poll_wait);
-
return res;
}
@@ -2370,11 +2349,6 @@ static int __init eventpoll_init(void)
*/
ep_nested_calls_init(&poll_loop_ncalls);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /* Initialize the structure used to perform safe poll wait head wake ups */
- ep_nested_calls_init(&poll_safewake_ncalls);
-#endif
-
/*
* We can have many thousands of epitems, so prevent this from
* using an extra cache line on 64-bit (and smaller) CPUs
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index cb5629bd5fff..733881a6387b 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -42,8 +42,8 @@ config PROC_VMCORE
bool "/proc/vmcore support"
depends on PROC_FS && CRASH_DUMP
default y
- help
- Exports the dump image of crashed kernel in ELF format.
+ help
+ Exports the dump image of crashed kernel in ELF format.
config PROC_VMCORE_DEVICE_DUMP
bool "Device Hardware/Firmware Log Collection"
@@ -72,7 +72,7 @@ config PROC_SYSCTL
a recompile of the kernel or reboot of the system. The primary
interface is through /proc/sys. If you say Y here a tree of
modifiable sysctl entries will be generated beneath the
- /proc/sys directory. They are explained in the files
+ /proc/sys directory. They are explained in the files
in <file:Documentation/admin-guide/sysctl/>. Note that enabling this
option will enlarge the kernel by at least 8 KB.
@@ -88,7 +88,7 @@ config PROC_PAGE_MONITOR
Various /proc files exist to monitor process memory utilization:
/proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
/proc/kpagecount, and /proc/kpageflags. Disabling these
- interfaces will reduce the size of the kernel by approximately 4kb.
+ interfaces will reduce the size of the kernel by approximately 4kb.
config PROC_CHILDREN
bool "Include /proc/<pid>/task/<tid>/children file"
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 64e9ee1b129e..074e9585c699 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -138,8 +138,12 @@ static int proc_getattr(const struct path *path, struct kstat *stat,
{
struct inode *inode = d_inode(path->dentry);
struct proc_dir_entry *de = PDE(inode);
- if (de && de->nlink)
- set_nlink(inode, de->nlink);
+ if (de) {
+ nlink_t nlink = READ_ONCE(de->nlink);
+ if (nlink > 0) {
+ set_nlink(inode, nlink);
+ }
+ }
generic_fillattr(inode, stat);
return 0;
@@ -159,7 +163,6 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
{
const char *cp = name, *next;
struct proc_dir_entry *de;
- unsigned int len;
de = *ret;
if (!de)
@@ -170,13 +173,12 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
if (!next)
break;
- len = next - cp;
- de = pde_subdir_find(de, cp, len);
+ de = pde_subdir_find(de, cp, next - cp);
if (!de) {
WARN(1, "name '%s'\n", name);
return -ENOENT;
}
- cp += len + 1;
+ cp = next + 1;
}
*residual = cp;
*ret = de;
@@ -362,6 +364,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
write_unlock(&proc_subdir_lock);
goto out_free_inum;
}
+ dir->nlink++;
write_unlock(&proc_subdir_lock);
return dp;
@@ -472,10 +475,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->data = data;
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
- parent->nlink++;
ent = proc_register(parent, ent);
- if (!ent)
- parent->nlink--;
}
return ent;
}
@@ -505,10 +505,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->data = NULL;
ent->proc_fops = NULL;
ent->proc_iops = NULL;
- parent->nlink++;
ent = proc_register(parent, ent);
- if (!ent)
- parent->nlink--;
}
return ent;
}
@@ -666,8 +663,12 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
len = strlen(fn);
de = pde_subdir_find(parent, fn, len);
- if (de)
+ if (de) {
rb_erase(&de->subdir_node, &parent->subdir);
+ if (S_ISDIR(de->mode)) {
+ parent->nlink--;
+ }
+ }
write_unlock(&proc_subdir_lock);
if (!de) {
WARN(1, "name '%s'\n", name);
@@ -676,9 +677,6 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
proc_entry_rundown(de);
- if (S_ISDIR(de->mode))
- parent->nlink--;
- de->nlink = 0;
WARN(pde_subdir_first(de),
"%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
__func__, de->parent->name, de->name, pde_subdir_first(de)->name);
@@ -714,13 +712,12 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
de = next;
continue;
}
- write_unlock(&proc_subdir_lock);
-
- proc_entry_rundown(de);
next = de->parent;
if (S_ISDIR(de->mode))
next->nlink--;
- de->nlink = 0;
+ write_unlock(&proc_subdir_lock);
+
+ proc_entry_rundown(de);
if (de == root)
break;
pde_put(de);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd0c8d5ce9a1..0f3b557c9b77 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -197,8 +197,8 @@ extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, lof
* inode.c
*/
struct pde_opener {
- struct file *file;
struct list_head lh;
+ struct file *file;
bool closing;
struct completion *c;
} __randomize_layout;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7c952ee732e6..e40dbfe1168e 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -97,7 +97,10 @@ u64 stable_page_flags(struct page *page)
* it differentiates a memory hole from a page with no flags
*/
if (!page)
- return 1 << KPF_NOPAGE;
+ return BIT_ULL(KPF_NOPAGE);
+
+ if (pfn_zone_device_reserved(page_to_pfn(page)))
+ return BIT_ULL(KPF_RESERVED);
k = page->flags;
u = 0;
@@ -109,22 +112,22 @@ u64 stable_page_flags(struct page *page)
* simple test in page_mapped() is not enough.
*/
if (!PageSlab(page) && page_mapped(page))
- u |= 1 << KPF_MMAP;
+ u |= BIT_ULL(KPF_MMAP);
if (PageAnon(page))
- u |= 1 << KPF_ANON;
+ u |= BIT_ULL(KPF_ANON);
if (PageKsm(page))
- u |= 1 << KPF_KSM;
+ u |= BIT_ULL(KPF_KSM);
/*
* compound pages: export both head/tail info
* they together define a compound page's start/end pos and order
*/
if (PageHead(page))
- u |= 1 << KPF_COMPOUND_HEAD;
+ u |= BIT_ULL(KPF_COMPOUND_HEAD);
if (PageTail(page))
- u |= 1 << KPF_COMPOUND_TAIL;
+ u |= BIT_ULL(KPF_COMPOUND_TAIL);
if (PageHuge(page))
- u |= 1 << KPF_HUGE;
+ u |= BIT_ULL(KPF_HUGE);
/*
* PageTransCompound can be true for non-huge compound pages (slab
* pages or pages allocated by drivers with __GFP_COMP) because it
@@ -135,14 +138,13 @@ u64 stable_page_flags(struct page *page)
struct page *head = compound_head(page);
if (PageLRU(head) || PageAnon(head))
- u |= 1 << KPF_THP;
+ u |= BIT_ULL(KPF_THP);
else if (is_huge_zero_page(head)) {
- u |= 1 << KPF_ZERO_PAGE;
- u |= 1 << KPF_THP;
+ u |= BIT_ULL(KPF_ZERO_PAGE);
+ u |= BIT_ULL(KPF_THP);
}
} else if (is_zero_pfn(page_to_pfn(page)))
- u |= 1 << KPF_ZERO_PAGE;
-
+ u |= BIT_ULL(KPF_ZERO_PAGE);
/*
* Caveats on high order pages: page->_refcount will only be set
@@ -150,23 +152,23 @@ u64 stable_page_flags(struct page *page)
* SLOB won't set PG_slab at all on compound pages.
*/
if (PageBuddy(page))
- u |= 1 << KPF_BUDDY;
+ u |= BIT_ULL(KPF_BUDDY);
else if (page_count(page) == 0 && is_free_buddy_page(page))
- u |= 1 << KPF_BUDDY;
+ u |= BIT_ULL(KPF_BUDDY);
if (PageOffline(page))
- u |= 1 << KPF_OFFLINE;
+ u |= BIT_ULL(KPF_OFFLINE);
if (PageTable(page))
- u |= 1 << KPF_PGTABLE;
+ u |= BIT_ULL(KPF_PGTABLE);
if (page_is_idle(page))
- u |= 1 << KPF_IDLE;
+ u |= BIT_ULL(KPF_IDLE);
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
if (PageTail(page) && PageSlab(compound_head(page)))
- u |= 1 << KPF_SLAB;
+ u |= BIT_ULL(KPF_SLAB);
u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
@@ -179,7 +181,7 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
if (PageSwapCache(page))
- u |= 1 << KPF_SWAPCACHE;
+ u |= BIT_ULL(KPF_SWAPCACHE);
u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index d82636e8eb65..35624ca2a2f9 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -147,6 +147,17 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
return error;
}
+static int ramfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = ramfs_get_inode(dir->i_sb, dir, mode, 0);
+ if (!inode)
+ return -ENOSPC;
+ d_tmpfile(dentry, inode);
+ return 0;
+}
+
static const struct inode_operations ramfs_dir_inode_operations = {
.create = ramfs_create,
.lookup = simple_lookup,
@@ -157,6 +168,7 @@ static const struct inode_operations ramfs_dir_inode_operations = {
.rmdir = simple_rmdir,
.mknod = ramfs_mknod,
.rename = simple_rename,
+ .tmpfile = ramfs_tmpfile,
};
/*
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
deleted file mode 100644
index c86cf7cb4bba..000000000000
--- a/include/asm-generic/4level-fixup.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _4LEVEL_FIXUP_H
-#define _4LEVEL_FIXUP_H
-
-#define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED 1
-
-#define PUD_SHIFT PGDIR_SHIFT
-#define PUD_SIZE PGDIR_SIZE
-#define PUD_MASK PGDIR_MASK
-#define PTRS_PER_PUD 1
-
-#define pud_t pgd_t
-
-#define pmd_alloc(mm, pud, address) \
- ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
- NULL: pmd_offset(pud, address))
-
-#define pud_offset(pgd, start) (pgd)
-#define pud_none(pud) 0
-#define pud_bad(pud) 0
-#define pud_present(pud) 1
-#define pud_ERROR(pud) do { } while (0)
-#define pud_clear(pud) pgd_clear(pud)
-#define pud_val(pud) pgd_val(pud)
-#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd)
-#define pud_page(pud) pgd_page(pud)
-#define pud_page_vaddr(pud) pgd_page_vaddr(pud)
-
-#undef pud_free_tlb
-#define pud_free_tlb(tlb, x, addr) do { } while (0)
-#define pud_free(mm, x) do { } while (0)
-
-#undef pud_addr_end
-#define pud_addr_end(addr, end) (end)
-
-#include <asm-generic/5level-fixup.h>
-
-#endif
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 8a1ee10014de..9fdf21302fdf 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -80,4 +80,21 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr,
#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+ const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+ find_next_clump8((clump), (bits), (size), 0)
+
#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 29fc933df3bf..ff335b22f23c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -53,6 +53,7 @@
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
* bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
@@ -66,6 +67,8 @@
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
* bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst
* bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst
+ * bitmap_get_value8(map, start) Get 8bit value from map at start
+ * bitmap_set_value8(map, value, start) Set 8bit value to map at start
*
* Note, bitmap_zero() and bitmap_fill() operate over the region of
* unsigned longs, that is, bits behind bitmap till the unsigned long
@@ -138,6 +141,9 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
+extern void __bitmap_replace(unsigned long *dst,
+ const unsigned long *old, const unsigned long *new,
+ const unsigned long *mask, unsigned int nbits);
extern int __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_subset(const unsigned long *bitmap1,
@@ -432,6 +438,18 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
__bitmap_shift_left(dst, src, shift, nbits);
}
+static inline void bitmap_replace(unsigned long *dst,
+ const unsigned long *old,
+ const unsigned long *new,
+ const unsigned long *mask,
+ unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = (*old & ~(*mask)) | (*new & *mask);
+ else
+ __bitmap_replace(dst, old, new, mask, nbits);
+}
+
static inline int bitmap_parse(const char *buf, unsigned int buflen,
unsigned long *maskp, int nmaskbits)
{
@@ -489,6 +507,39 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
dst[1] = mask >> 32;
}
+/**
+ * bitmap_get_value8 - get an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ *
+ * Returns the 8-bit value located at the @start bit offset within the @src
+ * memory region.
+ */
+static inline unsigned long bitmap_get_value8(const unsigned long *map,
+ unsigned long start)
+{
+ const size_t index = BIT_WORD(start);
+ const unsigned long offset = start % BITS_PER_LONG;
+
+ return (map[index] >> offset) & 0xFF;
+}
+
+/**
+ * bitmap_set_value8 - set an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @value: the 8-bit value; values wider than 8 bits may clobber bitmap
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ */
+static inline void bitmap_set_value8(unsigned long *map, unsigned long value,
+ unsigned long start)
+{
+ const size_t index = BIT_WORD(start);
+ const unsigned long offset = start % BITS_PER_LONG;
+
+ map[index] &= ~(0xFFUL << offset);
+ map[index] |= value << offset;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c94a9ff9f082..e479067c202c 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,6 +47,18 @@ extern unsigned long __sw_hweight64(__u64 w);
(bit) < (size); \
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+ for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+ (start) < (size); \
+ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
static inline int get_bitmask_order(unsigned int count)
{
int order;
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index 0fe5426f2bdc..e3a0be2c90ad 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -9,11 +9,11 @@
#else /* __CHECKER__ */
/*
* Force a compilation error if condition is true, but also produce a
- * result (of value 0 and type size_t), so the expression can be used
+ * result (of value 0 and type int), so the expression can be used
* e.g. in a structure initializer (or where-ever else comma expressions
* aren't permitted).
*/
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
#endif /* __CHECKER__ */
/* Force a compilation error if a constant expression is not a power of 2 */
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 4bd583bd6934..5b14a0f38124 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
int min_alloc_order, int nid, const char *name);
extern struct gen_pool *gen_pool_get(struct device *dev, const char *name);
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
size_t size);
#ifdef CONFIG_OF
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index b76a1807028d..a10e84707d82 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -37,12 +37,35 @@ do { \
(t)->kcov_mode &= ~KCOV_IN_CTXSW; \
} while (0)
+/* See Documentation/dev-tools/kcov.rst for usage details. */
+void kcov_remote_start(u64 handle);
+void kcov_remote_stop(void);
+u64 kcov_common_handle(void);
+
+static inline void kcov_remote_start_common(u64 id)
+{
+ kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id));
+}
+
+static inline void kcov_remote_start_usb(u64 id)
+{
+ kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id));
+}
+
#else
static inline void kcov_task_init(struct task_struct *t) {}
static inline void kcov_task_exit(struct task_struct *t) {}
static inline void kcov_prepare_switch(struct task_struct *t) {}
static inline void kcov_finish_switch(struct task_struct *t) {}
+static inline void kcov_remote_start(u64 handle) {}
+static inline void kcov_remote_stop(void) {}
+static inline u64 kcov_common_handle(void)
+{
+ return 0;
+}
+static inline void kcov_remote_start_common(u64 id) {}
+static inline void kcov_remote_start_usb(u64 id) {}
#endif /* CONFIG_KCOV */
#endif /* _LINUX_KCOV_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 09f759228e3f..3adcb39fa6f5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -348,8 +348,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
* @res: Where to write the result of the conversion on success.
*
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
*/
static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
{
@@ -377,8 +376,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign
* @res: Where to write the result of the conversion on success.
*
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
*/
static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
{
@@ -454,7 +452,18 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t
return kstrtoint_from_user(s, count, base, res);
}
-/* Obsolete, do not use. Use kstrto<foo> instead */
+/*
+ * Use kstrto<foo> instead.
+ *
+ * NOTE: simple_strto<foo> does not check for the range overflow and,
+ * depending on the input, may give interesting results.
+ *
+ * Use these functions if and only if you cannot use kstrto<foo>, because
+ * the conversion ends on the first non-digit character, which may be far
+ * beyond the supported range. It might be useful to parse the strings like
+ * 10x50 or 12:21 without altering original string or temporary buffer in use.
+ * Keep in mind above caveat.
+ */
extern unsigned long simple_strtoul(const char *,char **,unsigned int);
extern long simple_strtol(const char *,char **,unsigned int);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3a08ecdfca11..ba0dca6aac6e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -122,8 +122,8 @@ static inline bool movable_node_is_enabled(void)
extern void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap);
-extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap);
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -342,6 +342,9 @@ extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void remove_pfn_range_from_zone(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 6fefb09af7c3..c676e33205d3 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -123,6 +123,7 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
}
#ifdef CONFIG_ZONE_DEVICE
+bool pfn_zone_device_reserved(unsigned long pfn);
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
@@ -133,6 +134,11 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
#else
+static inline bool pfn_zone_device_reserved(unsigned long pfn)
+{
+ return false;
+}
+
static inline void *devm_memremap_pages(struct device *dev,
struct dev_pagemap *pgmap)
{
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b0ef04b6d15..c97ea3b694e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1838,12 +1838,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
int __pte_alloc_kernel(pmd_t *pmd);
+#if defined(CONFIG_MMU)
+
/*
- * The following ifdef needed to get the 4level-fixup.h header to work.
- * Remove it when 4level-fixup.h has been removed.
+ * The following ifdef needed to get the 5level-fixup.h header to work.
+ * Remove it when 5level-fixup.h has been removed.
*/
-#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-
#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
@@ -1865,7 +1865,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
-#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 0096a05395e3..018947611483 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -150,10 +150,6 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh,
extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
struct notifier_block *nb);
-extern int blocking_notifier_chain_cond_register(
- struct blocking_notifier_head *nh,
- struct notifier_block *nb);
-
extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
struct notifier_block *nb);
extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a705aa2d03f9..0640be56dcbd 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -58,8 +58,8 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
struct proc_dir_entry *parent, const struct seq_operations *ops,
unsigned int state_size, void *data);
-#define proc_create_net(name, mode, parent, state_size, ops) \
- proc_create_net_data(name, mode, parent, state_size, ops, NULL)
+#define proc_create_net(name, mode, parent, ops, state_size) \
+ proc_create_net_data(name, mode, parent, ops, state_size, NULL)
struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
struct proc_dir_entry *parent,
int (*show)(struct seq_file *, void *), void *data);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index fdd421b8d9ae..724b0d036b57 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -283,14 +283,12 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
__rb_change_child(node, successor, tmp, root);
if (child2) {
- successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
rebalance = NULL;
} else {
- unsigned long pc2 = successor->__rb_parent_color;
- successor->__rb_parent_color = pc;
- rebalance = __rb_is_black(pc2) ? parent : NULL;
+ rebalance = rb_is_black(successor) ? parent : NULL;
}
+ successor->__rb_parent_color = pc;
tmp = successor;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 69b103825ed6..0d84f8fee209 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -906,6 +906,7 @@ struct task_struct {
#ifdef CONFIG_DETECT_HUNG_TASK
unsigned long last_switch_count;
unsigned long last_switch_time;
+ unsigned long killed_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
@@ -1214,6 +1215,8 @@ struct task_struct {
#endif /* CONFIG_TRACING */
#ifdef CONFIG_KCOV
+ /* See kernel/kcov.c for more details. */
+
/* Coverage collection mode enabled for this task (0 if disabled): */
unsigned int kcov_mode;
@@ -1225,6 +1228,12 @@ struct task_struct {
/* KCOV descriptor wired with this task or NULL: */
struct kcov *kcov;
+
+ /* KCOV common handle for remote coverage collection: */
+ u64 kcov_handle;
+
+ /* KCOV sequence number: */
+ int kcov_sequence;
#endif
#ifdef CONFIG_MEMCG
@@ -1257,6 +1266,7 @@ struct task_struct {
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
+ struct list_head oom_victim_list;
#ifdef CONFIG_VMAP_STACK
struct vm_struct *stack_vm_area;
#endif
@@ -1277,6 +1287,13 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ unsigned long getblk_stamp;
+ unsigned int getblk_executed;
+ unsigned int getblk_bh_count;
+ unsigned long getblk_bh_state;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/string.h b/include/linux/string.h
index 02894e417565..6b0e950701d0 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -35,6 +35,51 @@ ssize_t strscpy(char *, const char *, size_t);
/* Wraps calls to strscpy()/memset(), no arch specific code required */
ssize_t strscpy_pad(char *dest, const char *src, size_t count);
+/**
+ * stracpy - Copy a C-string into an array of char/u8/s8 or equivalent
+ * @dest: Where to copy the string, must be an array of char and not a pointer
+ * @src: String to copy, may be a pointer or const char array
+ *
+ * Helper for strscpy().
+ * Copies a maximum of sizeof(@dest) bytes of @src with %NUL termination.
+ *
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if @dest is a zero size array or @src was truncated.
+ */
+#define stracpy(dest, src) \
+({ \
+ size_t count = ARRAY_SIZE(dest); \
+ BUILD_BUG_ON(!(__same_type(dest, char[]) || \
+ __same_type(dest, unsigned char[]) || \
+ __same_type(dest, signed char[]))); \
+ \
+ strscpy(dest, src, count); \
+})
+
+/**
+ * stracpy_pad - Copy a C-string into an array of char/u8/s8 with %NUL padding
+ * @dest: Where to copy the string, must be an array of char and not a pointer
+ * @src: String to copy, may be a pointer or const char array
+ *
+ * Helper for strscpy_pad().
+ * Copies a maximum of sizeof(@dest) bytes of @src with %NUL termination
+ * and zero-pads the remaining size of @dest
+ *
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if @dest is a zero size array or @src was truncated.
+ */
+#define stracpy_pad(dest, src) \
+({ \
+ size_t count = ARRAY_SIZE(dest); \
+ BUILD_BUG_ON(!(__same_type(dest, char[]) || \
+ __same_type(dest, unsigned char[]) || \
+ __same_type(dest, signed char[]))); \
+ \
+ strscpy_pad(dest, src, count); \
+})
+
#ifndef __HAVE_ARCH_STRCAT
extern char * strcat(char *, const char *);
#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6df477329b76..02fa84493f23 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -120,8 +120,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
/* A sysctl table is an array of struct ctl_table: */
-struct ctl_table
-{
+struct ctl_table {
const char *procname; /* Text ID for /proc/sys, or zero */
void *data;
int maxlen;
@@ -140,8 +139,7 @@ struct ctl_node {
/* struct ctl_table_header is used to maintain dynamic lists of
struct ctl_table trees. */
-struct ctl_table_header
-{
+struct ctl_table_header {
union {
struct {
struct ctl_table *ctl_table;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 659a4400517b..e93e249a4e9b 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -147,6 +147,8 @@ check_copy_size(const void *addr, size_t bytes, bool is_source)
__bad_copy_to();
return false;
}
+ if (WARN_ON_ONCE(bytes > INT_MAX))
+ return false;
check_object_size(addr, bytes, is_source);
return true;
}
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a4b241102771..0e7cd3aa489b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -139,8 +139,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);
-void vmalloc_sync_all(void);
-
+void vmalloc_sync_mappings(void);
+void vmalloc_sync_unmappings(void);
+
/*
* Lowlevel-APIs (not for driver use!)
*/
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index bdeda4b079fe..292485f3d24d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -31,6 +31,12 @@ struct reclaim_stat {
unsigned nr_unmap_fail;
};
+enum writeback_stat_item {
+ NR_DIRTY_THRESHOLD,
+ NR_DIRTY_BG_THRESHOLD,
+ NR_VM_WRITEBACK_STAT_ITEMS,
+};
+
#ifdef CONFIG_VM_EVENT_COUNTERS
/*
* Light weight per cpu counter implementation.
@@ -381,4 +387,48 @@ static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
extern const char * const vmstat_text[];
+static inline const char *zone_stat_name(enum zone_stat_item item)
+{
+ return vmstat_text[item];
+}
+
+#ifdef CONFIG_NUMA
+static inline const char *numa_stat_name(enum numa_stat_item item)
+{
+ return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+ item];
+}
+#endif /* CONFIG_NUMA */
+
+static inline const char *node_stat_name(enum node_stat_item item)
+{
+ return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS +
+ item];
+}
+
+static inline const char *lru_list_name(enum lru_list lru)
+{
+ return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
+}
+
+static inline const char *writeback_stat_name(enum writeback_stat_item item)
+{
+ return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS +
+ NR_VM_NODE_STAT_ITEMS +
+ item];
+}
+
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
+static inline const char *vm_event_name(enum vm_event_item item)
+{
+ return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS +
+ NR_VM_NODE_STAT_ITEMS +
+ NR_VM_WRITEBACK_STAT_ITEMS +
+ item];
+}
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3283c8d02137..774e16b2249d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -543,7 +543,7 @@ do { \
({ \
int __ret = 0; \
might_sleep(); \
- if (!(condition)) \
+ if (!(condition) && (timeout)) \
__ret = __wait_event_hrtimeout(wq_head, condition, timeout, \
TASK_UNINTERRUPTIBLE); \
__ret; \
@@ -569,7 +569,7 @@ do { \
({ \
long __ret = 0; \
might_sleep(); \
- if (!(condition)) \
+ if (!(condition) && (timeout)) \
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
TASK_INTERRUPTIBLE); \
__ret; \
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index a5ab2973e8dc..c37e2280e6dd 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -127,18 +127,43 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b
);
#ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
- TP_PROTO(int order, gfp_t gfp_flags),
+ TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
- TP_ARGS(order, gfp_flags)
+ TP_ARGS(cgroup_ino, order, gfp_flags),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cgroup_ino)
+ __field(int, order)
+ __field(gfp_t, gfp_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->cgroup_ino = cgroup_ino;
+ __entry->order = order;
+ __entry->gfp_flags = gfp_flags;
+ ),
+
+ TP_printk("cgroup_ino=%u order=%d gfp_flags=%s",
+ __entry->cgroup_ino, __entry->order,
+ show_gfp_flags(__entry->gfp_flags))
);
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+ mm_vmscan_memcg_reclaim_begin,
- TP_PROTO(int order, gfp_t gfp_flags),
+ TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
- TP_ARGS(order, gfp_flags)
+ TP_ARGS(cgroup_ino, order, gfp_flags)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+ mm_vmscan_memcg_softlimit_reclaim_begin,
+
+ TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
+
+ TP_ARGS(cgroup_ino, order, gfp_flags)
);
#endif /* CONFIG_MEMCG */
@@ -167,18 +192,40 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end
);
#ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
- TP_PROTO(unsigned long nr_reclaimed),
+ TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
- TP_ARGS(nr_reclaimed)
+ TP_ARGS(cgroup_ino, nr_reclaimed),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cgroup_ino)
+ __field(unsigned long, nr_reclaimed)
+ ),
+
+ TP_fast_assign(
+ __entry->cgroup_ino = cgroup_ino;
+ __entry->nr_reclaimed = nr_reclaimed;
+ ),
+
+ TP_printk("cgroup_ino=%u nr_reclaimed=%lu",
+ __entry->cgroup_ino, __entry->nr_reclaimed)
);
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+ mm_vmscan_memcg_reclaim_end,
- TP_PROTO(unsigned long nr_reclaimed),
+ TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
- TP_ARGS(nr_reclaimed)
+ TP_ARGS(cgroup_ino, nr_reclaimed)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+ mm_vmscan_memcg_softlimit_reclaim_end,
+
+ TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
+
+ TP_ARGS(cgroup_ino, nr_reclaimed)
);
#endif /* CONFIG_MEMCG */
diff --git a/include/uapi/asm-generic/ipcbuf.h b/include/uapi/asm-generic/ipcbuf.h
index 7d80dbd336fb..41a01b494fc7 100644
--- a/include/uapi/asm-generic/ipcbuf.h
+++ b/include/uapi/asm-generic/ipcbuf.h
@@ -2,6 +2,8 @@
#ifndef __ASM_GENERIC_IPCBUF_H
#define __ASM_GENERIC_IPCBUF_H
+#include <linux/posix_types.h>
+
/*
* The generic ipc64_perm structure:
* Note extra padding because this structure is passed back and forth
diff --git a/include/uapi/asm-generic/msgbuf.h b/include/uapi/asm-generic/msgbuf.h
index af95aa89012e..6504d7b741ce 100644
--- a/include/uapi/asm-generic/msgbuf.h
+++ b/include/uapi/asm-generic/msgbuf.h
@@ -3,6 +3,8 @@
#define __ASM_GENERIC_MSGBUF_H
#include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
+
/*
* generic msqid64_ds structure.
*
diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h
index 137606018c6a..0e709bd3d730 100644
--- a/include/uapi/asm-generic/sembuf.h
+++ b/include/uapi/asm-generic/sembuf.h
@@ -3,6 +3,7 @@
#define __ASM_GENERIC_SEMBUF_H
#include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
/*
* The semid64_ds structure for x86 architecture.
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index 9529867717a8..409d3ad1e6e2 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -4,9 +4,24 @@
#include <linux/types.h>
+/*
+ * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst
+ * and the comment before kcov_remote_start() for usage details.
+ */
+struct kcov_remote_arg {
+ unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */
+ unsigned int area_size; /* Length of coverage buffer in words */
+ unsigned int num_handles; /* Size of handles array */
+ __u64 common_handle;
+ __u64 handles[0];
+};
+
+#define KCOV_REMOTE_MAX_HANDLES 0x100
+
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
#define KCOV_ENABLE _IO('c', 100)
#define KCOV_DISABLE _IO('c', 101)
+#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg)
enum {
/*
@@ -32,4 +47,17 @@ enum {
#define KCOV_CMP_SIZE(n) ((n) << 1)
#define KCOV_CMP_MASK KCOV_CMP_SIZE(3)
+#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56)
+#define KCOV_SUBSYSTEM_USB (0x01ull << 56)
+
+#define KCOV_SUBSYSTEM_MASK (0xffull << 56)
+#define KCOV_INSTANCE_MASK (0xffffffffull)
+
+static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+{
+ if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+ return 0;
+ return subsys | inst;
+}
+
#endif /* _LINUX_KCOV_IOCTLS_H */
diff --git a/include/uapi/linux/scc.h b/include/uapi/linux/scc.h
index c5bc7f747755..947edb17ce9d 100644
--- a/include/uapi/linux/scc.h
+++ b/include/uapi/linux/scc.h
@@ -4,6 +4,7 @@
#ifndef _UAPI_SCC_H
#define _UAPI_SCC_H
+#include <linux/sockios.h>
/* selection of hardware types */
diff --git a/init/Kconfig b/init/Kconfig
index d7163fcf233b..a34064a031a5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -146,13 +146,13 @@ config LOCALVERSION_AUTO
which is done within the script "scripts/setlocalversion".)
config BUILD_SALT
- string "Build ID Salt"
- default ""
- help
- The build ID is used to link binaries and their debug info. Setting
- this option will use the value in the calculation of the build id.
- This is mostly useful for distributions which want to ensure the
- build is unique between builds. It's safe to leave the default.
+ string "Build ID Salt"
+ default ""
+ help
+ The build ID is used to link binaries and their debug info. Setting
+ this option will use the value in the calculation of the build id.
+ This is mostly useful for distributions which want to ensure the
+ build is unique between builds. It's safe to leave the default.
config HAVE_KERNEL_GZIP
bool
@@ -818,7 +818,7 @@ menuconfig CGROUPS
if CGROUPS
config PAGE_COUNTER
- bool
+ bool
config MEMCG
bool "Memory controller"
@@ -1311,9 +1311,9 @@ menuconfig EXPERT
select DEBUG_KERNEL
help
This option allows certain base kernel options and settings
- to be disabled or tweaked. This is for specialized
- environments which can tolerate a "non-standard" kernel.
- Only use this if you really know what you are doing.
+ to be disabled or tweaked. This is for specialized
+ environments which can tolerate a "non-standard" kernel.
+ Only use this if you really know what you are doing.
config UID16
bool "Enable 16-bit UID system calls" if EXPERT
@@ -1406,11 +1406,11 @@ config BUG
bool "BUG() support" if EXPERT
default y
help
- Disabling this option eliminates support for BUG and WARN, reducing
- the size of your kernel image and potentially quietly ignoring
- numerous fatal conditions. You should only consider disabling this
- option for embedded systems with no facilities for reporting errors.
- Just say Y.
+ Disabling this option eliminates support for BUG and WARN, reducing
+ the size of your kernel image and potentially quietly ignoring
+ numerous fatal conditions. You should only consider disabling this
+ option for embedded systems with no facilities for reporting errors.
+ Just say Y.
config ELF_CORE
depends on COREDUMP
@@ -1426,8 +1426,8 @@ config PCSPKR_PLATFORM
select I8253_LOCK
default y
help
- This option allows to disable the internal PC-Speaker
- support, saving some memory.
+ This option allows to disable the internal PC-Speaker
+ support, saving some memory.
config BASE_FULL
default y
@@ -1545,29 +1545,29 @@ config MEMBARRIER
If unsure, say Y.
config KALLSYMS
- bool "Load all symbols for debugging/ksymoops" if EXPERT
- default y
- help
- Say Y here to let the kernel print out symbolic crash information and
- symbolic stack backtraces. This increases the size of the kernel
- somewhat, as all symbols have to be loaded into the kernel image.
+ bool "Load all symbols for debugging/ksymoops" if EXPERT
+ default y
+ help
+ Say Y here to let the kernel print out symbolic crash information and
+ symbolic stack backtraces. This increases the size of the kernel
+ somewhat, as all symbols have to be loaded into the kernel image.
config KALLSYMS_ALL
bool "Include all symbols in kallsyms"
depends on DEBUG_KERNEL && KALLSYMS
help
- Normally kallsyms only contains the symbols of functions for nicer
- OOPS messages and backtraces (i.e., symbols from the text and inittext
- sections). This is sufficient for most cases. And only in very rare
- cases (e.g., when a debugger is used) all symbols are required (e.g.,
- names of variables from the data sections, etc).
+ Normally kallsyms only contains the symbols of functions for nicer
+ OOPS messages and backtraces (i.e., symbols from the text and inittext
+ sections). This is sufficient for most cases. And only in very rare
+ cases (e.g., when a debugger is used) all symbols are required (e.g.,
+ names of variables from the data sections, etc).
- This option makes sure that all symbols are loaded into the kernel
- image (i.e., symbols from all sections) in cost of increased kernel
- size (depending on the kernel configuration, it may be 300KiB or
- something like this).
+ This option makes sure that all symbols are loaded into the kernel
+ image (i.e., symbols from all sections) in cost of increased kernel
+ size (depending on the kernel configuration, it may be 300KiB or
+ something like this).
- Say N unless you really need all symbols.
+ Say N unless you really need all symbols.
config KALLSYMS_ABSOLUTE_PERCPU
bool
@@ -1710,12 +1710,12 @@ config DEBUG_PERF_USE_VMALLOC
depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
select PERF_USE_VMALLOC
help
- Use vmalloc memory to back perf mmap() buffers.
+ Use vmalloc memory to back perf mmap() buffers.
- Mostly useful for debugging the vmalloc code on platforms
- that don't require it.
+ Mostly useful for debugging the vmalloc code on platforms
+ that don't require it.
- Say N if unsure.
+ Say N if unsure.
endmenu
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3d920ff15c80..49a05ba3000d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -63,6 +63,66 @@ struct posix_msg_tree_node {
int priority;
};
+/*
+ * Locking:
+ *
+ * Accesses to a message queue are synchronized by acquiring info->lock.
+ *
+ * There are two notable exceptions:
+ * - The actual wakeup of a sleeping task is performed using the wake_q
+ * framework. info->lock is already released when wake_up_q is called.
+ * - The exit codepaths after sleeping check ext_wait_queue->state without
+ * any locks. If it is STATE_READY, then the syscall is completed without
+ * acquiring info->lock.
+ *
+ * MQ_BARRIER:
+ * To achieve proper release/acquire memory barrier pairing, the state is set to
+ * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed
+ * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used.
+ *
+ * This prevents the following races:
+ *
+ * 1) With the simple wake_q_add(), the task could be gone already before
+ * the increase of the reference happens
+ * Thread A
+ * Thread B
+ * WRITE_ONCE(wait.state, STATE_NONE);
+ * schedule_hrtimeout()
+ * wake_q_add(A)
+ * if (cmpxchg()) // success
+ * ->state = STATE_READY (reordered)
+ * <timeout returns>
+ * if (wait.state == STATE_READY) return;
+ * sysret to user space
+ * sys_exit()
+ * get_task_struct() // UaF
+ *
+ * Solution: Use wake_q_add_safe() and perform the get_task_struct() before
+ * the smp_store_release() that does ->state = STATE_READY.
+ *
+ * 2) Without proper _release/_acquire barriers, the woken up task
+ * could read stale data
+ *
+ * Thread A
+ * Thread B
+ * do_mq_timedreceive
+ * WRITE_ONCE(wait.state, STATE_NONE);
+ * schedule_hrtimeout()
+ * state = STATE_READY;
+ * <timeout returns>
+ * if (wait.state == STATE_READY) return;
+ * msg_ptr = wait.msg; // Access to stale data!
+ * receiver->msg = message; (reordered)
+ *
+ * Solution: use _release and _acquire barriers.
+ *
+ * 3) There is intentionally no barrier when setting current->state
+ * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the
+ * release memory barrier, and the wakeup is triggered when holding
+ * info->lock, i.e. spin_lock(&info->lock) provided a pairing
+ * acquire memory barrier.
+ */
+
struct ext_wait_queue { /* queue of sleeping tasks */
struct task_struct *task;
struct list_head list;
@@ -646,18 +706,23 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
wq_add(info, sr, ewp);
for (;;) {
+ /* memory barrier not required, we hold info->lock */
__set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&info->lock);
time = schedule_hrtimeout_range_clock(timeout, 0,
HRTIMER_MODE_ABS, CLOCK_REALTIME);
- if (ewp->state == STATE_READY) {
+ if (READ_ONCE(ewp->state) == STATE_READY) {
+ /* see MQ_BARRIER for purpose/pairing */
+ smp_acquire__after_ctrl_dep();
retval = 0;
goto out;
}
spin_lock(&info->lock);
- if (ewp->state == STATE_READY) {
+
+ /* we hold info->lock, so no memory barrier required */
+ if (READ_ONCE(ewp->state) == STATE_READY) {
retval = 0;
goto out_unlock;
}
@@ -918,6 +983,18 @@ out_name:
* The same algorithm is used for senders.
*/
+static inline void __pipelined_op(struct wake_q_head *wake_q,
+ struct mqueue_inode_info *info,
+ struct ext_wait_queue *this)
+{
+ list_del(&this->list);
+ get_task_struct(this->task);
+
+ /* see MQ_BARRIER for purpose/pairing */
+ smp_store_release(&this->state, STATE_READY);
+ wake_q_add_safe(wake_q, this->task);
+}
+
/* pipelined_send() - send a message directly to the task waiting in
* sys_mq_timedreceive() (without inserting message into a queue).
*/
@@ -927,17 +1004,7 @@ static inline void pipelined_send(struct wake_q_head *wake_q,
struct ext_wait_queue *receiver)
{
receiver->msg = message;
- list_del(&receiver->list);
- wake_q_add(wake_q, receiver->task);
- /*
- * Rely on the implicit cmpxchg barrier from wake_q_add such
- * that we can ensure that updating receiver->state is the last
- * write operation: As once set, the receiver can continue,
- * and if we don't have the reference count from the wake_q,
- * yet, at that point we can later have a use-after-free
- * condition and bogus wakeup.
- */
- receiver->state = STATE_READY;
+ __pipelined_op(wake_q, info, receiver);
}
/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
@@ -955,9 +1022,7 @@ static inline void pipelined_receive(struct wake_q_head *wake_q,
if (msg_insert(sender->msg, info))
return;
- list_del(&sender->list);
- wake_q_add(wake_q, sender->task);
- sender->state = STATE_READY;
+ __pipelined_op(wake_q, info, sender);
}
static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
@@ -1044,7 +1109,9 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
} else {
wait.task = current;
wait.msg = (void *) msg_ptr;
- wait.state = STATE_NONE;
+
+ /* memory barrier not required, we hold info->lock */
+ WRITE_ONCE(wait.state, STATE_NONE);
ret = wq_sleep(info, SEND, timeout, &wait);
/*
* wq_sleep must be called with info->lock held, and
@@ -1147,7 +1214,9 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
ret = -EAGAIN;
} else {
wait.task = current;
- wait.state = STATE_NONE;
+
+ /* memory barrier not required, we hold info->lock */
+ WRITE_ONCE(wait.state, STATE_NONE);
ret = wq_sleep(info, RECV, timeout, &wait);
msg_ptr = wait.msg;
}
diff --git a/ipc/msg.c b/ipc/msg.c
index 8dec945fa030..22ed09ded601 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -61,6 +61,16 @@ struct msg_queue {
struct list_head q_senders;
} __randomize_layout;
+/*
+ * MSG_BARRIER Locking:
+ *
+ * Similar to the optimization used in ipc/mqueue.c, one syscall return path
+ * does not acquire any locks when it sees that a message exists in
+ * msg_receiver.r_msg. Therefore r_msg is set using smp_store_release()
+ * and accessed using READ_ONCE()+smp_acquire__after_ctrl_dep(). In addition,
+ * wake_q_add_safe() is used. See ipc/mqueue.c for more details
+ */
+
/* one msg_receiver structure for each sleeping receiver */
struct msg_receiver {
struct list_head r_list;
@@ -184,6 +194,10 @@ static inline void ss_add(struct msg_queue *msq,
{
mss->tsk = current;
mss->msgsz = msgsz;
+ /*
+ * No memory barrier required: we did ipc_lock_object(),
+ * and the waker obtains that lock before calling wake_q_add().
+ */
__set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders);
}
@@ -237,8 +251,11 @@ static void expunge_all(struct msg_queue *msq, int res,
struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
- wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(res));
+ get_task_struct(msr->r_tsk);
+
+ /* see MSG_BARRIER for purpose/pairing */
+ smp_store_release(&msr->r_msg, ERR_PTR(res));
+ wake_q_add_safe(wake_q, msr->r_tsk);
}
}
@@ -377,7 +394,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
* NOTE: no locks must be held, the rwsem is taken inside this function.
*/
static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
- struct msqid64_ds *msqid64)
+ struct ipc64_perm *perm, int msg_qbytes)
{
struct kern_ipc_perm *ipcp;
struct msg_queue *msq;
@@ -387,7 +404,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
rcu_read_lock();
ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
- &msqid64->msg_perm, msqid64->msg_qbytes);
+ perm, msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_unlock1;
@@ -409,18 +426,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
{
DEFINE_WAKE_Q(wake_q);
- if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
+ if (msg_qbytes > ns->msg_ctlmnb &&
!capable(CAP_SYS_RESOURCE)) {
err = -EPERM;
goto out_unlock1;
}
ipc_lock_object(&msq->q_perm);
- err = ipc_update_perm(&msqid64->msg_perm, ipcp);
+ err = ipc_update_perm(perm, ipcp);
if (err)
goto out_unlock0;
- msq->q_qbytes = msqid64->msg_qbytes;
+ msq->q_qbytes = msg_qbytes;
msq->q_ctime = ktime_get_real_seconds();
/*
@@ -601,9 +618,9 @@ static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int ver
case IPC_SET:
if (copy_msqid_from_user(&msqid64, buf, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
@@ -735,9 +752,9 @@ static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int versio
case IPC_SET:
if (copy_compat_msqid_from_user(&msqid64, uptr, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
@@ -798,13 +815,17 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, ERR_PTR(-E2BIG));
} else {
ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
msq->q_rtime = ktime_get_real_seconds();
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, msg);
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, msg);
return 1;
}
}
@@ -1154,7 +1175,11 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = bufsz;
- msr_d.r_msg = ERR_PTR(-EAGAIN);
+
+ /* memory barrier not require due to ipc_lock_object() */
+ WRITE_ONCE(msr_d.r_msg, ERR_PTR(-EAGAIN));
+
+ /* memory barrier not required, we own ipc_lock_object() */
__set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm);
@@ -1183,8 +1208,12 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
* signal) it will either see the message and continue ...
*/
msg = READ_ONCE(msr_d.r_msg);
- if (msg != ERR_PTR(-EAGAIN))
+ if (msg != ERR_PTR(-EAGAIN)) {
+ /* see MSG_BARRIER for purpose/pairing */
+ smp_acquire__after_ctrl_dep();
+
goto out_unlock1;
+ }
/*
* ... or see -EAGAIN, acquire the lock to check the message
@@ -1192,7 +1221,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
*/
ipc_lock_object(&msq->q_perm);
- msg = msr_d.r_msg;
+ msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0;
diff --git a/ipc/sem.c b/ipc/sem.c
index ec97a7072413..4f4303f32077 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -205,15 +205,38 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
*
* Memory ordering:
* Most ordering is enforced by using spin_lock() and spin_unlock().
- * The special case is use_global_lock:
+ *
+ * Exceptions:
+ * 1) use_global_lock: (SEM_BARRIER_1)
* Setting it from non-zero to 0 is a RELEASE, this is ensured by
- * using smp_store_release().
+ * using smp_store_release(): Immediately after setting it to 0,
+ * a simple op can start.
* Testing if it is non-zero is an ACQUIRE, this is ensured by using
* smp_load_acquire().
* Setting it from 0 to non-zero must be ordered with regards to
* this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
* is inside a spin_lock() and after a write from 0 to non-zero a
* spin_lock()+spin_unlock() is done.
+ *
+ * 2) queue.status: (SEM_BARRIER_2)
+ * Initialization is done while holding sem_lock(), so no further barrier is
+ * required.
+ * Setting it to a result code is a RELEASE, this is ensured by both a
+ * smp_store_release() (for case a) and while holding sem_lock()
+ * (for case b).
+ * The AQUIRE when reading the result code without holding sem_lock() is
+ * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
+ * (case a above).
+ * Reading the result code while holding sem_lock() needs no further barriers,
+ * the locks inside sem_lock() enforce ordering (case b above)
+ *
+ * 3) current->state:
+ * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock().
+ * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may
+ * happen immediately after calling wake_q_add. As wake_q_add_safe() is called
+ * when holding sem_lock(), no further barriers are required.
+ *
+ * See also ipc/mqueue.c for more details on the covered races.
*/
#define sc_semmsl sem_ctls[0]
@@ -344,12 +367,8 @@ static void complexmode_tryleave(struct sem_array *sma)
return;
}
if (sma->use_global_lock == 1) {
- /*
- * Immediately after setting use_global_lock to 0,
- * a simple op can start. Thus: all memory writes
- * performed by the current operation must be visible
- * before we set use_global_lock to 0.
- */
+
+ /* See SEM_BARRIER_1 for purpose/pairing */
smp_store_release(&sma->use_global_lock, 0);
} else {
sma->use_global_lock--;
@@ -400,7 +419,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
*/
spin_lock(&sem->lock);
- /* pairs with smp_store_release() */
+ /* see SEM_BARRIER_1 for purpose/pairing */
if (!smp_load_acquire(&sma->use_global_lock)) {
/* fast path successful! */
return sops->sem_num;
@@ -766,15 +785,12 @@ would_block:
static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
struct wake_q_head *wake_q)
{
- wake_q_add(wake_q, q->sleeper);
- /*
- * Rely on the above implicit barrier, such that we can
- * ensure that we hold reference to the task before setting
- * q->status. Otherwise we could race with do_exit if the
- * task is awoken by an external event before calling
- * wake_up_process().
- */
- WRITE_ONCE(q->status, error);
+ get_task_struct(q->sleeper);
+
+ /* see SEM_BARRIER_2 for purpuse/pairing */
+ smp_store_release(&q->status, error);
+
+ wake_q_add_safe(wake_q, q->sleeper);
}
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -2148,9 +2164,11 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
}
do {
+ /* memory ordering ensured by the lock in sem_lock() */
WRITE_ONCE(queue.status, -EINTR);
queue.sleeper = current;
+ /* memory ordering is ensured by the lock in sem_lock() */
__set_current_state(TASK_INTERRUPTIBLE);
sem_unlock(sma, locknum);
rcu_read_unlock();
@@ -2173,13 +2191,8 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
*/
error = READ_ONCE(queue.status);
if (error != -EINTR) {
- /*
- * User space could assume that semop() is a memory
- * barrier: Without the mb(), the cpu could
- * speculatively read in userspace stale data that was
- * overwritten by the previous owner of the semaphore.
- */
- smp_mb();
+ /* see SEM_BARRIER_2 for purpose/pairing */
+ smp_acquire__after_ctrl_dep();
goto out_free;
}
@@ -2189,6 +2202,9 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
if (!ipc_valid_object(&sma->sem_perm))
goto out_unlock_free;
+ /*
+ * No necessity for any barrier: We are protect by sem_lock()
+ */
error = READ_ONCE(queue.status);
/*
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d47bd40fc0f5..d14cbc83986a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size)
if (unlikely(!atomic_pool))
return false;
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c16cb3..69f54848af79 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -142,6 +142,47 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
touch_nmi_watchdog();
}
+static void check_killed_task(struct task_struct *t, unsigned long timeout)
+{
+ unsigned long stamp = t->killed_time;
+
+ /*
+ * Ensure the task is not frozen.
+ * Also, skip vfork and any other user process that freezer should skip.
+ */
+ if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
+ return;
+ /*
+ * Skip threads which are already inside do_exit(), for exit_mm() etc.
+ * might take many seconds.
+ */
+ if (t->flags & PF_EXITING)
+ return;
+ if (!stamp) {
+ stamp = jiffies;
+ if (!stamp)
+ stamp++;
+ t->killed_time = stamp;
+ return;
+ }
+ if (time_is_after_jiffies(stamp + timeout * HZ))
+ return;
+ trace_sched_process_hang(t);
+ if (sysctl_hung_task_panic) {
+ console_verbose();
+ hung_task_call_panic = true;
+ }
+ /*
+ * This thread failed to terminate for more than
+ * sysctl_hung_task_timeout_secs seconds, complain:
+ */
+ pr_err("INFO: task %s:%d can't die for more than %ld seconds.\n",
+ t->comm, t->pid, (jiffies - stamp) / HZ);
+ sched_show_task(t);
+ hung_task_show_lock = true;
+ touch_nmi_watchdog();
+}
+
/*
* To avoid extending the RCU grace period for an unbounded amount of time,
* periodically exit the critical section and enter a new one.
@@ -193,6 +234,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
goto unlock;
last_break = jiffies;
}
+ /* Check threads which are about to terminate. */
+ if (unlikely(fatal_signal_pending(t)))
+ check_killed_task(t, timeout);
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, timeout);
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 2ee38727844a..f50354202dbe 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/hashtable.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/preempt.h>
@@ -21,8 +22,11 @@
#include <linux/uaccess.h>
#include <linux/kcov.h>
#include <linux/refcount.h>
+#include <linux/log2.h>
#include <asm/setup.h>
+#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__)
+
/* Number of 64-bit words written per one comparison: */
#define KCOV_WORDS_PER_CMP 4
@@ -44,19 +48,100 @@ struct kcov {
* Reference counter. We keep one for:
* - opened file descriptor
* - task with enabled coverage (we can't unwire it from another task)
+ * - each code section for remote coverage collection
*/
refcount_t refcount;
/* The lock protects mode, size, area and t. */
spinlock_t lock;
enum kcov_mode mode;
- /* Size of arena (in long's for KCOV_MODE_TRACE). */
- unsigned size;
+ /* Size of arena (in long's). */
+ unsigned int size;
/* Coverage buffer shared with user space. */
void *area;
/* Task for which we collect coverage, or NULL. */
struct task_struct *t;
+ /* Collecting coverage from remote (background) threads. */
+ bool remote;
+ /* Size of remote area (in long's). */
+ unsigned int remote_size;
+ /*
+ * Sequence is incremented each time kcov is reenabled, used by
+ * kcov_remote_stop(), see the comment there.
+ */
+ int sequence;
};
+struct kcov_remote_area {
+ struct list_head list;
+ unsigned int size;
+};
+
+struct kcov_remote {
+ u64 handle;
+ struct kcov *kcov;
+ struct hlist_node hnode;
+};
+
+static DEFINE_SPINLOCK(kcov_remote_lock);
+static DEFINE_HASHTABLE(kcov_remote_map, 4);
+static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote *kcov_remote_find(u64 handle)
+{
+ struct kcov_remote *remote;
+
+ hash_for_each_possible(kcov_remote_map, remote, hnode, handle) {
+ if (remote->handle == handle)
+ return remote;
+ }
+ return NULL;
+}
+
+static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle)
+{
+ struct kcov_remote *remote;
+
+ if (kcov_remote_find(handle))
+ return ERR_PTR(-EEXIST);
+ remote = kmalloc(sizeof(*remote), GFP_ATOMIC);
+ if (!remote)
+ return ERR_PTR(-ENOMEM);
+ remote->handle = handle;
+ remote->kcov = kcov;
+ hash_add(kcov_remote_map, &remote->hnode, handle);
+ return remote;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote_area *kcov_remote_area_get(unsigned int size)
+{
+ struct kcov_remote_area *area;
+ struct list_head *pos;
+
+ kcov_debug("size = %u\n", size);
+ list_for_each(pos, &kcov_remote_areas) {
+ area = list_entry(pos, struct kcov_remote_area, list);
+ if (area->size == size) {
+ list_del(&area->list);
+ kcov_debug("rv = %px\n", area);
+ return area;
+ }
+ }
+ kcov_debug("rv = NULL\n");
+ return NULL;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static void kcov_remote_area_put(struct kcov_remote_area *area,
+ unsigned int size)
+{
+ kcov_debug("area = %px, size = %u\n", area, size);
+ INIT_LIST_HEAD(&area->list);
+ area->size = size;
+ list_add(&area->list, &kcov_remote_areas);
+}
+
static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
{
unsigned int mode;
@@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
* in_interrupt() returns false (e.g. preempt_schedule_irq()).
* READ_ONCE()/barrier() effectively provides load-acquire wrt
* interrupts, there are paired barrier()/WRITE_ONCE() in
- * kcov_ioctl_locked().
+ * kcov_start().
*/
barrier();
return mode == needed_mode;
@@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
+static void kcov_start(struct task_struct *t, unsigned int size,
+ void *area, enum kcov_mode mode, int sequence)
+{
+ kcov_debug("t = %px, size = %u, area = %px\n", t, size, area);
+ /* Cache in task struct for performance. */
+ t->kcov_size = size;
+ t->kcov_area = area;
+ /* See comment in check_kcov_mode(). */
+ barrier();
+ WRITE_ONCE(t->kcov_mode, mode);
+ t->kcov_sequence = sequence;
+}
+
+static void kcov_stop(struct task_struct *t)
+{
+ WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
+ barrier();
+ t->kcov_size = 0;
+ t->kcov_area = NULL;
+}
+
+static void kcov_task_reset(struct task_struct *t)
+{
+ kcov_stop(t);
+ t->kcov = NULL;
+ t->kcov_sequence = 0;
+ t->kcov_handle = 0;
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+ kcov_task_reset(t);
+ t->kcov_handle = current->kcov_handle;
+}
+
+static void kcov_reset(struct kcov *kcov)
+{
+ kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
+ kcov->remote = false;
+ kcov->remote_size = 0;
+ kcov->sequence++;
+}
+
+static void kcov_remote_reset(struct kcov *kcov)
+{
+ int bkt;
+ struct kcov_remote *remote;
+ struct hlist_node *tmp;
+
+ spin_lock(&kcov_remote_lock);
+ hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) {
+ if (remote->kcov != kcov)
+ continue;
+ kcov_debug("removing handle %llx\n", remote->handle);
+ hash_del(&remote->hnode);
+ kfree(remote);
+ }
+ /* Do reset before unlock to prevent races with kcov_remote_start(). */
+ kcov_reset(kcov);
+ spin_unlock(&kcov_remote_lock);
+}
+
+static void kcov_disable(struct task_struct *t, struct kcov *kcov)
+{
+ kcov_task_reset(t);
+ if (kcov->remote)
+ kcov_remote_reset(kcov);
+ else
+ kcov_reset(kcov);
+}
+
static void kcov_get(struct kcov *kcov)
{
refcount_inc(&kcov->refcount);
@@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov)
static void kcov_put(struct kcov *kcov)
{
if (refcount_dec_and_test(&kcov->refcount)) {
+ kcov_remote_reset(kcov);
vfree(kcov->area);
kfree(kcov);
}
}
-void kcov_task_init(struct task_struct *t)
-{
- WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
- barrier();
- t->kcov_size = 0;
- t->kcov_area = NULL;
- t->kcov = NULL;
-}
-
void kcov_task_exit(struct task_struct *t)
{
struct kcov *kcov;
@@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t)
kcov = t->kcov;
if (kcov == NULL)
return;
+
spin_lock(&kcov->lock);
+ kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t);
+ /*
+ * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t,
+ * which comes down to:
+ * WARN_ON(!kcov->remote && kcov->t != t);
+ *
+ * For KCOV_REMOTE_ENABLE devices, the exiting task is either:
+ * 2. A remote task between kcov_remote_start() and kcov_remote_stop().
+ * In this case we should print a warning right away, since a task
+ * shouldn't be exiting when it's in a kcov coverage collection
+ * section. Here t points to the task that is collecting remote
+ * coverage, and t->kcov->t points to the thread that created the
+ * kcov device. Which means that to detect this case we need to
+ * check that t != t->kcov->t, and this gives us the following:
+ * WARN_ON(kcov->remote && kcov->t != t);
+ *
+ * 2. The task that created kcov exiting without calling KCOV_DISABLE,
+ * and then again we can make sure that t->kcov->t == t:
+ * WARN_ON(kcov->remote && kcov->t != t);
+ *
+ * By combining all three checks into one we get:
+ */
if (WARN_ON(kcov->t != t)) {
spin_unlock(&kcov->lock);
return;
}
/* Just to not leave dangling references behind. */
- kcov_task_init(t);
- kcov->t = NULL;
- kcov->mode = KCOV_MODE_INIT;
+ kcov_disable(t, kcov);
spin_unlock(&kcov->lock);
kcov_put(kcov);
}
@@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep)
if (!kcov)
return -ENOMEM;
kcov->mode = KCOV_MODE_DISABLED;
+ kcov->sequence = 1;
refcount_set(&kcov->refcount, 1);
spin_lock_init(&kcov->lock);
filep->private_data = kcov;
@@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep)
return 0;
}
+static int kcov_get_mode(unsigned long arg)
+{
+ if (arg == KCOV_TRACE_PC)
+ return KCOV_MODE_TRACE_PC;
+ else if (arg == KCOV_TRACE_CMP)
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+ return KCOV_MODE_TRACE_CMP;
+#else
+ return -ENOTSUPP;
+#endif
+ else
+ return -EINVAL;
+}
+
/*
* Fault in a lazily-faulted vmalloc area before it can be used by
* __santizer_cov_trace_pc(), to avoid recursion issues if any code on the
@@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov)
READ_ONCE(area[offset]);
}
+static inline bool kcov_check_handle(u64 handle, bool common_valid,
+ bool uncommon_valid, bool zero_valid)
+{
+ if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK))
+ return false;
+ switch (handle & KCOV_SUBSYSTEM_MASK) {
+ case KCOV_SUBSYSTEM_COMMON:
+ return (handle & KCOV_INSTANCE_MASK) ?
+ common_valid : zero_valid;
+ case KCOV_SUBSYSTEM_USB:
+ return uncommon_valid;
+ default:
+ return false;
+ }
+ return false;
+}
+
static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
unsigned long arg)
{
struct task_struct *t;
unsigned long size, unused;
+ int mode, i;
+ struct kcov_remote_arg *remote_arg;
+ struct kcov_remote *remote;
switch (cmd) {
case KCOV_INIT_TRACE:
+ kcov_debug("KCOV_INIT_TRACE\n");
/*
* Enable kcov in trace mode and setup buffer size.
* Must happen before anything else.
@@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
kcov->mode = KCOV_MODE_INIT;
return 0;
case KCOV_ENABLE:
+ kcov_debug("KCOV_ENABLE\n");
/*
* Enable coverage for the current task.
* At this point user must have been enabled trace mode,
@@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
t = current;
if (kcov->t != NULL || t->kcov != NULL)
return -EBUSY;
- if (arg == KCOV_TRACE_PC)
- kcov->mode = KCOV_MODE_TRACE_PC;
- else if (arg == KCOV_TRACE_CMP)
-#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
- kcov->mode = KCOV_MODE_TRACE_CMP;
-#else
- return -ENOTSUPP;
-#endif
- else
- return -EINVAL;
+ mode = kcov_get_mode(arg);
+ if (mode < 0)
+ return mode;
kcov_fault_in_area(kcov);
- /* Cache in task struct for performance. */
- t->kcov_size = kcov->size;
- t->kcov_area = kcov->area;
- /* See comment in check_kcov_mode(). */
- barrier();
- WRITE_ONCE(t->kcov_mode, kcov->mode);
+ kcov->mode = mode;
+ kcov_start(t, kcov->size, kcov->area, kcov->mode,
+ kcov->sequence);
t->kcov = kcov;
kcov->t = t;
- /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+ /* Put either in kcov_task_exit() or in KCOV_DISABLE. */
kcov_get(kcov);
return 0;
case KCOV_DISABLE:
+ kcov_debug("KCOV_DISABLE\n");
/* Disable coverage for the current task. */
unused = arg;
if (unused != 0 || current->kcov != kcov)
@@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
t = current;
if (WARN_ON(kcov->t != t))
return -EINVAL;
- kcov_task_init(t);
- kcov->t = NULL;
- kcov->mode = KCOV_MODE_INIT;
+ kcov_disable(t, kcov);
kcov_put(kcov);
return 0;
+ case KCOV_REMOTE_ENABLE:
+ kcov_debug("KCOV_REMOTE_ENABLE\n");
+ if (kcov->mode != KCOV_MODE_INIT || !kcov->area)
+ return -EINVAL;
+ t = current;
+ if (kcov->t != NULL || t->kcov != NULL)
+ return -EBUSY;
+ remote_arg = (struct kcov_remote_arg *)arg;
+ mode = kcov_get_mode(remote_arg->trace_mode);
+ if (mode < 0)
+ return mode;
+ if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long))
+ return -EINVAL;
+ kcov->mode = mode;
+ t->kcov = kcov;
+ kcov->t = t;
+ kcov->remote = true;
+ kcov->remote_size = remote_arg->area_size;
+ spin_lock(&kcov_remote_lock);
+ for (i = 0; i < remote_arg->num_handles; i++) {
+ kcov_debug("handle %llx\n", remote_arg->handles[i]);
+ if (!kcov_check_handle(remote_arg->handles[i],
+ false, true, false)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return -EINVAL;
+ }
+ remote = kcov_remote_add(kcov, remote_arg->handles[i]);
+ if (IS_ERR(remote)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return PTR_ERR(remote);
+ }
+ }
+ if (remote_arg->common_handle) {
+ kcov_debug("common handle %llx\n",
+ remote_arg->common_handle);
+ if (!kcov_check_handle(remote_arg->common_handle,
+ true, false, false)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return -EINVAL;
+ }
+ remote = kcov_remote_add(kcov,
+ remote_arg->common_handle);
+ if (IS_ERR(remote)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return PTR_ERR(remote);
+ }
+ t->kcov_handle = remote_arg->common_handle;
+ }
+ spin_unlock(&kcov_remote_lock);
+ /* Put either in kcov_task_exit() or in KCOV_DISABLE. */
+ kcov_get(kcov);
+ return 0;
default:
return -ENOTTY;
}
@@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct kcov *kcov;
int res;
+ struct kcov_remote_arg *remote_arg = NULL;
+ unsigned int remote_num_handles;
+ unsigned long remote_arg_size;
+
+ if (cmd == KCOV_REMOTE_ENABLE) {
+ if (get_user(remote_num_handles, (unsigned __user *)(arg +
+ offsetof(struct kcov_remote_arg, num_handles))))
+ return -EFAULT;
+ if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES)
+ return -EINVAL;
+ remote_arg_size = struct_size(remote_arg, handles,
+ remote_num_handles);
+ remote_arg = memdup_user((void __user *)arg, remote_arg_size);
+ if (IS_ERR(remote_arg))
+ return PTR_ERR(remote_arg);
+ if (remote_arg->num_handles != remote_num_handles) {
+ kfree(remote_arg);
+ return -EINVAL;
+ }
+ arg = (unsigned long)remote_arg;
+ }
kcov = filep->private_data;
spin_lock(&kcov->lock);
res = kcov_ioctl_locked(kcov, cmd, arg);
spin_unlock(&kcov->lock);
+
+ kfree(remote_arg);
+
return res;
}
@@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = {
.release = kcov_close,
};
+/*
+ * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section
+ * of code in a kernel background thread to allow kcov to be used to collect
+ * coverage from that part of code.
+ *
+ * The handle argument of kcov_remote_start() identifies a code section that is
+ * used for coverage collection. A userspace process passes this handle to
+ * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting
+ * coverage for the code section identified by this handle.
+ *
+ * The usage of these annotations in the kernel code is different depending on
+ * the type of the kernel thread whose code is being annotated.
+ *
+ * For global kernel threads that are spawned in a limited number of instances
+ * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each
+ * instance must be assigned a unique 4-byte instance id. The instance id is
+ * then combined with a 1-byte subsystem id to get a handle via
+ * kcov_remote_handle(subsystem_id, instance_id).
+ *
+ * For local kernel threads that are spawned from system calls handler when a
+ * user interacts with some kernel interface (e.g. vhost workers), a handle is
+ * passed from a userspace process as the common_handle field of the
+ * kcov_remote_arg struct (note, that the user must generate a handle by using
+ * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
+ * arbitrary 4-byte non-zero number as the instance id). This common handle
+ * then gets saved into the task_struct of the process that issued the
+ * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
+ * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * and passed to the spawned threads via custom annotations. Those kernel
+ * threads must in turn be annotated with kcov_remote_start(common_handle) and
+ * kcov_remote_stop(). All of the threads that are spawned by the same process
+ * obtain the same handle, hence the name "common".
+ *
+ * See Documentation/dev-tools/kcov.rst for more details.
+ *
+ * Internally, this function looks up the kcov device associated with the
+ * provided handle, allocates an area for coverage collection, and saves the
+ * pointers to kcov and area into the current task_struct to allow coverage to
+ * be collected via __sanitizer_cov_trace_pc()
+ * In turns kcov_remote_stop() clears those pointers from task_struct to stop
+ * collecting coverage and copies all collected coverage into the kcov area.
+ */
+void kcov_remote_start(u64 handle)
+{
+ struct kcov_remote *remote;
+ void *area;
+ struct task_struct *t;
+ unsigned int size;
+ enum kcov_mode mode;
+ int sequence;
+
+ if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
+ return;
+ if (WARN_ON(!in_task()))
+ return;
+ t = current;
+ /*
+ * Check that kcov_remote_start is not called twice
+ * nor called by user tasks (with enabled kcov).
+ */
+ if (WARN_ON(t->kcov))
+ return;
+
+ kcov_debug("handle = %llx\n", handle);
+
+ spin_lock(&kcov_remote_lock);
+ remote = kcov_remote_find(handle);
+ if (!remote) {
+ kcov_debug("no remote found");
+ spin_unlock(&kcov_remote_lock);
+ return;
+ }
+ /* Put in kcov_remote_stop(). */
+ kcov_get(remote->kcov);
+ t->kcov = remote->kcov;
+ /*
+ * Read kcov fields before unlock to prevent races with
+ * KCOV_DISABLE / kcov_remote_reset().
+ */
+ size = remote->kcov->remote_size;
+ mode = remote->kcov->mode;
+ sequence = remote->kcov->sequence;
+ area = kcov_remote_area_get(size);
+ spin_unlock(&kcov_remote_lock);
+
+ if (!area) {
+ area = vmalloc(size * sizeof(unsigned long));
+ if (!area) {
+ t->kcov = NULL;
+ kcov_put(remote->kcov);
+ return;
+ }
+ }
+ /* Reset coverage size. */
+ *(u64 *)area = 0;
+
+ kcov_debug("area = %px, size = %u", area, size);
+
+ kcov_start(t, size, area, mode, sequence);
+
+}
+EXPORT_SYMBOL(kcov_remote_start);
+
+static void kcov_move_area(enum kcov_mode mode, void *dst_area,
+ unsigned int dst_area_size, void *src_area)
+{
+ u64 word_size = sizeof(unsigned long);
+ u64 count_size, entry_size_log;
+ u64 dst_len, src_len;
+ void *dst_entries, *src_entries;
+ u64 dst_occupied, dst_free, bytes_to_move, entries_moved;
+
+ kcov_debug("%px %u <= %px %lu\n",
+ dst_area, dst_area_size, src_area, *(unsigned long *)src_area);
+
+ switch (mode) {
+ case KCOV_MODE_TRACE_PC:
+ dst_len = READ_ONCE(*(unsigned long *)dst_area);
+ src_len = *(unsigned long *)src_area;
+ count_size = sizeof(unsigned long);
+ entry_size_log = __ilog2_u64(sizeof(unsigned long));
+ break;
+ case KCOV_MODE_TRACE_CMP:
+ dst_len = READ_ONCE(*(u64 *)dst_area);
+ src_len = *(u64 *)src_area;
+ count_size = sizeof(u64);
+ BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP));
+ entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ /* As arm can't divide u64 integers use log of entry size. */
+ if (dst_len > ((dst_area_size * word_size - count_size) >>
+ entry_size_log))
+ return;
+ dst_occupied = count_size + (dst_len << entry_size_log);
+ dst_free = dst_area_size * word_size - dst_occupied;
+ bytes_to_move = min(dst_free, src_len << entry_size_log);
+ dst_entries = dst_area + dst_occupied;
+ src_entries = src_area + count_size;
+ memcpy(dst_entries, src_entries, bytes_to_move);
+ entries_moved = bytes_to_move >> entry_size_log;
+
+ switch (mode) {
+ case KCOV_MODE_TRACE_PC:
+ WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);
+ break;
+ case KCOV_MODE_TRACE_CMP:
+ WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved);
+ break;
+ default:
+ break;
+ }
+}
+
+/* See the comment before kcov_remote_start() for usage details. */
+void kcov_remote_stop(void)
+{
+ struct task_struct *t = current;
+ struct kcov *kcov = t->kcov;
+ void *area = t->kcov_area;
+ unsigned int size = t->kcov_size;
+ int sequence = t->kcov_sequence;
+
+ if (!kcov) {
+ kcov_debug("no kcov found\n");
+ return;
+ }
+
+ kcov_stop(t);
+ t->kcov = NULL;
+
+ spin_lock(&kcov->lock);
+ /*
+ * KCOV_DISABLE could have been called between kcov_remote_start()
+ * and kcov_remote_stop(), hence the check.
+ */
+ kcov_debug("move if: %d == %d && %d\n",
+ sequence, kcov->sequence, (int)kcov->remote);
+ if (sequence == kcov->sequence && kcov->remote)
+ kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
+ spin_unlock(&kcov->lock);
+
+ spin_lock(&kcov_remote_lock);
+ kcov_remote_area_put(area, size);
+ spin_unlock(&kcov_remote_lock);
+
+ kcov_put(kcov);
+}
+EXPORT_SYMBOL(kcov_remote_stop);
+
+/* See the comment before kcov_remote_start() for usage details. */
+u64 kcov_common_handle(void)
+{
+ return current->kcov_handle;
+}
+EXPORT_SYMBOL(kcov_common_handle);
+
static int __init kcov_init(void)
{
/*
diff --git a/kernel/notifier.c b/kernel/notifier.c
index d9f5081d578d..5989bbb93039 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -23,22 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl,
struct notifier_block *n)
{
while ((*nl) != NULL) {
- WARN_ONCE(((*nl) == n), "double register detected");
- if (n->priority > (*nl)->priority)
- break;
- nl = &((*nl)->next);
- }
- n->next = *nl;
- rcu_assign_pointer(*nl, n);
- return 0;
-}
-
-static int notifier_chain_cond_register(struct notifier_block **nl,
- struct notifier_block *n)
-{
- while ((*nl) != NULL) {
- if ((*nl) == n)
+ if (unlikely((*nl) == n)) {
+ WARN(1, "double register detected");
return 0;
+ }
if (n->priority > (*nl)->priority)
break;
nl = &((*nl)->next);
@@ -233,29 +221,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
/**
- * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain
- * @nh: Pointer to head of the blocking notifier chain
- * @n: New entry in notifier chain
- *
- * Adds a notifier to a blocking notifier chain, only if not already
- * present in the chain.
- * Must be called in process context.
- *
- * Currently always returns zero.
- */
-int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
- struct notifier_block *n)
-{
- int ret;
-
- down_write(&nh->rwsem);
- ret = notifier_chain_cond_register(&nh->head, n);
- up_write(&nh->rwsem);
- return ret;
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register);
-
-/**
* blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
* @nh: Pointer to head of the blocking notifier chain
* @n: Entry to remove from notifier chain
@@ -554,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
int register_die_notifier(struct notifier_block *nb)
{
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
diff --git a/kernel/profile.c b/kernel/profile.c
index af7c94bf5fa1..4b144b02ca5d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu)
struct page *page;
int i;
- if (prof_cpu_mask != NULL)
+ if (cpumask_available(prof_cpu_mask))
cpumask_clear_cpu(cpu, prof_cpu_mask);
for (i = 0; i < 2; i++) {
@@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu)
static int profile_online_cpu(unsigned int cpu)
{
- if (prof_cpu_mask != NULL)
+ if (cpumask_available(prof_cpu_mask))
cpumask_set_cpu(cpu, prof_cpu_mask);
return 0;
@@ -403,7 +403,7 @@ void profile_tick(int type)
{
struct pt_regs *regs = get_irq_regs();
- if (!user_mode(regs) && prof_cpu_mask != NULL &&
+ if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
profile_hit(type, (void *)profile_pc(regs));
}
diff --git a/kernel/sys.c b/kernel/sys.c
index d3aef31e24dc..a9331f101883 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
- struct oldold_utsname tmp = {};
+ struct oldold_utsname tmp;
if (!name)
return -EFAULT;
+ memset(&tmp, 0, sizeof(tmp));
+
down_read(&uts_sem);
memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8a117a3462a5..72361341bbee 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2156,6 +2156,12 @@ config IO_STRICT_DEVMEM
If in doubt, say Y.
+config DEBUG_AID_FOR_SYZBOT
+ bool "Additional debug code for syzbot"
+ default n
+ help
+ This option is intended for testing by syzbot.
+
source "arch/$(SRCARCH)/Kconfig.debug"
config HYPERV_TESTING
diff --git a/lib/bitmap.c b/lib/bitmap.c
index f9e834841e94..4250519d7d1c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -222,6 +222,18 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_andnot);
+void __bitmap_replace(unsigned long *dst,
+ const unsigned long *old, const unsigned long *new,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(nbits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = (old[k] & ~mask[k]) | (new[k] & mask[k]);
+}
+EXPORT_SYMBOL(__bitmap_replace);
+
int __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 5c51eb45178a..e35a76b291e6 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -214,3 +214,17 @@ EXPORT_SYMBOL(find_next_bit_le);
#endif
#endif /* __BIG_ENDIAN */
+
+unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ offset = find_next_bit(addr, size, offset);
+ if (offset == size)
+ return size;
+
+ offset = round_down(offset, 8);
+ *clump = bitmap_get_value8(addr, offset);
+
+ return offset;
+}
+EXPORT_SYMBOL(find_next_clump8);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 24d20ca7e91b..7f1244b5294a 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -540,7 +540,7 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
EXPORT_SYMBOL(gen_pool_for_each_chunk);
/**
- * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * gen_pool_has_addr - checks if an address falls within the range of a pool
* @pool: the generic memory pool
* @start: start address
* @size: size of the region
@@ -548,7 +548,7 @@ EXPORT_SYMBOL(gen_pool_for_each_chunk);
* Check if the range of addresses falls within the specified pool. Returns
* true if the entire range is contained in the pool and false otherwise.
*/
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
size_t size)
{
bool found = false;
@@ -567,6 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
rcu_read_unlock();
return found;
}
+EXPORT_SYMBOL(gen_pool_has_addr);
/**
* gen_pool_avail - get available free space of the pool
diff --git a/lib/math/rational.c b/lib/math/rational.c
index ba7443677c90..31fb27db2deb 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -3,6 +3,7 @@
* rational fractions
*
* Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com>
+ * Copyright (C) 2019 Trent Piepho <tpiepho@gmail.com>
*
* helper functions when coping with rational numbers
*/
@@ -10,6 +11,7 @@
#include <linux/rational.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/kernel.h>
/*
* calculate best rational approximation for a given fraction
@@ -33,30 +35,65 @@ void rational_best_approximation(
unsigned long max_numerator, unsigned long max_denominator,
unsigned long *best_numerator, unsigned long *best_denominator)
{
- unsigned long n, d, n0, d0, n1, d1;
+ /* n/d is the starting rational, which is continually
+ * decreased each iteration using the Euclidean algorithm.
+ *
+ * dp is the value of d from the prior iteration.
+ *
+ * n2/d2, n1/d1, and n0/d0 are our successively more accurate
+ * approximations of the rational. They are, respectively,
+ * the current, previous, and two prior iterations of it.
+ *
+ * a is current term of the continued fraction.
+ */
+ unsigned long n, d, n0, d0, n1, d1, n2, d2;
n = given_numerator;
d = given_denominator;
n0 = d1 = 0;
n1 = d0 = 1;
+
for (;;) {
- unsigned long t, a;
- if ((n1 > max_numerator) || (d1 > max_denominator)) {
- n1 = n0;
- d1 = d0;
- break;
- }
+ unsigned long dp, a;
+
if (d == 0)
break;
- t = d;
+ /* Find next term in continued fraction, 'a', via
+ * Euclidean algorithm.
+ */
+ dp = d;
a = n / d;
d = n % d;
- n = t;
- t = n0 + a * n1;
+ n = dp;
+
+ /* Calculate the current rational approximation (aka
+ * convergent), n2/d2, using the term just found and
+ * the two prior approximations.
+ */
+ n2 = n0 + a * n1;
+ d2 = d0 + a * d1;
+
+ /* If the current convergent exceeds the maxes, then
+ * return either the previous convergent or the
+ * largest semi-convergent, the final term of which is
+ * found below as 't'.
+ */
+ if ((n2 > max_numerator) || (d2 > max_denominator)) {
+ unsigned long t = min((max_numerator - n0) / n1,
+ (max_denominator - d0) / d1);
+
+ /* This tests if the semi-convergent is closer
+ * than the previous convergent.
+ */
+ if (2u * t > a || (2u * t == a && d0 * dp > d1 * d)) {
+ n1 = n0 + t * n1;
+ d1 = d0 + t * d1;
+ }
+ break;
+ }
n0 = n1;
- n1 = t;
- t = d0 + a * d1;
+ n1 = n2;
d0 = d1;
- d1 = t;
+ d1 = d2;
}
*best_numerator = n1;
*best_denominator = d1;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 51a98f7ee79e..e14a15ac250b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Test cases for printf facility.
+ * Test cases for bitmap API.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,6 +21,39 @@ static unsigned failed_tests __initdata;
static char pbl_buffer[PAGE_SIZE] __initdata;
+static const unsigned long exp1[] __initconst = {
+ BITMAP_FROM_U64(1),
+ BITMAP_FROM_U64(2),
+ BITMAP_FROM_U64(0x0000ffff),
+ BITMAP_FROM_U64(0xffff0000),
+ BITMAP_FROM_U64(0x55555555),
+ BITMAP_FROM_U64(0xaaaaaaaa),
+ BITMAP_FROM_U64(0x11111111),
+ BITMAP_FROM_U64(0x22222222),
+ BITMAP_FROM_U64(0xffffffff),
+ BITMAP_FROM_U64(0xfffffffe),
+ BITMAP_FROM_U64(0x3333333311111111ULL),
+ BITMAP_FROM_U64(0xffffffff77777777ULL),
+ BITMAP_FROM_U64(0),
+};
+
+static const unsigned long exp2[] __initconst = {
+ BITMAP_FROM_U64(0x3333333311111111ULL),
+ BITMAP_FROM_U64(0xffffffff77777777ULL),
+};
+
+/* Fibonacci sequence */
+static const unsigned long exp2_to_exp3_mask[] __initconst = {
+ BITMAP_FROM_U64(0x008000020020212eULL),
+};
+/* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */
+static const unsigned long exp3_0_1[] __initconst = {
+ BITMAP_FROM_U64(0x33b3333311313137ULL),
+};
+/* exp3_1_0 = (exp2[1] & ~exp2_to_exp3_mask) | (exp2[0] & exp2_to_exp3_mask) */
+static const unsigned long exp3_1_0[] __initconst = {
+ BITMAP_FROM_U64(0xff7fffff77575751ULL),
+};
static bool __init
__check_eq_uint(const char *srcfile, unsigned int line,
@@ -92,6 +125,36 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
return true;
}
+static bool __init __check_eq_clump8(const char *srcfile, unsigned int line,
+ const unsigned int offset,
+ const unsigned int size,
+ const unsigned char *const clump_exp,
+ const unsigned long *const clump)
+{
+ unsigned long exp;
+
+ if (offset >= size) {
+ pr_warn("[%s:%u] bit offset for clump out-of-bounds: expected less than %u, got %u\n",
+ srcfile, line, size, offset);
+ return false;
+ }
+
+ exp = clump_exp[offset / 8];
+ if (!exp) {
+ pr_warn("[%s:%u] bit offset for zero clump: expected nonzero clump, got bit offset %u with clump value 0",
+ srcfile, line, offset);
+ return false;
+ }
+
+ if (*clump != exp) {
+ pr_warn("[%s:%u] expected clump value of 0x%lX, got clump value of 0x%lX",
+ srcfile, line, exp, *clump);
+ return false;
+ }
+
+ return true;
+}
+
#define __expect_eq(suffix, ...) \
({ \
int result = 0; \
@@ -108,6 +171,7 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
#define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__)
#define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__)
#define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__)
+#define expect_eq_clump8(...) __expect_eq(clump8, ##__VA_ARGS__)
static void __init test_zero_clear(void)
{
@@ -206,6 +270,30 @@ static void __init test_copy(void)
expect_eq_pbl("0-108,128-1023", bmap2, 1024);
}
+#define EXP2_IN_BITS (sizeof(exp2) * 8)
+
+static void __init test_replace(void)
+{
+ unsigned int nbits = 64;
+ DECLARE_BITMAP(bmap, 1024);
+
+ bitmap_zero(bmap, 1024);
+ bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+ bitmap_zero(bmap, 1024);
+ bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_1_0, nbits);
+
+ bitmap_fill(bmap, 1024);
+ bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+ bitmap_fill(bmap, 1024);
+ bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_1_0, nbits);
+}
+
#define PARSE_TIME 0x1
struct test_bitmap_parselist{
@@ -216,53 +304,32 @@ struct test_bitmap_parselist{
const int flags;
};
-static const unsigned long exp[] __initconst = {
- BITMAP_FROM_U64(1),
- BITMAP_FROM_U64(2),
- BITMAP_FROM_U64(0x0000ffff),
- BITMAP_FROM_U64(0xffff0000),
- BITMAP_FROM_U64(0x55555555),
- BITMAP_FROM_U64(0xaaaaaaaa),
- BITMAP_FROM_U64(0x11111111),
- BITMAP_FROM_U64(0x22222222),
- BITMAP_FROM_U64(0xffffffff),
- BITMAP_FROM_U64(0xfffffffe),
- BITMAP_FROM_U64(0x3333333311111111ULL),
- BITMAP_FROM_U64(0xffffffff77777777ULL),
- BITMAP_FROM_U64(0),
-};
-
-static const unsigned long exp2[] __initconst = {
- BITMAP_FROM_U64(0x3333333311111111ULL),
- BITMAP_FROM_U64(0xffffffff77777777ULL)
-};
-
static const struct test_bitmap_parselist parselist_tests[] __initconst = {
#define step (sizeof(u64) / sizeof(unsigned long))
- {0, "0", &exp[0], 8, 0},
- {0, "1", &exp[1 * step], 8, 0},
- {0, "0-15", &exp[2 * step], 32, 0},
- {0, "16-31", &exp[3 * step], 32, 0},
- {0, "0-31:1/2", &exp[4 * step], 32, 0},
- {0, "1-31:1/2", &exp[5 * step], 32, 0},
- {0, "0-31:1/4", &exp[6 * step], 32, 0},
- {0, "1-31:1/4", &exp[7 * step], 32, 0},
- {0, "0-31:4/4", &exp[8 * step], 32, 0},
- {0, "1-31:4/4", &exp[9 * step], 32, 0},
- {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0},
- {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0},
- {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp[11 * step], 64, 0},
+ {0, "0", &exp1[0], 8, 0},
+ {0, "1", &exp1[1 * step], 8, 0},
+ {0, "0-15", &exp1[2 * step], 32, 0},
+ {0, "16-31", &exp1[3 * step], 32, 0},
+ {0, "0-31:1/2", &exp1[4 * step], 32, 0},
+ {0, "1-31:1/2", &exp1[5 * step], 32, 0},
+ {0, "0-31:1/4", &exp1[6 * step], 32, 0},
+ {0, "1-31:1/4", &exp1[7 * step], 32, 0},
+ {0, "0-31:4/4", &exp1[8 * step], 32, 0},
+ {0, "1-31:4/4", &exp1[9 * step], 32, 0},
+ {0, "0-31:1/4,32-63:2/4", &exp1[10 * step], 64, 0},
+ {0, "0-31:3/4,32-63:4/4", &exp1[11 * step], 64, 0},
+ {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp1[11 * step], 64, 0},
{0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0},
{0, "0-2047:128/256", NULL, 2048, PARSE_TIME},
- {0, "", &exp[12 * step], 8, 0},
- {0, "\n", &exp[12 * step], 8, 0},
- {0, ",, ,, , , ,", &exp[12 * step], 8, 0},
- {0, " , ,, , , ", &exp[12 * step], 8, 0},
- {0, " , ,, , , \n", &exp[12 * step], 8, 0},
+ {0, "", &exp1[12 * step], 8, 0},
+ {0, "\n", &exp1[12 * step], 8, 0},
+ {0, ",, ,, , , ,", &exp1[12 * step], 8, 0},
+ {0, " , ,, , , ", &exp1[12 * step], 8, 0},
+ {0, " , ,, , , \n", &exp1[12 * step], 8, 0},
{-EINVAL, "-1", NULL, 8, 0},
{-EINVAL, "-0", NULL, 8, 0},
@@ -280,6 +347,8 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
{-EINVAL, "a-31:10/1", NULL, 8, 0},
{-EINVAL, "0-31:a/1", NULL, 8, 0},
{-EINVAL, "0-\n", NULL, 8, 0},
+
+#undef step
};
static void __init __test_bitmap_parselist(int is_user)
@@ -299,7 +368,7 @@ static void __init __test_bitmap_parselist(int is_user)
set_fs(KERNEL_DS);
time = ktime_get();
- err = bitmap_parselist_user(ptest.in, len,
+ err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
bmap, ptest.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
@@ -326,6 +395,8 @@ static void __init __test_bitmap_parselist(int is_user)
if (ptest.flags & PARSE_TIME)
pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
mode, i, ptest.in, time);
+
+#undef ptest
}
}
@@ -339,20 +410,20 @@ static void __init test_bitmap_parselist_user(void)
__test_bitmap_parselist(1);
}
-#define EXP_BYTES (sizeof(exp) * 8)
+#define EXP1_IN_BITS (sizeof(exp1) * 8)
static void __init test_bitmap_arr32(void)
{
unsigned int nbits, next_bit;
- u32 arr[sizeof(exp) / 4];
- DECLARE_BITMAP(bmap2, EXP_BYTES);
+ u32 arr[EXP1_IN_BITS / 32];
+ DECLARE_BITMAP(bmap2, EXP1_IN_BITS);
memset(arr, 0xa5, sizeof(arr));
- for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
- bitmap_to_arr32(arr, exp, nbits);
+ for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) {
+ bitmap_to_arr32(arr, exp1, nbits);
bitmap_from_arr32(bmap2, arr, nbits);
- expect_eq_bitmap(bmap2, exp, nbits);
+ expect_eq_bitmap(bmap2, exp1, nbits);
next_bit = find_next_bit(bmap2,
round_up(nbits, BITS_PER_LONG), nbits);
@@ -361,7 +432,7 @@ static void __init test_bitmap_arr32(void)
" tail is not safely cleared: %d\n",
nbits, next_bit);
- if (nbits < EXP_BYTES - 32)
+ if (nbits < EXP1_IN_BITS - 32)
expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
0xa5a5a5a5);
}
@@ -404,15 +475,50 @@ static void noinline __init test_mem_optimisations(void)
}
}
+static const unsigned char clump_exp[] __initconst = {
+ 0x01, /* 1 bit set */
+ 0x02, /* non-edge 1 bit set */
+ 0x00, /* zero bits set */
+ 0x38, /* 3 bits set across 4-bit boundary */
+ 0x38, /* Repeated clump */
+ 0x0F, /* 4 bits set */
+ 0xFF, /* all bits set */
+ 0x05, /* non-adjacent 2 bits set */
+};
+
+static void __init test_for_each_set_clump8(void)
+{
+#define CLUMP_EXP_NUMBITS 64
+ DECLARE_BITMAP(bits, CLUMP_EXP_NUMBITS);
+ unsigned int start;
+ unsigned long clump;
+
+ /* set bitmap to test case */
+ bitmap_zero(bits, CLUMP_EXP_NUMBITS);
+ bitmap_set(bits, 0, 1); /* 0x01 */
+ bitmap_set(bits, 9, 1); /* 0x02 */
+ bitmap_set(bits, 27, 3); /* 0x28 */
+ bitmap_set(bits, 35, 3); /* 0x28 */
+ bitmap_set(bits, 40, 4); /* 0x0F */
+ bitmap_set(bits, 48, 8); /* 0xFF */
+ bitmap_set(bits, 56, 1); /* 0x05 - part 1 */
+ bitmap_set(bits, 58, 1); /* 0x05 - part 2 */
+
+ for_each_set_clump8(start, clump, bits, CLUMP_EXP_NUMBITS)
+ expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump);
+}
+
static void __init selftest(void)
{
test_zero_clear();
test_fill_set();
test_copy();
+ test_replace();
test_bitmap_arr32();
test_bitmap_parselist();
test_bitmap_parselist_user();
test_mem_optimisations();
+ test_for_each_set_clump8();
}
KSTM_MODULE_LOADERS(test_bitmap);
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index 9742e5cb853a..e4f706a404b3 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -183,6 +183,9 @@ static bool __init check_buf(void *buf, int size, bool want_ctor,
return fail;
}
+#define BULK_SIZE 100
+static void *bulk_array[BULK_SIZE];
+
/*
* Test kmem_cache with given parameters:
* want_ctor - use a constructor;
@@ -203,9 +206,24 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor,
want_rcu ? SLAB_TYPESAFE_BY_RCU : 0,
want_ctor ? test_ctor : NULL);
for (iter = 0; iter < 10; iter++) {
+ /* Do a test of bulk allocations */
+ if (!want_rcu && !want_ctor) {
+ int ret;
+
+ ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array);
+ if (!ret) {
+ fail = true;
+ } else {
+ int i;
+ for (i = 0; i < ret; i++)
+ fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero);
+ kmem_cache_free_bulk(c, ret, bulk_array);
+ }
+ }
+
buf = kmem_cache_alloc(c, alloc_mask);
/* Check that buf is zeroed, if it must be. */
- fail = check_buf(buf, size, want_ctor, want_rcu, want_zero);
+ fail |= check_buf(buf, size, want_ctor, want_rcu, want_zero);
fill_with_garbage_skip(buf, size, want_ctor ? CTOR_BYTES : 0);
if (!want_rcu) {
diff --git a/lib/ubsan.c b/lib/ubsan.c
index fc552d524ef7..7b9b58aee72c 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
}
}
-static DEFINE_SPINLOCK(report_lock);
-
-static void ubsan_prologue(struct source_location *location,
- unsigned long *flags)
+static void ubsan_prologue(struct source_location *location)
{
current->in_ubsan++;
- spin_lock_irqsave(&report_lock, *flags);
pr_err("========================================"
"========================================\n");
print_source_location("UBSAN: Undefined behaviour in", location);
}
-static void ubsan_epilogue(unsigned long *flags)
+static void ubsan_epilogue(void)
{
dump_stack();
pr_err("========================================"
"========================================\n");
- spin_unlock_irqrestore(&report_lock, *flags);
+
current->in_ubsan--;
}
@@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
{
struct type_descriptor *type = data->type;
- unsigned long flags;
char lhs_val_str[VALUE_LENGTH];
char rhs_val_str[VALUE_LENGTH];
if (suppress_report(&data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
@@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
rhs_val_str,
type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
void __ubsan_handle_add_overflow(struct overflow_data *data,
@@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
void __ubsan_handle_negate_overflow(struct overflow_data *data,
void *old_val)
{
- unsigned long flags;
char old_val_str[VALUE_LENGTH];
if (suppress_report(&data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
pr_err("negation of %s cannot be represented in type %s:\n",
old_val_str, data->type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
@@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
void __ubsan_handle_divrem_overflow(struct overflow_data *data,
void *lhs, void *rhs)
{
- unsigned long flags;
char rhs_val_str[VALUE_LENGTH];
if (suppress_report(&data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs);
@@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
else
pr_err("division by zero\n");
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
{
- unsigned long flags;
-
if (suppress_report(data->location))
return;
- ubsan_prologue(data->location, &flags);
+ ubsan_prologue(data->location);
pr_err("%s null pointer of type %s\n",
type_check_kinds[data->type_check_kind],
data->type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
static void handle_misaligned_access(struct type_mismatch_data_common *data,
unsigned long ptr)
{
- unsigned long flags;
-
if (suppress_report(data->location))
return;
- ubsan_prologue(data->location, &flags);
+ ubsan_prologue(data->location);
pr_err("%s misaligned address %p for type %s\n",
type_check_kinds[data->type_check_kind],
(void *)ptr, data->type->type_name);
pr_err("which requires %ld byte alignment\n", data->alignment);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
unsigned long ptr)
{
- unsigned long flags;
-
if (suppress_report(data->location))
return;
- ubsan_prologue(data->location, &flags);
+ ubsan_prologue(data->location);
pr_err("%s address %p with insufficient space\n",
type_check_kinds[data->type_check_kind],
(void *) ptr);
pr_err("for an object of type %s\n", data->type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
@@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
{
- unsigned long flags;
char index_str[VALUE_LENGTH];
if (suppress_report(&data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(index_str, sizeof(index_str), data->index_type, index);
pr_err("index %s is out of range for type %s\n", index_str,
data->array_type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
void *lhs, void *rhs)
{
- unsigned long flags;
struct type_descriptor *rhs_type = data->rhs_type;
struct type_descriptor *lhs_type = data->lhs_type;
char rhs_str[VALUE_LENGTH];
@@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
if (suppress_report(&data->location))
goto out;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs);
val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs);
@@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
lhs_str, rhs_str,
lhs_type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
out:
user_access_restore(ua_flags);
}
@@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
{
- unsigned long flags;
-
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
pr_err("calling __builtin_unreachable()\n");
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
panic("can't return from __builtin_unreachable()");
}
EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
@@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
void *val)
{
- unsigned long flags;
char val_str[VALUE_LENGTH];
if (suppress_report(&data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(&data->location);
val_to_string(val_str, sizeof(val_str), data->type, val);
pr_err("load of value %s is not a valid value for type %s\n",
val_str, data->type->type_name);
- ubsan_epilogue(&flags);
+ ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index df3371d5c572..2fa710bb6358 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,7 @@
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include "kasan.h"
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc01423277c5..c5b5f74cfd4d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -98,14 +98,6 @@ static bool do_memsw_account(void)
return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account;
}
-static const char *const mem_cgroup_lru_names[] = {
- "inactive_anon",
- "active_anon",
- "inactive_file",
- "active_file",
- "unevictable",
-};
-
#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
@@ -1421,7 +1413,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
PAGE_SIZE);
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i],
+ seq_buf_printf(&s, "%s %llu\n", lru_list_name(i),
(u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
@@ -1434,8 +1426,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
/* Accumulated memory events */
- seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
- seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT),
+ memcg_events(memcg, PGFAULT));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT),
+ memcg_events(memcg, PGMAJFAULT));
seq_buf_printf(&s, "workingset_refault %lu\n",
memcg_page_state(memcg, WORKINGSET_REFAULT));
@@ -1444,22 +1438,27 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
- seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL),
+ memcg_events(memcg, PGREFILL));
seq_buf_printf(&s, "pgscan %lu\n",
memcg_events(memcg, PGSCAN_KSWAPD) +
memcg_events(memcg, PGSCAN_DIRECT));
seq_buf_printf(&s, "pgsteal %lu\n",
memcg_events(memcg, PGSTEAL_KSWAPD) +
memcg_events(memcg, PGSTEAL_DIRECT));
- seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
- seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
- seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
- seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE),
+ memcg_events(memcg, PGACTIVATE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE),
+ memcg_events(memcg, PGDEACTIVATE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE),
+ memcg_events(memcg, PGLAZYFREE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED),
+ memcg_events(memcg, PGLAZYFREED));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- seq_buf_printf(&s, "thp_fault_alloc %lu\n",
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC),
memcg_events(memcg, THP_FAULT_ALLOC));
- seq_buf_printf(&s, "thp_collapse_alloc %lu\n",
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC),
memcg_events(memcg, THP_COLLAPSE_ALLOC));
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3742,13 +3741,6 @@ static const unsigned int memcg1_events[] = {
PGMAJFAULT,
};
-static const char *const memcg1_event_names[] = {
- "pgpgin",
- "pgpgout",
- "pgfault",
- "pgmajfault",
-};
-
static int memcg_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
@@ -3757,7 +3749,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
unsigned int i;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
- BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
@@ -3768,11 +3759,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_printf(m, "%s %lu\n", memcg1_event_names[i],
+ seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
+ seq_printf(m, "%s %lu\n", lru_list_name(i),
memcg_page_state_local(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
@@ -3797,11 +3788,12 @@ static int memcg_stat_show(struct seq_file *m, void *v)
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
+ seq_printf(m, "total_%s %llu\n",
+ vm_event_name(memcg1_events[i]),
(u64)memcg_events(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
+ seq_printf(m, "total_%s %llu\n", lru_list_name(i),
(u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
diff --git a/mm/memory.c b/mm/memory.c
index 513c3ecc76ee..606da187d1de 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -672,7 +672,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pmd_devmap(pmd))
return NULL;
- if (is_zero_pfn(pfn))
+ if (is_huge_zero_pmd(pmd))
return NULL;
if (unlikely(pfn > highest_memmap_pfn))
return NULL;
@@ -4197,19 +4197,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
smp_wmb(); /* See comment in __pte_alloc */
ptl = pud_lock(mm, pud);
-#ifndef __ARCH_HAS_4LEVEL_HACK
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
pud_populate(mm, pud, new);
} else /* Another has populated it */
pmd_free(mm, new);
-#else
- if (!pgd_present(*pud)) {
- mm_inc_nr_pmds(mm);
- pgd_populate(mm, pud, new);
- } else /* Another has populated it */
- pmd_free(mm, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
spin_unlock(ptl);
return 0;
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 55ac23ef11c1..fc617ad6f035 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -355,7 +355,7 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
if (unlikely(pfn_to_nid(start_pfn) != nid))
continue;
- if (zone && zone != page_zone(pfn_to_page(start_pfn)))
+ if (zone != page_zone(pfn_to_page(start_pfn)))
continue;
return start_pfn;
@@ -380,7 +380,7 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
if (unlikely(pfn_to_nid(pfn) != nid))
continue;
- if (zone && zone != page_zone(pfn_to_page(pfn)))
+ if (zone != page_zone(pfn_to_page(pfn)))
continue;
return pfn;
@@ -392,14 +392,11 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
unsigned long end_pfn)
{
- unsigned long zone_start_pfn = zone->zone_start_pfn;
- unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
- unsigned long zone_end_pfn = z;
unsigned long pfn;
int nid = zone_to_nid(zone);
zone_span_writelock(zone);
- if (zone_start_pfn == start_pfn) {
+ if (zone->zone_start_pfn == start_pfn) {
/*
* If the section is smallest section in the zone, it need
* shrink zone->zone_start_pfn and zone->zone_spanned_pages.
@@ -407,50 +404,30 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
* for shrinking zone.
*/
pfn = find_smallest_section_pfn(nid, zone, end_pfn,
- zone_end_pfn);
+ zone_end_pfn(zone));
if (pfn) {
+ zone->spanned_pages = zone_end_pfn(zone) - pfn;
zone->zone_start_pfn = pfn;
- zone->spanned_pages = zone_end_pfn - pfn;
+ } else {
+ zone->zone_start_pfn = 0;
+ zone->spanned_pages = 0;
}
- } else if (zone_end_pfn == end_pfn) {
+ } else if (zone_end_pfn(zone) == end_pfn) {
/*
* If the section is biggest section in the zone, it need
* shrink zone->spanned_pages.
* In this case, we find second biggest valid mem_section for
* shrinking zone.
*/
- pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
+ pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
start_pfn);
if (pfn)
- zone->spanned_pages = pfn - zone_start_pfn + 1;
- }
-
- /*
- * The section is not biggest or smallest mem_section in the zone, it
- * only creates a hole in the zone. So in this case, we need not
- * change the zone. But perhaps, the zone has only hole data. Thus
- * it check the zone has only hole or not.
- */
- pfn = zone_start_pfn;
- for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_to_online_page(pfn)))
- continue;
-
- if (page_zone(pfn_to_page(pfn)) != zone)
- continue;
-
- /* Skip range to be removed */
- if (pfn >= start_pfn && pfn < end_pfn)
- continue;
-
- /* If we find valid section, we have nothing to do */
- zone_span_writeunlock(zone);
- return;
+ zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
+ else {
+ zone->zone_start_pfn = 0;
+ zone->spanned_pages = 0;
+ }
}
-
- /* The zone has no valid section */
- zone->zone_start_pfn = 0;
- zone->spanned_pages = 0;
zone_span_writeunlock(zone);
}
@@ -483,12 +460,16 @@ static void update_pgdat_span(struct pglist_data *pgdat)
pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages)
+void __ref remove_pfn_range_from_zone(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
+ /* Poison struct pages because they are now uninitialized again. */
+ page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+
#ifdef CONFIG_ZONE_DEVICE
/*
* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
@@ -499,28 +480,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
return;
#endif
+ clear_zone_contiguous(zone);
+
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
+
+ set_zone_contiguous(zone);
}
-static void __remove_section(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, unsigned long map_offset,
- struct vmem_altmap *altmap)
+static void __remove_section(unsigned long pfn, unsigned long nr_pages,
+ unsigned long map_offset,
+ struct vmem_altmap *altmap)
{
struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
if (WARN_ON_ONCE(!valid_section(ms)))
return;
- __remove_zone(zone, pfn, nr_pages);
sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
}
/**
- * __remove_pages() - remove sections of pages from a zone
- * @zone: zone from which pages need to be removed
+ * __remove_pages() - remove sections of pages
* @pfn: starting pageframe (must be aligned to start of a section)
* @nr_pages: number of pages to remove (must be multiple of section size)
* @altmap: alternative device page map or %NULL if default memmap is used
@@ -530,34 +513,25 @@ static void __remove_section(struct zone *zone, unsigned long pfn,
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
-void __remove_pages(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+void __remove_pages(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
{
+ const unsigned long end_pfn = pfn + nr_pages;
+ unsigned long cur_nr_pages;
unsigned long map_offset = 0;
- unsigned long nr, start_sec, end_sec;
map_offset = vmem_altmap_offset(altmap);
- clear_zone_contiguous(zone);
-
if (check_pfn_span(pfn, nr_pages, "remove"))
return;
- start_sec = pfn_to_section_nr(pfn);
- end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
- for (nr = start_sec; nr <= end_sec; nr++) {
- unsigned long pfns;
-
+ for (; pfn < end_pfn; pfn += cur_nr_pages) {
cond_resched();
- pfns = min(nr_pages, PAGES_PER_SECTION
- - (pfn & ~PAGE_SECTION_MASK));
- __remove_section(zone, pfn, pfns, map_offset, altmap);
- pfn += pfns;
- nr_pages -= pfns;
+ /* Select all remaining pages up to the next section boundary */
+ cur_nr_pages = min(end_pfn - pfn, -(pfn | PAGE_SECTION_MASK));
+ __remove_section(pfn, cur_nr_pages, map_offset, altmap);
map_offset = 0;
}
-
- set_zone_contiguous(zone);
}
int set_online_page_callback(online_page_callback_t callback)
@@ -869,6 +843,7 @@ failed_addition:
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
+ remove_pfn_range_from_zone(zone, pfn, nr_pages);
mem_hotplug_done();
return ret;
}
@@ -1628,6 +1603,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
+ remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
mem_hotplug_done();
return 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 067cf7d3daf5..b2920ae87a61 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2148,18 +2148,22 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
+ /*
+ * First, try to allocate THP only on local node, but
+ * don't reclaim unnecessarily, just compact.
+ */
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ gfp | __GFP_THISNODE | __GFP_NORETRY, order);
/*
* If hugepage allocations are configured to always
* synchronous compact or the vma has been madvised
* to prefer hugepage backing, retry allowing remote
- * memory as well.
+ * memory with both reclaim and compact as well.
*/
if (!page && (gfp & __GFP_DIRECT_RECLAIM))
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_NORETRY, order);
+ gfp, order);
goto out;
}
diff --git a/mm/memremap.c b/mm/memremap.c
index 03ccbdfeb697..022e78e68ea0 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -55,8 +55,16 @@ static void pgmap_array_delete(struct resource *res)
static unsigned long pfn_first(struct dev_pagemap *pgmap)
{
- return PHYS_PFN(pgmap->res.start) +
- vmem_altmap_offset(pgmap_altmap(pgmap));
+ const struct resource *res = &pgmap->res;
+ struct vmem_altmap *altmap = pgmap_altmap(pgmap);
+ unsigned long pfn;
+
+ if (altmap)
+ pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+ else
+ pfn = PHYS_PFN(res->start);
+
+ return pfn;
}
static unsigned long pfn_end(struct dev_pagemap *pgmap)
@@ -73,6 +81,26 @@ static unsigned long pfn_next(unsigned long pfn)
return pfn + 1;
}
+/*
+ * This returns true if the page is reserved by ZONE_DEVICE driver.
+ */
+bool pfn_zone_device_reserved(unsigned long pfn)
+{
+ struct dev_pagemap *pgmap;
+ struct vmem_altmap *altmap;
+ bool ret = false;
+
+ pgmap = get_dev_pagemap(pfn, NULL);
+ if (!pgmap)
+ return ret;
+ altmap = pgmap_altmap(pgmap);
+ if (altmap && pfn < (altmap->base_pfn + altmap->reserve))
+ ret = true;
+ put_dev_pagemap(pgmap);
+
+ return ret;
+}
+
#define for_each_device_pfn(pfn, map) \
for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
@@ -119,8 +147,10 @@ void memunmap_pages(struct dev_pagemap *pgmap)
nid = page_to_nid(first_page);
mem_hotplug_begin();
+ remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)));
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+ __remove_pages(PHYS_PFN(res->start),
PHYS_PFN(resource_size(res)), NULL);
} else {
arch_remove_memory(nid, res->start, resource_size(res),
diff --git a/mm/mmap.c b/mm/mmap.c
index 9c648524e4dc..4d4db76a07da 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1862,6 +1862,22 @@ unacct_error:
return error;
}
+static inline unsigned long gap_start_offset(struct vm_unmapped_area_info *info,
+ unsigned long addr)
+{
+ /* get gap_start offset to adjust gap address to the
+ * desired alignment
+ */
+ return (info->align_offset - addr) & info->align_mask;
+}
+
+static inline unsigned long gap_end_offset(struct vm_unmapped_area_info *info,
+ unsigned long addr)
+{
+ /* get gap_end offset to adjust gap address to the desired alignment */
+ return (addr - info->align_offset) & info->align_mask;
+}
+
unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
/*
@@ -1876,10 +1892,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
struct vm_area_struct *vma;
unsigned long length, low_limit, high_limit, gap_start, gap_end;
- /* Adjust search length to account for worst case alignment overhead */
- length = info->length + info->align_mask;
- if (length < info->length)
- return -ENOMEM;
+ length = info->length;
/* Adjust search limits by the desired length */
if (info->high_limit < length)
@@ -1911,6 +1924,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
}
gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ gap_start += gap_start_offset(info, gap_start);
check_current:
/* Check if current node has a suitable gap */
if (gap_start > high_limit)
@@ -1939,6 +1953,7 @@ check_current:
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_left) {
gap_start = vm_end_gap(vma->vm_prev);
+ gap_start += gap_start_offset(info, gap_start);
gap_end = vm_start_gap(vma);
goto check_current;
}
@@ -1948,17 +1963,17 @@ check_current:
check_highest:
/* Check highest gap, which does not precede any rbtree node */
gap_start = mm->highest_vm_end;
+ gap_start += gap_start_offset(info, gap_start);
gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */
if (gap_start > high_limit)
return -ENOMEM;
found:
/* We found a suitable gap. Clip it with the original low_limit. */
- if (gap_start < info->low_limit)
+ if (gap_start < info->low_limit) {
gap_start = info->low_limit;
-
- /* Adjust gap address to the desired alignment */
- gap_start += (info->align_offset - gap_start) & info->align_mask;
+ gap_start += gap_start_offset(info, gap_start);
+ }
VM_BUG_ON(gap_start + info->length > info->high_limit);
VM_BUG_ON(gap_start + info->length > gap_end);
@@ -1971,16 +1986,14 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
struct vm_area_struct *vma;
unsigned long length, low_limit, high_limit, gap_start, gap_end;
- /* Adjust search length to account for worst case alignment overhead */
- length = info->length + info->align_mask;
- if (length < info->length)
- return -ENOMEM;
+ length = info->length;
/*
* Adjust search limits by the desired length.
* See implementation comment at top of unmapped_area().
*/
gap_end = info->high_limit;
+ gap_end -= gap_end_offset(info, gap_end);
if (gap_end < length)
return -ENOMEM;
high_limit = gap_end - length;
@@ -2017,6 +2030,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
check_current:
/* Check if current node has a suitable gap */
gap_end = vm_start_gap(vma);
+ gap_end -= gap_end_offset(info, gap_end);
if (gap_end < low_limit)
return -ENOMEM;
if (gap_start <= high_limit &&
@@ -2051,13 +2065,14 @@ check_current:
found:
/* We found a suitable gap. Clip it with the original high_limit. */
- if (gap_end > info->high_limit)
+ if (gap_end > info->high_limit) {
gap_end = info->high_limit;
+ gap_end -= gap_end_offset(info, gap_end);
+ }
found_highest:
/* Compute highest gap address at the desired alignment */
gap_end -= info->length;
- gap_end -= (gap_end - info->align_offset) & info->align_mask;
VM_BUG_ON(gap_end < info->low_limit);
VM_BUG_ON(gap_end < gap_start);
diff --git a/mm/nommu.c b/mm/nommu.c
index bd2b4e5ef144..318df4e236c9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
+ * chose not to have one.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
+{
+}
+
+void __weak vmalloc_sync_unmappings(void)
{
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71e3acea7817..314ce1a3cf25 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -377,36 +377,13 @@ static void select_bad_process(struct oom_control *oc)
}
}
-static int dump_task(struct task_struct *p, void *arg)
-{
- struct oom_control *oc = arg;
- struct task_struct *task;
-
- if (oom_unkillable_task(p))
- return 0;
-
- /* p may not have freeable memory in nodemask */
- if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
- return 0;
- task = find_lock_task_mm(p);
- if (!task) {
- /*
- * This is a kthread or all of p's threads have already
- * detached their mm's. There's no need to report
- * them; they can't be oom killed anyway.
- */
- return 0;
+static int add_candidate_task(struct task_struct *p, void *arg)
+{
+ if (!oom_unkillable_task(p)) {
+ get_task_struct(p);
+ list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
}
-
- pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
- task->pid, from_kuid(&init_user_ns, task_uid(task)),
- task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
- mm_pgtables_bytes(task->mm),
- get_mm_counter(task->mm, MM_SWAPENTS),
- task->signal->oom_score_adj, task->comm);
- task_unlock(task);
-
return 0;
}
@@ -422,19 +399,41 @@ static int dump_task(struct task_struct *p, void *arg)
*/
static void dump_tasks(struct oom_control *oc)
{
- pr_info("Tasks state (memory values in pages):\n");
- pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
+ LIST_HEAD(list);
+ struct task_struct *p;
+ struct task_struct *t;
if (is_memcg_oom(oc))
- mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
+ mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
else {
- struct task_struct *p;
-
rcu_read_lock();
for_each_process(p)
- dump_task(p, oc);
+ add_candidate_task(p, &list);
rcu_read_unlock();
}
+ pr_info("Tasks state (memory values in pages):\n");
+ pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
+ list_for_each_entry(p, &list, oom_victim_list) {
+ cond_resched();
+ /* p may not have freeable memory in nodemask */
+ if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
+ continue;
+ /* All of p's threads might have already detached their mm's. */
+ t = find_lock_task_mm(p);
+ if (!t)
+ continue;
+ pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
+ t->pid, from_kuid(&init_user_ns, task_uid(t)),
+ t->tgid, t->mm->total_vm, get_mm_rss(t->mm),
+ mm_pgtables_bytes(t->mm),
+ get_mm_counter(t->mm, MM_SWAPENTS),
+ t->signal->oom_score_adj, t->comm);
+ task_unlock(t);
+ }
+ list_for_each_entry_safe(p, t, &list, oom_victim_list) {
+ list_del(&p->oom_victim_list);
+ put_task_struct(p);
+ }
}
static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..62dcd6b76c80 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4476,8 +4476,11 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
- !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+ /*
+ * Checks for costly allocations with __GFP_NORETRY, which
+ * includes some THP page fault allocations
+ */
+ if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
@@ -4498,23 +4501,6 @@ retry_cpuset:
if (compact_result == COMPACT_SKIPPED ||
compact_result == COMPACT_DEFERRED)
goto nopage;
- }
-
- /*
- * Checks for costly allocations with __GFP_NORETRY, which
- * includes THP page fault allocations
- */
- if (costly_order && (gfp_mask & __GFP_NORETRY)) {
- /*
- * If compaction is deferred for high-order allocations,
- * it is because sync compaction recently failed. If
- * this is the case and the caller requested a THP
- * allocation, we do not want to heavily disrupt the
- * system, so we fail the allocation instead of entering
- * direct reclaim.
- */
- if (compact_result == COMPACT_DEFERRED)
- goto nopage;
/*
* Looks like reclaim/compaction is worth trying, but
@@ -5949,10 +5935,10 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
#ifdef CONFIG_ZONE_DEVICE
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
- unsigned long size,
+ unsigned long nr_pages,
struct dev_pagemap *pgmap)
{
- unsigned long pfn, end_pfn = start_pfn + size;
+ unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
unsigned long zone_idx = zone_idx(zone);
@@ -5969,7 +5955,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
*/
if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
- size = end_pfn - start_pfn;
+ nr_pages = end_pfn - start_pfn;
}
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
@@ -6016,7 +6002,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
}
pr_info("%s initialised %lu pages in %ums\n", __func__,
- size, jiffies_to_msecs(jiffies - start));
+ nr_pages, jiffies_to_msecs(jiffies - start));
}
#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8afa188f6e20..f0ab6d4ceb4c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
* previous workitems on workqueue are processed.
*/
flush_workqueue(memcg_kmem_cache_wq);
+
+ /*
+ * If we're racing with children kmem_cache deactivation, it might
+ * take another rcu grace period to complete their destruction.
+ * At this moment the corresponding percpu_ref_kill() call should be
+ * done, but it might take another rcu grace period to complete
+ * switching to the atomic mode.
+ * Please, note that we check without grabbing the slab_mutex. It's safe
+ * because at this moment the children list can't grow.
+ */
+ if (!list_empty(&s->memcg_params.children))
+ rcu_barrier();
}
#else
static inline int shutdown_memcg_caches(struct kmem_cache *s)
diff --git a/mm/slub.c b/mm/slub.c
index d11389710b12..b7c6b2e7f2db 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -439,19 +439,38 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
}
#ifdef CONFIG_SLUB_DEBUG
+static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
+static DEFINE_SPINLOCK(object_map_lock);
+
/*
* Determine a map of object in use on a page.
*
* Node listlock must be held to guarantee that the page does
* not vanish from under us.
*/
-static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
+static unsigned long *get_map(struct kmem_cache *s, struct page *page)
{
void *p;
void *addr = page_address(page);
+ VM_BUG_ON(!irqs_disabled());
+
+ spin_lock(&object_map_lock);
+
+ bitmap_zero(object_map, page->objects);
+
for (p = page->freelist; p; p = get_freepointer(s, p))
- set_bit(slab_index(p, s, addr), map);
+ set_bit(slab_index(p, s, addr), object_map);
+
+ return object_map;
+}
+
+static void put_map(unsigned long *map)
+{
+ VM_BUG_ON(map != object_map);
+ lockdep_assert_held(&object_map_lock);
+
+ spin_unlock(&object_map_lock);
}
static inline unsigned int size_from_object(struct kmem_cache *s)
@@ -3675,13 +3694,12 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *p;
- unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
- if (!map)
- return;
+ unsigned long *map;
+
slab_err(s, page, text, s->name);
slab_lock(page);
- get_map(s, page, map);
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
if (!test_bit(slab_index(p, s, addr), map)) {
@@ -3689,8 +3707,9 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
print_tracking(s, p);
}
}
+ put_map(map);
+
slab_unlock(page);
- bitmap_free(map);
#endif
}
@@ -4384,19 +4403,19 @@ static int count_total(struct page *page)
#endif
#ifdef CONFIG_SLUB_DEBUG
-static void validate_slab(struct kmem_cache *s, struct page *page,
- unsigned long *map)
+static void validate_slab(struct kmem_cache *s, struct page *page)
{
void *p;
void *addr = page_address(page);
+ unsigned long *map;
+
+ slab_lock(page);
if (!check_slab(s, page) || !on_freelist(s, page, NULL))
- return;
+ goto unlock;
/* Now we know that a valid freelist exists */
- bitmap_zero(map, page->objects);
-
- get_map(s, page, map);
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
u8 val = test_bit(slab_index(p, s, addr), map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
@@ -4404,18 +4423,13 @@ static void validate_slab(struct kmem_cache *s, struct page *page,
if (!check_object(s, page, p, val))
break;
}
-}
-
-static void validate_slab_slab(struct kmem_cache *s, struct page *page,
- unsigned long *map)
-{
- slab_lock(page);
- validate_slab(s, page, map);
+ put_map(map);
+unlock:
slab_unlock(page);
}
static int validate_slab_node(struct kmem_cache *s,
- struct kmem_cache_node *n, unsigned long *map)
+ struct kmem_cache_node *n)
{
unsigned long count = 0;
struct page *page;
@@ -4424,7 +4438,7 @@ static int validate_slab_node(struct kmem_cache *s,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list) {
- validate_slab_slab(s, page, map);
+ validate_slab(s, page);
count++;
}
if (count != n->nr_partial)
@@ -4435,7 +4449,7 @@ static int validate_slab_node(struct kmem_cache *s,
goto out;
list_for_each_entry(page, &n->full, slab_list) {
- validate_slab_slab(s, page, map);
+ validate_slab(s, page);
count++;
}
if (count != atomic_long_read(&n->nr_slabs))
@@ -4452,15 +4466,11 @@ static long validate_slab_cache(struct kmem_cache *s)
int node;
unsigned long count = 0;
struct kmem_cache_node *n;
- unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
-
- if (!map)
- return -ENOMEM;
flush_all(s);
for_each_kmem_cache_node(s, node, n)
- count += validate_slab_node(s, n, map);
- bitmap_free(map);
+ count += validate_slab_node(s, n);
+
return count;
}
/*
@@ -4590,18 +4600,17 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
- struct page *page, enum track_item alloc,
- unsigned long *map)
+ struct page *page, enum track_item alloc)
{
void *addr = page_address(page);
void *p;
+ unsigned long *map;
- bitmap_zero(map, page->objects);
- get_map(s, page, map);
-
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects)
if (!test_bit(slab_index(p, s, addr), map))
add_location(t, s, get_track(s, p, alloc));
+ put_map(map);
}
static int list_locations(struct kmem_cache *s, char *buf,
@@ -4612,11 +4621,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
struct loc_track t = { 0, 0, NULL };
int node;
struct kmem_cache_node *n;
- unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
- if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
- GFP_KERNEL)) {
- bitmap_free(map);
+ if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+ GFP_KERNEL)) {
return sprintf(buf, "Out of memory\n");
}
/* Push back cpu slabs */
@@ -4631,9 +4638,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
- process_slab(&t, s, page, alloc, map);
+ process_slab(&t, s, page, alloc);
list_for_each_entry(page, &n->full, slab_list)
- process_slab(&t, s, page, alloc, map);
+ process_slab(&t, s, page, alloc);
spin_unlock_irqrestore(&n->list_lock, flags);
}
@@ -4682,7 +4689,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
}
free_loc_track(&t);
- bitmap_free(map);
if (!t.count)
len += sprintf(buf, "No data\n");
return len;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4d3b3d60d893..929172f935ea 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1279,7 +1279,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
* First make sure the mappings are removed from all page-tables
* before they are freed.
*/
- vmalloc_sync_all();
+ vmalloc_sync_unmappings();
/*
* TODO: to calculate a flush range without looping.
@@ -3126,16 +3126,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
EXPORT_SYMBOL(remap_vmalloc_range);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
+ * not to have one.
*
* The purpose of this function is to make sure the vmalloc area
* mappings are identical in all page-tables in the system.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
{
}
+void __weak vmalloc_sync_unmappings(void)
+{
+}
static int f(pte_t *pte, unsigned long addr, void *data)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74e8edce83ca..c8e88f4d9932 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3316,8 +3316,10 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
- trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
- sc.gfp_mask);
+ trace_mm_vmscan_memcg_softlimit_reclaim_begin(
+ cgroup_ino(memcg->css.cgroup),
+ sc.order,
+ sc.gfp_mask);
/*
* NOTE: Although we can get the priority field, using it
@@ -3328,7 +3330,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
*/
shrink_lruvec(lruvec, &sc);
- trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+ trace_mm_vmscan_memcg_softlimit_reclaim_end(
+ cgroup_ino(memcg->css.cgroup),
+ sc.nr_reclaimed);
*nr_scanned = sc.nr_scanned;
@@ -3363,7 +3367,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
set_task_reclaim_state(current, &sc.reclaim_state);
- trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
+ trace_mm_vmscan_memcg_reclaim_begin(
+ cgroup_ino(memcg->css.cgroup),
+ 0, sc.gfp_mask);
psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
@@ -3373,7 +3379,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
- trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+ trace_mm_vmscan_memcg_reclaim_end(
+ cgroup_ino(memcg->css.cgroup),
+ nr_reclaimed);
set_task_reclaim_state(current, NULL);
return nr_reclaimed;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8222041bd44..78d53378db99 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1084,7 +1084,8 @@ int fragmentation_index(struct zone *zone, unsigned int order)
}
#endif
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
+ defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
#ifdef CONFIG_ZONE_DMA
#define TEXT_FOR_DMA(xx) xx "_dma",
#else
@@ -1134,7 +1135,7 @@ const char * const vmstat_text[] = {
"numa_other",
#endif
- /* Node-based counters */
+ /* enum node_stat_item counters */
"nr_inactive_anon",
"nr_active_anon",
"nr_inactive_file",
@@ -1172,7 +1173,7 @@ const char * const vmstat_text[] = {
"nr_dirty_threshold",
"nr_dirty_background_threshold",
-#ifdef CONFIG_VM_EVENT_COUNTERS
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
/* enum vm_event_item counters */
"pgpgin",
"pgpgout",
@@ -1291,9 +1292,9 @@ const char * const vmstat_text[] = {
"swap_ra",
"swap_ra_hit",
#endif
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
defined(CONFIG_PROC_FS)
@@ -1564,10 +1565,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
if (is_zone_first_populated(pgdat, zone)) {
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS],
- node_page_state(pgdat, i));
+ seq_printf(m, "\n %-12s %lu", node_stat_name(i),
+ node_page_state(pgdat, i));
}
}
seq_printf(m,
@@ -1600,14 +1599,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu", vmstat_text[i],
- zone_page_state(zone, i));
+ seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
+ zone_page_state(zone, i));
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
- zone_numa_state_snapshot(zone, i));
+ seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
+ zone_numa_state_snapshot(zone, i));
#endif
seq_printf(m, "\n pagesets");
@@ -1658,31 +1656,23 @@ static const struct seq_operations zoneinfo_op = {
.show = zoneinfo_show,
};
-enum writeback_stat_item {
- NR_DIRTY_THRESHOLD,
- NR_DIRTY_BG_THRESHOLD,
- NR_VM_WRITEBACK_STAT_ITEMS,
-};
+#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
+ NR_VM_NUMA_STAT_ITEMS + \
+ NR_VM_NODE_STAT_ITEMS + \
+ NR_VM_WRITEBACK_STAT_ITEMS + \
+ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
+ NR_VM_EVENT_ITEMS : 0))
static void *vmstat_start(struct seq_file *m, loff_t *pos)
{
unsigned long *v;
- int i, stat_items_size;
+ int i;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
- stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
- stat_items_size += sizeof(struct vm_event_state);
-#endif
- BUILD_BUG_ON(stat_items_size !=
- ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
- v = kmalloc(stat_items_size, GFP_KERNEL);
+ BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+ v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
m->private = v;
if (!v)
return ERR_PTR(-ENOMEM);
@@ -1715,7 +1705,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
{
(*pos)++;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
return (unsigned long *)m->private + *pos;
}
@@ -1781,7 +1771,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
val = atomic_long_read(&vm_zone_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i], val);
+ __func__, zone_stat_name(i), val);
err = -EINVAL;
}
}
@@ -1790,7 +1780,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
val = atomic_long_read(&vm_numa_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+ __func__, numa_stat_name(i), val);
err = -EINVAL;
}
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b71a39ded930..39e14d5edaf1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -51,7 +51,7 @@ static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list);
int rpc_pipefs_notifier_register(struct notifier_block *nb)
{
- return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb);
+ return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 87831c535acb..f3b84347c2f3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -607,6 +607,20 @@ foreach my $entry (keys %deprecated_apis) {
}
$deprecated_apis_search = "(?:${deprecated_apis_search})";
+our %deprecated_string_apis = (
+ "strcpy" => "stracpy or strscpy",
+ "strlcpy" => "stracpy or strscpy",
+ "strncpy" => "stracpy or strscpy - for non-NUL-terminated uses, strncpy dest should be __nonstring",
+);
+
+#Create a search pattern for all these strings apis to speed up a loop below
+our $deprecated_string_apis_search = "";
+foreach my $entry (keys %deprecated_string_apis) {
+ $deprecated_string_apis_search .= '|' if ($deprecated_string_apis_search ne "");
+ $deprecated_string_apis_search .= $entry;
+}
+$deprecated_string_apis_search = "(?:${deprecated_string_apis_search})";
+
our $mode_perms_world_writable = qr{
S_IWUGO |
S_IWOTH |
@@ -874,14 +888,18 @@ sub seed_camelcase_file {
}
}
+our %maintained_status = ();
+
sub is_maintained_obsolete {
my ($filename) = @_;
return 0 if (!$tree || !(-e "$root/scripts/get_maintainer.pl"));
- my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`;
+ if (!exists($maintained_status{$filename})) {
+ $maintained_status{$filename} = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`;
+ }
- return $status =~ /obsolete/i;
+ return $maintained_status{$filename} =~ /obsolete/i;
}
sub is_SPDX_License_valid {
@@ -5029,8 +5047,9 @@ sub process {
$var =~ /[A-Z][a-z]|[a-z][A-Z]/ &&
#Ignore Page<foo> variants
$var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ &&
-#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show)
- $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ &&
+#Ignore SI style variants like nS, mV and dB
+#(ie: max_uV, regulator_min_uA_show, RANGE_mA_VALUE)
+ $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ &&
#Ignore some three character SI units explicitly, like MiB and KHz
$var !~ /^(?:[a-z_]*?)_?(?:[KMGT]iB|[KMGT]?Hz)(?:_[a-z_]+)?$/) {
while ($var =~ m{($Ident)}g) {
@@ -6475,6 +6494,16 @@ sub process {
"Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr);
}
+# check for string deprecated apis
+ if ($line =~ /\b($deprecated_string_apis_search)\b\s*\(/) {
+ my $deprecated_string_api = $1;
+ my $new_api = $deprecated_string_apis{$deprecated_string_api};
+ my $msg_level = \&WARN;
+ $msg_level = \&CHK if ($file);
+ &{$msg_level}("DEPRECATED_API",
+ "Deprecated use of '$deprecated_string_api', prefer '$new_api' instead\n" . $herecurr);
+ }
+
# check for various structs that are normally const (ops, kgdb, device_tree)
# and avoid what seem like struct definitions 'struct foo {'
if ($line !~ /\bconst\b/ &&
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 5ef59214c555..34085d146fa2 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -26,6 +26,7 @@ my $email = 1;
my $email_usename = 1;
my $email_maintainer = 1;
my $email_reviewer = 1;
+my $email_fixes = 1;
my $email_list = 1;
my $email_moderated_list = 1;
my $email_subscriber_list = 0;
@@ -249,6 +250,7 @@ if (!GetOptions(
'r!' => \$email_reviewer,
'n!' => \$email_usename,
'l!' => \$email_list,
+ 'fixes!' => \$email_fixes,
'moderated!' => \$email_moderated_list,
's!' => \$email_subscriber_list,
'multiline!' => \$output_multiline,
@@ -503,6 +505,7 @@ sub read_mailmap {
## use the filenames on the command line or find the filenames in the patchfiles
my @files = ();
+my @fixes = (); # If a patch description includes Fixes: lines
my @range = ();
my @keyword_tvi = ();
my @file_emails = ();
@@ -568,6 +571,8 @@ foreach my $file (@ARGV) {
my $filename2 = $2;
push(@files, $filename1);
push(@files, $filename2);
+ } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
+ push(@fixes, $1) if ($email_fixes);
} elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
my $filename = $1;
$filename =~ s@^[^/]*/@@;
@@ -598,6 +603,7 @@ foreach my $file (@ARGV) {
}
@file_emails = uniq(@file_emails);
+@fixes = uniq(@fixes);
my %email_hash_name;
my %email_hash_address;
@@ -612,7 +618,6 @@ my %deduplicate_name_hash = ();
my %deduplicate_address_hash = ();
my @maintainers = get_maintainers();
-
if (@maintainers) {
@maintainers = merge_email(@maintainers);
output(@maintainers);
@@ -927,6 +932,10 @@ sub get_maintainers {
}
}
+ foreach my $fix (@fixes) {
+ vcs_add_commit_signers($fix, "blamed_fixes");
+ }
+
foreach my $email (@email_to, @list_to) {
$email->[0] = deduplicate_email($email->[0]);
}
@@ -1031,6 +1040,7 @@ MAINTAINER field selection options:
--roles => show roles (status:subsystem, git-signer, list, etc...)
--rolestats => show roles and statistics (commits/total_commits, %)
--file-emails => add email addresses found in -f file (default: 0 (off))
+ --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
--scm => print SCM tree(s) if any
--status => print status if any
--subsystem => print subsystem name if any
@@ -1730,6 +1740,32 @@ sub vcs_is_hg {
return $vcs_used == 2;
}
+sub vcs_add_commit_signers {
+ return if (!vcs_exists());
+
+ my ($commit, $desc) = @_;
+ my $commit_count = 0;
+ my $commit_authors_ref;
+ my $commit_signers_ref;
+ my $stats_ref;
+ my @commit_authors = ();
+ my @commit_signers = ();
+ my $cmd;
+
+ $cmd = $VCS_cmds{"find_commit_signers_cmd"};
+ $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd
+
+ ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
+ @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
+ @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
+
+ foreach my $signer (@commit_signers) {
+ $signer = deduplicate_email($signer);
+ }
+
+ vcs_assign($desc, 1, @commit_signers);
+}
+
sub interactive_get_maintainers {
my ($list_ref) = @_;
my @list = @$list_ref;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d67f968eac21..b001c602414b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -13,6 +13,7 @@ TARGETS += efivarfs
TARGETS += exec
TARGETS += filesystems
TARGETS += filesystems/binderfs
+TARGETS += filesystems/epoll
TARGETS += firmware
TARGETS += ftrace
TARGETS += futex
diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore
new file mode 100644
index 000000000000..9ae8db44ec14
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/.gitignore
@@ -0,0 +1 @@
+epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
new file mode 100644
index 000000000000..e62f3d4f68da
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+LDFLAGS += -lpthread
+TEST_GEN_PROGS := epoll_wakeup_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
new file mode 100644
index 000000000000..37a04dab56f0
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -0,0 +1,3074 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include "../../kselftest_harness.h"
+
+struct epoll_mtcontext
+{
+ int efd[3];
+ int sfd[4];
+ int count;
+
+ pthread_t main;
+ pthread_t waiter;
+};
+
+static void signal_handler(int signum)
+{
+}
+
+static void kill_timeout(struct epoll_mtcontext *ctx)
+{
+ usleep(1000000);
+ pthread_kill(ctx->main, SIGUSR1);
+ pthread_kill(ctx->waiter, SIGUSR1);
+}
+
+static void *waiter_entry1a(void *data)
+{
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry1ap(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *waiter_entry1o(void *data)
+{
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry1op(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *waiter_entry2a(void *data)
+{
+ struct epoll_event events[2];
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry2ap(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event events[2];
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], events, 2, 0) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *emitter_entry1(void *data)
+{
+ struct epoll_mtcontext *ctx = data;
+
+ usleep(100000);
+ write(ctx->sfd[1], "w", 1);
+
+ kill_timeout(ctx);
+
+ return NULL;
+}
+
+static void *emitter_entry2(void *data)
+{
+ struct epoll_mtcontext *ctx = data;
+
+ usleep(100000);
+ write(ctx->sfd[1], "w", 1);
+ write(ctx->sfd[3], "w", 1);
+
+ kill_timeout(ctx);
+
+ return NULL;
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll1)
+{
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll2)
+{
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll3)
+{
+ int efd;
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll4)
+{
+ int efd;
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll5)
+{
+ int efd;
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll6)
+{
+ int efd;
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 0);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+
+TEST(epoll7)
+{
+ int efd;
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll8)
+{
+ int efd;
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll9)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll10)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll11)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll12)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll13)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll14)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll15)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll16)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll17)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll18)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll19)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll20)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll21)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll22)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll23)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll24)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll25)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll26)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll27)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll28)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll29)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll30)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll31)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll32)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll33)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll34)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll35)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll36)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll37)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll38)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll39)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll40)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll41)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll42)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll43)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll44)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll45)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll46)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll47)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll48)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll49)
+{
+ int efd[3];
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll50)
+{
+ int efd[3];
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll51)
+{
+ int efd[3];
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll52)
+{
+ int efd[3];
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll53)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll54)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll55)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll56)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (p) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll57)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ pfd.fd = ctx.efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (p) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll58)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ pfd.fd = ctx.efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+TEST_HARNESS_MAIN
diff --git a/usr/include/Makefile b/usr/include/Makefile
index e5895a79c45f..84598469e6ff 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -16,9 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include
# Please consider to fix the header first.
#
# Sorted alphabetically.
-header-test- += asm/ipcbuf.h
-header-test- += asm/msgbuf.h
-header-test- += asm/sembuf.h
header-test- += asm/shmbuf.h
header-test- += asm/signal.h
header-test- += asm/ucontext.h
@@ -40,7 +37,6 @@ header-test- += linux/omapfb.h
header-test- += linux/patchkey.h
header-test- += linux/phonet.h
header-test- += linux/reiserfs_xattr.h
-header-test- += linux/scc.h
header-test- += linux/sctp.h
header-test- += linux/signal.h
header-test- += linux/sysctl.h