aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2020-10-29 12:40:54 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2020-10-29 12:40:54 +1100
commit8f4b93cb48614f0419640036eadf9708d0aa4e94 (patch)
tree55f647130d1d7b2d848f78a5a4bb4daaa649e09c
parentddd65e4699045d85171a94f12aabbbe945d624d2 (diff)
parent6f72faf4a32303c8bdc6491186b79391e9cf0c7e (diff)
Merge remote-tracking branch 'tip/auto-latest' into master
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/l1d_flush.rst70
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst8
-rw-r--r--Documentation/x86/index.rst2
-rw-r--r--Documentation/x86/resctrl.rst (renamed from Documentation/x86/resctrl_ui.rst)93
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/um/include/asm/cacheflush.h9
-rw-r--r--arch/x86/Kconfig.debug3
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/include/asm/atomic.h2
-rw-r--r--arch/x86/include/asm/atomic64_64.h2
-rw-r--r--arch/x86/include/asm/cacheflush.h8
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/compat.h15
-rw-r--r--arch/x86/include/asm/copy_mc_test.h75
-rw-r--r--arch/x86/include/asm/elf.h13
-rw-r--r--arch/x86/include/asm/mmu.h9
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/thread_info.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/kernel/cpu/bugs.c54
-rw-r--r--arch/x86/kernel/cpu/mce/core.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c1
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h1
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c82
-rw-r--r--arch/x86/kernel/perf_regs.c2
-rw-r--r--arch/x86/kernel/process_64.c28
-rw-r--r--arch/x86/kernel/smpboot.c11
-rw-r--r--arch/x86/kernel/traps.c43
-rw-r--r--arch/x86/lib/copy_mc.c4
-rw-r--r--arch/x86/lib/copy_mc_64.S10
-rw-r--r--arch/x86/mm/tlb.c100
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/compat_binfmt_elf.c20
-rw-r--r--include/asm-generic/atomic-instrumented.h216
-rw-r--r--include/linux/atomic-arch-fallback.h90
-rw-r--r--include/linux/atomic-fallback.h90
-rw-r--r--include/linux/elf.h10
-rw-r--r--include/linux/freelist.h129
-rw-r--r--include/linux/kprobes.h25
-rw-r--r--include/linux/llist.h23
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/time64.h4
-rw-r--r--include/uapi/linux/prctl.h1
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/kprobes.c286
-rw-r--r--kernel/locking/lockdep.c4
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/time/hrtimer.c5
-rw-r--r--kernel/time/itimer.c4
-rw-r--r--kernel/time/jiffies.c3
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/timekeeping.h2
-rw-r--r--kernel/time/timer.c5
-rw-r--r--kernel/trace/trace_kprobe.c3
-rwxr-xr-xscripts/atomic/gen-atomic-fallback.sh63
-rwxr-xr-xscripts/atomic/gen-atomic-instrumented.sh29
-rw-r--r--tools/testing/nvdimm/test/nfit.c103
67 files changed, 1206 insertions, 639 deletions
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index ca4dbdd9016d..21710f8609fe 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -15,3 +15,4 @@ are configurable at compile, boot or run time.
tsx_async_abort
multihit.rst
special-register-buffer-data-sampling.rst
+ l1d_flush.rst
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
new file mode 100644
index 000000000000..adc4ecc72361
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -0,0 +1,70 @@
+L1D Flushing
+============
+
+With an increasing number of vulnerabilities being reported around data
+leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
+mechanism to flush the L1D cache on context switch.
+
+This mechanism can be used to address e.g. CVE-2020-0550. For applications
+the mechanism keeps them safe from vulnerabilities, related to leaks
+(snooping of) from the L1D cache.
+
+
+Related CVEs
+------------
+The following CVEs can be addressed by this
+mechanism
+
+ ============= ======================== ==================
+ CVE-2020-0550 Improper Data Forwarding OS related aspects
+ ============= ======================== ==================
+
+Usage Guidelines
+----------------
+
+Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst` for
+details.
+
+**NOTE**: The feature is disabled by default, applications need to
+specifically opt into the feature to enable it.
+
+Mitigation
+----------
+
+When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+performed when the task is scheduled out and the incoming task belongs to a
+different process and therefore to a different address space.
+
+If the underlying CPU supports L1D flushing in hardware, the hardware
+mechanism is used, software fallback for the mitigation, is not supported.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1D flush mitigations at boot
+time with the option "l1d_flush_out=". The valid arguments for this option are:
+
+ ============ =============================================================
+ off Disables the prctl interface, applications trying to use
+ the prctl() will fail with an error
+ ============ =============================================================
+
+By default the API is enabled and applications opt-in by by using the prctl
+API.
+
+Limitations
+-----------
+
+The mechanism does not mitigate L1D data leaks between tasks belonging to
+different processes which are concurrently executing on sibling threads of
+a physical CPU core when SMT is enabled on the system.
+
+This can be addressed by controlled placement of processes on physical CPU
+cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
+document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
+
+**NOTE** : Checks have been added to ensure that the prctl API associated
+with the opt-in will work only when the task affinity of the task opting
+in, is limited to cores running in non-SMT mode. The same checks are made
+when L1D is flushed. Changing the affinity after opting in, would result
+in flushes not working on cores that are in non-SMT mode.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 526d65d8573a..bd1a5b87a5e2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2320,6 +2320,23 @@
feature (tagged TLBs) on capable Intel chips.
Default is 1 (enabled)
+ l1d_flush_out= [X86,INTEL]
+ Control mitigation for L1D based snooping vulnerability.
+
+ Certain CPUs are vulnerable to an exploit against CPU
+ internal buffers which can forward information to a
+ disclosure gadget under certain conditions.
+
+ In vulnerable processors, the speculatively
+ forwarded data can be used in a cache side channel
+ attack, to access data to which the attacker does
+ not have direct access.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ off - Unconditionally disable the mitigation
+
l1tf= [X86] Control mitigation of the L1TF vulnerability on
affected CPUs
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 7ddd8f667459..f39744ef8810 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -106,3 +106,11 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_L1D_FLUSH_OUT: Flush L1D Cache on context switch out of the task
+ (works only when tasks run on non SMT cores)
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_DISABLE, 0, 0);
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index b224d12c880b..9c6ebf355f81 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -27,7 +27,7 @@ x86-specific Documentation
pti
mds
microcode
- resctrl_ui
+ resctrl
tsx_async_abort
usb-legacy-support
i386/index
diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl.rst
index e59b7b93a9b4..71a531061e4e 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl.rst
@@ -1209,3 +1209,96 @@ View the llc occupancy snapshot::
# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
11234000
+
+Intel RDT Errata
+================
+
+Intel MBM Counters May Report System Memory Bandwidth Incorrectly
+-----------------------------------------------------------------
+
+Errata SKX99 for Skylake server and BDF102 for Broadwell server.
+
+Problem: Intel Memory Bandwidth Monitoring (MBM) counters track metrics
+according to the assigned Resource Monitor ID (RMID) for that logical
+core. The IA32_QM_CTR register (MSR 0xC8E), used to report these
+metrics, may report incorrect system bandwidth for certain RMID values.
+
+Implication: Due to the errata, system memory bandwidth may not match
+what is reported.
+
+Workaround: MBM total and local readings are corrected according to the
+following correction factor table:
+
++---------------+---------------+---------------+-----------------+
+|core count |rmid count |rmid threshold |correction factor|
++---------------+---------------+---------------+-----------------+
+|1 |8 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|2 |16 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|3 |24 |15 |0.969650 |
++---------------+---------------+---------------+-----------------+
+|4 |32 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|6 |48 |31 |0.969650 |
++---------------+---------------+---------------+-----------------+
+|7 |56 |47 |1.142857 |
++---------------+---------------+---------------+-----------------+
+|8 |64 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|9 |72 |63 |1.185115 |
++---------------+---------------+---------------+-----------------+
+|10 |80 |63 |1.066553 |
++---------------+---------------+---------------+-----------------+
+|11 |88 |79 |1.454545 |
++---------------+---------------+---------------+-----------------+
+|12 |96 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|13 |104 |95 |1.230769 |
++---------------+---------------+---------------+-----------------+
+|14 |112 |95 |1.142857 |
++---------------+---------------+---------------+-----------------+
+|15 |120 |95 |1.066667 |
++---------------+---------------+---------------+-----------------+
+|16 |128 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|17 |136 |127 |1.254863 |
++---------------+---------------+---------------+-----------------+
+|18 |144 |127 |1.185255 |
++---------------+---------------+---------------+-----------------+
+|19 |152 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|20 |160 |127 |1.066667 |
++---------------+---------------+---------------+-----------------+
+|21 |168 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|22 |176 |159 |1.454334 |
++---------------+---------------+---------------+-----------------+
+|23 |184 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|24 |192 |127 |0.969744 |
++---------------+---------------+---------------+-----------------+
+|25 |200 |191 |1.280246 |
++---------------+---------------+---------------+-----------------+
+|26 |208 |191 |1.230921 |
++---------------+---------------+---------------+-----------------+
+|27 |216 |0 |1.000000 |
++---------------+---------------+---------------+-----------------+
+|28 |224 |191 |1.143118 |
++---------------+---------------+---------------+-----------------+
+
+If rmid > rmid threshold, MBM total and local values should be multiplied
+by the correction factor.
+
+See:
+
+1. Erratum SKX99 in Intel Xeon Processor Scalable Family Specification Update:
+http://web.archive.org/web/20200716124958/https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-spec-update.html
+
+2. Erratum BDF102 in Intel Xeon E5-2600 v4 Processor Product Family Specification Update:
+http://web.archive.org/web/20191125200531/https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-v4-spec-update.pdf
+
+3. The errata in Intel Resource Director Technology (Intel RDT) on 2nd Generation Intel Xeon Scalable Processors Reference Manual:
+https://software.intel.com/content/www/us/en/develop/articles/intel-resource-director-technology-rdt-reference-manual.html
+
+for further information.
diff --git a/MAINTAINERS b/MAINTAINERS
index cf2a2e4edd24..fa316ee9a6ba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15821,13 +15821,14 @@ F: include/linux/sfp.h
K: phylink\.h|struct\s+phylink|\.phylink|>phylink_|phylink_(autoneg|clear|connect|create|destroy|disconnect|ethtool|helper|mac|mii|of|set|start|stop|test|validate)
SGI GRU DRIVER
-M: Dimitri Sivanich <sivanich@sgi.com>
+M: Dimitri Sivanich <dimitri.sivanich@hpe.com>
S: Maintained
F: drivers/misc/sgi-gru/
SGI XP/XPC/XPNET DRIVER
-M: Cliff Whickman <cpw@sgi.com>
M: Robin Holt <robinmholt@gmail.com>
+M: Steve Wahl <steve.wahl@hpe.com>
+R: Mike Travis <mike.travis@hpe.com>
S: Maintained
F: drivers/misc/sgi-xp/
@@ -19073,6 +19074,7 @@ F: arch/x86/platform
X86 PLATFORM UV HPE SUPERDOME FLEX
M: Steve Wahl <steve.wahl@hpe.com>
+R: Mike Travis <mike.travis@hpe.com>
R: Dimitri Sivanich <dimitri.sivanich@hpe.com>
R: Russ Anderson <russ.anderson@hpe.com>
S: Supported
diff --git a/arch/um/include/asm/cacheflush.h b/arch/um/include/asm/cacheflush.h
new file mode 100644
index 000000000000..f693cb9c4b7e
--- /dev/null
+++ b/arch/um/include/asm/cacheflush.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CACHEFLUSH_H
+#define _ASM_UM_CACHEFLUSH_H
+
+#undef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+#include <asm-generic/cacheflush.h>
+
+static inline int l1d_flush_hw(void) { return -EOPNOTSUPP; }
+#endif /* _ASM_UM_CACHEFLUSH_H */
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 27b5e2bc6a01..80b57e7f4947 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,9 +62,6 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
-config COPY_MC_TEST
- def_bool n
-
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
depends on EFI
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 9185cb1d13b9..50e5d3a2e70a 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -413,10 +413,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
#ifdef CONFIG_COMPAT
int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+ int uses_interp, bool x32)
{
#ifdef CONFIG_X86_X32_ABI
- if (test_thread_flag(TIF_X32)) {
+ if (x32) {
if (!vdso64_enabled)
return 0;
return map_vdso_randomized(&vdso_image_x32);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 44c33103a955..1b40b9297083 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -316,7 +316,7 @@ static struct vm_area_struct gate_vma __ro_after_init = {
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_COMPAT
- if (!mm || mm->context.ia32_compat)
+ if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL))
return NULL;
#endif
if (vsyscall_mode == NONE)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a88c94d65693..77b963e5e70a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2602,7 +2602,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
struct stack_frame_ia32 frame;
const struct stack_frame_ia32 __user *fp;
- if (!test_thread_flag(TIF_IA32))
+ if (user_64bit_mode(regs))
return 0;
cs_base = get_segment_base(regs->cs);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 404315df1e16..99a59f38a4e2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1259,7 +1259,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
old_to = to;
#ifdef CONFIG_X86_64
- is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+ is_64bit = kernel_ip(to) || any_64bit_mode(regs);
#endif
insn_init(&insn, kaddr, size, is_64bit);
insn_get_length(&insn);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8961653c5dd2..1aadb253d296 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1221,7 +1221,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
* on 64-bit systems running 32-bit apps
*/
#ifdef CONFIG_X86_64
- is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+ is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
#endif
insn_init(&insn, addr, bytes_read, is64);
insn_get_opcode(&insn);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b6cac6e9bb70..f732741ad7c7 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -199,7 +199,7 @@ static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
- return try_cmpxchg(&v->counter, old, new);
+ return arch_try_cmpxchg(&v->counter, old, new);
}
#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 809bd010a751..7886d0578fc9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -187,7 +187,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
- return try_cmpxchg(&v->counter, old, new);
+ return arch_try_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b192d917a6d0..554eaf697f3f 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -10,4 +10,12 @@
void clflush_cache_range(void *addr, unsigned int size);
+static inline int l1d_flush_hw(void)
+{
+ if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index a8bfac131256..4d4ec5cbdc51 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -221,7 +221,7 @@ extern void __add_wrong_size(void)
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
-#define try_cmpxchg(ptr, pold, new) \
+#define arch_try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
/*
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 0e327a01f50f..f145e3326c6d 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -177,14 +177,13 @@ typedef struct user_regs_struct compat_elf_gregset_t;
static inline void __user *arch_compat_alloc_user_space(long len)
{
- compat_uptr_t sp;
-
- if (test_thread_flag(TIF_IA32)) {
- sp = task_pt_regs(current)->sp;
- } else {
- /* -128 for the x32 ABI redzone */
- sp = task_pt_regs(current)->sp - 128;
- }
+ compat_uptr_t sp = task_pt_regs(current)->sp;
+
+ /*
+ * -128 for the x32 ABI redzone. For IA32, it is not strictly
+ * necessary, but not harmful.
+ */
+ sp -= 128;
return (void __user *)round_down(sp - len, 16);
}
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
deleted file mode 100644
index e4991ba96726..000000000000
--- a/arch/x86/include/asm/copy_mc_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _COPY_MC_TEST_H_
-#define _COPY_MC_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_COPY_MC_TEST
-extern unsigned long copy_mc_test_src;
-extern unsigned long copy_mc_test_dst;
-
-static inline void copy_mc_inject_src(void *addr)
-{
- if (addr)
- copy_mc_test_src = (unsigned long) addr;
- else
- copy_mc_test_src = ~0UL;
-}
-
-static inline void copy_mc_inject_dst(void *addr)
-{
- if (addr)
- copy_mc_test_dst = (unsigned long) addr;
- else
- copy_mc_test_dst = ~0UL;
-}
-#else /* CONFIG_COPY_MC_TEST */
-static inline void copy_mc_inject_src(void *addr)
-{
-}
-
-static inline void copy_mc_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_COPY_MC_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_COPY_MC_TEST
-.macro COPY_MC_TEST_CTL
- .pushsection .data
- .align 8
- .globl copy_mc_test_src
- copy_mc_test_src:
- .quad 0
- EXPORT_SYMBOL_GPL(copy_mc_test_src)
- .globl copy_mc_test_dst
- copy_mc_test_dst:
- .quad 0
- EXPORT_SYMBOL_GPL(copy_mc_test_dst)
- .popsection
-.endm
-
-.macro COPY_MC_TEST_SRC reg count target
- leaq \count(\reg), %r9
- cmp copy_mc_test_src, %r9
- ja \target
-.endm
-
-.macro COPY_MC_TEST_DST reg count target
- leaq \count(\reg), %r9
- cmp copy_mc_test_dst, %r9
- ja \target
-.endm
-#else
-.macro COPY_MC_TEST_CTL
-.endm
-
-.macro COPY_MC_TEST_SRC reg count target
-.endm
-
-.macro COPY_MC_TEST_DST reg count target
-.endm
-#endif /* CONFIG_COPY_MC_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index b9a5d488f1a5..44a9b9940535 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -186,8 +186,9 @@ static inline void elf_common_init(struct thread_struct *t,
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
elf_common_init(&current->thread, regs, __USER_DS)
-void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
-#define compat_start_thread compat_start_thread
+void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32);
+#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \
+ compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64)
void set_personality_ia32(bool);
#define COMPAT_SET_PERSONALITY(ex) \
@@ -361,7 +362,7 @@ do { \
#define AT_SYSINFO 32
#define COMPAT_ARCH_DLINFO \
-if (test_thread_flag(TIF_X32)) \
+if (exec->e_machine == EM_X86_64) \
ARCH_DLINFO_X32; \
else \
ARCH_DLINFO_IA32
@@ -382,8 +383,10 @@ struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp);
-#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
+ int uses_interp, bool x32);
+#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \
+ compat_arch_setup_additional_pages(bprm, interpreter, \
+ (ex->e_machine == EM_X86_64))
/* Do not change the values. See get_align_mask() */
enum align_flags {
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9257667d13c5..5d7494631ea9 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -6,6 +6,12 @@
#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
+#include <linux/bits.h>
+
+/* Uprobes on this MM assume 32-bit code */
+#define MM_CONTEXT_UPROBE_IA32 BIT(0)
+/* vsyscall page is accessible on this MM */
+#define MM_CONTEXT_HAS_VSYSCALL BIT(1)
/*
* x86 has arch-specific MMU state beyond what lives in mm_struct.
@@ -33,8 +39,7 @@ typedef struct {
#endif
#ifdef CONFIG_X86_64
- /* True if mm supports a task running in 32 bit compatibility mode. */
- unsigned short ia32_compat;
+ unsigned short flags;
#endif
struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 36afcbea6a9f..27516046117a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -181,7 +181,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
- !(mm->context.ia32_compat == TIF_IA32);
+ !(mm->context.flags & MM_CONTEXT_UPROBE_IA32);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 82a08b585818..60dbcdcb833f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -136,6 +136,8 @@ struct cpuinfo_x86 {
u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
+ /* Is SMT active on this core? */
+ bool smt_active;
u32 microcode;
/* Address space bits used by the cache internally */
u8 x86_cache_bits;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 44733a4bfc42..60aa7ec460f7 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,24 +84,23 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
+#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
-#define TIF_X32 30 /* 32-bit native x86-64 binary */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -113,23 +112,22 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
+#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
-#define _TIF_X32 (1 << TIF_X32)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
@@ -228,6 +226,9 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
+extern int enable_l1d_flush_for_task(struct task_struct *tsk);
+extern int disable_l1d_flush_for_task(struct task_struct *tsk);
+
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8c87a2e0b660..a927d40664df 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,7 +83,7 @@ struct tlb_state {
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
- unsigned long last_user_mm_ibpb;
+ unsigned long last_user_mm_spec;
};
u16 loaded_mm_asid;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d3f0db463f96..3923e484e6e4 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -296,6 +296,13 @@ enum taa_mitigations {
TAA_MITIGATION_TSX_DISABLED,
};
+enum l1d_flush_out_mitigations {
+ L1D_FLUSH_OUT_OFF,
+ L1D_FLUSH_OUT_ON,
+};
+
+static enum l1d_flush_out_mitigations l1d_flush_out_mitigation __ro_after_init = L1D_FLUSH_OUT_ON;
+
/* Default mitigation for TAA-affected CPUs */
static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
static bool taa_nosmt __ro_after_init;
@@ -379,6 +386,18 @@ out:
pr_info("%s\n", taa_strings[taa_mitigation]);
}
+static int __init l1d_flush_out_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_L1TF))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ l1d_flush_out_mitigation = L1D_FLUSH_OUT_OFF;
+
+ return 0;
+}
+early_param("l1d_flush_out", l1d_flush_out_parse_cmdline);
+
static int __init tsx_async_abort_parse_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_TAA))
@@ -1215,6 +1234,23 @@ static void task_update_spec_tif(struct task_struct *tsk)
speculation_ctrl_update_current();
}
+static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+
+ if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
+ return -EPERM;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ return enable_l1d_flush_for_task(task);
+ case PR_SPEC_DISABLE:
+ return disable_l1d_flush_for_task(task);
+ default:
+ return -ERANGE;
+ }
+ return 0;
+}
+
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
@@ -1306,6 +1342,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -1322,6 +1360,20 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
}
#endif
+static int l1d_flush_out_prctl_get(struct task_struct *task)
+{
+ int ret;
+
+ if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
+ return PR_SPEC_FORCE_DISABLE;
+
+ ret = test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
+ if (ret)
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ else
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+}
+
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
@@ -1375,6 +1427,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_get(task);
default:
return -ENODEV;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4102b866e7c0..51bf910b1e9d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1811,11 +1811,9 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c);
return 1;
- break;
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
return 1;
- break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 3f6b137ef4e6..3d4a48336084 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -215,7 +215,6 @@ static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size
default:
WARN(1, "%s: WTF family: 0x%x\n", __func__, family);
return 0;
- break;
}
if (sh_psize > min_t(u32, buf_size, max_size))
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index e5f4ee8f4c3b..1a681fde4c51 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -893,6 +893,10 @@ static __init void __check_quirks_intel(void)
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
else
set_rdt_options("!l3cat");
+ fallthrough;
+ case INTEL_FAM6_BROADWELL_X:
+ intel_rdt_mbm_apply_quirk();
+ break;
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 80fa997fae60..6cb068fcf501 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -616,6 +616,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
+void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 54dffe574e67..622073ffa715 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -64,6 +64,69 @@ unsigned int rdt_mon_features;
*/
unsigned int resctrl_cqm_threshold;
+#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
+
+/*
+ * The correction factor table is documented in Documentation/x86/resctrl.rst.
+ * If rmid > rmid threshold, MBM total and local values should be multiplied
+ * by the correction factor.
+ *
+ * The original table is modified for better code:
+ *
+ * 1. The threshold 0 is changed to rmid count - 1 so don't do correction
+ * for the case.
+ * 2. MBM total and local correction table indexed by core counter which is
+ * equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27.
+ * 3. The correction factor is normalized to 2^20 (1048576) so it's faster
+ * to calculate corrected value by shifting:
+ * corrected_value = (original_value * correction_factor) >> 20
+ */
+static const struct mbm_correction_factor_table {
+ u32 rmidthreshold;
+ u64 cf;
+} mbm_cf_table[] __initdata = {
+ {7, CF(1.000000)},
+ {15, CF(1.000000)},
+ {15, CF(0.969650)},
+ {31, CF(1.000000)},
+ {31, CF(1.066667)},
+ {31, CF(0.969650)},
+ {47, CF(1.142857)},
+ {63, CF(1.000000)},
+ {63, CF(1.185115)},
+ {63, CF(1.066553)},
+ {79, CF(1.454545)},
+ {95, CF(1.000000)},
+ {95, CF(1.230769)},
+ {95, CF(1.142857)},
+ {95, CF(1.066667)},
+ {127, CF(1.000000)},
+ {127, CF(1.254863)},
+ {127, CF(1.185255)},
+ {151, CF(1.000000)},
+ {127, CF(1.066667)},
+ {167, CF(1.000000)},
+ {159, CF(1.454334)},
+ {183, CF(1.000000)},
+ {127, CF(0.969744)},
+ {191, CF(1.280246)},
+ {191, CF(1.230921)},
+ {215, CF(1.000000)},
+ {191, CF(1.143118)},
+};
+
+static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
+static u64 mbm_cf __read_mostly;
+
+static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
+{
+ /* Correct MBM value. */
+ if (rmid > mbm_cf_rmidthreshold)
+ val = (val * mbm_cf) >> 20;
+
+ return val;
+}
+
static inline struct rmid_entry *__rmid_entry(u32 rmid)
{
struct rmid_entry *entry;
@@ -260,7 +323,8 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
m->chunks += chunks;
m->prev_msr = tval;
- rr->val += m->chunks;
+ rr->val += get_corrected_mbm_count(rmid, m->chunks);
+
return 0;
}
@@ -280,7 +344,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
m->chunks += chunks;
- cur_bw = (chunks * r->mon_scale) >> 20;
+ cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
@@ -644,3 +708,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
return 0;
}
+
+void __init intel_rdt_mbm_apply_quirk(void)
+{
+ int cf_index;
+
+ cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
+ if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
+ pr_info("No MBM correction factor available\n");
+ return;
+ }
+
+ mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
+ mbm_cf = mbm_cf_table[cf_index].cf;
+}
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index bb7e1132290b..9332c49a64a8 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -123,7 +123,7 @@ int perf_reg_validate(u64 mask)
u64 perf_reg_abi(struct task_struct *task)
{
- if (test_tsk_thread_flag(task, TIF_IA32))
+ if (!user_64bit_mode(task_pt_regs(task)))
return PERF_SAMPLE_REGS_ABI_32;
else
return PERF_SAMPLE_REGS_ABI_64;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index df342bedea88..ad582f9ac5a6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -511,11 +511,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
EXPORT_SYMBOL_GPL(start_thread);
#ifdef CONFIG_COMPAT
-void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32)
{
start_thread_common(regs, new_ip, new_sp,
- test_thread_flag(TIF_X32)
- ? __USER_CS : __USER32_CS,
+ x32 ? __USER_CS : __USER32_CS,
__USER_DS, __USER_DS);
}
#endif
@@ -641,16 +640,12 @@ void set_personality_64bit(void)
/* inherit personality from parent */
/* Make sure to be in 64bit mode */
- clear_thread_flag(TIF_IA32);
clear_thread_flag(TIF_ADDR32);
- clear_thread_flag(TIF_X32);
/* Pretend that this comes from a 64bit execve */
task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT;
-
- /* Ensure the corresponding mm is not marked. */
if (current->mm)
- current->mm->context.ia32_compat = 0;
+ current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL;
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
@@ -662,10 +657,9 @@ void set_personality_64bit(void)
static void __set_personality_x32(void)
{
#ifdef CONFIG_X86_X32
- clear_thread_flag(TIF_IA32);
- set_thread_flag(TIF_X32);
if (current->mm)
- current->mm->context.ia32_compat = TIF_X32;
+ current->mm->context.flags = 0;
+
current->personality &= ~READ_IMPLIES_EXEC;
/*
* in_32bit_syscall() uses the presence of the x32 syscall bit
@@ -683,10 +677,14 @@ static void __set_personality_x32(void)
static void __set_personality_ia32(void)
{
#ifdef CONFIG_IA32_EMULATION
- set_thread_flag(TIF_IA32);
- clear_thread_flag(TIF_X32);
- if (current->mm)
- current->mm->context.ia32_compat = TIF_IA32;
+ if (current->mm) {
+ /*
+ * uprobes applied to this MM need to know this and
+ * cannot use user_64bit_mode() at that time.
+ */
+ current->mm->context.flags = MM_CONTEXT_UPROBE_IA32;
+ }
+
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
task_pt_regs(current)->orig_ax = __NR_ia32_execve;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index de776b2e6046..9a94934fae5f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -635,6 +635,9 @@ void set_cpu_sibling_map(int cpu)
threads = cpumask_weight(topology_sibling_cpumask(cpu));
if (threads > __max_smt_threads)
__max_smt_threads = threads;
+
+ for_each_cpu(i, topology_sibling_cpumask(cpu))
+ cpu_data(i).smt_active = threads > 1;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1548,10 +1551,16 @@ static void remove_siblinginfo(int cpu)
for_each_cpu(sibling, topology_die_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
- for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+ if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
+ cpu_data(sibling).smt_active = false;
+ }
+
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3c70fb34028b..e19df6cde35d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -793,19 +793,6 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
set_debugreg(DR6_RESERVED, 6);
dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
- /*
- * Clear the virtual DR6 value, ptrace routines will set bits here for
- * things we want signals for.
- */
- current->thread.virtual_dr6 = 0;
-
- /*
- * The SDM says "The processor clears the BTF flag when it
- * generates a debug exception." Clear TIF_BLOCKSTEP to keep
- * TIF_BLOCKSTEP in sync with the hardware BTF flag.
- */
- clear_thread_flag(TIF_BLOCKSTEP);
-
return dr6;
}
@@ -873,6 +860,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
*/
WARN_ON_ONCE(user_mode(regs));
+ if (test_thread_flag(TIF_BLOCKSTEP)) {
+ /*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." but PTRACE_BLOCKSTEP requested
+ * it for userspace, but we just took a kernel #DB, so re-set
+ * BTF.
+ */
+ unsigned long debugctl;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl |= DEBUGCTLMSR_BTF;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
/*
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled.
@@ -936,6 +937,22 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
instrumentation_begin();
/*
+ * Start the virtual/ptrace DR6 value with just the DR_STEP mask
+ * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits.
+ *
+ * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6)
+ * even if it is not the result of PTRACE_SINGLESTEP.
+ */
+ current->thread.virtual_dr6 = (dr6 & DR_STEP);
+
+ /*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_thread_flag(TIF_BLOCKSTEP);
+
+ /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index c13e8c9ee926..80efd45a7761 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -10,10 +10,6 @@
#include <asm/mce.h>
#ifdef CONFIG_X86_MCE
-/*
- * See COPY_MC_TEST for self-test of the copy_mc_fragile()
- * implementation.
- */
static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
void enable_copy_mc_fragile(void)
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index 892d8915f609..e5f77e293034 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -2,14 +2,11 @@
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/linkage.h>
-#include <asm/copy_mc_test.h>
-#include <asm/export.h>
#include <asm/asm.h>
#ifndef CONFIG_UML
#ifdef CONFIG_X86_MCE
-COPY_MC_TEST_CTL
/*
* copy_mc_fragile - copy memory with indication if an exception / fault happened
@@ -38,8 +35,6 @@ SYM_FUNC_START(copy_mc_fragile)
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
- COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
- COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
@@ -55,8 +50,6 @@ SYM_FUNC_START(copy_mc_fragile)
.L_read_words:
movq (%rsi), %r8
- COPY_MC_TEST_SRC %rsi 8 .E_read_words
- COPY_MC_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
@@ -73,8 +66,6 @@ SYM_FUNC_START(copy_mc_fragile)
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
- COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
- COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
@@ -88,7 +79,6 @@ SYM_FUNC_START(copy_mc_fragile)
.L_done:
ret
SYM_FUNC_END(copy_mc_fragile)
-EXPORT_SYMBOL_GPL(copy_mc_fragile)
.section .fixup, "ax"
/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 11666ba19b62..88e9ad5142e4 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,11 +8,13 @@
#include <linux/export.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
+#include <linux/sched/smt.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
+#include <asm/cacheflush.h>
#include <asm/apic.h>
#include "mm_internal.h"
@@ -42,10 +44,15 @@
*/
/*
- * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
- * stored in cpu_tlb_state.last_user_mm_ibpb.
+ * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_spec.
*/
#define LAST_USER_MM_IBPB 0x1UL
+#define LAST_USER_MM_L1D_FLUSH 0x2UL
+#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
+
+/* Bits to set when tlbstate and flush is (re)initialized */
+#define LAST_USER_MM_INIT (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
/*
* The x86 feature is called PCID (Process Context IDentifier). It is similar
@@ -306,6 +313,41 @@ void leave_mm(int cpu)
}
EXPORT_SYMBOL_GPL(leave_mm);
+int enable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ int cpu, ret = 0, i;
+
+ /*
+ * Do not enable L1D_FLUSH_OUT if
+ * b. The CPU is not affected by the L1TF bug
+ * c. The CPU does not have L1D FLUSH feature support
+ * c. The task's affinity is on cores with SMT on.
+ */
+
+ if (!boot_cpu_has_bug(X86_BUG_L1TF) ||
+ !static_cpu_has(X86_FEATURE_FLUSH_L1D))
+ return -EINVAL;
+
+ cpu = get_cpu();
+
+ for_each_cpu(i, &tsk->cpus_mask) {
+ if (cpu_data(i).smt_active == true) {
+ put_cpu();
+ return -EINVAL;
+ }
+ }
+
+ set_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ put_cpu();
+ return ret;
+}
+
+int disable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ clear_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ return 0;
+}
+
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -316,20 +358,30 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
-static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+static inline unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
- unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+ unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
- return (unsigned long)next->mm | ibpb;
+ BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
+ return (unsigned long)next->mm | spec_bits;
}
-static void cond_ibpb(struct task_struct *next)
+static void cond_mitigation(struct task_struct *next)
{
+ unsigned long prev_mm, next_mm;
+
if (!next || !next->mm)
return;
+ next_mm = mm_mangle_tif_spec_bits(next);
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
+
/*
+ * Avoid user/user BTB poisoning by flushing the branch predictor
+ * when switching between processes. This stops one process from
+ * doing Spectre-v2 attacks on another.
+ *
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
* same process. Using the mm pointer instead of mm->context.ctx_id
@@ -339,8 +391,6 @@ static void cond_ibpb(struct task_struct *next)
* exposed data is not really interesting.
*/
if (static_branch_likely(&switch_mm_cond_ibpb)) {
- unsigned long prev_mm, next_mm;
-
/*
* This is a bit more complex than the always mode because
* it has to handle two cases:
@@ -370,20 +420,14 @@ static void cond_ibpb(struct task_struct *next)
* Optimize this with reasonably small overhead for the
* above cases. Mangle the TIF_SPEC_IB bit into the mm
* pointer of the incoming task which is stored in
- * cpu_tlbstate.last_user_mm_ibpb for comparison.
- */
- next_mm = mm_mangle_tif_spec_ib(next);
- prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
-
- /*
+ * cpu_tlbstate.last_user_mm_spec for comparison.
+ *
* Issue IBPB only if the mm's are different and one or
* both have the IBPB bit set.
*/
if (next_mm != prev_mm &&
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
indirect_branch_prediction_barrier();
-
- this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
}
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
@@ -392,11 +436,20 @@ static void cond_ibpb(struct task_struct *next)
* different context than the user space task which ran
* last on this CPU.
*/
- if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+ if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
+ (unsigned long)next->mm)
indirect_branch_prediction_barrier();
- this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
- }
}
+
+ /*
+ * Flush only if SMT is disabled as per the contract, which is checked
+ * when the feature is enabled.
+ */
+ if (sched_smt_active() && !this_cpu_read(cpu_info.smt_active) &&
+ (prev_mm & LAST_USER_MM_L1D_FLUSH))
+ l1d_flush_hw();
+
+ this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
}
#ifdef CONFIG_PERF_EVENTS
@@ -518,11 +571,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
need_flush = true;
} else {
/*
- * Avoid user/user BTB poisoning by flushing the branch
- * predictor when switching between processes. This stops
- * one process from doing Spectre-v2 attacks on another.
+ * Apply process to process speculation vulnerability
+ * mitigations if applicable.
*/
- cond_ibpb(tsk);
+ cond_mitigation(tsk);
/*
* Stop remote flushes for the previous mm.
@@ -630,7 +682,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
- this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
+ this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index a2488b6e27d6..1d8391fcca68 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -49,7 +49,7 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
struct stack_frame_ia32 *head;
/* User process is IA32 */
- if (!current || !test_thread_flag(TIF_IA32))
+ if (!current || user_64bit_mode(regs))
return 0;
head = (struct stack_frame_ia32 *) regs->bp;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa50e8936f5f..ac0b5fc30ea6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1246,7 +1246,7 @@ out_free_interp:
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
- retval = arch_setup_additional_pages(bprm, !!interpreter);
+ retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
if (retval < 0)
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
@@ -1307,7 +1307,7 @@ out_free_interp:
#endif
finalize_exec(bprm);
- start_thread(regs, elf_entry, bprm->p);
+ START_THREAD(elf_ex, regs, elf_entry, bprm->p);
retval = 0;
out:
return retval;
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 2d24c765cbd7..2c557229696a 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -106,15 +106,25 @@
#endif
#ifdef compat_start_thread
-#undef start_thread
-#define start_thread compat_start_thread
+#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \
+ compat_start_thread(regs, new_ip, new_sp)
#endif
-#ifdef compat_arch_setup_additional_pages
+#ifdef COMPAT_START_THREAD
+#undef START_THREAD
+#define START_THREAD COMPAT_START_THREAD
+#endif
+
+#ifdef compat_arch_setup_additional_pages
+#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \
+ compat_arch_setup_additional_pages(bprm, interpreter)
+#endif
+
+#ifdef COMPAT_ARCH_SETUP_ADDITIONAL_PAGES
#undef ARCH_HAS_SETUP_ADDITIONAL_PAGES
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-#undef arch_setup_additional_pages
-#define arch_setup_additional_pages compat_arch_setup_additional_pages
+#undef ARCH_SETUP_ADDITIONAL_PAGES
+#define ARCH_SETUP_ADDITIONAL_PAGES COMPAT_ARCH_SETUP_ADDITIONAL_PAGES
#endif
#ifdef compat_elf_read_implies_exec
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index cd223b68b69d..888b6cfeed91 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -1642,148 +1642,192 @@ atomic64_dec_if_positive(atomic64_t *v)
#endif
#if !defined(arch_xchg_relaxed) || defined(arch_xchg)
-#define xchg(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_xchg(__ai_ptr, __VA_ARGS__); \
+#define xchg(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_xchg(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_xchg_acquire)
-#define xchg_acquire(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \
+#define xchg_acquire(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_xchg_release)
-#define xchg_release(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_xchg_release(__ai_ptr, __VA_ARGS__); \
+#define xchg_release(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_xchg_release(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_xchg_relaxed)
-#define xchg_relaxed(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
+#define xchg_relaxed(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
})
#endif
#if !defined(arch_cmpxchg_relaxed) || defined(arch_cmpxchg)
-#define cmpxchg(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg_acquire)
-#define cmpxchg_acquire(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg_acquire(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg_release)
-#define cmpxchg_release(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg_release(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg_relaxed)
-#define cmpxchg_relaxed(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg_relaxed(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
})
#endif
#if !defined(arch_cmpxchg64_relaxed) || defined(arch_cmpxchg64)
-#define cmpxchg64(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg64(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg64_acquire)
-#define cmpxchg64_acquire(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg64_acquire(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg64_release)
-#define cmpxchg64_release(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg64_release(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
})
#endif
#if defined(arch_cmpxchg64_relaxed)
-#define cmpxchg64_relaxed(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg64_relaxed(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
})
#endif
-#define cmpxchg_local(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg_local(__ai_ptr, __VA_ARGS__); \
+#if !defined(arch_try_cmpxchg_relaxed) || defined(arch_try_cmpxchg)
+#define try_cmpxchg(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+#endif
+
+#if defined(arch_try_cmpxchg_acquire)
+#define try_cmpxchg_acquire(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+#endif
+
+#if defined(arch_try_cmpxchg_release)
+#define try_cmpxchg_release(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+#endif
+
+#if defined(arch_try_cmpxchg_relaxed)
+#define try_cmpxchg_relaxed(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+#endif
+
+#define cmpxchg_local(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg_local(__ai_ptr, __VA_ARGS__); \
})
-#define cmpxchg64_local(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg64_local(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
})
-#define sync_cmpxchg(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
- arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
+#define sync_cmpxchg(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
})
-#define cmpxchg_double(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
- arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg_double(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
+ arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \
})
-#define cmpxchg_double_local(ptr, ...) \
-({ \
- typeof(ptr) __ai_ptr = (ptr); \
- instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
- arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \
+#define cmpxchg_double_local(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
+ arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \
})
#endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// 9d5e6a315fb1335d02f0ccd3655a91c3dafcc63e
+// 4bec382e44520f4d8267e42620054db26a659ea3
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h
index bcb6aa27cfa6..a3dba31df01e 100644
--- a/include/linux/atomic-arch-fallback.h
+++ b/include/linux/atomic-arch-fallback.h
@@ -9,9 +9,9 @@
#include <linux/compiler.h>
#ifndef arch_xchg_relaxed
-#define arch_xchg_relaxed arch_xchg
-#define arch_xchg_acquire arch_xchg
-#define arch_xchg_release arch_xchg
+#define arch_xchg_acquire arch_xchg
+#define arch_xchg_release arch_xchg
+#define arch_xchg_relaxed arch_xchg
#else /* arch_xchg_relaxed */
#ifndef arch_xchg_acquire
@@ -32,9 +32,9 @@
#endif /* arch_xchg_relaxed */
#ifndef arch_cmpxchg_relaxed
-#define arch_cmpxchg_relaxed arch_cmpxchg
-#define arch_cmpxchg_acquire arch_cmpxchg
-#define arch_cmpxchg_release arch_cmpxchg
+#define arch_cmpxchg_acquire arch_cmpxchg
+#define arch_cmpxchg_release arch_cmpxchg
+#define arch_cmpxchg_relaxed arch_cmpxchg
#else /* arch_cmpxchg_relaxed */
#ifndef arch_cmpxchg_acquire
@@ -55,9 +55,9 @@
#endif /* arch_cmpxchg_relaxed */
#ifndef arch_cmpxchg64_relaxed
-#define arch_cmpxchg64_relaxed arch_cmpxchg64
-#define arch_cmpxchg64_acquire arch_cmpxchg64
-#define arch_cmpxchg64_release arch_cmpxchg64
+#define arch_cmpxchg64_acquire arch_cmpxchg64
+#define arch_cmpxchg64_release arch_cmpxchg64
+#define arch_cmpxchg64_relaxed arch_cmpxchg64
#else /* arch_cmpxchg64_relaxed */
#ifndef arch_cmpxchg64_acquire
@@ -77,6 +77,76 @@
#endif /* arch_cmpxchg64_relaxed */
+#ifndef arch_try_cmpxchg_relaxed
+#ifdef arch_try_cmpxchg
+#define arch_try_cmpxchg_acquire arch_try_cmpxchg
+#define arch_try_cmpxchg_release arch_try_cmpxchg
+#define arch_try_cmpxchg_relaxed arch_try_cmpxchg
+#endif /* arch_try_cmpxchg */
+
+#ifndef arch_try_cmpxchg
+#define arch_try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg */
+
+#ifndef arch_try_cmpxchg_acquire
+#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_acquire */
+
+#ifndef arch_try_cmpxchg_release
+#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_release */
+
+#ifndef arch_try_cmpxchg_relaxed
+#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_relaxed */
+
+#else /* arch_try_cmpxchg_relaxed */
+
+#ifndef arch_try_cmpxchg_acquire
+#define arch_try_cmpxchg_acquire(...) \
+ __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg_release
+#define arch_try_cmpxchg_release(...) \
+ __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg
+#define arch_try_cmpxchg(...) \
+ __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* arch_try_cmpxchg_relaxed */
+
#ifndef arch_atomic_read_acquire
static __always_inline int
arch_atomic_read_acquire(const atomic_t *v)
@@ -2288,4 +2358,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 90cd26cfd69d2250303d654955a0cc12620fb91b
+// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
index fd525c71d676..2a3f55d98be9 100644
--- a/include/linux/atomic-fallback.h
+++ b/include/linux/atomic-fallback.h
@@ -9,9 +9,9 @@
#include <linux/compiler.h>
#ifndef xchg_relaxed
-#define xchg_relaxed xchg
-#define xchg_acquire xchg
-#define xchg_release xchg
+#define xchg_acquire xchg
+#define xchg_release xchg
+#define xchg_relaxed xchg
#else /* xchg_relaxed */
#ifndef xchg_acquire
@@ -32,9 +32,9 @@
#endif /* xchg_relaxed */
#ifndef cmpxchg_relaxed
-#define cmpxchg_relaxed cmpxchg
-#define cmpxchg_acquire cmpxchg
-#define cmpxchg_release cmpxchg
+#define cmpxchg_acquire cmpxchg
+#define cmpxchg_release cmpxchg
+#define cmpxchg_relaxed cmpxchg
#else /* cmpxchg_relaxed */
#ifndef cmpxchg_acquire
@@ -55,9 +55,9 @@
#endif /* cmpxchg_relaxed */
#ifndef cmpxchg64_relaxed
-#define cmpxchg64_relaxed cmpxchg64
-#define cmpxchg64_acquire cmpxchg64
-#define cmpxchg64_release cmpxchg64
+#define cmpxchg64_acquire cmpxchg64
+#define cmpxchg64_release cmpxchg64
+#define cmpxchg64_relaxed cmpxchg64
#else /* cmpxchg64_relaxed */
#ifndef cmpxchg64_acquire
@@ -77,6 +77,76 @@
#endif /* cmpxchg64_relaxed */
+#ifndef try_cmpxchg_relaxed
+#ifdef try_cmpxchg
+#define try_cmpxchg_acquire try_cmpxchg
+#define try_cmpxchg_release try_cmpxchg
+#define try_cmpxchg_relaxed try_cmpxchg
+#endif /* try_cmpxchg */
+
+#ifndef try_cmpxchg
+#define try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg */
+
+#ifndef try_cmpxchg_acquire
+#define try_cmpxchg_acquire(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_acquire((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_acquire */
+
+#ifndef try_cmpxchg_release
+#define try_cmpxchg_release(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_release((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_release */
+
+#ifndef try_cmpxchg_relaxed
+#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_relaxed */
+
+#else /* try_cmpxchg_relaxed */
+
+#ifndef try_cmpxchg_acquire
+#define try_cmpxchg_acquire(...) \
+ __atomic_op_acquire(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef try_cmpxchg_release
+#define try_cmpxchg_release(...) \
+ __atomic_op_release(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef try_cmpxchg
+#define try_cmpxchg(...) \
+ __atomic_op_fence(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* try_cmpxchg_relaxed */
+
#define arch_atomic_read atomic_read
#define arch_atomic_read_acquire atomic_read_acquire
@@ -2522,4 +2592,4 @@ atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f
+// d78e6c293c661c15188f0ec05bce45188c8d5892
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 5d5b0321da0b..c9a46c4e183b 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -22,6 +22,16 @@
SET_PERSONALITY(ex)
#endif
+#ifndef START_THREAD
+#define START_THREAD(elf_ex, regs, elf_entry, start_stack) \
+ start_thread(regs, elf_entry, start_stack)
+#endif
+
+#if defined(ARCH_HAS_SETUP_ADDITIONAL_PAGES) && !defined(ARCH_SETUP_ADDITIONAL_PAGES)
+#define ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \
+ arch_setup_additional_pages(bprm, interpreter)
+#endif
+
#define ELF32_GNU_PROPERTY_ALIGN 4
#define ELF64_GNU_PROPERTY_ALIGN 8
diff --git a/include/linux/freelist.h b/include/linux/freelist.h
new file mode 100644
index 000000000000..fc1842b96469
--- /dev/null
+++ b/include/linux/freelist.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+#ifndef FREELIST_H
+#define FREELIST_H
+
+#include <linux/atomic.h>
+
+/*
+ * Copyright: cameron@moodycamel.com
+ *
+ * A simple CAS-based lock-free free list. Not the fastest thing in the world
+ * under heavy contention, but simple and correct (assuming nodes are never
+ * freed until after the free list is destroyed), and fairly speedy under low
+ * contention.
+ *
+ * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists
+ */
+
+struct freelist_node {
+ atomic_t refs;
+ struct freelist_node *next;
+};
+
+struct freelist_head {
+ struct freelist_node *head;
+};
+
+#define REFS_ON_FREELIST 0x80000000
+#define REFS_MASK 0x7FFFFFFF
+
+static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list)
+{
+ /*
+ * Since the refcount is zero, and nobody can increase it once it's
+ * zero (except us, and we run only one copy of this method per node at
+ * a time, i.e. the single thread case), then we know we can safely
+ * change the next pointer of the node; however, once the refcount is
+ * back above zero, then other threads could increase it (happens under
+ * heavy contention, when the refcount goes to zero in between a load
+ * and a refcount increment of a node in try_get, then back up to
+ * something non-zero, then the refcount increment is done by the other
+ * thread) -- so if the CAS to add the node to the actual list fails,
+ * decrese the refcount and leave the add operation to the next thread
+ * who puts the refcount back to zero (which could be us, hence the
+ * loop).
+ */
+ struct freelist_node *head = READ_ONCE(list->head);
+
+ for (;;) {
+ WRITE_ONCE(node->next, head);
+ atomic_set_release(&node->refs, 1);
+
+ if (!try_cmpxchg_release(&list->head, &head, node)) {
+ /*
+ * Hmm, the add failed, but we can only try again when
+ * the refcount goes back to zero.
+ */
+ if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1)
+ continue;
+ }
+ return;
+ }
+}
+
+static inline void freelist_add(struct freelist_node *node, struct freelist_head *list)
+{
+ /*
+ * We know that the should-be-on-freelist bit is 0 at this point, so
+ * it's safe to set it using a fetch_add.
+ */
+ if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) {
+ /*
+ * Oh look! We were the last ones referencing this node, and we
+ * know we want to add it to the free list, so let's do it!
+ */
+ __freelist_add(node, list);
+ }
+}
+
+static inline struct freelist_node *freelist_try_get(struct freelist_head *list)
+{
+ struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head);
+ unsigned int refs;
+
+ while (head) {
+ prev = head;
+ refs = atomic_read(&head->refs);
+ if ((refs & REFS_MASK) == 0 ||
+ !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) {
+ head = smp_load_acquire(&list->head);
+ continue;
+ }
+
+ /*
+ * Good, reference count has been incremented (it wasn't at
+ * zero), which means we can read the next and not worry about
+ * it changing between now and the time we do the CAS.
+ */
+ next = READ_ONCE(head->next);
+ if (try_cmpxchg_acquire(&list->head, &head, next)) {
+ /*
+ * Yay, got the node. This means it was on the list,
+ * which means should-be-on-freelist must be false no
+ * matter the refcount (because nobody else knows it's
+ * been taken off yet, it can't have been put back on).
+ */
+ WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST);
+
+ /*
+ * Decrease refcount twice, once for our ref, and once
+ * for the list's ref.
+ */
+ atomic_fetch_add(-2, &head->refs);
+
+ return head;
+ }
+
+ /*
+ * OK, the head must have changed on us, but we still need to decrement
+ * the refcount we increased.
+ */
+ refs = atomic_fetch_add(-1, &prev->refs);
+ if (refs == REFS_ON_FREELIST + 1)
+ __freelist_add(prev, list);
+ }
+
+ return NULL;
+}
+
+#endif /* FREELIST_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 629abaf25681..a79404433812 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -27,6 +27,8 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
+#include <linux/refcount.h>
+#include <linux/freelist.h>
#include <asm/kprobes.h>
#ifdef CONFIG_KPROBES
@@ -144,6 +146,11 @@ static inline int kprobe_ftrace(struct kprobe *p)
* ignored, due to maxactive being too low.
*
*/
+struct kretprobe_holder {
+ struct kretprobe *rp;
+ refcount_t ref;
+};
+
struct kretprobe {
struct kprobe kp;
kretprobe_handler_t handler;
@@ -151,18 +158,18 @@ struct kretprobe {
int maxactive;
int nmissed;
size_t data_size;
- struct hlist_head free_instances;
- raw_spinlock_t lock;
+ struct freelist_head freelist;
+ struct kretprobe_holder *rph;
};
struct kretprobe_instance {
union {
- struct hlist_node hlist;
+ struct freelist_node freelist;
struct rcu_head rcu;
};
- struct kretprobe *rp;
+ struct llist_node llist;
+ struct kretprobe_holder *rph;
kprobe_opcode_t *ret_addr;
- struct task_struct *task;
void *fp;
char data[];
};
@@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
return ret;
}
+static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
+ "Kretprobe is accessed from instance under preemptive context");
+
+ return READ_ONCE(ri->rph->rp);
+}
+
#else /* CONFIG_KRETPROBES */
static inline void arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 2e9c7215882b..24f207b0190b 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -197,6 +197,16 @@ static inline struct llist_node *llist_next(struct llist_node *node)
extern bool llist_add_batch(struct llist_node *new_first,
struct llist_node *new_last,
struct llist_head *head);
+
+static inline bool __llist_add_batch(struct llist_node *new_first,
+ struct llist_node *new_last,
+ struct llist_head *head)
+{
+ new_last->next = head->first;
+ head->first = new_first;
+ return new_last->next == NULL;
+}
+
/**
* llist_add - add a new entry
* @new: new entry to be added
@@ -209,6 +219,11 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head)
return llist_add_batch(new, new, head);
}
+static inline bool __llist_add(struct llist_node *new, struct llist_head *head)
+{
+ return __llist_add_batch(new, new, head);
+}
+
/**
* llist_del_all - delete all entries from lock-less list
* @head: the head of lock-less list to delete all entries
@@ -222,6 +237,14 @@ static inline struct llist_node *llist_del_all(struct llist_head *head)
return xchg(&head->first, NULL);
}
+static inline struct llist_node *__llist_del_all(struct llist_head *head)
+{
+ struct llist_node *first = head->first;
+
+ head->first = NULL;
+ return first;
+}
+
extern struct llist_node *llist_del_first(struct llist_head *head);
struct llist_node *llist_reverse_order(struct llist_node *head);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 063cd120b459..393db0690101 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1326,6 +1326,10 @@ struct task_struct {
struct callback_head mce_kill_me;
#endif
+#ifdef CONFIG_KRETPROBES
+ struct llist_head kretprobe_instances;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/time64.h b/include/linux/time64.h
index c9dcb3e5781f..5117cb5b5656 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -124,6 +124,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts)
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
+ /* Prevent multiplication overflow */
+ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ return KTIME_MAX;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 7f0827705c9a..c334e6a02e5f 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -213,6 +213,7 @@ struct prctl_mm_map {
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
+# define PR_SPEC_L1D_FLUSH_OUT 2
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
diff --git a/kernel/fork.c b/kernel/fork.c
index 32083db7a2a2..b9c289d0f4ef 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2187,6 +2187,10 @@ static __latent_entropy struct task_struct *copy_process(
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
+#ifdef CONFIG_KRETPROBES
+ p->kretprobe_instances.first = NULL;
+#endif
+
/*
* Ensure that the cgroup subsystem policies allow the new process to be
* forked. It should be noted that the new process's css_set can be changed
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..7aa69713b608 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -54,7 +54,6 @@ static int kprobes_initialized;
* - RCU hlist traversal under disabling preempt (breakpoint handlers)
*/
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
-static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_all_disarmed;
@@ -62,9 +61,6 @@ static bool kprobes_all_disarmed;
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
-static struct {
- raw_spinlock_t lock ____cacheline_aligned_in_smp;
-} kretprobe_table_locks[KPROBE_TABLE_SIZE];
kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
unsigned int __unused)
@@ -72,11 +68,6 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
}
-static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
-{
- return &(kretprobe_table_locks[hash].lock);
-}
-
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
@@ -1224,64 +1215,26 @@ void kprobes_inc_nmissed_count(struct kprobe *p)
}
NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
-static void recycle_rp_inst(struct kretprobe_instance *ri)
-{
- struct kretprobe *rp = ri->rp;
-
- /* remove rp inst off the rprobe_inst_table */
- hlist_del(&ri->hlist);
- INIT_HLIST_NODE(&ri->hlist);
- if (likely(rp)) {
- raw_spin_lock(&rp->lock);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock(&rp->lock);
- } else
- kfree_rcu(ri, rcu);
-}
-NOKPROBE_SYMBOL(recycle_rp_inst);
-
-static void kretprobe_hash_lock(struct task_struct *tsk,
- struct hlist_head **head, unsigned long *flags)
-__acquires(hlist_lock)
+static void free_rp_inst_rcu(struct rcu_head *head)
{
- unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- raw_spinlock_t *hlist_lock;
+ struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
- *head = &kretprobe_inst_table[hash];
- hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_hash_lock);
-
-static void kretprobe_table_lock(unsigned long hash,
- unsigned long *flags)
-__acquires(hlist_lock)
-{
- raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
+ if (refcount_dec_and_test(&ri->rph->ref))
+ kfree(ri->rph);
+ kfree(ri);
}
-NOKPROBE_SYMBOL(kretprobe_table_lock);
+NOKPROBE_SYMBOL(free_rp_inst_rcu);
-static void kretprobe_hash_unlock(struct task_struct *tsk,
- unsigned long *flags)
-__releases(hlist_lock)
+static void recycle_rp_inst(struct kretprobe_instance *ri)
{
- unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- raw_spinlock_t *hlist_lock;
-
- hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_unlock_irqrestore(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_hash_unlock);
+ struct kretprobe *rp = get_kretprobe(ri);
-static void kretprobe_table_unlock(unsigned long hash,
- unsigned long *flags)
-__releases(hlist_lock)
-{
- raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_unlock_irqrestore(hlist_lock, *flags);
+ if (likely(rp)) {
+ freelist_add(&ri->freelist, &rp->freelist);
+ } else
+ call_rcu(&ri->rcu, free_rp_inst_rcu);
}
-NOKPROBE_SYMBOL(kretprobe_table_unlock);
+NOKPROBE_SYMBOL(recycle_rp_inst);
static struct kprobe kprobe_busy = {
.addr = (void *) get_kprobe,
@@ -1312,24 +1265,21 @@ void kprobe_busy_end(void)
void kprobe_flush_task(struct task_struct *tk)
{
struct kretprobe_instance *ri;
- struct hlist_head *head;
- struct hlist_node *tmp;
- unsigned long hash, flags = 0;
+ struct llist_node *node;
+ /* Early boot, not yet initialized. */
if (unlikely(!kprobes_initialized))
- /* Early boot. kretprobe_table_locks not yet initialized. */
return;
kprobe_busy_begin();
- hash = hash_ptr(tk, KPROBE_HASH_BITS);
- head = &kretprobe_inst_table[hash];
- kretprobe_table_lock(hash, &flags);
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task == tk)
- recycle_rp_inst(ri);
+ node = __llist_del_all(&tk->kretprobe_instances);
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
+ node = node->next;
+
+ recycle_rp_inst(ri);
}
- kretprobe_table_unlock(hash, &flags);
kprobe_busy_end();
}
@@ -1338,37 +1288,23 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
static inline void free_rp_inst(struct kretprobe *rp)
{
struct kretprobe_instance *ri;
- struct hlist_node *next;
+ struct freelist_node *node;
+ int count = 0;
+
+ node = rp->freelist.head;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, freelist);
+ node = node->next;
- hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
- hlist_del(&ri->hlist);
kfree(ri);
+ count++;
}
-}
-static void cleanup_rp_inst(struct kretprobe *rp)
-{
- unsigned long flags, hash;
- struct kretprobe_instance *ri;
- struct hlist_node *next;
- struct hlist_head *head;
-
- /* To avoid recursive kretprobe by NMI, set kprobe busy here */
- kprobe_busy_begin();
- for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
- kretprobe_table_lock(hash, &flags);
- head = &kretprobe_inst_table[hash];
- hlist_for_each_entry_safe(ri, next, head, hlist) {
- if (ri->rp == rp)
- ri->rp = NULL;
- }
- kretprobe_table_unlock(hash, &flags);
+ if (refcount_sub_and_test(count, &rp->rph->ref)) {
+ kfree(rp->rph);
+ rp->rph = NULL;
}
- kprobe_busy_end();
-
- free_rp_inst(rp);
}
-NOKPROBE_SYMBOL(cleanup_rp_inst);
/* Add the new probe to ap->list */
static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
@@ -1930,88 +1866,56 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
void *trampoline_address,
void *frame_pointer)
{
- struct kretprobe_instance *ri = NULL, *last = NULL;
- struct hlist_head *head;
- struct hlist_node *tmp;
- unsigned long flags;
kprobe_opcode_t *correct_ret_addr = NULL;
- bool skipped = false;
+ struct kretprobe_instance *ri = NULL;
+ struct llist_node *first, *node;
+ struct kretprobe *rp;
- kretprobe_hash_lock(current, &head, &flags);
+ /* Find all nodes for this frame. */
+ first = node = current->kretprobe_instances.first;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
- /*
- * It is possible to have multiple instances associated with a given
- * task either because multiple functions in the call path have
- * return probes installed on them, and/or more than one
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always pushed into the head of the list
- * - when multiple return probes are registered for the same
- * function, the (chronologically) first instance's ret_addr
- * will be the real return address, and all the rest will
- * point to kretprobe_trampoline.
- */
- hlist_for_each_entry(ri, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- /*
- * Return probes must be pushed on this hash list correct
- * order (same as return order) so that it can be popped
- * correctly. However, if we find it is pushed it incorrect
- * order, this means we find a function which should not be
- * probed, because the wrong order entry is pushed on the
- * path of processing other kretprobe itself.
- */
- if (ri->fp != frame_pointer) {
- if (!skipped)
- pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
- skipped = true;
- continue;
- }
+ BUG_ON(ri->fp != frame_pointer);
- correct_ret_addr = ri->ret_addr;
- if (skipped)
- pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
- ri->rp->kp.addr);
-
- if (correct_ret_addr != trampoline_address)
+ if (ri->ret_addr != trampoline_address) {
+ correct_ret_addr = ri->ret_addr;
/*
* This is the real return address. Any other
* instances associated with this task are for
* other calls deeper on the call stack
*/
- break;
+ goto found;
+ }
+
+ node = node->next;
}
+ pr_err("Oops! Kretprobe fails to find correct return address.\n");
+ BUG_ON(1);
- BUG_ON(!correct_ret_addr || (correct_ret_addr == trampoline_address));
- last = ri;
+found:
+ /* Unlink all nodes for this frame. */
+ current->kretprobe_instances.first = node->next;
+ node->next = NULL;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- if (ri->fp != frame_pointer)
- continue;
+ /* Run them.. */
+ while (first) {
+ ri = container_of(first, struct kretprobe_instance, llist);
+ first = first->next;
- if (ri->rp && ri->rp->handler) {
+ rp = get_kretprobe(ri);
+ if (rp && rp->handler) {
struct kprobe *prev = kprobe_running();
- __this_cpu_write(current_kprobe, &ri->rp->kp);
+ __this_cpu_write(current_kprobe, &rp->kp);
ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
+ rp->handler(ri, regs);
__this_cpu_write(current_kprobe, prev);
}
recycle_rp_inst(ri);
-
- if (ri == last)
- break;
}
- kretprobe_hash_unlock(current, &flags);
-
return (unsigned long)correct_ret_addr;
}
NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
@@ -2023,39 +1927,26 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
- unsigned long hash, flags = 0;
struct kretprobe_instance *ri;
+ struct freelist_node *fn;
- /* TODO: consider to only swap the RA after the last pre_handler fired */
- hash = hash_ptr(current, KPROBE_HASH_BITS);
- raw_spin_lock_irqsave(&rp->lock, flags);
- if (!hlist_empty(&rp->free_instances)) {
- ri = hlist_entry(rp->free_instances.first,
- struct kretprobe_instance, hlist);
- hlist_del(&ri->hlist);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
-
- ri->rp = rp;
- ri->task = current;
-
- if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- raw_spin_lock_irqsave(&rp->lock, flags);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
- return 0;
- }
+ fn = freelist_try_get(&rp->freelist);
+ if (!fn) {
+ rp->nmissed++;
+ return 0;
+ }
- arch_prepare_kretprobe(ri, regs);
+ ri = container_of(fn, struct kretprobe_instance, freelist);
- /* XXX(hch): why is there no hlist_move_head? */
- INIT_HLIST_NODE(&ri->hlist);
- kretprobe_table_lock(hash, &flags);
- hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
- kretprobe_table_unlock(hash, &flags);
- } else {
- rp->nmissed++;
- raw_spin_unlock_irqrestore(&rp->lock, flags);
+ if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+ freelist_add(&ri->freelist, &rp->freelist);
+ return 0;
}
+
+ arch_prepare_kretprobe(ri, regs);
+
+ __llist_add(&ri->llist, &current->kretprobe_instances);
+
return 0;
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
@@ -2112,18 +2003,24 @@ int register_kretprobe(struct kretprobe *rp)
rp->maxactive = num_possible_cpus();
#endif
}
- raw_spin_lock_init(&rp->lock);
- INIT_HLIST_HEAD(&rp->free_instances);
+ rp->freelist.head = NULL;
+ rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
+ if (!rp->rph)
+ return -ENOMEM;
+
+ rp->rph->rp = rp;
for (i = 0; i < rp->maxactive; i++) {
- inst = kmalloc(sizeof(struct kretprobe_instance) +
+ inst = kzalloc(sizeof(struct kretprobe_instance) +
rp->data_size, GFP_KERNEL);
if (inst == NULL) {
+ refcount_set(&rp->rph->ref, i);
free_rp_inst(rp);
return -ENOMEM;
}
- INIT_HLIST_NODE(&inst->hlist);
- hlist_add_head(&inst->hlist, &rp->free_instances);
+ inst->rph = rp->rph;
+ freelist_add(&inst->freelist, &rp->freelist);
}
+ refcount_set(&rp->rph->ref, i);
rp->nmissed = 0;
/* Establish function entry probe point */
@@ -2165,16 +2062,18 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
if (num <= 0)
return;
mutex_lock(&kprobe_mutex);
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
if (__unregister_kprobe_top(&rps[i]->kp) < 0)
rps[i]->kp.addr = NULL;
+ rps[i]->rph->rp = NULL;
+ }
mutex_unlock(&kprobe_mutex);
synchronize_rcu();
for (i = 0; i < num; i++) {
if (rps[i]->kp.addr) {
__unregister_kprobe_bottom(&rps[i]->kp);
- cleanup_rp_inst(rps[i]);
+ free_rp_inst(rps[i]);
}
}
}
@@ -2566,11 +2465,8 @@ static int __init init_kprobes(void)
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
- for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&kprobe_table[i]);
- INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
- raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
- }
err = populate_kprobe_blacklist(__start_kprobe_blacklist,
__stop_kprobe_blacklist);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 520a0d4e1ec2..285be671fbd1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4057,7 +4057,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(in_nmi()))
return;
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (unlikely(lockdep_hardirqs_enabled())) {
@@ -4126,7 +4126,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
goto skip_checks;
}
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (lockdep_hardirqs_enabled()) {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 06895ef85d69..2a52f42f64b6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -409,7 +409,7 @@ bool rcu_eqs_special_set(int cpu)
*
* The caller must have disabled interrupts and must not be idle.
*/
-void rcu_momentary_dyntick_idle(void)
+notrace void rcu_momentary_dyntick_idle(void)
{
int special;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 865bb0228ab6..890b79cf0e7c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -178,7 +178,7 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
-void __weak stop_machine_yield(const struct cpumask *cpumask)
+notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
{
cpu_relax();
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3624b9b5835d..387b4bef7dd1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -425,11 +425,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
debug_object_deactivate(timer, &hrtimer_debug_descr);
}
-static inline void debug_hrtimer_free(struct hrtimer *timer)
-{
- debug_object_free(timer, &hrtimer_debug_descr);
-}
-
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index ca4e6d57d68b..00629e658ca1 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -172,10 +172,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = timespec64_to_ns(&value->it_value);
ninterval = timespec64_to_ns(&value->it_interval);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index eddcf4970444..a5cffe2a1770 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -59,7 +59,8 @@ static struct clocksource clocksource_jiffies = {
};
__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
-__cacheline_aligned_in_smp seqcount_t jiffies_seq;
+__cacheline_aligned_in_smp seqcount_raw_spinlock_t jiffies_seq =
+ SEQCNT_RAW_SPINLOCK_ZERO(jiffies_seq, &jiffies_lock);
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 0642013dace4..b1b9b12899f5 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -68,13 +68,13 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
return (cyc * mult) >> shift;
}
-struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
{
*seq = raw_read_seqcount_latch(&cd.seq);
return cd.read_data + (*seq & 1);
}
-int sched_clock_read_retry(unsigned int seq)
+notrace int sched_clock_read_retry(unsigned int seq)
{
return read_seqcount_latch_retry(&cd.seq, seq);
}
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 099737f6f10c..6c2cbd9ef999 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -26,7 +26,7 @@ extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
extern raw_spinlock_t jiffies_lock;
-extern seqcount_t jiffies_seq;
+extern seqcount_raw_spinlock_t jiffies_seq;
#define CS_NAME_LEN 32
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index de37e33a868d..c3ad64fb9d8b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -732,11 +732,6 @@ static inline void debug_timer_deactivate(struct timer_list *timer)
debug_object_deactivate(timer, &timer_debug_descr);
}
-static inline void debug_timer_free(struct timer_list *timer)
-{
- debug_object_free(timer, &timer_debug_descr);
-}
-
static inline void debug_timer_assert_init(struct timer_list *timer)
{
debug_object_assert_init(timer, &timer_debug_descr);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b911e9f6d9f5..97c7a7782db7 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1731,7 +1731,8 @@ NOKPROBE_SYMBOL(kprobe_dispatcher);
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
+ struct kretprobe *rp = get_kretprobe(ri);
+ struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
raw_cpu_inc(*tk->nhit);
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 693dfa1de430..317a6cec76e1 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -144,15 +144,11 @@ gen_proto_order_variants()
printf "#endif /* ${basename}_relaxed */\n\n"
}
-gen_xchg_fallbacks()
+gen_order_fallbacks()
{
local xchg="$1"; shift
+
cat <<EOF
-#ifndef ${xchg}_relaxed
-#define ${xchg}_relaxed ${xchg}
-#define ${xchg}_acquire ${xchg}
-#define ${xchg}_release ${xchg}
-#else /* ${xchg}_relaxed */
#ifndef ${xchg}_acquire
#define ${xchg}_acquire(...) \\
@@ -169,11 +165,62 @@ cat <<EOF
__atomic_op_fence(${xchg}, __VA_ARGS__)
#endif
-#endif /* ${xchg}_relaxed */
+EOF
+}
+
+gen_xchg_fallbacks()
+{
+ local xchg="$1"; shift
+ printf "#ifndef ${xchg}_relaxed\n"
+
+ gen_basic_fallbacks ${xchg}
+
+ printf "#else /* ${xchg}_relaxed */\n"
+
+ gen_order_fallbacks ${xchg}
+
+ printf "#endif /* ${xchg}_relaxed */\n\n"
+}
+
+gen_try_cmpxchg_fallback()
+{
+ local order="$1"; shift;
+
+cat <<EOF
+#ifndef ${ARCH}try_cmpxchg${order}
+#define ${ARCH}try_cmpxchg${order}(_ptr, _oldp, _new) \\
+({ \\
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
+ ___r = ${ARCH}cmpxchg${order}((_ptr), ___o, (_new)); \\
+ if (unlikely(___r != ___o)) \\
+ *___op = ___r; \\
+ likely(___r == ___o); \\
+})
+#endif /* ${ARCH}try_cmpxchg${order} */
EOF
}
+gen_try_cmpxchg_fallbacks()
+{
+ printf "#ifndef ${ARCH}try_cmpxchg_relaxed\n"
+ printf "#ifdef ${ARCH}try_cmpxchg\n"
+
+ gen_basic_fallbacks "${ARCH}try_cmpxchg"
+
+ printf "#endif /* ${ARCH}try_cmpxchg */\n\n"
+
+ for order in "" "_acquire" "_release" "_relaxed"; do
+ gen_try_cmpxchg_fallback "${order}"
+ done
+
+ printf "#else /* ${ARCH}try_cmpxchg_relaxed */\n"
+
+ gen_order_fallbacks "${ARCH}try_cmpxchg"
+
+ printf "#endif /* ${ARCH}try_cmpxchg_relaxed */\n\n"
+}
+
cat << EOF
// SPDX-License-Identifier: GPL-2.0
@@ -191,6 +238,8 @@ for xchg in "${ARCH}xchg" "${ARCH}cmpxchg" "${ARCH}cmpxchg64"; do
gen_xchg_fallbacks "${xchg}"
done
+gen_try_cmpxchg_fallbacks
+
grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "${ARCH}" "atomic" "int" ${args}
done
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 2b7fec7e6abc..5766ffcec7c5 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -112,14 +112,31 @@ gen_xchg()
local xchg="$1"; shift
local mult="$1"; shift
+ if [ "${xchg%${xchg#try_cmpxchg}}" = "try_cmpxchg" ] ; then
+
+cat <<EOF
+#define ${xchg}(ptr, oldp, ...) \\
+({ \\
+ typeof(ptr) __ai_ptr = (ptr); \\
+ typeof(oldp) __ai_oldp = (oldp); \\
+ instrument_atomic_write(__ai_ptr, ${mult}sizeof(*__ai_ptr)); \\
+ instrument_atomic_write(__ai_oldp, ${mult}sizeof(*__ai_oldp)); \\
+ arch_${xchg}(__ai_ptr, __ai_oldp, __VA_ARGS__); \\
+})
+EOF
+
+ else
+
cat <<EOF
-#define ${xchg}(ptr, ...) \\
-({ \\
- typeof(ptr) __ai_ptr = (ptr); \\
- instrument_atomic_write(__ai_ptr, ${mult}sizeof(*__ai_ptr)); \\
- arch_${xchg}(__ai_ptr, __VA_ARGS__); \\
+#define ${xchg}(ptr, ...) \\
+({ \\
+ typeof(ptr) __ai_ptr = (ptr); \\
+ instrument_atomic_write(__ai_ptr, ${mult}sizeof(*__ai_ptr)); \\
+ arch_${xchg}(__ai_ptr, __VA_ARGS__); \\
})
EOF
+
+ fi
}
gen_optional_xchg()
@@ -169,7 +186,7 @@ grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
done
-for xchg in "xchg" "cmpxchg" "cmpxchg64"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
for order in "" "_acquire" "_release" "_relaxed"; do
gen_optional_xchg "${xchg}" "${order}"
done
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 2ac0fff6dad8..9b185bf82da8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,6 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/copy_mc_test.h>
#include <asm/mce.h>
/*
@@ -3284,107 +3283,6 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
-static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-
-enum INJECT {
- INJECT_NONE,
- INJECT_SRC,
- INJECT_DST,
-};
-
-static void copy_mc_test_init(char *dst, char *src, size_t size)
-{
- size_t i;
-
- memset(dst, 0xff, size);
- for (i = 0; i < size; i++)
- src[i] = (char) i;
-}
-
-static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
- size_t size, unsigned long rem)
-{
- size_t i;
-
- for (i = 0; i < size - rem; i++)
- if (dst[i] != (unsigned char) i) {
- pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
- __func__, __LINE__, i, dst[i],
- (unsigned char) i);
- return false;
- }
- for (i = size - rem; i < size; i++)
- if (dst[i] != 0xffU) {
- pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
- __func__, __LINE__, i, dst[i]);
- return false;
- }
- return true;
-}
-
-void copy_mc_test(void)
-{
- char *inject_desc[] = { "none", "source", "destination" };
- enum INJECT inj;
-
- if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
- pr_info("%s: run...\n", __func__);
- } else {
- pr_info("%s: disabled, skip.\n", __func__);
- return;
- }
-
- for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
- int i;
-
- pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
- for (i = 0; i < 512; i++) {
- unsigned long expect, rem;
- void *src, *dst;
- bool valid;
-
- switch (inj) {
- case INJECT_NONE:
- copy_mc_inject_src(NULL);
- copy_mc_inject_dst(NULL);
- dst = &copy_mc_buf[2048];
- src = &copy_mc_buf[1024 - i];
- expect = 0;
- break;
- case INJECT_SRC:
- copy_mc_inject_src(&copy_mc_buf[1024]);
- copy_mc_inject_dst(NULL);
- dst = &copy_mc_buf[2048];
- src = &copy_mc_buf[1024 - i];
- expect = 512 - i;
- break;
- case INJECT_DST:
- copy_mc_inject_src(NULL);
- copy_mc_inject_dst(&copy_mc_buf[2048]);
- dst = &copy_mc_buf[2048 - i];
- src = &copy_mc_buf[1024];
- expect = 512 - i;
- break;
- }
-
- copy_mc_test_init(dst, src, 512);
- rem = copy_mc_fragile(dst, src, 512);
- valid = copy_mc_test_validate(dst, src, 512, expect);
- if (rem == expect && valid)
- continue;
- pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
- __func__,
- ((unsigned long) dst) & ~PAGE_MASK,
- ((unsigned long ) src) & ~PAGE_MASK,
- 512, i, rem, valid ? "valid" : "bad",
- expect);
- }
- }
-
- copy_mc_inject_src(NULL);
- copy_mc_inject_dst(NULL);
-}
-
static __init int nfit_test_init(void)
{
int rc, i;
@@ -3393,7 +3291,6 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
- copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT