summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilosz Wasilewski <milosz.wasilewski@hackbox.linaro.org>2015-11-29 23:02:08 +0000
committerMilosz Wasilewski <milosz.wasilewski@hackbox.linaro.org>2015-11-29 23:02:08 +0000
commitf8a700c636fa74706733014701fbeb1ac660a85d (patch)
tree51d4951dffcf9725c8f7ec74fa0c35562b22567f
parent751dc08e504621a230a1b556abdd2c660953708d (diff)
Update to commit: 8005c49d9aea74d382f474ce11afbbc7d7130bec8005c49d9aea74d382f474ce11afbbc7d7130bec
from repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git branch: master
-rw-r--r--Makefile55
-rw-r--r--breakpoints/Makefile19
-rw-r--r--capabilities/.gitignore2
-rw-r--r--capabilities/Makefile18
-rw-r--r--capabilities/test_execve.c427
-rw-r--r--capabilities/validate_cap.c73
-rw-r--r--cpu-hotplug/Makefile7
-rwxr-xr-x[-rw-r--r--]cpu-hotplug/cpu-on-off-test.sh (renamed from cpu-hotplug/on-off-test.sh)0
-rw-r--r--efivarfs/.gitignore2
-rw-r--r--efivarfs/Makefile7
-rwxr-xr-x[-rw-r--r--]efivarfs/efivarfs.sh0
-rw-r--r--exec/Makefile7
-rw-r--r--exec/execveat.c10
-rw-r--r--firmware/Makefile20
-rwxr-xr-x[-rw-r--r--]firmware/fw_filesystem.sh25
-rwxr-xr-x[-rw-r--r--]firmware/fw_userhelper.sh12
-rw-r--r--ftrace/Makefile6
-rw-r--r--ftrace/test.d/00basic/basic4.tc2
-rw-r--r--ftrace/test.d/event/event-enable.tc15
-rw-r--r--ftrace/test.d/event/subsystem-enable.tc15
-rw-r--r--ftrace/test.d/event/toplevel-enable.tc15
-rw-r--r--ftrace/test.d/ftrace/fgraph-filter-stack.tc6
-rw-r--r--ftrace/test.d/ftrace/fgraph-filter.tc2
-rw-r--r--ftrace/test.d/ftrace/func_profiler.tc2
-rw-r--r--ftrace/test.d/kprobe/add_and_remove.tc2
-rw-r--r--ftrace/test.d/kprobe/busy_check.tc2
-rw-r--r--ftrace/test.d/kprobe/kprobe_args.tc2
-rw-r--r--ftrace/test.d/kprobe/kprobe_ftrace.tc14
-rw-r--r--ftrace/test.d/kprobe/kretprobe_args.tc2
-rw-r--r--futex/Makefile29
-rw-r--r--futex/README62
-rw-r--r--futex/functional/.gitignore7
-rw-r--r--futex/functional/Makefile25
-rw-r--r--futex/functional/futex_requeue_pi.c409
-rw-r--r--futex/functional/futex_requeue_pi_mismatched_ops.c135
-rw-r--r--futex/functional/futex_requeue_pi_signal_restart.c223
-rw-r--r--futex/functional/futex_wait_private_mapped_file.c125
-rw-r--r--futex/functional/futex_wait_timeout.c86
-rw-r--r--futex/functional/futex_wait_uninitialized_heap.c124
-rw-r--r--futex/functional/futex_wait_wouldblock.c79
-rwxr-xr-xfutex/functional/run.sh79
-rw-r--r--futex/include/atomic.h83
-rw-r--r--futex/include/futextest.h266
-rw-r--r--futex/include/logging.h153
-rwxr-xr-xfutex/run.sh33
-rwxr-xr-xgen_kselftest_tar.sh55
-rw-r--r--ipc/Makefile11
-rw-r--r--kcmp/Makefile6
-rw-r--r--kselftest.h17
-rwxr-xr-xkselftest_install.sh37
-rw-r--r--lib.mk38
-rw-r--r--lib/Makefile8
-rw-r--r--lib/printf.sh10
-rw-r--r--membarrier/.gitignore1
-rw-r--r--membarrier/Makefile10
-rw-r--r--membarrier/membarrier_test.c118
-rw-r--r--memfd/Makefile18
-rw-r--r--memfd/memfd_test.c3
-rwxr-xr-x[-rw-r--r--]memfd/run_fuse_test.sh0
-rw-r--r--memory-hotplug/Makefile9
-rwxr-xr-x[-rw-r--r--]memory-hotplug/mem-on-off-test.sh (renamed from memory-hotplug/on-off-test.sh)0
-rw-r--r--mount/.gitignore1
-rw-r--r--mount/Makefile20
-rw-r--r--mqueue/Makefile22
-rw-r--r--mqueue/mq_open_tests.c1
-rw-r--r--mqueue/mq_perf_tests.c1
-rw-r--r--net/Makefile10
-rw-r--r--net/psock_fanout.c71
-rw-r--r--net/psock_lib.h29
-rwxr-xr-x[-rw-r--r--]net/run_afpackettests0
-rwxr-xr-x[-rw-r--r--]net/run_netsocktests0
-rw-r--r--powerpc/Makefile42
-rw-r--r--powerpc/benchmarks/.gitignore1
-rw-r--r--powerpc/benchmarks/Makefile12
-rw-r--r--powerpc/benchmarks/gettimeofday.c31
-rw-r--r--powerpc/copyloops/Makefile16
-rw-r--r--powerpc/copyloops/asm/ppc_asm.h33
-rw-r--r--powerpc/dscr/.gitignore7
-rw-r--r--powerpc/dscr/Makefile14
-rw-r--r--powerpc/dscr/dscr.h127
-rw-r--r--powerpc/dscr/dscr_default_test.c127
-rw-r--r--powerpc/dscr/dscr_explicit_test.c71
-rw-r--r--powerpc/dscr/dscr_inherit_exec_test.c117
-rw-r--r--powerpc/dscr/dscr_inherit_test.c95
-rw-r--r--powerpc/dscr/dscr_sysfs_test.c97
-rw-r--r--powerpc/dscr/dscr_sysfs_thread_test.c80
-rw-r--r--powerpc/dscr/dscr_user_test.c61
-rw-r--r--powerpc/harness.c47
-rw-r--r--powerpc/mm/Makefile16
-rw-r--r--powerpc/mm/hugetlb_vs_thp_test.c8
-rw-r--r--powerpc/pmu/Makefile48
-rw-r--r--powerpc/pmu/ebb/Makefile13
-rw-r--r--powerpc/pmu/ebb/back_to_back_ebbs_test.c2
-rw-r--r--powerpc/pmu/ebb/close_clears_pmcc_test.c2
-rw-r--r--powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c2
-rw-r--r--powerpc/pmu/ebb/cpu_event_vs_ebb_test.c2
-rw-r--r--powerpc/pmu/ebb/cycles_test.c2
-rw-r--r--powerpc/pmu/ebb/cycles_with_freeze_test.c2
-rw-r--r--powerpc/pmu/ebb/cycles_with_mmcr2_test.c2
-rw-r--r--powerpc/pmu/ebb/ebb.c11
-rw-r--r--powerpc/pmu/ebb/ebb.h1
-rw-r--r--powerpc/pmu/ebb/ebb_on_child_test.c2
-rw-r--r--powerpc/pmu/ebb/ebb_on_willing_child_test.c2
-rw-r--r--powerpc/pmu/ebb/ebb_vs_cpu_event_test.c2
-rw-r--r--powerpc/pmu/ebb/event_attributes_test.c2
-rw-r--r--powerpc/pmu/ebb/fork_cleanup_test.c2
-rw-r--r--powerpc/pmu/ebb/instruction_count_test.c2
-rw-r--r--powerpc/pmu/ebb/lost_exception_test.c2
-rw-r--r--powerpc/pmu/ebb/multi_counter_test.c2
-rw-r--r--powerpc/pmu/ebb/multi_ebb_procs_test.c2
-rw-r--r--powerpc/pmu/ebb/no_handler_test.c2
-rw-r--r--powerpc/pmu/ebb/pmae_handling_test.c2
-rw-r--r--powerpc/pmu/ebb/pmc56_overflow_test.c2
-rw-r--r--powerpc/pmu/ebb/reg_access_test.c2
-rw-r--r--powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c2
-rw-r--r--powerpc/pmu/ebb/task_event_vs_ebb_test.c2
-rw-r--r--powerpc/pmu/lib.c47
-rw-r--r--powerpc/pmu/lib.h1
-rw-r--r--powerpc/primitives/Makefile15
-rw-r--r--powerpc/primitives/load_unaligned_zeropad.c9
-rw-r--r--powerpc/stringloops/Makefile15
-rw-r--r--powerpc/switch_endian/.gitignore2
-rw-r--r--powerpc/switch_endian/Makefile18
-rw-r--r--powerpc/switch_endian/check.S100
-rw-r--r--powerpc/switch_endian/common.h6
-rw-r--r--powerpc/switch_endian/switch_endian_test.S81
-rw-r--r--powerpc/syscalls/.gitignore1
-rw-r--r--powerpc/syscalls/Makefile12
-rw-r--r--powerpc/syscalls/ipc.h47
-rw-r--r--powerpc/syscalls/ipc_unmuxed.c61
-rw-r--r--powerpc/tm/.gitignore1
-rw-r--r--powerpc/tm/Makefile18
-rw-r--r--powerpc/tm/tm-syscall-asm.S27
-rw-r--r--powerpc/tm/tm-syscall.c132
-rw-r--r--powerpc/utils.h3
-rw-r--r--powerpc/vphn/.gitignore1
-rw-r--r--powerpc/vphn/Makefile12
-rw-r--r--powerpc/vphn/test-vphn.c410
l---------powerpc/vphn/vphn.c1
l---------powerpc/vphn/vphn.h1
-rw-r--r--pstore/Makefile15
-rwxr-xr-xpstore/common_tests83
-rwxr-xr-xpstore/pstore_crash_test30
-rwxr-xr-xpstore/pstore_post_reboot_tests77
-rwxr-xr-xpstore/pstore_tests30
-rw-r--r--ptrace/Makefile5
-rwxr-xr-xrcutorture/bin/configinit.sh2
-rwxr-xr-xrcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xrcutorture/bin/kvm.sh33
-rw-r--r--rcutorture/configs/lock/CFLIST4
-rw-r--r--rcutorture/configs/lock/LOCK056
-rw-r--r--rcutorture/configs/lock/LOCK05.boot1
-rw-r--r--rcutorture/configs/lock/LOCK066
-rw-r--r--rcutorture/configs/lock/LOCK06.boot1
-rw-r--r--rcutorture/configs/rcu/CFcommon3
-rw-r--r--rcutorture/configs/rcu/SRCU-N1
-rw-r--r--rcutorture/configs/rcu/SRCU-P1
-rw-r--r--rcutorture/configs/rcu/SRCU-P.boot2
-rw-r--r--rcutorture/configs/rcu/TASKS015
-rw-r--r--rcutorture/configs/rcu/TASKS021
-rw-r--r--rcutorture/configs/rcu/TASKS032
-rw-r--r--rcutorture/configs/rcu/TINY022
-rw-r--r--rcutorture/configs/rcu/TINY02.boot1
-rw-r--r--rcutorture/configs/rcu/TREE012
-rw-r--r--rcutorture/configs/rcu/TREE023
-rw-r--r--rcutorture/configs/rcu/TREE02-T2
-rw-r--r--rcutorture/configs/rcu/TREE039
-rw-r--r--rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--rcutorture/configs/rcu/TREE047
-rw-r--r--rcutorture/configs/rcu/TREE055
-rw-r--r--rcutorture/configs/rcu/TREE065
-rw-r--r--rcutorture/configs/rcu/TREE06.boot1
-rw-r--r--rcutorture/configs/rcu/TREE073
-rw-r--r--rcutorture/configs/rcu/TREE087
-rw-r--r--rcutorture/configs/rcu/TREE08-T2
-rw-r--r--rcutorture/configs/rcu/TREE08-T.boot1
-rw-r--r--rcutorture/configs/rcu/TREE08.boot1
-rw-r--r--rcutorture/configs/rcu/TREE092
-rw-r--r--rcutorture/doc/TREE_RCU-kconfig.txt35
-rw-r--r--seccomp/.gitignore1
-rw-r--r--seccomp/Makefile10
-rw-r--r--seccomp/seccomp_bpf.c2208
-rw-r--r--seccomp/test_harness.h534
-rw-r--r--size/Makefile7
-rw-r--r--static_keys/Makefile8
-rwxr-xr-xstatic_keys/test_static_keys.sh16
-rw-r--r--sysctl/Makefile12
-rwxr-xr-x[-rw-r--r--]sysctl/run_numerictests0
-rwxr-xr-x[-rw-r--r--]sysctl/run_stringtests0
-rw-r--r--timers/.gitignore18
-rw-r--r--timers/Makefile39
-rw-r--r--timers/adjtick.c221
-rw-r--r--timers/alarmtimer-suspend.c189
-rw-r--r--timers/change_skew.c107
-rw-r--r--timers/clocksource-switch.c179
-rw-r--r--timers/inconsistency-check.c204
-rw-r--r--timers/leap-a-day.c388
-rw-r--r--timers/leapcrash.c120
-rw-r--r--timers/mqueue-lat.c124
-rw-r--r--timers/nanosleep.c175
-rw-r--r--timers/nsleep-lat.c190
-rw-r--r--timers/posix_timers.c9
-rw-r--r--timers/raw_skew.c154
-rw-r--r--timers/rtctest.c271
-rw-r--r--timers/set-2038.c144
-rw-r--r--timers/set-tai.c79
-rw-r--r--timers/set-timer-lat.c216
-rw-r--r--timers/skew_consistency.c89
-rw-r--r--timers/threadtest.c204
-rw-r--r--timers/valid-adjtimex.c202
-rw-r--r--user/Makefile5
-rw-r--r--vm/Makefile23
-rw-r--r--vm/compaction_test.c225
-rw-r--r--vm/hugetlbfstest.c84
-rw-r--r--vm/map_hugetlb.c6
-rw-r--r--vm/mlock2-tests.c737
-rw-r--r--vm/on-fault-limit.c47
-rwxr-xr-x[-rw-r--r--]vm/run_vmtests57
-rw-r--r--vm/userfaultfd.c645
-rw-r--r--x86/.gitignore2
-rw-r--r--x86/Makefile65
-rwxr-xr-xx86/check_cc.sh16
-rw-r--r--x86/entry_from_vm86.c238
-rw-r--r--x86/ldt_gdt.c576
-rw-r--r--x86/ptrace_syscall.c294
-rw-r--r--x86/raw_syscall_helper_32.S46
-rw-r--r--x86/sigreturn.c684
-rw-r--r--x86/single_step_syscall.c181
-rw-r--r--x86/syscall_arg_fault.c130
-rw-r--r--x86/syscall_nt.c54
-rw-r--r--x86/sysret_ss_attrs.c112
-rw-r--r--x86/test_FCMOV.c93
-rw-r--r--x86/test_FCOMI.c331
-rw-r--r--x86/test_FISTTP.c137
-rw-r--r--x86/test_syscall_vdso.c401
-rw-r--r--x86/thunks.S67
-rw-r--r--x86/thunks_32.S55
-rw-r--r--x86/trivial_32bit_program.c18
-rw-r--r--x86/trivial_64bit_program.c18
-rw-r--r--x86/unwind_vdso.c211
-rw-r--r--zram/Makefile9
-rw-r--r--zram/README40
-rwxr-xr-xzram/zram.sh27
-rwxr-xr-xzram/zram01.sh99
-rwxr-xr-xzram/zram02.sh54
-rwxr-xr-xzram/zram_lib.sh233
246 files changed, 17365 insertions, 497 deletions
diff --git a/Makefile b/Makefile
index 4e51122..c8edff6 100644
--- a/Makefile
+++ b/Makefile
@@ -4,24 +4,44 @@ TARGETS += efivarfs
TARGETS += exec
TARGETS += firmware
TARGETS += ftrace
+TARGETS += futex
TARGETS += kcmp
+TARGETS += lib
+TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
TARGETS += powerpc
+TARGETS += pstore
TARGETS += ptrace
+TARGETS += seccomp
TARGETS += size
+TARGETS += static_keys
TARGETS += sysctl
+ifneq (1, $(quicktest))
TARGETS += timers
+endif
TARGETS += user
TARGETS += vm
+TARGETS += x86
+TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest from top level Makefile
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+override LDFLAGS =
+override MAKEFLAGS =
+endif
+
all:
for TARGET in $(TARGETS); do \
make -C $$TARGET; \
@@ -47,7 +67,42 @@ clean_hotplug:
make -C $$TARGET clean; \
done;
+run_pstore_crash:
+ make -C pstore run_crash
+
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+ @# Ask all targets to install their files
+ mkdir -p $(INSTALL_PATH)
+ for TARGET in $(TARGETS); do \
+ make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+ done;
+
+ @# Ask all targets to emit their test scripts
+ echo "#!/bin/bash" > $(ALL_SCRIPT)
+ echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+ echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+
+ for TARGET in $(TARGETS); do \
+ echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+ echo "echo ========================================" >> $(ALL_SCRIPT); \
+ echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+ make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ done;
+
+ chmod u+x $(ALL_SCRIPT)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
clean:
for TARGET in $(TARGETS); do \
make -C $$TARGET clean; \
done;
+
+.PHONY: install
diff --git a/breakpoints/Makefile b/breakpoints/Makefile
index e18b42b..c0d9570 100644
--- a/breakpoints/Makefile
+++ b/breakpoints/Makefile
@@ -1,23 +1,14 @@
# Taken from perf makefile
uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
-ifeq ($(ARCH),i386)
- ARCH := x86
-endif
-ifeq ($(ARCH),x86_64)
- ARCH := x86
-endif
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-
-all:
ifeq ($(ARCH),x86)
- gcc breakpoint_test.c -o breakpoint_test
-else
- echo "Not an x86 target, can't build breakpoints selftests"
+TEST_PROGS := breakpoint_test
endif
-run_tests:
- @./breakpoint_test || echo "breakpoints selftests: [FAIL]"
+all: $(TEST_PROGS)
+
+include ../lib.mk
clean:
rm -fr breakpoint_test
diff --git a/capabilities/.gitignore b/capabilities/.gitignore
new file mode 100644
index 0000000..b732dd0
--- /dev/null
+++ b/capabilities/.gitignore
@@ -0,0 +1,2 @@
+test_execve
+validate_cap
diff --git a/capabilities/Makefile b/capabilities/Makefile
new file mode 100644
index 0000000..8c8f0c1
--- /dev/null
+++ b/capabilities/Makefile
@@ -0,0 +1,18 @@
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+TARGETS := validate_cap test_execve
+TEST_PROGS := test_execve
+
+CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng
+
+all: $(TARGETS)
+
+clean:
+ $(RM) $(TARGETS)
+
+$(TARGETS): %: %.c
+ $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
diff --git a/capabilities/test_execve.c b/capabilities/test_execve.c
new file mode 100644
index 0000000..10a21a9
--- /dev/null
+++ b/capabilities/test_execve.c
@@ -0,0 +1,427 @@
+#define _GNU_SOURCE
+
+#include <cap-ng.h>
+#include <err.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+static int nerrs;
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0) {
+ err(1, "vsnprintf failed");
+ }
+ if (buf_len >= sizeof(buf)) {
+ errx(1, "vsnprintf output truncated");
+ }
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ err(1, "open of %s failed", filename);
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ errx(1, "short write to %s", filename);
+ } else {
+ err(1, "write to %s failed", filename);
+ }
+ }
+ if (close(fd) != 0) {
+ err(1, "close of %s failed", filename);
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static bool create_and_enter_ns(uid_t inner_uid)
+{
+ uid_t outer_uid;
+ gid_t outer_gid;
+ int i;
+ bool have_outer_privilege;
+
+ outer_uid = getuid();
+ outer_gid = getgid();
+
+ /*
+ * TODO: If we're already root, we could skip creating the userns.
+ */
+
+ if (unshare(CLONE_NEWNS) == 0) {
+ printf("[NOTE]\tUsing global UIDs for tests\n");
+ if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
+ err(1, "PR_SET_KEEPCAPS");
+ if (setresuid(inner_uid, inner_uid, -1) != 0)
+ err(1, "setresuid");
+
+ // Re-enable effective caps
+ capng_get_caps_process();
+ for (i = 0; i < CAP_LAST_CAP; i++)
+ if (capng_have_capability(CAPNG_PERMITTED, i))
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ have_outer_privilege = true;
+ } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
+ printf("[NOTE]\tUsing a user namespace for tests\n");
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
+ write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
+
+ have_outer_privilege = false;
+ } else {
+ errx(1, "must be root or be able to create a userns");
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
+ err(1, "remount everything private");
+
+ return have_outer_privilege;
+}
+
+static void chdir_to_tmpfs(void)
+{
+ char cwd[PATH_MAX];
+ if (getcwd(cwd, sizeof(cwd)) != cwd)
+ err(1, "getcwd");
+
+ if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
+ err(1, "mount private tmpfs");
+
+ if (chdir(cwd) != 0)
+ err(1, "chdir to private tmpfs");
+
+ if (umount2(".", MNT_DETACH) != 0)
+ err(1, "detach private tmpfs");
+}
+
+static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
+{
+ int from = openat(fromfd, fromname, O_RDONLY);
+ if (from == -1)
+ err(1, "open copy source");
+
+ int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
+
+ while (true) {
+ char buf[4096];
+ ssize_t sz = read(from, buf, sizeof(buf));
+ if (sz == 0)
+ break;
+ if (sz < 0)
+ err(1, "read");
+
+ if (write(to, buf, sz) != sz)
+ err(1, "write"); /* no short writes on tmpfs */
+ }
+
+ close(from);
+ close(to);
+}
+
+static bool fork_wait(void)
+{
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ return true;
+ } else if (child > 0) {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+
+ return false;
+ } else {
+ err(1, "fork");
+ }
+}
+
+static void exec_other_validate_cap(const char *name,
+ bool eff, bool perm, bool inh, bool ambient)
+{
+ execl(name, name, (eff ? "1" : "0"),
+ (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
+ NULL);
+ err(1, "execl");
+}
+
+static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
+{
+ exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient);
+}
+
+static int do_tests(int uid, const char *our_path)
+{
+ bool have_outer_privilege = create_and_enter_ns(uid);
+
+ int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
+ if (ourpath_fd == -1)
+ err(1, "open '%s'", our_path);
+
+ chdir_to_tmpfs();
+
+ copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap");
+
+ if (have_outer_privilege) {
+ uid_t gid = getegid();
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidroot");
+ if (chown("validate_cap_suidroot", 0, -1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidnonroot");
+ if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidroot");
+ if (chown("validate_cap_sgidroot", -1, 0) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidnonroot");
+ if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
+ err(1, "chmod");
+}
+
+ capng_get_caps_process();
+
+ /* Make sure that i starts out clear */
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ if (uid == 0) {
+ printf("[RUN]\tRoot => ep\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, false, false);
+ } else {
+ printf("[RUN]\tNon-root => no caps\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, false, false);
+ }
+
+ printf("[OK]\tCheck cap_ambient manipulation rules\n");
+
+ /* We should not be able to add ambient caps yet. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
+ if (errno == EINVAL)
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n");
+ else
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_CLEAR_ALL");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_RAISE");
+
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tDropping I should have dropped A\n");
+ return 1;
+ }
+
+ printf("[OK]\tBasic manipulation appears to work\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (uid == 0) {
+ printf("[RUN]\tRoot +i => eip\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, true, false);
+ } else {
+ printf("[RUN]\tNon-root +i => i\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, true, false);
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_RAISE");
+
+ printf("[RUN]\tUID %d +ia => eipa\n", uid);
+ if (fork_wait())
+ exec_validate_cap(true, true, true, true);
+
+ /* The remaining tests need real privilege */
+
+ if (!have_outer_privilege) {
+ printf("[SKIP]\tSUID/SGID tests (needs privilege)\n");
+ goto done;
+ }
+
+ if (uid == 0) {
+ printf("[RUN]\tRoot +ia, suidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidroot",
+ true, true, true, true);
+
+ printf("[RUN]\tRoot +ia, suidnonroot => ip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidnonroot",
+ false, true, true, false);
+
+ printf("[RUN]\tRoot +ia, sgidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, true);
+
+ if (fork_wait()) {
+ printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+ if (setresgid(1, 1, 1) != 0)
+ err(1, "setresgid");
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, false);
+ }
+
+ printf("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ true, true, true, false);
+ } else {
+ printf("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ false, false, true, false);
+
+ if (fork_wait()) {
+ printf("[RUN]\tNon-root +ia, sgidroot => i\n");
+ if (setresgid(1, 1, 1) != 0)
+ err(1, "setresgid");
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ false, false, true, false);
+ }
+ }
+
+done:
+ return nerrs ? 1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *tmp1, *tmp2, *our_path;
+
+ /* Find our path */
+ tmp1 = strdup(argv[0]);
+ if (!tmp1)
+ err(1, "strdup");
+ tmp2 = dirname(tmp1);
+ our_path = strdup(tmp2);
+ if (!our_path)
+ err(1, "strdup");
+ free(tmp1);
+
+ if (fork_wait()) {
+ printf("[RUN]\t+++ Tests with uid == 0 +++\n");
+ return do_tests(0, our_path);
+ }
+
+ if (fork_wait()) {
+ printf("[RUN]\t+++ Tests with uid != 0 +++\n");
+ return do_tests(1, our_path);
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/capabilities/validate_cap.c b/capabilities/validate_cap.c
new file mode 100644
index 0000000..dd3c45f
--- /dev/null
+++ b/capabilities/validate_cap.c
@@ -0,0 +1,73 @@
+#include <cap-ng.h>
+#include <err.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/auxv.h>
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
+# define HAVE_GETAUXVAL
+#endif
+
+static bool bool_arg(char **argv, int i)
+{
+ if (!strcmp(argv[i], "0"))
+ return false;
+ else if (!strcmp(argv[i], "1"))
+ return true;
+ else
+ errx(1, "wrong argv[%d]", i);
+}
+
+int main(int argc, char **argv)
+{
+ const char *atsec = "";
+
+ /*
+ * Be careful just in case a setgid or setcapped copy of this
+ * helper gets out.
+ */
+
+ if (argc != 5)
+ errx(1, "wrong argc");
+
+#ifdef HAVE_GETAUXVAL
+ if (getauxval(AT_SECURE))
+ atsec = " (AT_SECURE is set)";
+ else
+ atsec = " (AT_SECURE is not set)";
+#endif
+
+ capng_get_caps_process();
+
+ if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
+ printf("[FAIL]\tWrong effective state%s\n", atsec);
+ return 1;
+ }
+ if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
+ printf("[FAIL]\tWrong permitted state%s\n", atsec);
+ return 1;
+ }
+ if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
+ printf("[FAIL]\tWrong inheritable state%s\n", atsec);
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
+ printf("[FAIL]\tWrong ambient state%s\n", atsec);
+ return 1;
+ }
+
+ printf("[OK]\tCapabilities after execve were correct\n");
+ return 0;
+}
diff --git a/cpu-hotplug/Makefile b/cpu-hotplug/Makefile
index e9c28d8..fe1f991 100644
--- a/cpu-hotplug/Makefile
+++ b/cpu-hotplug/Makefile
@@ -1,9 +1,10 @@
all:
-run_tests:
- @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
run_full_test:
- @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+ @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
clean:
diff --git a/cpu-hotplug/on-off-test.sh b/cpu-hotplug/cpu-on-off-test.sh
index 98b1d65..98b1d65 100644..100755
--- a/cpu-hotplug/on-off-test.sh
+++ b/cpu-hotplug/cpu-on-off-test.sh
diff --git a/efivarfs/.gitignore b/efivarfs/.gitignore
new file mode 100644
index 0000000..3361849
--- /dev/null
+++ b/efivarfs/.gitignore
@@ -0,0 +1,2 @@
+create-read
+open-unlink
diff --git a/efivarfs/Makefile b/efivarfs/Makefile
index 29e8c6b..736c3dd 100644
--- a/efivarfs/Makefile
+++ b/efivarfs/Makefile
@@ -1,12 +1,13 @@
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall
test_objs = open-unlink create-read
all: $(test_objs)
-run_tests: all
- @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
+TEST_PROGS := efivarfs.sh
+TEST_FILES := $(test_objs)
+
+include ../lib.mk
clean:
rm -f $(test_objs)
diff --git a/efivarfs/efivarfs.sh b/efivarfs/efivarfs.sh
index 77edcdc..77edcdc 100644..100755
--- a/efivarfs/efivarfs.sh
+++ b/efivarfs/efivarfs.sh
diff --git a/exec/Makefile b/exec/Makefile
index 66dfc2c..4e400eb 100644
--- a/exec/Makefile
+++ b/exec/Makefile
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall
BINARIES = execveat
DEPS = execveat.symlink execveat.denatured script subdir
@@ -18,8 +17,10 @@ execveat.denatured: execveat
%: %.c
$(CC) $(CFLAGS) -o $@ $^
-run_tests: all
- ./execveat
+TEST_PROGS := execveat
+TEST_FILES := $(DEPS)
+
+include ../lib.mk
clean:
rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
diff --git a/exec/execveat.c b/exec/execveat.c
index e238c95..8d5d1d2 100644
--- a/exec/execveat.c
+++ b/exec/execveat.c
@@ -30,7 +30,7 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
#ifdef __NR_execveat
return syscall(__NR_execveat, fd, path, argv, envp, flags);
#else
- errno = -ENOSYS;
+ errno = ENOSYS;
return -1;
#endif
}
@@ -234,6 +234,14 @@ static int run_tests(void)
int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC);
int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC);
+ /* Check if we have execveat at all, and bail early if not */
+ errno = 0;
+ execveat_(-1, NULL, NULL, NULL, 0);
+ if (errno == ENOSYS) {
+ printf("[FAIL] ENOSYS calling execveat - no kernel support?\n");
+ return 1;
+ }
+
/* Change file position to confirm it doesn't affect anything */
lseek(fd, 10, SEEK_SET);
diff --git a/firmware/Makefile b/firmware/Makefile
index e23cce0..9bf8223 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile
@@ -3,25 +3,9 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-fw_filesystem:
- @if /bin/sh ./fw_filesystem.sh ; then \
- echo "fw_filesystem: ok"; \
- else \
- echo "fw_filesystem: [FAIL]"; \
- exit 1; \
- fi
+TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
-fw_userhelper:
- @if /bin/sh ./fw_userhelper.sh ; then \
- echo "fw_userhelper: ok"; \
- else \
- echo "fw_userhelper: [FAIL]"; \
- exit 1; \
- fi
-
-run_tests: all fw_filesystem fw_userhelper
+include ../lib.mk
# Nothing to clean up.
clean:
-
-.PHONY: all clean run_tests fw_filesystem fw_userhelper
diff --git a/firmware/fw_filesystem.sh b/firmware/fw_filesystem.sh
index 3fc6c10..c4366dc 100644..100755
--- a/firmware/fw_filesystem.sh
+++ b/firmware/fw_filesystem.sh
@@ -9,7 +9,15 @@ modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
-OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
+# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
+# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
+HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+fi
+
OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path)
FWPATH=$(mktemp -d)
@@ -17,7 +25,9 @@ FW="$FWPATH/test-firmware.bin"
test_finish()
{
- echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ fi
echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path
rm -f "$FW"
rmdir "$FWPATH"
@@ -25,8 +35,11 @@ test_finish()
trap "test_finish" EXIT
-# Turn down the timeout so failures don't take so long.
-echo 1 >/sys/class/firmware/timeout
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ # Turn down the timeout so failures don't take so long.
+ echo 1 >/sys/class/firmware/timeout
+fi
+
# Set the kernel search path.
echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
@@ -41,7 +54,9 @@ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
else
- echo "$0: timeout works"
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$0: timeout works"
+ fi
fi
# This should succeed via kernel load or will fail after 1 second after
diff --git a/firmware/fw_userhelper.sh b/firmware/fw_userhelper.sh
index 6efbade..b9983f8 100644..100755
--- a/firmware/fw_userhelper.sh
+++ b/firmware/fw_userhelper.sh
@@ -9,7 +9,17 @@ modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
-OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
+# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
+# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
+HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+else
+ echo "usermode helper disabled so ignoring test"
+ exit 0
+fi
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
diff --git a/ftrace/Makefile b/ftrace/Makefile
index 76cc9f1..4e6ed13 100644
--- a/ftrace/Makefile
+++ b/ftrace/Makefile
@@ -1,7 +1,9 @@
all:
-run_tests:
- @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
+TEST_PROGS := ftracetest
+TEST_DIRS := test.d
+
+include ../lib.mk
clean:
rm -rf logs/*
diff --git a/ftrace/test.d/00basic/basic4.tc b/ftrace/test.d/00basic/basic4.tc
index fd9c49a..aa51f6c 100644
--- a/ftrace/test.d/00basic/basic4.tc
+++ b/ftrace/test.d/00basic/basic4.tc
@@ -2,4 +2,4 @@
# description: Basic event tracing check
test -f available_events -a -f set_event -a -d events
# check scheduler events are available
-grep -q sched available_events && exit 0 || exit -1 \ No newline at end of file
+grep -q sched available_events && exit 0 || exit $FAIL
diff --git a/ftrace/test.d/event/event-enable.tc b/ftrace/test.d/event/event-enable.tc
index 668616d..87eb9d6 100644
--- a/ftrace/test.d/event/event-enable.tc
+++ b/ftrace/test.d/event/event-enable.tc
@@ -9,7 +9,11 @@ do_reset() {
fail() { #msg
do_reset
echo $1
- exit -1
+ exit $FAIL
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
}
if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
do_reset
echo 'sched:sched_switch' > set_event
-usleep 1
+
+yield
count=`cat trace | grep sched_switch | wc -l`
if [ $count -eq 0 ]; then
@@ -31,7 +36,8 @@ fi
do_reset
echo 1 > events/sched/sched_switch/enable
-usleep 1
+
+yield
count=`cat trace | grep sched_switch | wc -l`
if [ $count -eq 0 ]; then
@@ -41,7 +47,8 @@ fi
do_reset
echo 0 > events/sched/sched_switch/enable
-usleep 1
+
+yield
count=`cat trace | grep sched_switch | wc -l`
if [ $count -ne 0 ]; then
diff --git a/ftrace/test.d/event/subsystem-enable.tc b/ftrace/test.d/event/subsystem-enable.tc
index 655c415..ced27ef 100644
--- a/ftrace/test.d/event/subsystem-enable.tc
+++ b/ftrace/test.d/event/subsystem-enable.tc
@@ -9,7 +9,11 @@ do_reset() {
fail() { #msg
do_reset
echo $1
- exit -1
+ exit $FAIL
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
}
if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
do_reset
echo 'sched:*' > set_event
-usleep 1
+
+yield
count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
@@ -31,7 +36,8 @@ fi
do_reset
echo 1 > events/sched/enable
-usleep 1
+
+yield
count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
@@ -41,7 +47,8 @@ fi
do_reset
echo 0 > events/sched/enable
-usleep 1
+
+yield
count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -ne 0 ]; then
diff --git a/ftrace/test.d/event/toplevel-enable.tc b/ftrace/test.d/event/toplevel-enable.tc
index 4808457..0bb5df3 100644
--- a/ftrace/test.d/event/toplevel-enable.tc
+++ b/ftrace/test.d/event/toplevel-enable.tc
@@ -9,7 +9,11 @@ do_reset() {
fail() { #msg
do_reset
echo $1
- exit -1
+ exit $FAIL
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
}
if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
@@ -21,6 +25,9 @@ reset_tracer
do_reset
echo '*:*' > set_event
+
+yield
+
count=`cat trace | grep -v ^# | wc -l`
if [ $count -eq 0 ]; then
fail "none of events are recorded"
@@ -29,6 +36,9 @@ fi
do_reset
echo 1 > events/enable
+
+yield
+
count=`cat trace | grep -v ^# | wc -l`
if [ $count -eq 0 ]; then
fail "none of events are recorded"
@@ -37,6 +47,9 @@ fi
do_reset
echo 0 > events/enable
+
+yield
+
count=`cat trace | grep -v ^# | wc -l`
if [ $count -ne 0 ]; then
fail "any of events should not be recorded"
diff --git a/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index c15e018..15c2dba 100644
--- a/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -16,7 +16,9 @@ fi
do_reset() {
reset_tracer
- echo 0 > /proc/sys/kernel/stack_tracer_enabled
+ if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+ echo 0 > /proc/sys/kernel/stack_tracer_enabled
+ fi
enable_tracing
clear_trace
echo > set_ftrace_filter
@@ -25,7 +27,7 @@ do_reset() {
fail() { # msg
do_reset
echo $1
- exit -1
+ exit $FAIL
}
disable_tracing
diff --git a/ftrace/test.d/ftrace/fgraph-filter.tc b/ftrace/test.d/ftrace/fgraph-filter.tc
index 6af5f63..0ab2189 100644
--- a/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -17,7 +17,7 @@ do_reset() {
fail() { # msg
do_reset
echo $1
- exit -1
+ exit $FAIL
}
disable_tracing
diff --git a/ftrace/test.d/ftrace/func_profiler.tc b/ftrace/test.d/ftrace/func_profiler.tc
index 2e719cb..7808336 100644
--- a/ftrace/test.d/ftrace/func_profiler.tc
+++ b/ftrace/test.d/ftrace/func_profiler.tc
@@ -31,7 +31,7 @@ fail() { # mesg
reset_tracer
echo > set_ftrace_filter
echo $1
- exit -1
+ exit $FAIL
}
echo "Testing function tracer with profiler:"
diff --git a/ftrace/test.d/kprobe/add_and_remove.tc b/ftrace/test.d/kprobe/add_and_remove.tc
index a5a4262..c3843ed 100644
--- a/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/ftrace/test.d/kprobe/add_and_remove.tc
@@ -5,7 +5,7 @@
echo 0 > events/enable
echo > kprobe_events
-echo p:myevent do_fork > kprobe_events
+echo p:myevent _do_fork > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/ftrace/test.d/kprobe/busy_check.tc b/ftrace/test.d/kprobe/busy_check.tc
index d8c7bb6..74507db 100644
--- a/ftrace/test.d/kprobe/busy_check.tc
+++ b/ftrace/test.d/kprobe/busy_check.tc
@@ -5,7 +5,7 @@
echo 0 > events/enable
echo > kprobe_events
-echo p:myevent do_fork > kprobe_events
+echo p:myevent _do_fork > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit 1 # this must fail
diff --git a/ftrace/test.d/kprobe/kprobe_args.tc b/ftrace/test.d/kprobe/kprobe_args.tc
index c45ee27..64949d4 100644
--- a/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/ftrace/test.d/kprobe/kprobe_args.tc
@@ -5,7 +5,7 @@
echo 0 > events/enable
echo > kprobe_events
-echo 'p:testprobe do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
grep testprobe kprobe_events
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
diff --git a/ftrace/test.d/kprobe/kprobe_ftrace.tc b/ftrace/test.d/kprobe/kprobe_ftrace.tc
index ab41d2b..d6f2f49 100644
--- a/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -6,31 +6,31 @@ grep function available_tracers || exit_unsupported # this is configurable
# prepare
echo nop > current_tracer
-echo do_fork > set_ftrace_filter
+echo _do_fork > set_ftrace_filter
echo 0 > events/enable
echo > kprobe_events
-echo 'p:testprobe do_fork' > kprobe_events
+echo 'p:testprobe _do_fork' > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep 'do_fork <-' trace
+! grep '_do_fork <-' trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep 'do_fork <-' trace
+grep '_do_fork <-' trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep 'do_fork <-' trace
+grep '_do_fork <-' trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -38,14 +38,14 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep 'do_fork <-' trace
+grep '_do_fork <-' trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep 'do_fork <-' trace
+! grep '_do_fork <-' trace
# cleanup
echo nop > current_tracer
diff --git a/ftrace/test.d/kprobe/kretprobe_args.tc b/ftrace/test.d/kprobe/kretprobe_args.tc
index 3171798..0d09546 100644
--- a/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -5,7 +5,7 @@
echo 0 > events/enable
echo > kprobe_events
-echo 'r:testprobe2 do_fork $retval' > kprobe_events
+echo 'r:testprobe2 _do_fork $retval' > kprobe_events
grep testprobe2 kprobe_events
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
diff --git a/futex/Makefile b/futex/Makefile
new file mode 100644
index 0000000..6a17529
--- /dev/null
+++ b/futex/Makefile
@@ -0,0 +1,29 @@
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+all:
+ for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
+
+include ../lib.mk
+
+override define RUN_TESTS
+ ./run.sh
+endef
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define EMIT_TESTS
+ echo "./run.sh"
+endef
+
+clean:
+ for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
diff --git a/futex/README b/futex/README
new file mode 100644
index 0000000..3224a04
--- /dev/null
+++ b/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+ the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+ shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+ implemented in header files, avoiding the need to compile intermediate object
+ files.
+o External dependendencies shall remain as minimal as possible. Currently gcc
+ and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+ Arguments: arg1=val1 #units specified for clarity where appropriate
+ ERROR: Description of unexpected error
+ FAIL: Reason for test failure
+ # FIXME: Perhaps an " INFO: informational message" option would be
+ # useful here. Using -v to toggle it them on and off, as with -c.
+ # there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme. Currently the tests are named
+ based on the primary futex operation they test. Eventually this will become a
+ problem as we intend to write multiple tests which collide in this namespace.
+ Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+ detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+ kernel as defined in the Linux source Documentation/CodingStyle.
diff --git a/futex/functional/.gitignore b/futex/functional/.gitignore
new file mode 100644
index 0000000..a09f570
--- /dev/null
+++ b/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/futex/functional/Makefile b/futex/functional/Makefile
new file mode 100644
index 0000000..9d6b75e
--- /dev/null
+++ b/futex/functional/Makefile
@@ -0,0 +1,25 @@
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := ../include/futextest.h
+TARGETS := \
+ futex_wait_timeout \
+ futex_wait_wouldblock \
+ futex_requeue_pi \
+ futex_requeue_pi_signal_restart \
+ futex_requeue_pi_mismatched_ops \
+ futex_wait_uninitialized_heap \
+ futex_wait_private_mapped_file
+
+TEST_PROGS := $(TARGETS) run.sh
+
+.PHONY: all clean
+all: $(TARGETS)
+
+$(TARGETS): $(HEADERS)
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TARGETS)
diff --git a/futex/functional/futex_requeue_pi.c b/futex/functional/futex_requeue_pi.c
new file mode 100644
index 0000000..3da06ad
--- /dev/null
+++ b/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,409 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test excercises the futex syscall op codes needed for requeuing
+ * priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ * Sripathi Kodi <sripathik@in.ibm.com>
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+ long id;
+ struct timespec *timeout;
+ int lock;
+ int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -b Broadcast wakeup (all waiters)\n");
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -l Lock the pi futex across requeue\n");
+ printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
+ printf(" -t N Timeout in nanoseconds (default: 0)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param schedp;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ futex_t old_val;
+
+ info("Waiter %ld: running\n", args->id);
+ /* Each thread sleeps for a different amount of time
+ * This is to avoid races, because we don't lock the
+ * external mutex here */
+ usleep(1000 * (long)args->id);
+
+ old_val = f1;
+ atomic_inc(&waiters_blocked);
+ info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ &f1, f1, &f2);
+ args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+ FUTEX_PRIVATE_FLAG);
+
+ info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ args->ret < 0 ? strerror(errno) : "");
+ atomic_inc(&waiters_woken);
+ if (args->ret < 0) {
+ if (args->timeout && errno == ETIMEDOUT)
+ args->ret = 0;
+ else {
+ args->ret = RET_ERROR;
+ error("futex_wait_requeue_pi\n", errno);
+ }
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int nr_requeue = INT_MAX;
+ int task_count = 0;
+ futex_t old_val;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ info("Waker: Calling broadcast\n");
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ continue_requeue:
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0) {
+ args->ret = RET_ERROR;
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ } else if (++i < MAX_WAKE_ITERS) {
+ task_count += args->ret;
+ if (task_count < THREAD_MAX - waiters_woken.val)
+ goto continue_requeue;
+ } else {
+ error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+ args->ret = RET_ERROR;
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->lock)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret > 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ unsigned int old_val;
+ int nr_requeue = 0;
+ int task_count = 0;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+ info("task_count: %d, waiters_woken: %d\n",
+ task_count, waiters_woken.val);
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ info("Waker: Calling signal\n");
+ /* cond_signal */
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+ nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0)
+ args->ret = -errno;
+ info("futex: %x\n", f2);
+ if (args->lock) {
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ }
+ info("futex: %x\n", f2);
+ if (args->ret < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ args->ret = RET_ERROR;
+ break;
+ }
+
+ task_count += args->ret;
+ usleep(SIGNAL_PERIOD_US);
+ i++;
+ /* we have to loop at least THREAD_MAX times */
+ if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+ error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ 0, MAX_WAKE_ITERS + THREAD_MAX);
+ args->ret = RET_ERROR;
+ break;
+ }
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret >= 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int ret2 = 0;
+
+ args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ if (args->ret)
+ goto out;
+ args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+ FUTEX_PRIVATE_FLAG);
+ ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+ if (args->ret || ret2) {
+ error("third_party_blocker() futex error", 0);
+ args->ret = RET_ERROR;
+ }
+
+ pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+ struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+ pthread_t waiter[THREAD_MAX], waker, blocker;
+ struct timespec ts, *tsp = NULL;
+ struct thread_arg args[THREAD_MAX];
+ int *waiter_ret;
+ int i, ret = RET_PASS;
+
+ if (timeout_ns) {
+ time_t secs;
+
+ info("timeout_ns = %ld\n", timeout_ns);
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+ ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+ ts.tv_sec += secs;
+ info("ts.tv_sec = %ld\n", ts.tv_sec);
+ info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ tsp = &ts;
+ }
+
+ if (broadcast)
+ wakerfn = broadcast_wakerfn;
+
+ if (third_party_owner) {
+ if (create_rt_thread(&blocker, third_party_blocker,
+ (void *)&blocker_arg, SCHED_FIFO, 1)) {
+ error("Creating third party blocker thread failed\n",
+ errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+
+ atomic_set(&waiters_woken, 0);
+ for (i = 0; i < THREAD_MAX; i++) {
+ args[i].id = i;
+ args[i].timeout = tsp;
+ info("Starting thread %d\n", i);
+ if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+ SCHED_FIFO, 1)) {
+ error("Creating waiting thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+ waker_arg.lock = lock;
+ if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+ SCHED_FIFO, 1)) {
+ error("Creating waker thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ /* Wait for threads to finish */
+ /* Store the first error or failure encountered in waiter_ret */
+ waiter_ret = &args[0].ret;
+ for (i = 0; i < THREAD_MAX; i++)
+ pthread_join(waiter[i],
+ *waiter_ret ? NULL : (void **)&waiter_ret);
+
+ if (third_party_owner)
+ pthread_join(blocker, NULL);
+ pthread_join(waker, NULL);
+
+out:
+ if (!ret) {
+ if (*waiter_ret)
+ ret = *waiter_ret;
+ else if (waker_arg.ret < 0)
+ ret = waker_arg.ret;
+ else if (blocker_arg.ret)
+ ret = blocker_arg.ret;
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int c, ret;
+
+ while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+ switch (c) {
+ case 'b':
+ broadcast = 1;
+ break;
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'l':
+ locked = 1;
+ break;
+ case 'o':
+ owner = 1;
+ locked = 0;
+ break;
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test requeue functionality\n", basename(argv[0]));
+ printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, locked, owner, timeout_ns);
+
+ /*
+ * FIXME: unit_test is obsolete now that we parse options and the
+ * various style of runs are done by run.sh - simplify the code and move
+ * unit_test into main()
+ */
+ ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_requeue_pi_mismatched_ops.c b/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 0000000..d5e4f2c
--- /dev/null
+++ b/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * 1. Block a thread using FUTEX_WAIT
+ * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ * 3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+ child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+ if (child_ret < 0) {
+ child_ret = -errno;
+ error("futex_wait\n", errno);
+ }
+ return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = RET_PASS;
+ pthread_t child;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Detect mismatched requeue_pi operations\n",
+ basename(argv[0]));
+
+ if (pthread_create(&child, NULL, blocking_child, NULL)) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ /* Allow the child to block in the kernel. */
+ sleep(1);
+
+ /*
+ * The kernel should detect the waiter did not setup the
+ * q->requeue_pi_key and return -EINVAL. If it does not,
+ * it likely gave the lock to the child, which is now hung
+ * in the kernel.
+ */
+ ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+ if (ret < 0) {
+ if (errno == EINVAL) {
+ /*
+ * The kernel correctly detected the mismatched
+ * requeue_pi target and aborted. Wake the child with
+ * FUTEX_WAKE.
+ */
+ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+ if (ret == 1) {
+ ret = RET_PASS;
+ } else if (ret < 0) {
+ error("futex_wake\n", errno);
+ ret = RET_ERROR;
+ } else {
+ error("futex_wake did not wake the child\n", 0);
+ ret = RET_ERROR;
+ }
+ } else {
+ error("futex_cmp_requeue_pi\n", errno);
+ ret = RET_ERROR;
+ }
+ } else if (ret > 0) {
+ fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+ ret = RET_FAIL;
+ } else {
+ error("futex_cmp_requeue_pi found no waiters\n", 0);
+ ret = RET_ERROR;
+ }
+
+ pthread_join(child, NULL);
+
+ if (!ret)
+ ret = child_ret;
+
+ out:
+ /* If the kernel crashes, we shouldn't return at all. */
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_requeue_pi_signal_restart.c b/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 0000000..3d7dc6a
--- /dev/null
+++ b/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,223 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test exercises the futex_wait_requeue_pi() signal handling both
+ * before and after the requeue. The first should be restarted by the
+ * kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ struct sched_param schedp;
+ pthread_attr_t attr;
+ int ret;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+void handle_signal(int signo)
+{
+ info("signal received %s requeue\n",
+ requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+ unsigned int old_val;
+ int res;
+
+ waiter_ret = RET_PASS;
+
+ info("Waiter running\n");
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ old_val = f1;
+ res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+ FUTEX_PRIVATE_FLAG);
+ if (!requeued.val || errno != EWOULDBLOCK) {
+ fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ res, strerror(errno));
+ info("w2:futex: %x\n", f2);
+ if (!res)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ waiter_ret = RET_FAIL;
+ }
+
+ info("Waiter exiting with %d\n", waiter_ret);
+ pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int old_val;
+ struct sigaction sa;
+ pthread_t waiter;
+ int c, res, ret = RET_PASS;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test signal handling during requeue_pi\n",
+ basename(argv[0]));
+ printf("\tArguments: <none>\n");
+
+ sa.sa_handler = handle_signal;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ if (sigaction(SIGUSR1, &sa, NULL)) {
+ error("sigaction\n", errno);
+ exit(1);
+ }
+
+ info("m1:f2: %x\n", f2);
+ info("Creating waiter\n");
+ res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+ if (res) {
+ error("Creating waiting thread failed", res);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ info("m2:f2: %x\n", f2);
+ futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+ info("m3:f2: %x\n", f2);
+
+ while (1) {
+ /*
+ * signal the waiter before requeue, waiter should automatically
+ * restart futex_wait_requeue_pi() in the kernel. Wait for the
+ * waiter to block on f1 again.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ usleep(DELAY_US);
+
+ info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ old_val = f1;
+ res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+ FUTEX_PRIVATE_FLAG);
+ /*
+ * If res is non-zero, we either requeued the waiter or hit an
+ * error, break out and handle it. If it is zero, then the
+ * signal may have hit before the the waiter was blocked on f1.
+ * Try again.
+ */
+ if (res > 0) {
+ atomic_set(&requeued, 1);
+ break;
+ } else if (res < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ret = RET_ERROR;
+ break;
+ }
+ }
+ info("m4:f2: %x\n", f2);
+
+ /*
+ * Signal the waiter after requeue, waiter should return from
+ * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+ * futex_unlock_pi() can't happen before the signal wakeup is detected
+ * in the kernel.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ info("Waiting for waiter to return\n");
+ pthread_join(waiter, NULL);
+
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ info("m5:f2: %x\n", f2);
+
+ out:
+ if (ret == RET_PASS && waiter_ret)
+ ret = waiter_ret;
+
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_wait_private_mapped_file.c b/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 0000000..5f687f2
--- /dev/null
+++ b/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Internally, Futex has two handling mode, anon and file. The private file
+ * mapping is special. At first it behave as file, but after write anything
+ * it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+ int ret;
+
+ info("futex wait\n");
+ ret = futex_wait(&val, 1, &wait_timeout, 0);
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+ error("futex error.\n", errno);
+ print_result(RET_ERROR);
+ exit(RET_ERROR);
+ }
+
+ if (ret && errno == ETIMEDOUT)
+ fail("waiter timedout\n");
+
+ info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ pthread_t thr;
+ int ret = RET_PASS;
+ int res;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pthread_create error\n");
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("wait a while\n");
+ usleep(WAKE_WAIT_US);
+ val = 2;
+ res = futex_wake(&val, 1, 0);
+ info("futex_wake %d\n", res);
+ if (res != 1) {
+ fail("FUTEX_WAKE didn't find the waiting thread.\n");
+ ret = RET_FAIL;
+ }
+
+ info("join\n");
+ pthread_join(thr, NULL);
+
+ out:
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_wait_timeout.c b/futex/functional/futex_wait_timeout.c
new file mode 100644
index 0000000..ab428ca
--- /dev/null
+++ b/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+static long timeout_ns = 100000; /* 100us default timeout */
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Block on a futex and wait for timeout\n",
+ basename(argv[0]));
+ printf("\tArguments: timeout=%ldns\n", timeout_ns);
+
+ /* initialize timeout */
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+ res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != ETIMEDOUT) {
+ fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+ ret = RET_FAIL;
+ }
+
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_wait_uninitialized_heap.c b/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 0000000..fe7aee9
--- /dev/null
+++ b/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ * return immediately. This test is intent to test zero page handling in
+ * futex.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+ int res;
+
+ child_ret = RET_PASS;
+ res = futex_wait(buf, 1, NULL, 0);
+ child_blocked = 0;
+
+ if (res != 0 && errno != EWOULDBLOCK) {
+ error("futex failure\n", errno);
+ child_ret = RET_ERROR;
+ }
+ pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+ int c, ret = RET_PASS;
+ long page_size;
+ pthread_t thr;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (buf == (void *)-1) {
+ error("mmap\n", errno);
+ exit(1);
+ }
+
+ printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+
+ ret = pthread_create(&thr, NULL, wait_thread, NULL);
+ if (ret) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("waiting %dus for child to return\n", WAIT_US);
+ usleep(WAIT_US);
+
+ ret = child_ret;
+ if (child_blocked) {
+ fail("child blocked in kernel\n");
+ ret = RET_FAIL;
+ }
+
+ out:
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/futex_wait_wouldblock.c b/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 0000000..b6b0274
--- /dev/null
+++ b/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ * from the expected one.
+ *
+ * AUTHOR
+ * Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != EWOULDBLOCK) {
+ fail("futex_wait returned: %d %s\n",
+ res ? errno : res, res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ print_result(ret);
+ return ret;
+}
diff --git a/futex/functional/run.sh b/futex/functional/run.sh
new file mode 100755
index 0000000..e87dbe2
--- /dev/null
+++ b/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run tests in the current directory.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+ tput setf 7
+ if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+ fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+ COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/futex/include/atomic.h b/futex/include/atomic.h
new file mode 100644
index 0000000..f861da3
--- /dev/null
+++ b/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * GCC atomic builtin wrappers
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+ volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+ return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr: Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+ return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr: Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+ return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr: Address of the variable to set
+ * @newval: New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+ addr->val = newval;
+ return newval;
+}
+
+#endif
diff --git a/futex/include/futextest.h b/futex/include/futextest.h
new file mode 100644
index 0000000..b98c3ab
--- /dev/null
+++ b/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI 12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset: bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+ u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset: bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+ int wake_op, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+ opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+ struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+ return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr: The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+ return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr: the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+ return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr: the address of the futex to be modified
+ * @newval: New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+ *uaddr = newval;
+ return newval;
+}
+
+#endif
diff --git a/futex/include/logging.h b/futex/include/logging.h
new file mode 100644
index 0000000..014aa01
--- /dev/null
+++ b/futex/include/logging.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+ RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+ RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+ RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET 0
+#define VCRITICAL 1
+#define VINFO 2
+#define VMAX VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS 0
+#define RET_ERROR -1
+#define RET_FAIL -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color: use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+ if (use_color) {
+ PASS = PASS_COLOR;
+ ERROR = ERROR_COLOR;
+ FAIL = FAIL_COLOR;
+ } else {
+ PASS = PASS_NORMAL;
+ ERROR = ERROR_NORMAL;
+ FAIL = FAIL_NORMAL;
+ }
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose: Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+ if (level > VMAX)
+ level = VMAX;
+ else if (level < 0)
+ level = 0;
+ _verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(int ret)
+{
+ const char *result = "Unknown return code";
+
+ switch (ret) {
+ case RET_PASS:
+ ksft_inc_pass_cnt();
+ result = PASS;
+ break;
+ case RET_ERROR:
+ result = ERROR;
+ break;
+ case RET_FAIL:
+ ksft_inc_fail_cnt();
+ result = FAIL;
+ break;
+ }
+ printf("Result: %s\n", result);
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+ if (_verbose >= VINFO) \
+ fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+ if (_verbose >= VCRITICAL) {\
+ if (err) \
+ fprintf(stderr, "\t%s: %s: "message, \
+ ERROR, strerror(err), ##args); \
+ else \
+ fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+ } \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+ if (_verbose >= VCRITICAL) \
+ fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/futex/run.sh b/futex/run.sh
new file mode 100755
index 0000000..4126312
--- /dev/null
+++ b/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run all tests under the functional, performance, and stress directories.
+# Format and summarize the results.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7
+if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/gen_kselftest_tar.sh b/gen_kselftest_tar.sh
new file mode 100755
index 0000000..17d5bd0
--- /dev/null
+++ b/gen_kselftest_tar.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+# main
+main()
+{
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Generating default compression gzip"
+ copts="cvzf"
+ ext=".tar.gz"
+ else
+ case "$1" in
+ tar)
+ copts="cvf"
+ ext=".tar"
+ ;;
+ targz)
+ copts="cvzf"
+ ext=".tar.gz"
+ ;;
+ tarbz2)
+ copts="cvjf"
+ ext=".tar.bz2"
+ ;;
+ tarxz)
+ copts="cvJf"
+ ext=".tar.xz"
+ ;;
+ *)
+ echo "Unknown tarball format $1"
+ exit 1
+ ;;
+ esac
+ fi
+
+ install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
diff --git a/ipc/Makefile b/ipc/Makefile
index 74bbefd..25d2e70 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -12,14 +12,11 @@ endif
CFLAGS += -I../../../../usr/include/
all:
-ifeq ($(ARCH),x86)
- gcc $(CFLAGS) msgque.c -o msgque_test
-else
- echo "Not an x86 target, can't build msgque selftest"
-endif
+ $(CC) $(CFLAGS) msgque.c -o msgque_test
+
+TEST_PROGS := msgque_test
-run_tests: all
- ./msgque_test
+include ../lib.mk
clean:
rm -fr ./msgque_test
diff --git a/kcmp/Makefile b/kcmp/Makefile
index ff0eefd..2ae7450 100644
--- a/kcmp/Makefile
+++ b/kcmp/Makefile
@@ -1,10 +1,10 @@
-CC := $(CROSS_COMPILE)$(CC)
CFLAGS += -I../../../../usr/include/
all: kcmp_test
-run_tests: all
- @./kcmp_test || echo "kcmp_test: [FAIL]"
+TEST_PROGS := kcmp_test
+
+include ../lib.mk
clean:
$(RM) kcmp_test kcmp-test-file
diff --git a/kselftest.h b/kselftest.h
index 572c888..ef1c80d 100644
--- a/kselftest.h
+++ b/kselftest.h
@@ -13,6 +13,13 @@
#include <stdlib.h>
#include <unistd.h>
+/* define kselftest exit codes */
+#define KSFT_PASS 0
+#define KSFT_FAIL 1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP 4
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void)
static inline int ksft_exit_pass(void)
{
- exit(0);
+ exit(KSFT_PASS);
}
static inline int ksft_exit_fail(void)
{
- exit(1);
+ exit(KSFT_FAIL);
}
static inline int ksft_exit_xfail(void)
{
- exit(2);
+ exit(KSFT_XFAIL);
}
static inline int ksft_exit_xpass(void)
{
- exit(3);
+ exit(KSFT_XPASS);
}
static inline int ksft_exit_skip(void)
{
- exit(4);
+ exit(KSFT_SKIP);
}
#endif /* __KSELFTEST_H */
diff --git a/kselftest_install.sh b/kselftest_install.sh
new file mode 100755
index 0000000..1555fbd
--- /dev/null
+++ b/kselftest_install.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+install_loc=`pwd`
+
+main()
+{
+ if [ $(basename $install_loc) != "selftests" ]; then
+ echo "$0: Please run it in selftests directory ..."
+ exit 1;
+ fi
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Installing in default location - $install_loc ..."
+ elif [ ! -d "$1" ]; then
+ echo "$0: $1 doesn't exist!!"
+ exit 1;
+ else
+ install_loc=$1
+ echo "$0: Installing in specified location - $install_loc ..."
+ fi
+
+ install_dir=$install_loc/kselftest
+
+# Create install directory
+ mkdir -p $install_dir
+# Build tests
+ INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/lib.mk b/lib.mk
new file mode 100644
index 0000000..50a93f5
--- /dev/null
+++ b/lib.mk
@@ -0,0 +1,38 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+CC := $(CROSS_COMPILE)gcc
+
+define RUN_TESTS
+ @for TEST in $(TEST_PROGS); do \
+ (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+ done;
+endef
+
+run_tests: all
+ $(RUN_TESTS)
+
+define INSTALL_RULE
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
+ mkdir -p ${INSTALL_PATH}; \
+ echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \
+ rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \
+ fi
+endef
+
+install: all
+ifdef INSTALL_PATH
+ $(INSTALL_RULE)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+ @for TEST in $(TEST_PROGS); do \
+ echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+ done;
+endef
+
+emit_tests:
+ $(EMIT_TESTS)
+
+.PHONY: run_tests all clean install emit_tests
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000..47147b9
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,8 @@
+# Makefile for lib/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := printf.sh
+
+include ../lib.mk
diff --git a/lib/printf.sh b/lib/printf.sh
new file mode 100644
index 0000000..4fdc70f
--- /dev/null
+++ b/lib/printf.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs printf infrastructure using test_printf kernel module
+
+if /sbin/modprobe -q test_printf; then
+ /sbin/modprobe -q -r test_printf
+ echo "printf: ok"
+else
+ echo "printf: [FAIL]"
+ exit 1
+fi
diff --git a/membarrier/.gitignore b/membarrier/.gitignore
new file mode 100644
index 0000000..020c44f
--- /dev/null
+++ b/membarrier/.gitignore
@@ -0,0 +1 @@
+membarrier_test
diff --git a/membarrier/Makefile b/membarrier/Makefile
new file mode 100644
index 0000000..a1a9708
--- /dev/null
+++ b/membarrier/Makefile
@@ -0,0 +1,10 @@
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_PROGS := membarrier_test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ $(RM) $(TEST_PROGS)
diff --git a/membarrier/membarrier_test.c b/membarrier/membarrier_test.c
new file mode 100644
index 0000000..535f0fe
--- /dev/null
+++ b/membarrier/membarrier_test.c
@@ -0,0 +1,118 @@
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+enum test_membarrier_status {
+ TEST_MEMBARRIER_PASS = 0,
+ TEST_MEMBARRIER_FAIL,
+ TEST_MEMBARRIER_SKIP,
+};
+
+static int sys_membarrier(int cmd, int flags)
+{
+ return syscall(__NR_membarrier, cmd, flags);
+}
+
+static enum test_membarrier_status test_membarrier_cmd_fail(void)
+{
+ int cmd = -1, flags = 0;
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ printf("membarrier: Wrong command should fail but passed.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_flags_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ printf("membarrier: Wrong flags should fail but passed.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_success(void)
+{
+ int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n",
+ strerror(errno));
+ return TEST_MEMBARRIER_FAIL;
+ }
+
+ printf("membarrier: MEMBARRIER_CMD_SHARED success.\n");
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier(void)
+{
+ enum test_membarrier_status status;
+
+ status = test_membarrier_cmd_fail();
+ if (status)
+ return status;
+ status = test_membarrier_flags_fail();
+ if (status)
+ return status;
+ status = test_membarrier_success();
+ if (status)
+ return status;
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_query(void)
+{
+ int flags = 0, ret;
+
+ printf("membarrier MEMBARRIER_CMD_QUERY ");
+ ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+ if (ret < 0) {
+ printf("failed. %s.\n", strerror(errno));
+ switch (errno) {
+ case ENOSYS:
+ /*
+ * It is valid to build a kernel with
+ * CONFIG_MEMBARRIER=n. However, this skips the tests.
+ */
+ return TEST_MEMBARRIER_SKIP;
+ case EINVAL:
+ default:
+ return TEST_MEMBARRIER_FAIL;
+ }
+ }
+ if (!(ret & MEMBARRIER_CMD_SHARED)) {
+ printf("command MEMBARRIER_CMD_SHARED is not supported.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ printf("syscall available.\n");
+ return TEST_MEMBARRIER_PASS;
+}
+
+int main(int argc, char **argv)
+{
+ switch (test_membarrier_query()) {
+ case TEST_MEMBARRIER_FAIL:
+ return ksft_exit_fail();
+ case TEST_MEMBARRIER_SKIP:
+ return ksft_exit_skip();
+ }
+ switch (test_membarrier()) {
+ case TEST_MEMBARRIER_FAIL:
+ return ksft_exit_fail();
+ case TEST_MEMBARRIER_SKIP:
+ return ksft_exit_skip();
+ }
+
+ printf("membarrier: tests done!\n");
+ return ksft_exit_pass();
+}
diff --git a/memfd/Makefile b/memfd/Makefile
index b80cd10..fd396ac 100644
--- a/memfd/Makefile
+++ b/memfd/Makefile
@@ -1,17 +1,19 @@
+CC = $(CROSS_COMPILE)gcc
CFLAGS += -D_FILE_OFFSET_BITS=64
CFLAGS += -I../../../../include/uapi/
CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
-all:
- gcc $(CFLAGS) memfd_test.c -o memfd_test
+TEST_PROGS := memfd_test
-run_tests: all
- gcc $(CFLAGS) memfd_test.c -o memfd_test
- @./memfd_test || echo "memfd_test: [FAIL]"
+all: $(TEST_PROGS)
-build_fuse:
- gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
- gcc $(CFLAGS) fuse_test.c -o fuse_test
+include ../lib.mk
+
+build_fuse: fuse_mnt fuse_test
+
+fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
+fuse_mnt: LDFLAGS += $(shell pkg-config fuse --libs)
run_fuse: build_fuse
@./run_fuse_test.sh || echo "fuse_test: [FAIL]"
diff --git a/memfd/memfd_test.c b/memfd/memfd_test.c
index 0b9eafb..2654689 100644
--- a/memfd/memfd_test.c
+++ b/memfd/memfd_test.c
@@ -15,10 +15,11 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
+#include <sys/wait.h>
#include <unistd.h>
#define MFD_DEF_SIZE 8192
-#define STACK_SIZE 65535
+#define STACK_SIZE 65536
static int sys_memfd_create(const char *name,
unsigned int flags)
diff --git a/memfd/run_fuse_test.sh b/memfd/run_fuse_test.sh
index 69b930e..69b930e 100644..100755
--- a/memfd/run_fuse_test.sh
+++ b/memfd/run_fuse_test.sh
diff --git a/memory-hotplug/Makefile b/memory-hotplug/Makefile
index d46b8d4..afb2624 100644
--- a/memory-hotplug/Makefile
+++ b/memory-hotplug/Makefile
@@ -1,9 +1,12 @@
all:
-run_tests:
- @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]"
+override EMIT_TESTS := echo "$(RUN_TESTS)"
run_full_test:
- @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+ @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
clean:
diff --git a/memory-hotplug/on-off-test.sh b/memory-hotplug/mem-on-off-test.sh
index 6cddde0..6cddde0 100644..100755
--- a/memory-hotplug/on-off-test.sh
+++ b/memory-hotplug/mem-on-off-test.sh
diff --git a/mount/.gitignore b/mount/.gitignore
new file mode 100644
index 0000000..856ad41
--- /dev/null
+++ b/mount/.gitignore
@@ -0,0 +1 @@
+unprivileged-remount-test
diff --git a/mount/Makefile b/mount/Makefile
index 337d853..5e35c9c 100644
--- a/mount/Makefile
+++ b/mount/Makefile
@@ -1,17 +1,21 @@
# Makefile for mount selftests.
-
+CFLAGS = -Wall \
+ -O2
all: unprivileged-remount-test
unprivileged-remount-test: unprivileged-remount-test.c
- gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+ $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
-# Allow specific tests to be selected.
-test_unprivileged_remount: unprivileged-remount-test
- @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+include ../lib.mk
-run_tests: all test_unprivileged_remount
+TEST_PROGS := unprivileged-remount-test
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
+ then \
+ ./unprivileged-remount-test ; \
+ else \
+ echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
+ fi
+override EMIT_TESTS := echo "$(RUN_TESTS)"
clean:
rm -f unprivileged-remount-test
-
-.PHONY: all test_unprivileged_remount
diff --git a/mqueue/Makefile b/mqueue/Makefile
index 8056e2e..eebac29 100644
--- a/mqueue/Makefile
+++ b/mqueue/Makefile
@@ -1,10 +1,20 @@
-all:
- gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
- gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+CFLAGS += -O2
+LDLIBS = -lrt -lpthread -lpopt
+TEST_PROGS := mq_open_tests mq_perf_tests
-run_tests:
- @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
- @./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+override define RUN_TESTS
+ @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+ @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+endef
+
+override define EMIT_TESTS
+ echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
+ echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
+endef
clean:
rm -f mq_open_tests mq_perf_tests
diff --git a/mqueue/mq_open_tests.c b/mqueue/mq_open_tests.c
index 9c1a5d3..e0a74bd 100644
--- a/mqueue/mq_open_tests.c
+++ b/mqueue/mq_open_tests.c
@@ -31,6 +31,7 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <mqueue.h>
+#include <error.h>
static char *usage =
"Usage:\n"
diff --git a/mqueue/mq_perf_tests.c b/mqueue/mq_perf_tests.c
index 8519e9e..8188f72 100644
--- a/mqueue/mq_perf_tests.c
+++ b/mqueue/mq_perf_tests.c
@@ -37,6 +37,7 @@
#include <sys/stat.h>
#include <mqueue.h>
#include <popt.h>
+#include <error.h>
static char *usage =
"Usage:\n"
diff --git a/net/Makefile b/net/Makefile
index 62f22cc..fac4782 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -1,6 +1,5 @@
# Makefile for net selftests
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -O2 -g
CFLAGS += -I../../../../usr/include/
@@ -11,9 +10,10 @@ all: $(NET_PROGS)
%: %.c
$(CC) $(CFLAGS) -o $@ $^
-run_tests: all
- @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
- @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
- ./test_bpf.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
+TEST_FILES := $(NET_PROGS)
+
+include ../lib.mk
+
clean:
$(RM) $(NET_PROGS)
diff --git a/net/psock_fanout.c b/net/psock_fanout.c
index 6f67333..4124593 100644
--- a/net/psock_fanout.c
+++ b/net/psock_fanout.c
@@ -19,6 +19,8 @@
* - PACKET_FANOUT_LB
* - PACKET_FANOUT_CPU
* - PACKET_FANOUT_ROLLOVER
+ * - PACKET_FANOUT_CBPF
+ * - PACKET_FANOUT_EBPF
*
* Todo:
* - functionality: PACKET_FANOUT_FLAG_DEFRAG
@@ -44,7 +46,9 @@
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
+#include <linux/unistd.h> /* for __NR_bpf */
#include <linux/filter.h>
+#include <linux/bpf.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
@@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
return fd;
}
+static void sock_fanout_set_ebpf(int fd)
+{
+ const int len_off = __builtin_offsetof(struct __sk_buff, len);
+ struct bpf_insn prog[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
+ { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
+ { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
+ { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
+ { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
+ { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ char log_buf[512];
+ union bpf_attr attr;
+ int pfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = (unsigned long) prog;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.license = (unsigned long) "GPL";
+ attr.log_buf = (unsigned long) log_buf,
+ attr.log_size = sizeof(log_buf),
+ attr.log_level = 1,
+
+ pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (pfd < 0) {
+ perror("bpf");
+ fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+ exit(1);
+ }
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+ perror("fanout data ebpf");
+ exit(1);
+ }
+
+ if (close(pfd)) {
+ perror("close ebpf");
+ exit(1);
+ }
+}
+
static char *sock_fanout_open_ring(int fd)
{
struct tpacket_req req = {
@@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd)
ring = mmap(0, req.tp_block_size * req.tp_block_nr,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (!ring) {
- fprintf(stderr, "packetsock ring mmap\n");
+ if (ring == MAP_FAILED) {
+ perror("packetsock ring mmap");
exit(1);
}
@@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
{
const int expect0[] = { 0, 0 };
char *rings[2];
+ uint8_t type = typeflags & 0xFF;
int fds[2], fds_udp[2][2], ret;
fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
@@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
fprintf(stderr, "ERROR: failed open\n");
exit(1);
}
+ if (type == PACKET_FANOUT_CBPF)
+ sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+ else if (type == PACKET_FANOUT_EBPF)
+ sock_fanout_set_ebpf(fds[0]);
+
rings[0] = sock_fanout_open_ring(fds[0]);
rings[1] = sock_fanout_open_ring(fds[1]);
pair_udp_open(fds_udp[0], PORT_BASE);
@@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
/* Send data, but not enough to overflow a queue */
pair_udp_send(fds_udp[0], 15);
- pair_udp_send(fds_udp[1], 5);
+ pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
ret = sock_fanout_read(fds, rings, expect1);
/* Send more data, overflow the queue */
- pair_udp_send(fds_udp[0], 15);
+ pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
/* TODO: ensure consistent order between expect1 and expect2 */
ret |= sock_fanout_read(fds, rings, expect2);
@@ -272,9 +327,10 @@ int main(int argc, char **argv)
const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
- const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } };
+ const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
+ const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
int port_off = 2, tries = 5, ret;
test_control_single();
@@ -296,6 +352,11 @@ int main(int argc, char **argv)
ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
port_off, expect_rb[0], expect_rb[1]);
+ ret |= test_datapath(PACKET_FANOUT_CBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+ ret |= test_datapath(PACKET_FANOUT_EBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+
set_cpuaffinity(0);
ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
expect_cpu0[0], expect_cpu0[1]);
diff --git a/net/psock_lib.h b/net/psock_lib.h
index 37da54a..24bc7ec 100644
--- a/net/psock_lib.h
+++ b/net/psock_lib.h
@@ -30,6 +30,7 @@
#define DATA_LEN 100
#define DATA_CHAR 'a'
+#define DATA_CHAR_1 'b'
#define PORT_BASE 8000
@@ -37,29 +38,36 @@
# define __maybe_unused __attribute__ ((__unused__))
#endif
-static __maybe_unused void pair_udp_setfilter(int fd)
+static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
{
struct sock_filter bpf_filter[] = {
{ 0x80, 0, 0, 0x00000000 }, /* LD pktlen */
- { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
+ { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
{ 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */
- { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
- { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */
- { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
+ { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/
+ { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/
{ 0x06, 0, 0, 0x00000060 }, /* RET match */
{ 0x06, 0, 0, 0x00000000 }, /* RET no match */
};
struct sock_fprog bpf_prog;
+ if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA)
+ bpf_filter[5].code = 0x16; /* RET A */
+
bpf_prog.filter = bpf_filter;
bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+ if (setsockopt(fd, lvl, optnum, &bpf_prog,
sizeof(bpf_prog))) {
perror("setsockopt SO_ATTACH_FILTER");
exit(1);
}
}
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+ sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER);
+}
+
static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
{
struct sockaddr_in saddr, daddr;
@@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
}
}
-static __maybe_unused void pair_udp_send(int fds[], int num)
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
{
char buf[DATA_LEN], rbuf[DATA_LEN];
- memset(buf, DATA_CHAR, sizeof(buf));
+ memset(buf, payload, sizeof(buf));
while (num--) {
/* Should really handle EINTR and EAGAIN */
if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
@@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num)
}
}
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+ return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
static __maybe_unused void pair_udp_close(int fds[])
{
close(fds[0]);
diff --git a/net/run_afpackettests b/net/run_afpackettests
index 5246e78..5246e78 100644..100755
--- a/net/run_afpackettests
+++ b/net/run_afpackettests
diff --git a/net/run_netsocktests b/net/run_netsocktests
index c09a682..c09a682 100644..100755
--- a/net/run_netsocktests
+++ b/net/run_netsocktests
diff --git a/powerpc/Makefile b/powerpc/Makefile
index 1d5e7ad..0c2706b 100644
--- a/powerpc/Makefile
+++ b/powerpc/Makefile
@@ -8,27 +8,51 @@ ifeq ($(ARCH),powerpc)
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-CC := $(CROSS_COMPILE)$(CC)
CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
-export CC CFLAGS
+export CFLAGS
-TARGETS = pmu copyloops mm tm primitives stringloops
+SUB_DIRS = benchmarks \
+ copyloops \
+ dscr \
+ mm \
+ pmu \
+ primitives \
+ stringloops \
+ switch_endian \
+ syscalls \
+ tm \
+ vphn
endif
-all: $(TARGETS)
+all: $(SUB_DIRS)
-$(TARGETS):
+$(SUB_DIRS):
$(MAKE) -k -C $@ all
-run_tests: all
- @for TARGET in $(TARGETS); do \
+include ../lib.mk
+
+override define RUN_TESTS
+ @for TARGET in $(SUB_DIRS); do \
$(MAKE) -C $$TARGET run_tests; \
done;
+endef
+
+override define INSTALL_RULE
+ @for TARGET in $(SUB_DIRS); do \
+ $(MAKE) -C $$TARGET install; \
+ done;
+endef
+
+override define EMIT_TESTS
+ @for TARGET in $(SUB_DIRS); do \
+ $(MAKE) -s -C $$TARGET emit_tests; \
+ done;
+endef
clean:
- @for TARGET in $(TARGETS); do \
+ @for TARGET in $(SUB_DIRS); do \
$(MAKE) -C $$TARGET clean; \
done;
rm -f tags
@@ -36,4 +60,4 @@ clean:
tags:
find . -name '*.c' -o -name '*.h' | xargs ctags
-.PHONY: all run_tests clean tags $(TARGETS)
+.PHONY: tags $(SUB_DIRS)
diff --git a/powerpc/benchmarks/.gitignore b/powerpc/benchmarks/.gitignore
new file mode 100644
index 0000000..b4709ea
--- /dev/null
+++ b/powerpc/benchmarks/.gitignore
@@ -0,0 +1 @@
+gettimeofday
diff --git a/powerpc/benchmarks/Makefile b/powerpc/benchmarks/Makefile
new file mode 100644
index 0000000..5fa4870
--- /dev/null
+++ b/powerpc/benchmarks/Makefile
@@ -0,0 +1,12 @@
+TEST_PROGS := gettimeofday
+
+CFLAGS += -O2
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/benchmarks/gettimeofday.c b/powerpc/benchmarks/gettimeofday.c
new file mode 100644
index 0000000..3af3c21
--- /dev/null
+++ b/powerpc/benchmarks/gettimeofday.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2015, Anton Blanchard, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+static int test_gettimeofday(void)
+{
+ int i;
+
+ struct timeval tv_start, tv_end;
+
+ gettimeofday(&tv_start, NULL);
+
+ for(i = 0; i < 100000000; i++) {
+ gettimeofday(&tv_end, NULL);
+ }
+
+ printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_gettimeofday, "gettimeofday");
+}
diff --git a/powerpc/copyloops/Makefile b/powerpc/copyloops/Makefile
index 6f2d3be..384843e 100644
--- a/powerpc/copyloops/Makefile
+++ b/powerpc/copyloops/Makefile
@@ -2,28 +2,24 @@
CFLAGS += -m64
CFLAGS += -I$(CURDIR)
CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
# Use our CFLAGS for the implicit .S rule
ASFLAGS = $(CFLAGS)
-PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
EXTRA_SOURCES := validate.c ../harness.c
-all: $(PROGS)
+all: $(TEST_PROGS)
copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy
memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
clean:
- rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/copyloops/asm/ppc_asm.h b/powerpc/copyloops/asm/ppc_asm.h
index d1dc374..50ae7d2 100644
--- a/powerpc/copyloops/asm/ppc_asm.h
+++ b/powerpc/copyloops/asm/ppc_asm.h
@@ -4,39 +4,6 @@
#define r1 1
-#define vr0 0
-#define vr1 1
-#define vr2 2
-#define vr3 3
-#define vr4 4
-#define vr5 5
-#define vr6 6
-#define vr7 7
-#define vr8 8
-#define vr9 9
-#define vr10 10
-#define vr11 11
-#define vr12 12
-#define vr13 13
-#define vr14 14
-#define vr15 15
-#define vr16 16
-#define vr17 17
-#define vr18 18
-#define vr19 19
-#define vr20 20
-#define vr21 21
-#define vr22 22
-#define vr23 23
-#define vr24 24
-#define vr25 25
-#define vr26 26
-#define vr27 27
-#define vr28 28
-#define vr29 29
-#define vr30 30
-#define vr31 31
-
#define R14 r14
#define R15 r15
#define R16 r16
diff --git a/powerpc/dscr/.gitignore b/powerpc/dscr/.gitignore
new file mode 100644
index 0000000..b585c6c
--- /dev/null
+++ b/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/powerpc/dscr/Makefile b/powerpc/dscr/Makefile
new file mode 100644
index 0000000..49327ee
--- /dev/null
+++ b/powerpc/dscr/Makefile
@@ -0,0 +1,14 @@
+TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
+ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
+ dscr_sysfs_thread_test
+
+dscr_default_test: LDLIBS += -lpthread
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/dscr/dscr.h b/powerpc/dscr/dscr.h
new file mode 100644
index 0000000..a36af1b
--- /dev/null
+++ b/powerpc/dscr/dscr.h
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define SPRN_DSCR 0x11 /* Privilege state SPR */
+#define SPRN_DSCR_USR 0x03 /* Problem state SPR */
+#define THREADS 100 /* Max threads */
+#define COUNT 100 /* Max iterations */
+#define DSCR_MAX 16 /* Max DSCR value */
+#define LEN_MAX 100 /* Max name length */
+
+#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH "/sys/devices/system/cpu/"
+
+#define rmb() asm volatile("lwsync":::"memory")
+#define wmb() asm volatile("lwsync":::"memory")
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR));
+
+ return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR));
+
+ return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+ int fd = -1, ret;
+ char buf[16];
+ unsigned long val;
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDONLY);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ memset(buf, 0, sizeof(buf));
+ lseek(fd, 0, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret == -1) {
+ perror("read() failed");
+ exit(1);
+ }
+ sscanf(buf, "%lx", &val);
+ close(fd);
+ return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+ int fd = -1, ret;
+ char buf[16];
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ sprintf(buf, "%lx\n", val);
+ ret = write(fd, buf, strlen(buf));
+ if (ret == -1) {
+ perror("write() failed");
+ exit(1);
+ }
+ close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+ return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/powerpc/dscr/dscr_default_test.c b/powerpc/dscr/dscr_default_test.c
new file mode 100644
index 0000000..df17c3b
--- /dev/null
+++ b/powerpc/dscr/dscr_default_test.c
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr; /* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+ unsigned long thread = (unsigned long)in;
+ unsigned long i;
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long d, cur_dscr, cur_dscr_usr;
+ unsigned long s1, s2;
+
+ s1 = ACCESS_ONCE(sequence);
+ if (s1 & 1)
+ continue;
+ rmb();
+
+ d = dscr;
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ rmb();
+ s2 = sequence;
+
+ if (s1 != s2)
+ continue;
+
+ if (cur_dscr != d) {
+ fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+
+ if (cur_dscr_usr != d) {
+ fprintf(stderr, "thread %ld user DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr_usr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+ }
+ result[thread] = 0;
+ pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+ pthread_t threads[THREADS];
+ unsigned long i, *status[THREADS];
+ unsigned long orig_dscr_default;
+
+ orig_dscr_default = get_default_dscr();
+
+ /* Initial DSCR default */
+ dscr = 1;
+ set_default_dscr(dscr);
+
+ /* Spawn all testing threads */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+ perror("pthread_create() failed");
+ goto fail;
+ }
+ }
+
+ srand(getpid());
+
+ /* Keep changing the DSCR default */
+ for (i = 0; i < COUNT; i++) {
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.0001) {
+ sequence++;
+ wmb();
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_default_dscr(dscr);
+
+ wmb();
+ sequence++;
+ }
+ }
+
+ /* Individual testing thread exit status */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_join(threads[i], (void **)&(status[i]))) {
+ perror("pthread_join() failed");
+ goto fail;
+ }
+
+ if (*status[i]) {
+ printf("%ldth thread failed to join with %ld status\n",
+ i, *status[i]);
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/powerpc/dscr/dscr_explicit_test.c b/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 0000000..ad9c3ec
--- /dev/null
+++ b/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+ unsigned long i, dscr = 0;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr(dscr);
+ }
+
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr);
+ return 1;
+ }
+
+ ret = uniform_deviate(rand());
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr_usr(dscr);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr_usr);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/powerpc/dscr/dscr_inherit_exec_test.c b/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 0000000..8265504
--- /dev/null
+++ b/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,117 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char prog[LEN_MAX];
+
+static void do_exec(unsigned long parent_dscr)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (cur_dscr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (kernel value)\n", parent_dscr);
+ exit(1);
+ }
+
+ if (cur_dscr_usr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (user value)\n", parent_dscr);
+ exit(1);
+ }
+ exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ for (i = 0; i < COUNT; i++) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (dscr == get_default_dscr())
+ continue;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ /*
+ * XXX: Force a context switch out so that DSCR
+ * current value is copied into the thread struct
+ * which is required for the child to inherit the
+ * changed value.
+ */
+ sleep(1);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ char dscr_str[16];
+
+ sprintf(dscr_str, "%ld", dscr);
+ execlp(prog, prog, "exec", dscr_str, NULL);
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 3 && !strcmp(argv[1], "exec")) {
+ unsigned long parent_dscr;
+
+ parent_dscr = atoi(argv[2]);
+ do_exec(parent_dscr);
+ } else if (argc != 1) {
+ fprintf(stderr, "Usage: %s\n", argv[0]);
+ exit(1);
+ }
+
+ strncpy(prog, argv[0], strlen(argv[0]));
+ return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/powerpc/dscr/dscr_inherit_test.c b/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 0000000..4e414ca
--- /dev/null
+++ b/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,95 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ /*
+ * XXX: Force a context switch out so that DSCR
+ * current value is copied into the thread struct
+ * which is required for the child to inherit the
+ * changed value.
+ */
+ sleep(1);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr);
+ exit(1);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr_usr);
+ exit(1);
+ }
+ exit(0);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/powerpc/dscr/dscr_sysfs_test.c b/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 0000000..17fb1b4
--- /dev/null
+++ b/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,97 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+ char buf[10];
+ int fd, rc;
+
+ fd = open(file, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ return 1;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc == -1) {
+ perror("read() failed");
+ return 1;
+ }
+ close(fd);
+
+ buf[rc] = '\0';
+ if (strtol(buf, NULL, 16) != val) {
+ printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+ val, strtol(buf, NULL, 16));
+ return 1;
+ }
+ return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+ DIR *sysfs;
+ struct dirent *dp;
+ char file[LEN_MAX];
+
+ sysfs = opendir(CPU_PATH);
+ if (!sysfs) {
+ perror("opendir() failed");
+ return 1;
+ }
+
+ while ((dp = readdir(sysfs))) {
+ if (!(dp->d_type & DT_DIR))
+ continue;
+ if (!strcmp(dp->d_name, "cpuidle"))
+ continue;
+ if (!strstr(dp->d_name, "cpu"))
+ continue;
+
+ sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name);
+ if (access(file, F_OK))
+ continue;
+
+ if (check_cpu_dscr_default(file, val))
+ return 1;
+ }
+ closedir(sysfs);
+ return 0;
+}
+
+int dscr_sysfs(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_all_cpu_dscr_defaults(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/powerpc/dscr/dscr_sysfs_thread_test.c b/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 0000000..ad97b59
--- /dev/null
+++ b/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (val != cur_dscr) {
+ printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr);
+ return 1;
+ }
+
+ if (val != cur_dscr_usr) {
+ printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+ cpu_set_t mask;
+ int cpu;
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ continue;
+
+ if (test_thread_dscr(val))
+ return 1;
+ }
+ return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_cpu_dscr_thread(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/powerpc/dscr/dscr_user_test.c b/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 0000000..77d16b5
--- /dev/null
+++ b/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr != cur_dscr_usr) {
+ printf("%s set, kernel get %lx != user get %lx\n",
+ str, cur_dscr, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+int dscr_user(void)
+{
+ int i;
+
+ check_dscr("");
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr(i);
+ if (check_dscr("kernel"))
+ return 1;
+ }
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr_usr(i);
+ if (check_dscr("user"))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_user, "dscr_user_test");
+}
diff --git a/powerpc/harness.c b/powerpc/harness.c
index 8ebc58a..f7997af 100644
--- a/powerpc/harness.c
+++ b/powerpc/harness.c
@@ -11,6 +11,10 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sys/stat.h>
#include "subunit.h"
#include "utils.h"
@@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name)
return rc;
}
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+ ElfW(auxv_t) *p;
+ void *result;
+ ssize_t num;
+ int fd;
+
+ fd = open("/proc/self/auxv", O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return NULL;
+ }
+
+ result = NULL;
+
+ num = read(fd, auxv, sizeof(auxv));
+ if (num < 0) {
+ perror("read");
+ goto out;
+ }
+
+ if (num > sizeof(auxv)) {
+ printf("Overflowed auxv buffer\n");
+ goto out;
+ }
+
+ p = (ElfW(auxv_t) *)auxv;
+
+ while (p->a_type != AT_NULL) {
+ if (p->a_type == type) {
+ result = (void *)p->a_un.a_val;
+ break;
+ }
+
+ p++;
+ }
+out:
+ close(fd);
+ return result;
+}
diff --git a/powerpc/mm/Makefile b/powerpc/mm/Makefile
index a14c538..ee179e2 100644
--- a/powerpc/mm/Makefile
+++ b/powerpc/mm/Makefile
@@ -1,21 +1,17 @@
noarg:
$(MAKE) -C ../
-PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_FILES := tempfile
-all: $(PROGS) tempfile
+all: $(TEST_PROGS) $(TEST_FILES)
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
tempfile:
dd if=/dev/zero of=tempfile bs=64k count=1
clean:
- rm -f $(PROGS) tempfile
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS) tempfile
diff --git a/powerpc/mm/hugetlb_vs_thp_test.c b/powerpc/mm/hugetlb_vs_thp_test.c
index 3d8e5b0..4900367 100644
--- a/powerpc/mm/hugetlb_vs_thp_test.c
+++ b/powerpc/mm/hugetlb_vs_thp_test.c
@@ -21,9 +21,13 @@ static int test_body(void)
* Typically the mmap will fail because no huge pages are
* allocated on the system. But if there are huge pages
* allocated the mmap will succeed. That's fine too, we just
- * munmap here before continuing.
+ * munmap here before continuing. munmap() length of
+ * MAP_HUGETLB memory must be hugepage aligned.
*/
- munmap(addr, SIZE);
+ if (munmap(addr, SIZE)) {
+ perror("munmap");
+ return 1;
+ }
}
p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
diff --git a/powerpc/pmu/Makefile b/powerpc/pmu/Makefile
index c9f4263..a9099d9 100644
--- a/powerpc/pmu/Makefile
+++ b/powerpc/pmu/Makefile
@@ -1,38 +1,42 @@
noarg:
$(MAKE) -C ../
-PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_PROGS := count_instructions l3_bank_test per_event_excludes
EXTRA_SOURCES := ../harness.c event.c lib.c
-SUB_TARGETS = ebb
+all: $(TEST_PROGS) ebb
-all: $(PROGS) $(SUB_TARGETS)
-
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
# loop.S can only be built 64-bit
count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
$(CC) $(CFLAGS) -m64 -o $@ $^
-run_tests: all sub_run_tests
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
-clean: sub_clean
- rm -f $(PROGS) loop.o
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+ $(DEFAULT_RUN_TESTS)
+ $(MAKE) -C ebb run_tests
+endef
-$(SUB_TARGETS):
- $(MAKE) -k -C $@ all
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+ $(DEFAULT_EMIT_TESTS)
+ $(MAKE) -s -C ebb emit_tests
+endef
-sub_run_tests: all
- @for TARGET in $(SUB_TARGETS); do \
- $(MAKE) -C $$TARGET run_tests; \
- done;
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ $(MAKE) -C ebb install
+endef
-sub_clean:
- @for TARGET in $(SUB_TARGETS); do \
- $(MAKE) -C $$TARGET clean; \
- done;
+clean:
+ rm -f $(TEST_PROGS) loop.o
+ $(MAKE) -C ebb clean
+
+ebb:
+ $(MAKE) -k -C $@ all
-.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
+.PHONY: all run_tests clean ebb
diff --git a/powerpc/pmu/ebb/Makefile b/powerpc/pmu/ebb/Makefile
index 3dc4332..5cdc9db 100644
--- a/powerpc/pmu/ebb/Makefile
+++ b/powerpc/pmu/ebb/Makefile
@@ -4,7 +4,7 @@ noarg:
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
-PROGS := reg_access_test event_attributes_test cycles_test \
+TEST_PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \
@@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test \
lost_exception_test no_handler_test \
cycles_with_mmcr2_test
-all: $(PROGS)
+all: $(TEST_PROGS)
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
instruction_count_test: ../loop.S
lost_exception_test: ../lib.c
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../../lib.mk
clean:
- rm -f $(PROGS)
+ rm -f $(TEST_PROGS)
diff --git a/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index 66ea765..94110b1 100644
--- a/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -63,6 +63,8 @@ int back_to_back_ebbs(void)
{
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/close_clears_pmcc_test.c b/powerpc/pmu/ebb/close_clears_pmcc_test.c
index 0f0423d..ac18cf6 100644
--- a/powerpc/pmu/ebb/close_clears_pmcc_test.c
+++ b/powerpc/pmu/ebb/close_clears_pmcc_test.c
@@ -20,6 +20,8 @@ int close_clears_pmcc(void)
{
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
index d3ed64d..f0632e7 100644
--- a/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
+++ b/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -43,6 +43,8 @@ int cpu_event_pinned_vs_ebb(void)
int cpu, rc;
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
diff --git a/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
index 8b972c2..33e56a2 100644
--- a/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
+++ b/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -41,6 +41,8 @@ int cpu_event_vs_ebb(void)
int cpu, rc;
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
diff --git a/powerpc/pmu/ebb/cycles_test.c b/powerpc/pmu/ebb/cycles_test.c
index 8590fc1..7c57a8d 100644
--- a/powerpc/pmu/ebb/cycles_test.c
+++ b/powerpc/pmu/ebb/cycles_test.c
@@ -16,6 +16,8 @@ int cycles(void)
{
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/cycles_with_freeze_test.c b/powerpc/pmu/ebb/cycles_with_freeze_test.c
index 754b3f2..ecf5ee3 100644
--- a/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -56,6 +56,8 @@ int cycles_with_freeze(void)
uint64_t val;
bool fc_cleared;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index d43029b..c0faba5 100644
--- a/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -26,6 +26,8 @@ int cycles_with_mmcr2(void)
int i;
bool bad_mmcr2;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/ebb.c b/powerpc/pmu/ebb/ebb.c
index d7a72ce..9729d9f 100644
--- a/powerpc/pmu/ebb/ebb.c
+++ b/powerpc/pmu/ebb/ebb.c
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <linux/auxvec.h>
#include "trace.h"
#include "reg.h"
@@ -319,6 +320,16 @@ void ebb_global_disable(void)
mb();
}
+bool ebb_is_supported(void)
+{
+#ifdef PPC_FEATURE2_EBB
+ /* EBB requires at least POWER8 */
+ return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_EBB);
+#else
+ return false;
+#endif
+}
+
void event_ebb_init(struct event *e)
{
e->attr.config |= (1ull << 63);
diff --git a/powerpc/pmu/ebb/ebb.h b/powerpc/pmu/ebb/ebb.h
index e44eee5..f87e761 100644
--- a/powerpc/pmu/ebb/ebb.h
+++ b/powerpc/pmu/ebb/ebb.h
@@ -52,6 +52,7 @@ void standard_ebb_callee(void);
int ebb_event_enable(struct event *e);
void ebb_global_enable(void);
void ebb_global_disable(void);
+bool ebb_is_supported(void);
void ebb_freeze_pmcs(void);
void ebb_unfreeze_pmcs(void);
void event_ebb_init(struct event *e);
diff --git a/powerpc/pmu/ebb/ebb_on_child_test.c b/powerpc/pmu/ebb/ebb_on_child_test.c
index c45f948..1e7b7fe 100644
--- a/powerpc/pmu/ebb/ebb_on_child_test.c
+++ b/powerpc/pmu/ebb/ebb_on_child_test.c
@@ -47,6 +47,8 @@ int ebb_on_child(void)
struct event event;
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index 11acf1d..a991d2e 100644
--- a/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -54,6 +54,8 @@ int ebb_on_willing_child(void)
struct event event;
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
index be4dd5a..af20a2b 100644
--- a/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
+++ b/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -41,6 +41,8 @@ int ebb_vs_cpu_event(void)
int cpu, rc;
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
diff --git a/powerpc/pmu/ebb/event_attributes_test.c b/powerpc/pmu/ebb/event_attributes_test.c
index 7e78153..7762ab2 100644
--- a/powerpc/pmu/ebb/event_attributes_test.c
+++ b/powerpc/pmu/ebb/event_attributes_test.c
@@ -16,6 +16,8 @@ int event_attributes(void)
{
struct event event, leader;
+ SKIP_IF(!ebb_is_supported());
+
event_init(&event, 0x1001e);
event_leader_ebb_init(&event);
/* Expected to succeed */
diff --git a/powerpc/pmu/ebb/fork_cleanup_test.c b/powerpc/pmu/ebb/fork_cleanup_test.c
index 9e7af6e..167135b 100644
--- a/powerpc/pmu/ebb/fork_cleanup_test.c
+++ b/powerpc/pmu/ebb/fork_cleanup_test.c
@@ -44,6 +44,8 @@ int fork_cleanup(void)
{
pid_t pid;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/instruction_count_test.c b/powerpc/pmu/ebb/instruction_count_test.c
index f8190fa..5da3551 100644
--- a/powerpc/pmu/ebb/instruction_count_test.c
+++ b/powerpc/pmu/ebb/instruction_count_test.c
@@ -111,6 +111,8 @@ int instruction_count(void)
struct event event;
uint64_t overhead;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
event_leader_ebb_init(&event);
event.attr.exclude_kernel = 1;
diff --git a/powerpc/pmu/ebb/lost_exception_test.c b/powerpc/pmu/ebb/lost_exception_test.c
index 0c9dd9b..eb8acb7 100644
--- a/powerpc/pmu/ebb/lost_exception_test.c
+++ b/powerpc/pmu/ebb/lost_exception_test.c
@@ -23,6 +23,8 @@ static int test_body(void)
int i, orig_period, max_period;
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
/* We use PMC4 to make sure the kernel switches all counters correctly */
event_init_named(&event, 0x40002, "instructions");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/multi_counter_test.c b/powerpc/pmu/ebb/multi_counter_test.c
index 67d78af..6ff8c8f 100644
--- a/powerpc/pmu/ebb/multi_counter_test.c
+++ b/powerpc/pmu/ebb/multi_counter_test.c
@@ -18,6 +18,8 @@ int multi_counter(void)
struct event events[6];
int i, group_fd;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
diff --git a/powerpc/pmu/ebb/multi_ebb_procs_test.c b/powerpc/pmu/ebb/multi_ebb_procs_test.c
index b8dc371..037cb61 100644
--- a/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -79,6 +79,8 @@ int multi_ebb_procs(void)
pid_t pids[NR_CHILDREN];
int cpu, rc, i;
+ SKIP_IF(!ebb_is_supported());
+
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
FAIL_IF(bind_to_cpu(cpu));
diff --git a/powerpc/pmu/ebb/no_handler_test.c b/powerpc/pmu/ebb/no_handler_test.c
index 2f9bf8e..8341d77 100644
--- a/powerpc/pmu/ebb/no_handler_test.c
+++ b/powerpc/pmu/ebb/no_handler_test.c
@@ -19,6 +19,8 @@ static int no_handler_test(void)
u64 val;
int i;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/pmae_handling_test.c b/powerpc/pmu/ebb/pmae_handling_test.c
index 986500f..c5fa647 100644
--- a/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/powerpc/pmu/ebb/pmae_handling_test.c
@@ -58,6 +58,8 @@ static int test_body(void)
{
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
event_init_named(&event, 0x1001e, "cycles");
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/pmc56_overflow_test.c b/powerpc/pmu/ebb/pmc56_overflow_test.c
index a503fa7..c22860a 100644
--- a/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -49,6 +49,8 @@ int pmc56_overflow(void)
{
struct event event;
+ SKIP_IF(!ebb_is_supported());
+
/* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
event_init(&event, 0x2001e);
event_leader_ebb_init(&event);
diff --git a/powerpc/pmu/ebb/reg_access_test.c b/powerpc/pmu/ebb/reg_access_test.c
index 0cae66f..5b1188f 100644
--- a/powerpc/pmu/ebb/reg_access_test.c
+++ b/powerpc/pmu/ebb/reg_access_test.c
@@ -18,6 +18,8 @@ int reg_access(void)
{
uint64_t val, expected;
+ SKIP_IF(!ebb_is_supported());
+
expected = 0x8000000100000000ull;
mtspr(SPRN_BESCR, expected);
val = mfspr(SPRN_BESCR);
diff --git a/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
index d56607e..1846f4e 100644
--- a/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
+++ b/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
@@ -42,6 +42,8 @@ int task_event_pinned_vs_ebb(void)
pid_t pid;
int rc;
+ SKIP_IF(!ebb_is_supported());
+
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/powerpc/pmu/ebb/task_event_vs_ebb_test.c
index eba3219..e3bc6e9 100644
--- a/powerpc/pmu/ebb/task_event_vs_ebb_test.c
+++ b/powerpc/pmu/ebb/task_event_vs_ebb_test.c
@@ -40,6 +40,8 @@ int task_event_vs_ebb(void)
pid_t pid;
int rc;
+ SKIP_IF(!ebb_is_supported());
+
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/powerpc/pmu/lib.c b/powerpc/pmu/lib.c
index 9768dea..a07104c 100644
--- a/powerpc/pmu/lib.c
+++ b/powerpc/pmu/lib.c
@@ -5,15 +5,10 @@
#define _GNU_SOURCE /* For CPU_ZERO etc. */
-#include <elf.h>
#include <errno.h>
-#include <fcntl.h>
-#include <link.h>
#include <sched.h>
#include <setjmp.h>
#include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/types.h>
#include <sys/wait.h>
#include "utils.h"
@@ -256,45 +251,3 @@ out:
return rc;
}
-static char auxv[4096];
-
-void *get_auxv_entry(int type)
-{
- ElfW(auxv_t) *p;
- void *result;
- ssize_t num;
- int fd;
-
- fd = open("/proc/self/auxv", O_RDONLY);
- if (fd == -1) {
- perror("open");
- return NULL;
- }
-
- result = NULL;
-
- num = read(fd, auxv, sizeof(auxv));
- if (num < 0) {
- perror("read");
- goto out;
- }
-
- if (num > sizeof(auxv)) {
- printf("Overflowed auxv buffer\n");
- goto out;
- }
-
- p = (ElfW(auxv_t) *)auxv;
-
- while (p->a_type != AT_NULL) {
- if (p->a_type == type) {
- result = (void *)p->a_un.a_val;
- break;
- }
-
- p++;
- }
-out:
- close(fd);
- return result;
-}
diff --git a/powerpc/pmu/lib.h b/powerpc/pmu/lib.h
index 0f0339c..ca5d72a 100644
--- a/powerpc/pmu/lib.h
+++ b/powerpc/pmu/lib.h
@@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe);
extern int notify_parent_of_error(union pipe write_pipe);
extern pid_t eat_cpu(int (test_function)(void));
extern bool require_paranoia_below(int level);
-extern void *get_auxv_entry(int type);
struct addr_range {
uint64_t first, last;
diff --git a/powerpc/primitives/Makefile b/powerpc/primitives/Makefile
index ea737ca..b68c622 100644
--- a/powerpc/primitives/Makefile
+++ b/powerpc/primitives/Makefile
@@ -1,17 +1,12 @@
CFLAGS += -I$(CURDIR)
-PROGS := load_unaligned_zeropad
+TEST_PROGS := load_unaligned_zeropad
-all: $(PROGS)
+all: $(TEST_PROGS)
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
clean:
- rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/primitives/load_unaligned_zeropad.c b/powerpc/primitives/load_unaligned_zeropad.c
index d1b6475..6cae061 100644
--- a/powerpc/primitives/load_unaligned_zeropad.c
+++ b/powerpc/primitives/load_unaligned_zeropad.c
@@ -25,10 +25,19 @@
#define FIXUP_SECTION ".ex_fixup"
+static inline unsigned long __fls(unsigned long x);
+
#include "word-at-a-time.h"
#include "utils.h"
+static inline unsigned long __fls(unsigned long x)
+{
+ int lz;
+
+ asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
+ return sizeof(unsigned long) - 1 - lz;
+}
static int page_size;
static char *mem_region;
diff --git a/powerpc/stringloops/Makefile b/powerpc/stringloops/Makefile
index 506d773..2a728f4 100644
--- a/powerpc/stringloops/Makefile
+++ b/powerpc/stringloops/Makefile
@@ -2,19 +2,14 @@
CFLAGS += -m64
CFLAGS += -I$(CURDIR)
-PROGS := memcmp
+TEST_PROGS := memcmp
EXTRA_SOURCES := memcmp_64.S ../harness.c
-all: $(PROGS)
+all: $(TEST_PROGS)
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
clean:
- rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/switch_endian/.gitignore b/powerpc/switch_endian/.gitignore
new file mode 100644
index 0000000..89e762e
--- /dev/null
+++ b/powerpc/switch_endian/.gitignore
@@ -0,0 +1,2 @@
+switch_endian_test
+check-reversed.S
diff --git a/powerpc/switch_endian/Makefile b/powerpc/switch_endian/Makefile
new file mode 100644
index 0000000..e21d106
--- /dev/null
+++ b/powerpc/switch_endian/Makefile
@@ -0,0 +1,18 @@
+TEST_PROGS := switch_endian_test
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
+
+all: $(TEST_PROGS)
+
+switch_endian_test: check-reversed.S
+
+check-reversed.o: check.o
+ $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
+
+check-reversed.S: check-reversed.o
+ hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o check-reversed.S
diff --git a/powerpc/switch_endian/check.S b/powerpc/switch_endian/check.S
new file mode 100644
index 0000000..e2484d2
--- /dev/null
+++ b/powerpc/switch_endian/check.S
@@ -0,0 +1,100 @@
+#include "common.h"
+
+/*
+ * Checks that registers contain what we expect, ie. they were not clobbered by
+ * the syscall.
+ *
+ * r15: pattern to check registers against.
+ *
+ * At the end r3 == 0 if everything's OK.
+ */
+ nop # guaranteed to be illegal in reverse-endian
+ mr r9,r15
+ cmpd r9,r3 # check r3
+ bne 1f
+ addi r9,r15,4 # check r4
+ cmpd r9,r4
+ bne 1f
+ lis r9,0x00FF # check CR
+ ori r9,r9,0xF000
+ mfcr r10
+ and r10,r10,r9
+ cmpw r9,r10
+ addi r9,r15,34
+ bne 1f
+ addi r9,r15,32 # check LR
+ mflr r10
+ cmpd r9,r10
+ bne 1f
+ addi r9,r15,5 # check r5
+ cmpd r9,r5
+ bne 1f
+ addi r9,r15,6 # check r6
+ cmpd r9,r6
+ bne 1f
+ addi r9,r15,7 # check r7
+ cmpd r9,r7
+ bne 1f
+ addi r9,r15,8 # check r8
+ cmpd r9,r8
+ bne 1f
+ addi r9,r15,13 # check r13
+ cmpd r9,r13
+ bne 1f
+ addi r9,r15,14 # check r14
+ cmpd r9,r14
+ bne 1f
+ addi r9,r15,16 # check r16
+ cmpd r9,r16
+ bne 1f
+ addi r9,r15,17 # check r17
+ cmpd r9,r17
+ bne 1f
+ addi r9,r15,18 # check r18
+ cmpd r9,r18
+ bne 1f
+ addi r9,r15,19 # check r19
+ cmpd r9,r19
+ bne 1f
+ addi r9,r15,20 # check r20
+ cmpd r9,r20
+ bne 1f
+ addi r9,r15,21 # check r21
+ cmpd r9,r21
+ bne 1f
+ addi r9,r15,22 # check r22
+ cmpd r9,r22
+ bne 1f
+ addi r9,r15,23 # check r23
+ cmpd r9,r23
+ bne 1f
+ addi r9,r15,24 # check r24
+ cmpd r9,r24
+ bne 1f
+ addi r9,r15,25 # check r25
+ cmpd r9,r25
+ bne 1f
+ addi r9,r15,26 # check r26
+ cmpd r9,r26
+ bne 1f
+ addi r9,r15,27 # check r27
+ cmpd r9,r27
+ bne 1f
+ addi r9,r15,28 # check r28
+ cmpd r9,r28
+ bne 1f
+ addi r9,r15,29 # check r29
+ cmpd r9,r29
+ bne 1f
+ addi r9,r15,30 # check r30
+ cmpd r9,r30
+ bne 1f
+ addi r9,r15,31 # check r31
+ cmpd r9,r31
+ bne 1f
+ b 2f
+1: mr r3, r9
+ li r0, __NR_exit
+ sc
+2: li r0, __NR_switch_endian
+ nop
diff --git a/powerpc/switch_endian/common.h b/powerpc/switch_endian/common.h
new file mode 100644
index 0000000..69e3996
--- /dev/null
+++ b/powerpc/switch_endian/common.h
@@ -0,0 +1,6 @@
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_switch_endian
+#define __NR_switch_endian 363
+#endif
diff --git a/powerpc/switch_endian/switch_endian_test.S b/powerpc/switch_endian/switch_endian_test.S
new file mode 100644
index 0000000..ef7c971
--- /dev/null
+++ b/powerpc/switch_endian/switch_endian_test.S
@@ -0,0 +1,81 @@
+#include "common.h"
+
+ .data
+ .balign 8
+message:
+ .ascii "success: switch_endian_test\n\0"
+
+ .section ".toc"
+ .balign 8
+pattern:
+ .llong 0x5555AAAA5555AAAA
+
+ .text
+FUNC_START(_start)
+ /* Load the pattern */
+ ld r15, pattern@TOC(%r2)
+
+ /* Setup CR, only CR2-CR4 are maintained */
+ lis r3, 0x00FF
+ ori r3, r3, 0xF000
+ mtcr r3
+
+ /* Load the pattern slightly modified into the registers */
+ mr r3, r15
+ addi r4, r15, 4
+
+ addi r5, r15, 32
+ mtlr r5
+
+ addi r5, r15, 5
+ addi r6, r15, 6
+ addi r7, r15, 7
+ addi r8, r15, 8
+
+ /* r9 - r12 are clobbered */
+
+ addi r13, r15, 13
+ addi r14, r15, 14
+
+ /* Skip r15 we're using it */
+
+ addi r16, r15, 16
+ addi r17, r15, 17
+ addi r18, r15, 18
+ addi r19, r15, 19
+ addi r20, r15, 20
+ addi r21, r15, 21
+ addi r22, r15, 22
+ addi r23, r15, 23
+ addi r24, r15, 24
+ addi r25, r15, 25
+ addi r26, r15, 26
+ addi r27, r15, 27
+ addi r28, r15, 28
+ addi r29, r15, 29
+ addi r30, r15, 30
+ addi r31, r15, 31
+
+ /*
+ * Call the syscall to switch endian.
+ * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
+ */
+ li r0, __NR_switch_endian
+ sc
+
+#include "check-reversed.S"
+
+ /* Flip back, r0 already has the switch syscall number */
+ .long 0x02000044 /* sc */
+
+#include "check.S"
+
+ li r0, __NR_write
+ li r3, 1 /* stdout */
+ ld r4, message@got(%r2)
+ li r5, 28 /* strlen(message3) */
+ sc
+ li r0, __NR_exit
+ li r3, 0
+ sc
+ b .
diff --git a/powerpc/syscalls/.gitignore b/powerpc/syscalls/.gitignore
new file mode 100644
index 0000000..f0f3fcc
--- /dev/null
+++ b/powerpc/syscalls/.gitignore
@@ -0,0 +1 @@
+ipc_unmuxed
diff --git a/powerpc/syscalls/Makefile b/powerpc/syscalls/Makefile
new file mode 100644
index 0000000..b35c794
--- /dev/null
+++ b/powerpc/syscalls/Makefile
@@ -0,0 +1,12 @@
+TEST_PROGS := ipc_unmuxed
+
+CFLAGS += -I../../../../../usr/include
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/syscalls/ipc.h b/powerpc/syscalls/ipc.h
new file mode 100644
index 0000000..fbebc02
--- /dev/null
+++ b/powerpc/syscalls/ipc.h
@@ -0,0 +1,47 @@
+#ifdef __NR_semop
+DO_TEST(semop, __NR_semop)
+#endif
+
+#ifdef __NR_semget
+DO_TEST(semget, __NR_semget)
+#endif
+
+#ifdef __NR_semctl
+DO_TEST(semctl, __NR_semctl)
+#endif
+
+#ifdef __NR_semtimedop
+DO_TEST(semtimedop, __NR_semtimedop)
+#endif
+
+#ifdef __NR_msgsnd
+DO_TEST(msgsnd, __NR_msgsnd)
+#endif
+
+#ifdef __NR_msgrcv
+DO_TEST(msgrcv, __NR_msgrcv)
+#endif
+
+#ifdef __NR_msgget
+DO_TEST(msgget, __NR_msgget)
+#endif
+
+#ifdef __NR_msgctl
+DO_TEST(msgctl, __NR_msgctl)
+#endif
+
+#ifdef __NR_shmat
+DO_TEST(shmat, __NR_shmat)
+#endif
+
+#ifdef __NR_shmdt
+DO_TEST(shmdt, __NR_shmdt)
+#endif
+
+#ifdef __NR_shmget
+DO_TEST(shmget, __NR_shmget)
+#endif
+
+#ifdef __NR_shmctl
+DO_TEST(shmctl, __NR_shmctl)
+#endif
diff --git a/powerpc/syscalls/ipc_unmuxed.c b/powerpc/syscalls/ipc_unmuxed.c
new file mode 100644
index 0000000..2ac0270
--- /dev/null
+++ b/powerpc/syscalls/ipc_unmuxed.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test simply tests that certain syscalls are implemented. It doesn't
+ * actually exercise their logic in any way.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "utils.h"
+
+
+#define DO_TEST(_name, _num) \
+static int test_##_name(void) \
+{ \
+ int rc; \
+ printf("Testing " #_name); \
+ errno = 0; \
+ rc = syscall(_num, -1, 0, 0, 0, 0, 0); \
+ printf("\treturned %d, errno %d\n", rc, errno); \
+ return errno == ENOSYS; \
+}
+
+#include "ipc.h"
+#undef DO_TEST
+
+static int ipc_unmuxed(void)
+{
+ int tests_done = 0;
+
+#define DO_TEST(_name, _num) \
+ FAIL_IF(test_##_name()); \
+ tests_done++;
+
+#include "ipc.h"
+#undef DO_TEST
+
+ /*
+ * If we ran no tests then it means none of the syscall numbers were
+ * defined, possibly because we were built against old headers. But it
+ * means we didn't really test anything, so instead of passing mark it
+ * as a skip to give the user a clue.
+ */
+ SKIP_IF(tests_done == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ipc_unmuxed, "ipc_unmuxed");
+}
diff --git a/powerpc/tm/.gitignore b/powerpc/tm/.gitignore
index 33d02cc..2699635 100644
--- a/powerpc/tm/.gitignore
+++ b/powerpc/tm/.gitignore
@@ -1 +1,2 @@
tm-resched-dscr
+tm-syscall
diff --git a/powerpc/tm/Makefile b/powerpc/tm/Makefile
index 2cede23..4bea62a 100644
--- a/powerpc/tm/Makefile
+++ b/powerpc/tm/Makefile
@@ -1,15 +1,13 @@
-PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr tm-syscall
-all: $(PROGS)
+all: $(TEST_PROGS)
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+tm-syscall: tm-syscall-asm.S
+tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include
-clean:
- rm -f $(PROGS) *.o
+include ../../lib.mk
-.PHONY: all run_tests clean
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/powerpc/tm/tm-syscall-asm.S b/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 0000000..431f61a
--- /dev/null
+++ b/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,27 @@
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+ .text
+FUNC_START(getppid_tm_active)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ sc
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
+
+FUNC_START(getppid_tm_suspended)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ sc
+ tresume.
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
diff --git a/powerpc/tm/tm-syscall.c b/powerpc/tm/tm-syscall.c
new file mode 100644
index 0000000..e835bf7
--- /dev/null
+++ b/powerpc/tm/tm-syscall.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2015, Sam Bobroff, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/tm.h>
+#include <asm/cputable.h>
+#include <linux/auxvec.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int getppid_tm_active(void);
+extern int getppid_tm_suspended(void);
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+long failure_code(void)
+{
+ return __builtin_get_texasru() >> 24;
+}
+
+bool failure_is_persistent(void)
+{
+ return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+bool failure_is_syscall(void)
+{
+ return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+pid_t getppid_tm(bool suspend)
+{
+ int i;
+ pid_t pid;
+
+ for (i = 0; i < TM_RETRIES; i++) {
+ if (suspend)
+ pid = getppid_tm_suspended();
+ else
+ pid = getppid_tm_active();
+
+ if (pid >= 0)
+ return pid;
+
+ if (failure_is_persistent()) {
+ if (failure_is_syscall())
+ return -1;
+
+ printf("Unexpected persistent transaction failure.\n");
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+ exit(-1);
+ }
+
+ retries++;
+ }
+
+ printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES);
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+
+ exit(-1);
+}
+
+static inline bool have_htm_nosc(void)
+{
+#ifdef PPC_FEATURE2_HTM_NOSC
+ return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM_NOSC);
+#else
+ printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n");
+ return false;
+#endif
+}
+
+int tm_syscall(void)
+{
+ unsigned count = 0;
+ struct timeval end, now;
+
+ SKIP_IF(!have_htm_nosc());
+
+ setbuf(stdout, NULL);
+
+ printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
+
+ gettimeofday(&end, NULL);
+ now.tv_sec = TEST_DURATION;
+ now.tv_usec = 0;
+ timeradd(&end, &now, &end);
+
+ for (count = 0; timercmp(&now, &end, <); count++) {
+ /*
+ * Test a syscall within a suspended transaction and verify
+ * that it succeeds.
+ */
+ FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+
+ /*
+ * Test a syscall within an active transaction and verify that
+ * it fails with the correct failure code.
+ */
+ FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ gettimeofday(&now, 0);
+ }
+
+ printf("%d active and suspended transactions behaved correctly.\n", count);
+ printf("(There were %d transaction retries.)\n", retries);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(tm_syscall, "tm_syscall");
+}
diff --git a/powerpc/utils.h b/powerpc/utils.h
index a93777a..b7d4108 100644
--- a/powerpc/utils.h
+++ b/powerpc/utils.h
@@ -15,11 +15,12 @@ typedef signed long long s64;
/* Just for familiarity */
typedef uint32_t u32;
+typedef uint16_t u16;
typedef uint8_t u8;
int test_harness(int (test_function)(void), char *name);
-
+extern void *get_auxv_entry(int type);
/* Yes, this is evil */
#define FAIL_IF(x) \
diff --git a/powerpc/vphn/.gitignore b/powerpc/vphn/.gitignore
new file mode 100644
index 0000000..7c04395
--- /dev/null
+++ b/powerpc/vphn/.gitignore
@@ -0,0 +1 @@
+test-vphn
diff --git a/powerpc/vphn/Makefile b/powerpc/vphn/Makefile
new file mode 100644
index 0000000..a485f2e
--- /dev/null
+++ b/powerpc/vphn/Makefile
@@ -0,0 +1,12 @@
+TEST_PROGS := test-vphn
+
+CFLAGS += -m64
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS)
diff --git a/powerpc/vphn/test-vphn.c b/powerpc/vphn/test-vphn.c
new file mode 100644
index 0000000..5742f68
--- /dev/null
+++ b/powerpc/vphn/test-vphn.c
@@ -0,0 +1,410 @@
+#include <stdio.h>
+#include <byteswap.h>
+#include "utils.h"
+#include "subunit.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#define be16_to_cpup(x) bswap_16(*x)
+#define cpu_to_be64(x) bswap_64(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#define be16_to_cpup(x) (*x)
+#define cpu_to_be64(x) (x)
+#endif
+
+#include "vphn.c"
+
+static struct test {
+ char *descr;
+ long input[VPHN_REGISTER_COUNT];
+ u32 expected[VPHN_ASSOC_BUFSIZE];
+} all_tests[] = {
+ {
+ "vphn: no data",
+ {
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ "vphn: 1 x 16-bit value",
+ {
+ 0x8001ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 16-bit values",
+ {
+ 0x80018002ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 3 x 16-bit values",
+ {
+ 0x800180028003ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ "vphn: 4 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000004,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004
+ }
+ },
+ {
+ /* Parsing the next 16-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 5 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000005,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 24 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005800680078008,
+ 0x8009800a800b800c,
+ 0x800d800e800f8010,
+ 0x8011801280138014,
+ 0x8015801680178018
+ },
+ {
+ 0x00000018,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c,
+ 0x0000000d,
+ 0x0000000e,
+ 0x0000000f,
+ 0x00000010,
+ 0x00000011,
+ 0x00000012,
+ 0x00000013,
+ 0x00000014,
+ 0x00000015,
+ 0x00000016,
+ 0x00000017,
+ 0x00000018
+ }
+ },
+ {
+ "vphn: 1 x 32-bit value",
+ {
+ 0x00000001ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parsing the next 32-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 3 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x00000003ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 12 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b0000000c
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: 16-bit value followed by 32-bit value",
+ {
+ 0x800100000002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 32-bit value followed by 16-bit value",
+ {
+ 0x000000018002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parse a 32-bit value split accross two consecutives 64-bit
+ * input values.
+ */
+ "vphn: 16-bit value followed by 2 x 32-bit values",
+ {
+ 0x8001000000020000,
+ 0x0003ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* The lower bits in 0x0001ffff don't get mixed up with the
+ * 0xffff terminator.
+ */
+ "vphn: 32-bit value has all ones in 16 lower bits",
+ {
+ 0x0001ffff80028003,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x0001ffff,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* The following input doesn't follow the specification.
+ */
+ "vphn: last 32-bit value is truncated",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b800c2bad
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: garbage after terminator",
+ {
+ 0xffff2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ NULL
+ }
+};
+
+static int test_one(struct test *test)
+{
+ __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 };
+ int i, len;
+
+ vphn_unpack_associativity(test->input, output);
+
+ len = be32_to_cpu(output[0]);
+ if (len != test->expected[0]) {
+ printf("expected %d elements, got %d\n", test->expected[0],
+ len);
+ return 1;
+ }
+
+ for (i = 1; i < len; i++) {
+ u32 val = be32_to_cpu(output[i]);
+ if (val != test->expected[i]) {
+ printf("element #%d is 0x%x, should be 0x%x\n", i, val,
+ test->expected[i]);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int test_vphn(void)
+{
+ static struct test *test;
+
+ for (test = all_tests; test->descr; test++) {
+ int ret;
+
+ ret = test_one(test);
+ test_finish(test->descr, ret);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(test_vphn, "test-vphn");
+}
diff --git a/powerpc/vphn/vphn.c b/powerpc/vphn/vphn.c
new file mode 120000
index 0000000..186b906
--- /dev/null
+++ b/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.c \ No newline at end of file
diff --git a/powerpc/vphn/vphn.h b/powerpc/vphn/vphn.h
new file mode 120000
index 0000000..7131efe
--- /dev/null
+++ b/powerpc/vphn/vphn.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.h \ No newline at end of file
diff --git a/pstore/Makefile b/pstore/Makefile
new file mode 100644
index 0000000..bd7abe2
--- /dev/null
+++ b/pstore/Makefile
@@ -0,0 +1,15 @@
+# Makefile for pstore selftests.
+# Expects pstore backend is registered.
+
+all:
+
+TEST_PROGS := pstore_tests pstore_post_reboot_tests
+TEST_FILES := common_tests pstore_crash_test
+
+include ../lib.mk
+
+run_crash:
+ @sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
+
+clean:
+ rm -rf logs/* *uuid
diff --git a/pstore/common_tests b/pstore/common_tests
new file mode 100755
index 0000000..3ea64d7
--- /dev/null
+++ b/pstore/common_tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+# common_tests - Shell script commonly used by pstore test scripts
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Utilities
+errexit() { # message
+ echo "Error: $1" 1>&2
+ exit 1
+}
+
+absdir() { # file_path
+ (cd `dirname $1`; pwd)
+}
+
+show_result() { # result_value
+ if [ $1 -eq 0 ]; then
+ prlog "ok"
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+check_files_exist() { # type of pstorefs file
+ if [ -e ${1}-${backend}-0 ]; then
+ prlog "ok"
+ for f in `ls ${1}-${backend}-*`; do
+ prlog -e "\t${f}"
+ done
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+operate_files() { # tested value, files, operation
+ if [ $1 -eq 0 ]; then
+ prlog
+ for f in $2; do
+ prlog -ne "\t${f} ... "
+ # execute operation
+ $3 $f
+ show_result $?
+ done
+ else
+ prlog " ... FAIL"
+ rc=1
+ fi
+}
+
+# Parameters
+TEST_STRING_PATTERN="Testing pstore: uuid="
+UUID=`cat /proc/sys/kernel/random/uuid`
+TOP_DIR=`absdir $0`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`_${UUID}/
+REBOOT_FLAG=$TOP_DIR/reboot_flag
+
+# Preparing logs
+LOG_FILE=$LOG_DIR/`basename $0`.log
+mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+date > $LOG_FILE
+prlog() { # messages
+ /bin/echo "$@" | tee -a $LOG_FILE
+}
+
+# Starting tests
+rc=0
+prlog "=== Pstore unit tests (`basename $0`) ==="
+prlog "UUID="$UUID
+
+prlog -n "Checking pstore backend is registered ... "
+backend=`cat /sys/module/pstore/parameters/backend`
+show_result $?
+prlog -e "\tbackend=${backend}"
+prlog -e "\tcmdline=`cat /proc/cmdline`"
+if [ $rc -ne 0 ]; then
+ exit 1
+fi
diff --git a/pstore/pstore_crash_test b/pstore/pstore_crash_test
new file mode 100755
index 0000000..1a4afe5
--- /dev/null
+++ b/pstore/pstore_crash_test
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_crash_test - Pstore test shell script which causes crash and reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# exit if pstore backend is not registered
+. ./common_tests
+
+prlog "Causing kernel crash ..."
+
+# enable all functions triggered by sysrq
+echo 1 > /proc/sys/kernel/sysrq
+# setting to reboot in 3 seconds after panic
+echo 3 > /proc/sys/kernel/panic
+
+# save uuid file by different name because next test execution will replace it.
+mv $TOP_DIR/uuid $TOP_DIR/prev_uuid
+
+# create a file as reboot flag
+touch $REBOOT_FLAG
+sync
+
+# cause crash
+# Note: If you use kdump and want to see kmesg-* files after reboot, you should
+# specify 'crash_kexec_post_notifiers' in 1st kernel's cmdline.
+echo c > /proc/sysrq-trigger
diff --git a/pstore/pstore_post_reboot_tests b/pstore/pstore_post_reboot_tests
new file mode 100755
index 0000000..6ccb154
--- /dev/null
+++ b/pstore/pstore_post_reboot_tests
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+# pstore_post_reboot_tests - Check pstore's behavior after crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+. ./common_tests
+
+if [ -e $REBOOT_FLAG ]; then
+ rm $REBOOT_FLAG
+else
+ prlog "pstore_crash_test has not been executed yet. we skip further tests."
+ exit 0
+fi
+
+prlog -n "Mounting pstore filesystem ... "
+mount_info=`grep pstore /proc/mounts`
+if [ $? -eq 0 ]; then
+ mount_point=`echo ${mount_info} | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+else
+ mount none /sys/fs/pstore -t pstore
+ if [ $? -eq 0 ]; then
+ mount_point=`grep pstore /proc/mounts | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+ else
+ prlog "FAIL"
+ exit 1
+ fi
+fi
+
+cd ${mount_point}
+
+prlog -n "Checking dmesg files exist in pstore filesystem ... "
+check_files_exist dmesg
+
+prlog -n "Checking console files exist in pstore filesystem ... "
+check_files_exist console
+
+prlog -n "Checking pmsg files exist in pstore filesystem ... "
+check_files_exist pmsg
+
+prlog -n "Checking dmesg files contain oops end marker"
+grep_end_trace() {
+ grep -q "\---\[ end trace" $1
+}
+files=`ls dmesg-${backend}-*`
+operate_files $? "$files" grep_end_trace
+
+prlog -n "Checking console file contains oops end marker ... "
+grep -q "\---\[ end trace" console-${backend}-0
+show_result $?
+
+prlog -n "Checking pmsg file properly keeps the content written before crash ... "
+prev_uuid=`cat $TOP_DIR/prev_uuid`
+if [ $? -eq 0 ]; then
+ nr_matched=`grep -c "$TEST_STRING_PATTERN" pmsg-${backend}-0`
+ if [ $nr_matched -eq 1 ]; then
+ grep -q "$TEST_STRING_PATTERN"$prev_uuid pmsg-${backend}-0
+ show_result $?
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+prlog -n "Removing all files in pstore filesystem "
+files=`ls *-${backend}-*`
+operate_files $? "$files" rm
+
+exit $rc
diff --git a/pstore/pstore_tests b/pstore/pstore_tests
new file mode 100755
index 0000000..f25d2a3
--- /dev/null
+++ b/pstore/pstore_tests
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_tests - Check pstore's behavior before crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+. ./common_tests
+
+prlog -n "Checking pstore console is registered ... "
+dmesg | grep -q "console \[pstore"
+show_result $?
+
+prlog -n "Checking /dev/pmsg0 exists ... "
+test -e /dev/pmsg0
+show_result $?
+
+prlog -n "Writing unique string to /dev/pmsg0 ... "
+if [ -e "/dev/pmsg0" ]; then
+ echo "${TEST_STRING_PATTERN}""$UUID" > /dev/pmsg0
+ show_result $?
+ echo "$UUID" > $TOP_DIR/uuid
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+exit $rc
diff --git a/ptrace/Makefile b/ptrace/Makefile
index 47ae2d3..453927f 100644
--- a/ptrace/Makefile
+++ b/ptrace/Makefile
@@ -6,5 +6,6 @@ all: peeksiginfo
clean:
rm -f peeksiginfo
-run_tests: all
- @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
+TEST_PROGS := peeksiginfo
+
+include ../lib.mk
diff --git a/rcutorture/bin/configinit.sh b/rcutorture/bin/configinit.sh
index 15f1a17..3f81a10 100755
--- a/rcutorture/bin/configinit.sh
+++ b/rcutorture/bin/configinit.sh
@@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1
mv $builddir/.config $builddir/.config.sav
sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1
+yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err
# verify new config matches specification.
configcheck.sh $builddir/.config $c
diff --git a/rcutorture/bin/kvm-recheck.sh b/rcutorture/bin/kvm-recheck.sh
index 4f5b20f..d86bdd6 100755
--- a/rcutorture/bin/kvm-recheck.sh
+++ b/rcutorture/bin/kvm-recheck.sh
@@ -43,6 +43,10 @@ do
if test -f "$i/console.log"
then
configcheck.sh $i/.config $i/ConfigFragment
+ if test -r $i/Make.oldconfig.err
+ then
+ cat $i/Make.oldconfig.err
+ fi
parse-build.sh $i/Make.out $configfile
parse-torture.sh $i/console.log $configfile
parse-console.sh $i/console.log $configfile
diff --git a/rcutorture/bin/kvm.sh b/rcutorture/bin/kvm.sh
index 368d64a..f648360 100755
--- a/rcutorture/bin/kvm.sh
+++ b/rcutorture/bin/kvm.sh
@@ -55,7 +55,7 @@ usage () {
echo " --bootargs kernel-boot-arguments"
echo " --bootimage relative-path-to-kernel-boot-image"
echo " --buildonly"
- echo " --configs \"config-file list\""
+ echo " --configs \"config-file list w/ repeat factor (3*TINY01)\""
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
@@ -75,7 +75,7 @@ usage () {
while test $# -gt 0
do
case "$1" in
- --bootargs)
+ --bootargs|--bootarg)
checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
TORTURE_BOOTARGS="$2"
shift
@@ -88,7 +88,7 @@ do
--buildonly)
TORTURE_BUILDONLY=1
;;
- --configs)
+ --configs|--config)
checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
configs="$2"
shift
@@ -134,7 +134,7 @@ do
--no-initrd)
TORTURE_INITRD=""; export TORTURE_INITRD
;;
- --qemu-args)
+ --qemu-args|--qemu-arg)
checkarg --qemu-args "-qemu args" $# "$2" '^-' '^error'
TORTURE_QEMU_ARG="$2"
shift
@@ -178,13 +178,26 @@ fi
touch $T/cfgcpu
for CF in $configs
do
- if test -f "$CONFIGFRAG/$CF"
+ case $CF in
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ config_reps=`echo $CF | sed -e 's/\*.*$//'`
+ CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+ ;;
+ *)
+ config_reps=1
+ CF1=$CF
+ ;;
+ esac
+ if test -f "$CONFIGFRAG/$CF1"
then
- cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF`
- cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"`
- echo $CF $cpu_count >> $T/cfgcpu
+ cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+ do
+ echo $CF1 $cpu_count >> $T/cfgcpu
+ done
else
- echo "The --configs file $CF does not exist, terminating."
+ echo "The --configs file $CF1 does not exist, terminating."
exit 1
fi
done
@@ -310,7 +323,7 @@ function dump(first, pastlast)
cfr[jn] = cf[j] "." cfrep[cf[j]];
}
if (cpusr[jn] > ncpus && ncpus != 0)
- ovf = "(!)";
+ ovf = "-ovf";
else
ovf = "";
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`";
diff --git a/rcutorture/configs/lock/CFLIST b/rcutorture/configs/lock/CFLIST
index 6910b73..b9611c5 100644
--- a/rcutorture/configs/lock/CFLIST
+++ b/rcutorture/configs/lock/CFLIST
@@ -1,4 +1,6 @@
LOCK01
LOCK02
LOCK03
-LOCK04 \ No newline at end of file
+LOCK04
+LOCK05
+LOCK06
diff --git a/rcutorture/configs/lock/LOCK05 b/rcutorture/configs/lock/LOCK05
new file mode 100644
index 0000000..1d1da14
--- /dev/null
+++ b/rcutorture/configs/lock/LOCK05
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/rcutorture/configs/lock/LOCK05.boot b/rcutorture/configs/lock/LOCK05.boot
new file mode 100644
index 0000000..8ac3730
--- /dev/null
+++ b/rcutorture/configs/lock/LOCK05.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock
diff --git a/rcutorture/configs/lock/LOCK06 b/rcutorture/configs/lock/LOCK06
new file mode 100644
index 0000000..1d1da14
--- /dev/null
+++ b/rcutorture/configs/lock/LOCK06
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/rcutorture/configs/lock/LOCK06.boot b/rcutorture/configs/lock/LOCK06.boot
new file mode 100644
index 0000000..f92219c
--- /dev/null
+++ b/rcutorture/configs/lock/LOCK06.boot
@@ -0,0 +1 @@
+locktorture.torture_type=percpu_rwsem_lock
diff --git a/rcutorture/configs/rcu/CFcommon b/rcutorture/configs/rcu/CFcommon
index d2d2a86..f824b4c 100644
--- a/rcutorture/configs/rcu/CFcommon
+++ b/rcutorture/configs/rcu/CFcommon
@@ -1,2 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/rcutorture/configs/rcu/SRCU-N b/rcutorture/configs/rcu/SRCU-N
index 9fbb41b..1a087c3 100644
--- a/rcutorture/configs/rcu/SRCU-N
+++ b/rcutorture/configs/rcu/SRCU-N
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/SRCU-P b/rcutorture/configs/rcu/SRCU-P
index 4b6f272..4837430 100644
--- a/rcutorture/configs/rcu/SRCU-P
+++ b/rcutorture/configs/rcu/SRCU-P
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/rcutorture/configs/rcu/SRCU-P.boot b/rcutorture/configs/rcu/SRCU-P.boot
index 238bfe3..84a7d51 100644
--- a/rcutorture/configs/rcu/SRCU-P.boot
+++ b/rcutorture/configs/rcu/SRCU-P.boot
@@ -1 +1 @@
-rcutorture.torture_type=srcu
+rcutorture.torture_type=srcud
diff --git a/rcutorture/configs/rcu/TASKS01 b/rcutorture/configs/rcu/TASKS01
index 97f0a0b..bafe94c 100644
--- a/rcutorture/configs/rcu/TASKS01
+++ b/rcutorture/configs/rcu/TASKS01
@@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_RCU=y
-CONFIG_TASKS_RCU=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TASKS02 b/rcutorture/configs/rcu/TASKS02
index 696d2ea..ad2be91 100644
--- a/rcutorture/configs/rcu/TASKS02
+++ b/rcutorture/configs/rcu/TASKS02
@@ -2,4 +2,3 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
-CONFIG_TASKS_RCU=y
diff --git a/rcutorture/configs/rcu/TASKS03 b/rcutorture/configs/rcu/TASKS03
index 9c60da5..c70c51d 100644
--- a/rcutorture/configs/rcu/TASKS03
+++ b/rcutorture/configs/rcu/TASKS03
@@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
-CONFIG_TASKS_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/rcutorture/configs/rcu/TINY02 b/rcutorture/configs/rcu/TINY02
index 36e41df..f1892e0 100644
--- a/rcutorture/configs/rcu/TINY02
+++ b/rcutorture/configs/rcu/TINY02
@@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_PREEMPT_COUNT=y
diff --git a/rcutorture/configs/rcu/TINY02.boot b/rcutorture/configs/rcu/TINY02.boot
index 0f08027..6c1a292 100644
--- a/rcutorture/configs/rcu/TINY02.boot
+++ b/rcutorture/configs/rcu/TINY02.boot
@@ -1,2 +1,3 @@
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_bh=1
+rcutorture.torture_type=rcu_bh
diff --git a/rcutorture/configs/rcu/TREE01 b/rcutorture/configs/rcu/TREE01
index f8a10a7..f572b87 100644
--- a/rcutorture/configs/rcu/TREE01
+++ b/rcutorture/configs/rcu/TREE01
@@ -13,6 +13,6 @@ CONFIG_MAXSMP=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ZERO=y
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE02 b/rcutorture/configs/rcu/TREE02
index 629122f..ef6a22c 100644
--- a/rcutorture/configs/rcu/TREE02
+++ b/rcutorture/configs/rcu/TREE02
@@ -14,10 +14,9 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE02-T b/rcutorture/configs/rcu/TREE02-T
index a25de47..917d251 100644
--- a/rcutorture/configs/rcu/TREE02-T
+++ b/rcutorture/configs/rcu/TREE02-T
@@ -14,10 +14,8 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/rcutorture/configs/rcu/TREE03 b/rcutorture/configs/rcu/TREE03
index 53f24e0..7a17c50 100644
--- a/rcutorture/configs/rcu/TREE03
+++ b/rcutorture/configs/rcu/TREE03
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
@@ -9,12 +9,11 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=y
CONFIG_RCU_KTHREAD_PRIO=2
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE03.boot b/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 0000000..120c0c8
--- /dev/null
+++ b/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1 @@
+rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
diff --git a/rcutorture/configs/rcu/TREE04 b/rcutorture/configs/rcu/TREE04
index 0f84db3..39a2c6d 100644
--- a/rcutorture/configs/rcu/TREE04
+++ b/rcutorture/configs/rcu/TREE04
@@ -13,10 +13,9 @@ CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=2
-CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=4
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE05 b/rcutorture/configs/rcu/TREE05
index 212e3bf..1257d32 100644
--- a/rcutorture/configs/rcu/TREE05
+++ b/rcutorture/configs/rcu/TREE05
@@ -12,11 +12,10 @@ CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_NONE=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE06 b/rcutorture/configs/rcu/TREE06
index 7eee63b..d3e456b 100644
--- a/rcutorture/configs/rcu/TREE06
+++ b/rcutorture/configs/rcu/TREE06
@@ -14,10 +14,9 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE06.boot b/rcutorture/configs/rcu/TREE06.boot
index da9a03a..dd90f28 100644
--- a/rcutorture/configs/rcu/TREE06.boot
+++ b/rcutorture/configs/rcu/TREE06.boot
@@ -1,3 +1,4 @@
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_bh=1
rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
diff --git a/rcutorture/configs/rcu/TREE07 b/rcutorture/configs/rcu/TREE07
index 92a97fa..3956b41 100644
--- a/rcutorture/configs/rcu/TREE07
+++ b/rcutorture/configs/rcu/TREE07
@@ -15,8 +15,7 @@ CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=2
CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE08 b/rcutorture/configs/rcu/TREE08
index 5812027..bb9b0c1 100644
--- a/rcutorture/configs/rcu/TREE08
+++ b/rcutorture/configs/rcu/TREE08
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=16
+CONFIG_NR_CPUS=8
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
@@ -13,13 +13,12 @@ CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ALL=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/rcutorture/configs/rcu/TREE08-T b/rcutorture/configs/rcu/TREE08-T
index 3eaecca..2ad13f0 100644
--- a/rcutorture/configs/rcu/TREE08-T
+++ b/rcutorture/configs/rcu/TREE08-T
@@ -13,11 +13,9 @@ CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ALL=y
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/rcutorture/configs/rcu/TREE08-T.boot b/rcutorture/configs/rcu/TREE08-T.boot
new file mode 100644
index 0000000..883149b
--- /dev/null
+++ b/rcutorture/configs/rcu/TREE08-T.boot
@@ -0,0 +1 @@
+rcutree.rcu_fanout_exact=1
diff --git a/rcutorture/configs/rcu/TREE08.boot b/rcutorture/configs/rcu/TREE08.boot
index 2561daf..fb066dc 100644
--- a/rcutorture/configs/rcu/TREE08.boot
+++ b/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,4 @@
rcutorture.torture_type=sched
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
diff --git a/rcutorture/configs/rcu/TREE09 b/rcutorture/configs/rcu/TREE09
index 6076b36..6710e74 100644
--- a/rcutorture/configs/rcu/TREE09
+++ b/rcutorture/configs/rcu/TREE09
@@ -13,6 +13,6 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/rcutorture/doc/TREE_RCU-kconfig.txt b/rcutorture/doc/TREE_RCU-kconfig.txt
index ec03c88..657f3a0 100644
--- a/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -12,13 +12,11 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
CONFIG_PREEMPT -- Do half. (First three and #8.)
-CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not.
-CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING.
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
-CONFIG_RCU_CPU_STALL_INFO -- Do one.
-CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others.
-CONFIG_RCU_FANOUT_EXACT -- Do one.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
CONFIG_RCU_NOCB_CPU -- Do three, see below.
@@ -27,28 +25,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
CONFIG_RCU_TRACE -- Do half.
CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
RCU-bh: Do one with PREEMPT and one with !PREEMPT.
RCU-sched: Do one with PREEMPT but not BOOST.
-Hierarchy:
-
-TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n.
-TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=3.
-TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=4.
-TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n
- CONFIG_RCU_FANOUT_LEAF=6.
-TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y
- CONFIG_RCU_FANOUT_LEAF=6.
-TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE09. CONFIG_NR_CPUS=1.
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
Kconfig Parameters Ignored:
diff --git a/seccomp/.gitignore b/seccomp/.gitignore
new file mode 100644
index 0000000..346d83c
--- /dev/null
+++ b/seccomp/.gitignore
@@ -0,0 +1 @@
+seccomp_bpf
diff --git a/seccomp/Makefile b/seccomp/Makefile
new file mode 100644
index 0000000..8401e87
--- /dev/null
+++ b/seccomp/Makefile
@@ -0,0 +1,10 @@
+TEST_PROGS := seccomp_bpf
+CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ $(RM) $(TEST_PROGS)
diff --git a/seccomp/seccomp_bpf.c b/seccomp/seccomp_bpf.c
new file mode 100644
index 0000000..e38cc54
--- /dev/null
+++ b/seccomp/seccomp_bpf.c
@@ -0,0 +1,2208 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <sys/times.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "test_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION 0x7fff0000U
+#define SECCOMP_RET_DATA 0x0000ffffU
+
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
+#else
+#error "wut? Unknown __BYTE_ORDER?!"
+#endif
+
+#define SIBLING_EXIT_UNKILLED 0xbadbeef
+#define SIBLING_EXIT_FAILURE 0xbadface
+#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+
+TEST(mode_strict_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_exit, 1);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ NULL, NULL, NULL);
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+TEST(mode_filter_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+ }
+}
+
+TEST(mode_filter_without_nnp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+ ASSERT_LE(0, ret) {
+ TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+ }
+ errno = 0;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ /* Succeeds with CAP_SYS_ADMIN, fails without */
+ /* TODO(wad) check caps not euid */
+ if (geteuid()) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EACCES, errno);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS + 1;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = count;
+
+ /* Too many filter instructions in a single filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_NE(0, ret) {
+ TH_LOG("Installing %d insn filter was allowed", prog.len);
+ }
+
+ /* One less is okay, though. */
+ prog.len -= 1;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+ }
+}
+
+TEST(filter_chain_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = 1;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.len = count;
+
+ /* Too many total filter instructions. */
+ for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ if (ret != 0)
+ break;
+ }
+ ASSERT_NE(0, ret) {
+ TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+ i, count, i * (count + 4));
+ }
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+ struct sock_filter filter[] = {
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+ void *fatal_address;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
+ (unsigned long)&fatal_address, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ struct tms timebuf;
+ clock_t clock = times(&timebuf);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_LE(clock, syscall(__NR_times, &timebuf));
+ /* times() should never return. */
+ EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+#ifndef __NR_mmap2
+ int sysno = __NR_mmap;
+#else
+ int sysno = __NR_mmap2;
+#endif
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ int fd;
+ void *map1, *map2;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ fd = open("/dev/zero", O_RDONLY);
+ ASSERT_NE(-1, fd);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ map1 = (void *)syscall(sysno,
+ NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, PAGE_SIZE);
+ EXPECT_NE(MAP_FAILED, map1);
+ /* mmap2() should never return. */
+ map2 = (void *)syscall(sysno,
+ NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+ EXPECT_EQ(MAP_FAILED, map2);
+
+ /* The test failed, so clean up the resources. */
+ munmap(map1, PAGE_SIZE);
+ munmap(map2, PAGE_SIZE);
+ close(fd);
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(ERRNO_valid)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(E2BIG, errno);
+}
+
+TEST(ERRNO_zero)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* "errno" of 0 is ok. */
+ EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+TEST(ERRNO_capped)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(4095, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+ struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ signal(SIGSYS, SIG_IGN);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+static struct siginfo TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+ memcpy(&TRAP_info, info, sizeof(TRAP_info));
+ TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+ int ret, test;
+ struct sigaction act;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
+
+ act.sa_sigaction = &TRAP_action;
+ act.sa_flags = SA_SIGINFO;
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigaction failed");
+ }
+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigprocmask failed");
+ }
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ TRAP_nr = 0;
+ memset(&TRAP_info, 0, sizeof(TRAP_info));
+ /* Expect the registers to be rolled back. (nr = error) may vary
+ * based on arch. */
+ ret = syscall(__NR_getpid);
+ /* Silence gcc warning about volatile. */
+ test = TRAP_nr;
+ EXPECT_EQ(SIGSYS, test);
+ struct local_sigsys {
+ void *_call_addr; /* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+ &(TRAP_info.si_call_addr);
+#else
+ &TRAP_info.si_pid;
+#endif
+ EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+ /* Make sure arch is non-zero. */
+ EXPECT_NE(0, sigsys->_arch);
+ EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+ struct sock_fprog allow;
+ struct sock_fprog trace;
+ struct sock_fprog error;
+ struct sock_fprog trap;
+ struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+ struct sock_filter allow_insns[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter trace_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+ };
+ struct sock_filter error_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+ };
+ struct sock_filter trap_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ };
+ struct sock_filter kill_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+
+ memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+ self->_x.filter = malloc(sizeof(_x##_insns)); \
+ ASSERT_NE(NULL, self->_x.filter); \
+ memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+ FILTER_ALLOC(allow);
+ FILTER_ALLOC(trace);
+ FILTER_ALLOC(error);
+ FILTER_ALLOC(trap);
+ FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+ FILTER_FREE(allow);
+ FILTER_FREE(trace);
+ FILTER_FREE(error);
+ FILTER_FREE(trap);
+ FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+ /* getpid() should never return. */
+ res = syscall(__NR_getpid);
+ EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP 0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+ tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+ pid_t tracee, int status, void *args);
+
+void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+ tracer_func_t tracer_func, void *args)
+{
+ int ret = -1;
+ struct sigaction action = {
+ .sa_handler = tracer_stop,
+ };
+
+ /* Allow external shutdown. */
+ tracer_running = true;
+ ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+ errno = 0;
+ while (ret == -1 && errno != EINVAL)
+ ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+ ASSERT_EQ(0, ret) {
+ kill(tracee, SIGKILL);
+ }
+ /* Wait for attach stop */
+ wait(NULL);
+
+ ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+ kill(tracee, SIGKILL);
+ }
+ ptrace(PTRACE_CONT, tracee, NULL, 0);
+
+ /* Unblock the tracee */
+ ASSERT_EQ(1, write(fd, "A", 1));
+ ASSERT_EQ(0, close(fd));
+
+ /* Run until we're shut down. Must assert to stop execution. */
+ while (tracer_running) {
+ int status;
+
+ if (wait(&status) != tracee)
+ continue;
+ if (WIFSIGNALED(status) || WIFEXITED(status))
+ /* Child is dead. Time to go. */
+ return;
+
+ /* Make sure this is a seccomp event. */
+ ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
+
+ tracer_func(_metadata, tracee, status, args);
+
+ ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
+ ASSERT_EQ(0, ret);
+ }
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+ tracer_func_t func, void *args)
+{
+ char sync;
+ int pipefd[2];
+ pid_t tracer_pid;
+ pid_t tracee = getpid();
+
+ /* Setup a pipe for clean synchronization. */
+ ASSERT_EQ(0, pipe(pipefd));
+
+ /* Fork a child which we'll promote to tracer */
+ tracer_pid = fork();
+ ASSERT_LE(0, tracer_pid);
+ signal(SIGALRM, cont_handler);
+ if (tracer_pid == 0) {
+ close(pipefd[0]);
+ tracer(_metadata, pipefd[1], tracee, func, args);
+ syscall(__NR_exit, 0);
+ }
+ close(pipefd[1]);
+ prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+ read(pipefd[0], &sync, 1);
+ close(pipefd[0]);
+
+ return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+ pid_t tracer)
+{
+ if (tracer) {
+ int status;
+ /*
+ * Extract the exit code from the other process and
+ * adopt it for ourselves in case its asserts failed.
+ */
+ ASSERT_EQ(0, kill(tracer, SIGUSR1));
+ ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+ if (WEXITSTATUS(status))
+ _metadata->passed = 0;
+ }
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+ unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+ void *args)
+{
+ int ret;
+ unsigned long msg;
+ struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ /* If this fails, don't try to recover. */
+ ASSERT_EQ(0x1001, msg) {
+ kill(tracee, SIGKILL);
+ }
+ /*
+ * Poke in the message.
+ * Registers are not touched to try to keep this relatively arch
+ * agnostic.
+ */
+ ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+ EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+ struct sock_fprog prog;
+ pid_t tracer;
+ long poked;
+ struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ self->poked = 0;
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Set up tracer args. */
+ self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+ &self->tracer_args);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+ ssize_t ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ ret = read(-1, NULL, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ EXPECT_NE(0, syscall(__NR_getpid));
+ EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_rax
+# define SYSCALL_RET rax
+#elif defined(__i386__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_eax
+# define SYSCALL_RET eax
+#elif defined(__arm__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM ARM_r7
+# define SYSCALL_RET ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM regs[8]
+# define SYSCALL_RET regs[0]
+#elif defined(__powerpc__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM gpr[0]
+# define SYSCALL_RET gpr[3]
+#elif defined(__s390__)
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM gprs[2]
+# define SYSCALL_RET gprs[2]
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+ struct iovec iov;
+ ARCH_REGS regs;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+ TH_LOG("PTRACE_GETREGSET failed");
+ return -1;
+ }
+
+ return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, int syscall)
+{
+ struct iovec iov;
+ int ret;
+ ARCH_REGS regs;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+ EXPECT_EQ(0, ret);
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
+ defined(__s390__)
+ {
+ regs.SYSCALL_NUM = syscall;
+ }
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+ {
+ ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+ EXPECT_EQ(0, ret);
+ }
+
+#elif defined(__aarch64__)
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+ {
+ iov.iov_base = &syscall;
+ iov.iov_len = sizeof(syscall);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
+ &iov);
+ EXPECT_EQ(0, ret);
+ }
+
+#else
+ ASSERT_EQ(1, 0) {
+ TH_LOG("How is the syscall changed on this architecture?");
+ }
+#endif
+
+ /* If syscall is skipped, change return value. */
+ if (syscall == -1)
+ regs.SYSCALL_RET = 1;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+ EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret;
+ unsigned long msg;
+
+ /* Make sure we got the right message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+
+ /* Validate and take action on expected syscalls. */
+ switch (msg) {
+ case 0x1002:
+ /* change getpid to getppid. */
+ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, __NR_getppid);
+ break;
+ case 0x1003:
+ /* skip gettid. */
+ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, -1);
+ break;
+ case 0x1004:
+ /* do nothing (allow getppid) */
+ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
+ break;
+ default:
+ EXPECT_EQ(0, msg) {
+ TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+ kill(tracee, SIGKILL);
+ }
+ }
+
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Prepare some testable syscall results. */
+ self->mytid = syscall(__NR_gettid);
+ ASSERT_GT(self->mytid, 0);
+ ASSERT_NE(self->mytid, 1) {
+ TH_LOG("Running this test as init is not supported. :)");
+ }
+
+ self->mypid = getpid();
+ ASSERT_GT(self->mypid, 0);
+ ASSERT_EQ(self->mytid, self->mypid);
+
+ self->parent = getppid();
+ ASSERT_GT(self->parent, 0);
+ ASSERT_NE(self->parent, self->mypid);
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getppid works as expected (no changes). */
+ EXPECT_EQ(self->parent, syscall(__NR_getppid));
+ EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getpid has been redirected to getppid as expected. */
+ EXPECT_EQ(self->parent, syscall(__NR_getpid));
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_dropped)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* gettid has been skipped and an altered return value stored. */
+ EXPECT_EQ(1, syscall(__NR_gettid));
+ EXPECT_NE(self->mytid, syscall(__NR_gettid));
+}
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+# define __NR_seccomp 354
+# elif defined(__x86_64__)
+# define __NR_seccomp 317
+# elif defined(__arm__)
+# define __NR_seccomp 383
+# elif defined(__aarch64__)
+# define __NR_seccomp 277
+# elif defined(__powerpc__)
+# define __NR_seccomp 358
+# elif defined(__s390__)
+# define __NR_seccomp 348
+# else
+# warning "seccomp syscall number unknown for this architecture"
+# define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_FLAG_FILTER_TSYNC
+#define SECCOMP_FLAG_FILTER_TSYNC 1
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+{
+ errno = 0;
+ return syscall(__NR_seccomp, op, flags, filter);
+}
+#endif
+
+TEST(seccomp_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Reject insane operation. */
+ ret = seccomp(-1, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy op value!");
+ }
+
+ /* Reject strict with flags or pointer. */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with uargs!");
+ }
+
+ /* Reject insane args for filter. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy filter flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Did not reject NULL filter!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ EXPECT_EQ(0, errno) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+ strerror(errno));
+ }
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install filter!");
+ }
+
+ /* Make sure neither entry point will switch to strict. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+}
+
+TEST(TSYNC_first)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install initial filter with TSYNC!");
+ }
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+ pthread_t tid;
+ pid_t system_tid;
+ sem_t *started;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mutex;
+ int diverge;
+ int num_waits;
+ struct sock_fprog *prog;
+ struct __test_metadata *metadata;
+};
+
+FIXTURE_DATA(TSYNC) {
+ struct sock_fprog root_prog, apply_prog;
+ struct tsync_sibling sibling[TSYNC_SIBLINGS];
+ sem_t started;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+ struct sock_filter root_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter apply_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->root_prog, 0, sizeof(self->root_prog));
+ memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+ memset(&self->sibling, 0, sizeof(self->sibling));
+ self->root_prog.filter = malloc(sizeof(root_filter));
+ ASSERT_NE(NULL, self->root_prog.filter);
+ memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+ self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+ self->apply_prog.filter = malloc(sizeof(apply_filter));
+ ASSERT_NE(NULL, self->apply_prog.filter);
+ memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+ self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+ self->sibling_count = 0;
+ pthread_mutex_init(&self->mutex, NULL);
+ pthread_cond_init(&self->cond, NULL);
+ sem_init(&self->started, 0, 0);
+ self->sibling[0].tid = 0;
+ self->sibling[0].cond = &self->cond;
+ self->sibling[0].started = &self->started;
+ self->sibling[0].mutex = &self->mutex;
+ self->sibling[0].diverge = 0;
+ self->sibling[0].num_waits = 1;
+ self->sibling[0].prog = &self->root_prog;
+ self->sibling[0].metadata = _metadata;
+ self->sibling[1].tid = 0;
+ self->sibling[1].cond = &self->cond;
+ self->sibling[1].started = &self->started;
+ self->sibling[1].mutex = &self->mutex;
+ self->sibling[1].diverge = 0;
+ self->sibling[1].prog = &self->root_prog;
+ self->sibling[1].num_waits = 1;
+ self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+ int sib = 0;
+
+ if (self->root_prog.filter)
+ free(self->root_prog.filter);
+ if (self->apply_prog.filter)
+ free(self->apply_prog.filter);
+
+ for ( ; sib < self->sibling_count; ++sib) {
+ struct tsync_sibling *s = &self->sibling[sib];
+ void *status;
+
+ if (!s->tid)
+ continue;
+ if (pthread_kill(s->tid, 0)) {
+ pthread_cancel(s->tid);
+ pthread_join(s->tid, &status);
+ }
+ }
+ pthread_mutex_destroy(&self->mutex);
+ pthread_cond_destroy(&self->cond);
+ sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+ long ret = 0;
+ struct tsync_sibling *me = data;
+
+ me->system_tid = syscall(__NR_gettid);
+
+ pthread_mutex_lock(me->mutex);
+ if (me->diverge) {
+ /* Just re-apply the root prog to fork the tree */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ me->prog, 0, 0);
+ }
+ sem_post(me->started);
+ /* Return outside of started so parent notices failures. */
+ if (ret) {
+ pthread_mutex_unlock(me->mutex);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+ do {
+ pthread_cond_wait(me->cond, me->mutex);
+ me->num_waits = me->num_waits - 1;
+ } while (me->num_waits);
+ pthread_mutex_unlock(me->mutex);
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (!ret)
+ return (void *)SIBLING_EXIT_NEWPRIVS;
+ read(0, NULL, 0);
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+ pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+ long ret;
+ void *status;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check prctl failure detection by requesting sib 0 diverge. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("setting filter failed");
+ }
+
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Signal the threads to clean up*/
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure diverging sibling failed to call prctl. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ /* Ensure they are both killed and don't exit cleanly. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Tell the siblings to test no policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both upset about lacking nnp. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+ long ret;
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both killed and don't exit cleanly. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(self->sibling[0].system_tid, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+ long ret, sib;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /*
+ * Sibling 0 will have its own seccomp policy
+ * and Sibling 1 will not be under seccomp at
+ * all. Sibling 1 will enter seccomp and 0
+ * will cause failure.
+ */
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(ret, self->sibling[0].system_tid) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+ sib = 1;
+ if (ret == self->sibling[0].system_tid)
+ sib = 0;
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* Increment the other siblings num_waits so we can clean up
+ * the one we just saw.
+ */
+ self->sibling[!sib].num_waits += 1;
+
+ /* Signal the thread to clean up*/
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ pthread_join(self->sibling[sib].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+ /* Switch to the remaining sibling */
+ sib = !sib;
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Expected the remaining sibling to sync");
+ };
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* If remaining sibling didn't have a chance to wake up during
+ * the first broadcast, manually reduce the num_waits now.
+ */
+ if (self->sibling[sib].num_waits > 1)
+ self->sibling[sib].num_waits = 1;
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ pthread_join(self->sibling[sib].tid, &status);
+ EXPECT_EQ(0, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret); /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+ long ret;
+ unsigned long msg;
+ pid_t child_pid;
+ int pipefd[2];
+ int status;
+ siginfo_t info = { };
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+ /* Allow __NR_write for easy logging. */
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ /* The nanosleep jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
+ /* The restart_syscall jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+#if defined(__arm__)
+ struct utsname utsbuf;
+#endif
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ /* Child uses EXPECT not ASSERT to deliver status correctly. */
+ char buf = ' ';
+ struct timespec timeout = { };
+
+ /* Attach parent as tracer and stop. */
+ EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(0, raise(SIGSTOP));
+
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Failed to install filter!");
+ }
+
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed to read() sync from parent");
+ }
+ EXPECT_EQ('.', buf) {
+ TH_LOG("Failed to get sync data from read()");
+ }
+
+ /* Start nanosleep to be interrupted. */
+ timeout.tv_sec = 1;
+ errno = 0;
+ EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
+ TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ }
+
+ /* Read final sync from parent. */
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed final read() from parent");
+ }
+ EXPECT_EQ('!', buf) {
+ TH_LOG("Failed to get final data from read()");
+ }
+
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+ : EXIT_FAILURE);
+ }
+ EXPECT_EQ(0, close(pipefd[0]));
+
+ /* Attach to child, setup options, and release. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+ PTRACE_O_TRACESECCOMP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+ /* Wait for nanosleep() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+ ASSERT_EQ(0x100, msg);
+ EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
+
+ /* Might as well check siginfo for sanity while we're here. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ ASSERT_EQ(SIGTRAP, info.si_signo);
+ ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+ EXPECT_EQ(0, info.si_errno);
+ EXPECT_EQ(getuid(), info.si_uid);
+ /* Verify signal delivery came from child (seccomp-triggered). */
+ EXPECT_EQ(child_pid, info.si_pid);
+
+ /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
+ ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+ /* Verify signal delivery came from parent now. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ EXPECT_EQ(getpid(), info.si_pid);
+
+ /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
+ ASSERT_EQ(0, kill(child_pid, SIGCONT));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+ /* Wait for restart_syscall() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+
+ ASSERT_EQ(0x200, msg);
+ ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+ /*
+ * FIXME:
+ * - native ARM registers do NOT expose true syscall.
+ * - compat ARM registers on ARM64 DO expose true syscall.
+ */
+ ASSERT_EQ(0, uname(&utsbuf));
+ if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+ EXPECT_EQ(__NR_nanosleep, ret);
+ } else
+#endif
+ {
+ EXPECT_EQ(__NR_restart_syscall, ret);
+ }
+
+ /* Write again to end test. */
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], "!", 1));
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ if (WIFSIGNALED(status) || WEXITSTATUS(status))
+ _metadata->passed = 0;
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/seccomp/test_harness.h b/seccomp/test_harness.h
new file mode 100644
index 0000000..fb28416
--- /dev/null
+++ b/seccomp/test_harness.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * test_harness.h: simple C unit test helper.
+ *
+ * Usage:
+ * #include "test_harness.h"
+ * TEST(standalone_test) {
+ * do_some_stuff;
+ * EXPECT_GT(10, stuff) {
+ * stuff_state_t state;
+ * enumerate_stuff_state(&state);
+ * TH_LOG("expectation failed with state: %s", state.msg);
+ * }
+ * more_stuff;
+ * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ * last_stuff;
+ * EXPECT_EQ(0, last_stuff);
+ * }
+ *
+ * FIXTURE(my_fixture) {
+ * mytype_t *data;
+ * int awesomeness_level;
+ * };
+ * FIXTURE_SETUP(my_fixture) {
+ * self->data = mytype_new();
+ * ASSERT_NE(NULL, self->data);
+ * }
+ * FIXTURE_TEARDOWN(my_fixture) {
+ * mytype_free(self->data);
+ * }
+ * TEST_F(my_fixture, data_is_good) {
+ * EXPECT_EQ(1, is_my_data_good(self->data));
+ * }
+ *
+ * TEST_HARNESS_MAIN
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+#ifndef TEST_HARNESS_H_
+#define TEST_HARNESS_H_
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* All exported functionality should be declared through this macro. */
+#define TEST_API(x) _##x
+
+/*
+ * Exported APIs
+ */
+
+/* TEST(name) { implementation }
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST TEST_API(TEST)
+
+/* TEST_SIGNAL(name, signal) { implementation }
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL TEST_API(TEST_SIGNAL)
+
+/* FIXTURE(datatype name) {
+ * type property1;
+ * ...
+ * };
+ * Defines the data provided to TEST_F()-defined tests as |self|. It should be
+ * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN.
+ */
+#define FIXTURE TEST_API(FIXTURE)
+
+/* FIXTURE_DATA(datatype name)
+ * This call may be used when the type of the fixture data
+ * is needed. In general, this should not be needed unless
+ * the |self| is being passed to a helper directly.
+ */
+#define FIXTURE_DATA TEST_API(FIXTURE_DATA)
+
+/* FIXTURE_SETUP(fixture name) { implementation }
+ * Populates the required "setup" function for a fixture. An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP)
+
+/* FIXTURE_TEARDOWN(fixture name) { implementation }
+ * Populates the required "teardown" function for a fixture. An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN)
+
+/* TEST_F(fixture, name) { implementation }
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that |self| is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ */
+#define TEST_F TEST_API(TEST_F)
+
+#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL)
+
+/* Use once to append a main() to the test file. E.g.,
+ * TEST_HARNESS_MAIN
+ */
+#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN)
+
+/*
+ * Operators for use in TEST and TEST_F.
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/* ASSERT_EQ(expected, measured): expected == measured */
+#define ASSERT_EQ TEST_API(ASSERT_EQ)
+/* ASSERT_NE(expected, measured): expected != measured */
+#define ASSERT_NE TEST_API(ASSERT_NE)
+/* ASSERT_LT(expected, measured): expected < measured */
+#define ASSERT_LT TEST_API(ASSERT_LT)
+/* ASSERT_LE(expected, measured): expected <= measured */
+#define ASSERT_LE TEST_API(ASSERT_LE)
+/* ASSERT_GT(expected, measured): expected > measured */
+#define ASSERT_GT TEST_API(ASSERT_GT)
+/* ASSERT_GE(expected, measured): expected >= measured */
+#define ASSERT_GE TEST_API(ASSERT_GE)
+/* ASSERT_NULL(measured): NULL == measured */
+#define ASSERT_NULL TEST_API(ASSERT_NULL)
+/* ASSERT_TRUE(measured): measured != 0 */
+#define ASSERT_TRUE TEST_API(ASSERT_TRUE)
+/* ASSERT_FALSE(measured): measured == 0 */
+#define ASSERT_FALSE TEST_API(ASSERT_FALSE)
+/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define ASSERT_STREQ TEST_API(ASSERT_STREQ)
+/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */
+#define ASSERT_STRNE TEST_API(ASSERT_STRNE)
+/* EXPECT_EQ(expected, measured): expected == measured */
+#define EXPECT_EQ TEST_API(EXPECT_EQ)
+/* EXPECT_NE(expected, measured): expected != measured */
+#define EXPECT_NE TEST_API(EXPECT_NE)
+/* EXPECT_LT(expected, measured): expected < measured */
+#define EXPECT_LT TEST_API(EXPECT_LT)
+/* EXPECT_LE(expected, measured): expected <= measured */
+#define EXPECT_LE TEST_API(EXPECT_LE)
+/* EXPECT_GT(expected, measured): expected > measured */
+#define EXPECT_GT TEST_API(EXPECT_GT)
+/* EXPECT_GE(expected, measured): expected >= measured */
+#define EXPECT_GE TEST_API(EXPECT_GE)
+/* EXPECT_NULL(measured): NULL == measured */
+#define EXPECT_NULL TEST_API(EXPECT_NULL)
+/* EXPECT_TRUE(measured): 0 != measured */
+#define EXPECT_TRUE TEST_API(EXPECT_TRUE)
+/* EXPECT_FALSE(measured): 0 == measured */
+#define EXPECT_FALSE TEST_API(EXPECT_FALSE)
+/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define EXPECT_STREQ TEST_API(EXPECT_STREQ)
+/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */
+#define EXPECT_STRNE TEST_API(EXPECT_STRNE)
+
+/* TH_LOG(format, ...)
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ * If no definition is provided, logging is enabled by default.
+ */
+#define TH_LOG TEST_API(TH_LOG)
+
+/*
+ * Internal implementation.
+ *
+ */
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+# define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+# define TH_LOG_ENABLED 1
+#endif
+
+#define _TH_LOG(fmt, ...) do { \
+ if (TH_LOG_ENABLED) \
+ __TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+ fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/* Defines the test function and creates the registration stub. */
+#define _TEST(test_name) __TEST_IMPL(test_name, -1)
+
+#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+ static void test_name(struct __test_metadata *_metadata); \
+ static struct __test_metadata _##test_name##_object = \
+ { name: "global." #test_name, \
+ fn: &test_name, termsig: _signal }; \
+ static void __attribute__((constructor)) _register_##test_name(void) \
+ { \
+ __register_test(&_##test_name##_object); \
+ } \
+ static void test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata)
+
+/* Wraps the struct name so we have one less argument to pass around. */
+#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name
+
+/* Called once per fixture to setup the data and register. */
+#define _FIXTURE(fixture_name) \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_data(void) \
+ { \
+ __fixture_count++; \
+ } \
+ _FIXTURE_DATA(fixture_name)
+
+/* Prepares the setup function for the fixture. |_metadata| is included
+ * so that ASSERT_* work as a convenience.
+ */
+#define _FIXTURE_SETUP(fixture_name) \
+ void fixture_name##_setup( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+#define _FIXTURE_TEARDOWN(fixture_name) \
+ void fixture_name##_teardown( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Emits test registration and helpers for fixture-based test
+ * cases.
+ * TODO(wad) register fixtures on dedicated test lists.
+ */
+#define _TEST_F(fixture_name, test_name) \
+ __TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \
+ __TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata *_metadata, \
+ _FIXTURE_DATA(fixture_name) *self); \
+ static inline void wrapper_##fixture_name##_##test_name( \
+ struct __test_metadata *_metadata) \
+ { \
+ /* fixture data is alloced, setup, and torn down per call. */ \
+ _FIXTURE_DATA(fixture_name) self; \
+ memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \
+ fixture_name##_setup(_metadata, &self); \
+ /* Let setup failure terminate early. */ \
+ if (!_metadata->passed) \
+ return; \
+ fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_teardown(_metadata, &self); \
+ } \
+ static struct __test_metadata \
+ _##fixture_name##_##test_name##_object = { \
+ name: #fixture_name "." #test_name, \
+ fn: &wrapper_##fixture_name##_##test_name, \
+ termsig: signal, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##test_name(void) \
+ { \
+ __register_test(&_##fixture_name##_##test_name##_object); \
+ } \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Exports a simple wrapper to run the test harness. */
+#define _TEST_HARNESS_MAIN \
+ static void __attribute__((constructor)) \
+ __constructor_order_last(void) \
+ { \
+ if (!__constructor_order) \
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+ } \
+ int main(int argc, char **argv) { \
+ return test_harness_run(argc, argv); \
+ }
+
+#define _ASSERT_EQ(_expected, _seen) \
+ __EXPECT(_expected, _seen, ==, 1)
+#define _ASSERT_NE(_expected, _seen) \
+ __EXPECT(_expected, _seen, !=, 1)
+#define _ASSERT_LT(_expected, _seen) \
+ __EXPECT(_expected, _seen, <, 1)
+#define _ASSERT_LE(_expected, _seen) \
+ __EXPECT(_expected, _seen, <=, 1)
+#define _ASSERT_GT(_expected, _seen) \
+ __EXPECT(_expected, _seen, >, 1)
+#define _ASSERT_GE(_expected, _seen) \
+ __EXPECT(_expected, _seen, >=, 1)
+#define _ASSERT_NULL(_seen) \
+ __EXPECT(NULL, _seen, ==, 1)
+
+#define _ASSERT_TRUE(_seen) \
+ _ASSERT_NE(0, _seen)
+#define _ASSERT_FALSE(_seen) \
+ _ASSERT_EQ(0, _seen)
+#define _ASSERT_STREQ(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, ==, 1)
+#define _ASSERT_STRNE(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, !=, 1)
+
+#define _EXPECT_EQ(_expected, _seen) \
+ __EXPECT(_expected, _seen, ==, 0)
+#define _EXPECT_NE(_expected, _seen) \
+ __EXPECT(_expected, _seen, !=, 0)
+#define _EXPECT_LT(_expected, _seen) \
+ __EXPECT(_expected, _seen, <, 0)
+#define _EXPECT_LE(_expected, _seen) \
+ __EXPECT(_expected, _seen, <=, 0)
+#define _EXPECT_GT(_expected, _seen) \
+ __EXPECT(_expected, _seen, >, 0)
+#define _EXPECT_GE(_expected, _seen) \
+ __EXPECT(_expected, _seen, >=, 0)
+
+#define _EXPECT_NULL(_seen) \
+ __EXPECT(NULL, _seen, ==, 0)
+#define _EXPECT_TRUE(_seen) \
+ _EXPECT_NE(0, _seen)
+#define _EXPECT_FALSE(_seen) \
+ _EXPECT_EQ(0, _seen)
+
+#define _EXPECT_STREQ(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, ==, 0)
+#define _EXPECT_STRNE(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, !=, 0)
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+ for (; _metadata->trigger; _metadata->trigger = __bail(_assert))
+
+#define __EXPECT(_expected, _seen, _t, _assert) do { \
+ /* Avoid multiple evaluation of the cases */ \
+ __typeof__(_expected) __exp = (_expected); \
+ __typeof__(_seen) __seen = (_seen); \
+ if (!(__exp _t __seen)) { \
+ unsigned long long __exp_print = (unsigned long long)__exp; \
+ unsigned long long __seen_print = (unsigned long long)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ #_expected, __exp_print, #_t, \
+ #_seen, __seen_print); \
+ _metadata->passed = 0; \
+ /* Ensure the optional handler is triggered */ \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+ const char *__exp = (_expected); \
+ const char *__seen = (_seen); \
+ if (!(strcmp(__exp, __seen) _t 0)) { \
+ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+ _metadata->passed = 0; \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+ const char *name;
+ void (*fn)(struct __test_metadata *);
+ int termsig;
+ int passed;
+ int trigger; /* extra handler after the evaluation */
+ struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+ __test_count++;
+ /* Circular linked list where only prev is circular. */
+ if (__test_list == NULL) {
+ __test_list = t;
+ t->next = NULL;
+ t->prev = t;
+ return;
+ }
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+ t->next = NULL;
+ t->prev = __test_list->prev;
+ t->prev->next = t;
+ __test_list->prev = t;
+ } else {
+ t->next = __test_list;
+ t->next->prev = t;
+ t->prev = t;
+ __test_list = t;
+ }
+}
+
+static inline int __bail(int for_realz)
+{
+ if (for_realz)
+ abort();
+ return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+ pid_t child_pid;
+ int status;
+
+ t->passed = 1;
+ t->trigger = 0;
+ printf("[ RUN ] %s\n", t->name);
+ child_pid = fork();
+ if (child_pid < 0) {
+ printf("ERROR SPAWNING TEST CHILD\n");
+ t->passed = 0;
+ } else if (child_pid == 0) {
+ t->fn(t);
+ _exit(t->passed);
+ } else {
+ /* TODO(wad) add timeout support. */
+ waitpid(child_pid, &status, 0);
+ if (WIFEXITED(status)) {
+ t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0;
+ if (t->termsig != -1) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test exited normally "
+ "instead of by signal (code: %d)\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ t->passed = 0;
+ if (WTERMSIG(status) == SIGABRT) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by assertion\n",
+ t->name);
+ } else if (WTERMSIG(status) == t->termsig) {
+ t->passed = 1;
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated unexpectedly "
+ "by signal %d\n",
+ t->name,
+ WTERMSIG(status));
+ }
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test ended in some other way [%u]\n",
+ t->name,
+ status);
+ }
+ }
+ printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+ char __attribute__((unused)) **argv)
+{
+ struct __test_metadata *t;
+ int ret = 0;
+ unsigned int count = 0;
+ unsigned int pass_count = 0;
+
+ /* TODO(wad) add optional arguments similar to gtest. */
+ printf("[==========] Running %u tests from %u test cases.\n",
+ __test_count, __fixture_count + 1);
+ for (t = __test_list; t; t = t->next) {
+ count++;
+ __run_test(t);
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ printf("[==========] %u / %u tests passed.\n", pass_count, count);
+ printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
+ return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif /* TEST_HARNESS_H_ */
diff --git a/size/Makefile b/size/Makefile
index 04dc25e..bbd0b53 100644
--- a/size/Makefile
+++ b/size/Makefile
@@ -1,12 +1,11 @@
-CC = $(CROSS_COMPILE)gcc
-
all: get_size
get_size: get_size.c
$(CC) -static -ffreestanding -nostartfiles -s $< -o $@
-run_tests: all
- ./get_size
+TEST_PROGS := get_size
+
+include ../lib.mk
clean:
$(RM) get_size
diff --git a/static_keys/Makefile b/static_keys/Makefile
new file mode 100644
index 0000000..9cdadf3
--- /dev/null
+++ b/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/static_keys/test_static_keys.sh b/static_keys/test_static_keys.sh
new file mode 100755
index 0000000..1261e3f
--- /dev/null
+++ b/static_keys/test_static_keys.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Runs static keys kernel module tests
+
+if /sbin/modprobe -q test_static_key_base; then
+ if /sbin/modprobe -q test_static_keys; then
+ echo "static_key: ok"
+ /sbin/modprobe -q -r test_static_keys
+ /sbin/modprobe -q -r test_static_key_base
+ else
+ echo "static_keys: [FAIL]"
+ /sbin/modprobe -q -r test_static_key_base
+ fi
+else
+ echo "static_key: [FAIL]"
+ exit 1
+fi
diff --git a/sysctl/Makefile b/sysctl/Makefile
index 0a92ada..b3c33e0 100644
--- a/sysctl/Makefile
+++ b/sysctl/Makefile
@@ -4,16 +4,10 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
all:
-# Allow specific tests to be selected.
-test_num:
- @/bin/sh ./run_numerictests
+TEST_PROGS := run_numerictests run_stringtests
+TEST_FILES := common_tests
-test_string:
- @/bin/sh ./run_stringtests
-
-run_tests: all test_num test_string
+include ../lib.mk
# Nothing to clean up.
clean:
-
-.PHONY: all run_tests clean test_num test_string
diff --git a/sysctl/run_numerictests b/sysctl/run_numerictests
index 8510f93..8510f93 100644..100755
--- a/sysctl/run_numerictests
+++ b/sysctl/run_numerictests
diff --git a/sysctl/run_stringtests b/sysctl/run_stringtests
index 90a9293..90a9293 100644..100755
--- a/sysctl/run_stringtests
+++ b/sysctl/run_stringtests
diff --git a/timers/.gitignore b/timers/.gitignore
new file mode 100644
index 0000000..ced9981
--- /dev/null
+++ b/timers/.gitignore
@@ -0,0 +1,18 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtctest
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
diff --git a/timers/Makefile b/timers/Makefile
index eb2859f..4a1be1b 100644
--- a/timers/Makefile
+++ b/timers/Makefile
@@ -1,8 +1,37 @@
-all:
- gcc posix_timers.c -o posix_timers -lrt
+CC = $(CROSS_COMPILE)gcc
+BUILD_FLAGS = -DKTEST
+CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+LDFLAGS += -lrt -lpthread
-run_tests: all
- ./posix_timers
+# these are all "safe" tests that don't modify
+# system time or require escalated privledges
+TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+ inconsistency-check raw_skew threadtest rtctest
+
+TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+ skew_consistency clocksource-switch leap-a-day \
+ leapcrash set-tai set-2038
+
+bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
+
+all: ${bins}
+
+include ../lib.mk
+
+# these tests require escalated privledges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+ ./alarmtimer-suspend
+ ./valid-adjtimex
+ ./adjtick
+ ./change_skew
+ ./skew_consistency
+ ./clocksource-switch
+ ./leap-a-day -s -i 10
+ ./leapcrash
+ ./set-tai
+ ./set-2038
clean:
- rm -f ./posix_timers
+ rm -f ${bins}
diff --git a/timers/adjtick.c b/timers/adjtick.c
new file mode 100644
index 0000000..9887fd5
--- /dev/null
+++ b/timers/adjtick.c
@@ -0,0 +1,221 @@
+/* adjtimex() tick adjustment test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc adjtick.c -o adjtick -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define CLOCK_MONOTONIC_RAW 4
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000
+
+#define MILLION 1000000
+
+long systick;
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ clock_gettime(CLOCK_MONOTONIC, mon);
+ clock_gettime(CLOCK_MONOTONIC_RAW, raw);
+
+ /* Try to get a more tightly bound pairing */
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+long long get_ppm_drift(void)
+{
+ struct timespec mon_start, raw_start, mon_end, raw_end;
+ long long delta1, delta2, eppm;
+
+ get_monotonic_and_raw(&mon_start, &raw_start);
+
+ sleep(15);
+
+ get_monotonic_and_raw(&mon_end, &raw_end);
+
+ delta1 = diff_timespec(mon_start, mon_end);
+ delta2 = diff_timespec(raw_start, raw_end);
+
+ eppm = (delta1*MILLION)/delta2 - MILLION;
+
+ return eppm;
+}
+
+int check_tick_adj(long tickval)
+{
+ long long eppm, ppm;
+ struct timex tx1;
+
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+ tx1.modes |= ADJ_STATUS;
+
+ tx1.status = STA_PLL;
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = tickval;
+
+ adjtimex(&tx1);
+
+ sleep(1);
+
+ ppm = ((long long)tickval * MILLION)/systick - MILLION;
+ printf("Estimating tick (act: %ld usec, %lld ppm): ", tickval, ppm);
+
+ eppm = get_ppm_drift();
+ printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm);
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+
+ if (tx1.offset || tx1.freq || tx1.tick != tickval) {
+ printf(" [ERROR]\n");
+ printf("\tUnexpected adjtimex return values, make sure ntpd is not running.\n");
+ return -1;
+ }
+
+ /*
+ * Here we use 100ppm difference as an error bound.
+ * We likely should see better, but some coarse clocksources
+ * cannot match the HZ tick size accurately, so we have a
+ * internal correction factor that doesn't scale exactly
+ * with the adjustment, resulting in > 10ppm error during
+ * a 10% adjustment. 100ppm also gives us more breathing
+ * room for interruptions during the measurement.
+ */
+ if (llabs(eppm - ppm) > 100) {
+ printf(" [FAILED]\n");
+ return -1;
+ }
+ printf(" [OK]\n");
+
+ return 0;
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec raw;
+ long tick, max, interval, err;
+ struct timex tx1;
+
+ err = 0;
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ printf("Each iteration takes about 15 seconds\n");
+
+ systick = sysconf(_SC_CLK_TCK);
+ systick = USEC_PER_SEC/sysconf(_SC_CLK_TCK);
+ max = systick/10; /* +/- 10% */
+ interval = max/4; /* in 4 steps each side */
+
+ for (tick = (systick - max); tick < (systick + max); tick += interval) {
+ if (check_tick_adj(tick)) {
+ err = 1;
+ break;
+ }
+ }
+
+ /* Reset things to zero */
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = systick;
+
+ adjtimex(&tx1);
+
+ if (err)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/timers/alarmtimer-suspend.c b/timers/alarmtimer-suspend.c
new file mode 100644
index 0000000..72cacf5
--- /dev/null
+++ b/timers/alarmtimer-suspend.c
@@ -0,0 +1,189 @@
+/* alarmtimer suspend test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the alarmtimer & RTC wakeup code is
+ * functioning.
+ *
+ * To build:
+ * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(alarm_clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+ printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+ ts.tv_nsec, delta_ns);
+
+ if (delta_ns > UNREASONABLE_LAT) {
+ printf("[FAIL]\n");
+ final_ret = -1;
+ } else
+ printf("[OK]\n");
+
+}
+
+int main(void)
+{
+ timer_t tm1;
+ struct itimerspec its1, its2;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+
+ for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+ alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+ alarm_clock_id++) {
+
+ alarmcount = 0;
+ if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+ printf("timer_create failled, %s unspported?\n",
+ clockstring(alarm_clock_id));
+ break;
+ }
+
+ clock_gettime(alarm_clock_id, &start_time);
+ printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+ start_time.tv_sec, start_time.tv_nsec);
+ printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += SUSPEND_SECS;
+ its1.it_interval.tv_sec = SUSPEND_SECS;
+ its1.it_interval.tv_nsec = 0;
+
+ timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+ while (alarmcount < 5)
+ sleep(1); /* First 5 alarms, do nothing */
+
+ printf("Starting suspend loops\n");
+ while (alarmcount < 10) {
+ int ret;
+
+ sleep(3);
+ ret = system("echo mem > /sys/power/state");
+ if (ret)
+ break;
+ }
+ timer_delete(tm1);
+ }
+ if (final_ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/timers/change_skew.c b/timers/change_skew.c
new file mode 100644
index 0000000..cb19689
--- /dev/null
+++ b/timers/change_skew.c
@@ -0,0 +1,107 @@
+/* ADJ_FREQ Skew change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the raw_skew, inconsistency-check and nanosleep tests be
+ * present in the same directory it is run from.
+ *
+ * To build:
+ * $ gcc change_skew.c -o change_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error adjusting freq\n");
+ return ret;
+ }
+
+ ret = system("./raw_skew");
+ ret |= system("./inconsistency-check");
+ ret |= system("./nanosleep");
+
+ return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int i, ret;
+
+ int ppm[5] = {0, 250, 500, -250, -500};
+
+ /* Kill ntpd */
+ ret = system("killall -9 ntpd");
+
+ /* Make sure there's no offset adjustment going on */
+ tx.modes = ADJ_OFFSET;
+ tx.offset = 0;
+ ret = adjtimex(&tx);
+
+ if (ret < 0) {
+ printf("Maybe you're not running as root?\n");
+ return -1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ printf("Using %i ppm adjustment\n", ppm[i]);
+ ret = change_skew_test(ppm[i]);
+ if (ret)
+ break;
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+ if (ret) {
+ printf("[FAIL]");
+ return ksft_exit_fail();
+ }
+ printf("[OK]");
+ return ksft_exit_pass();
+}
diff --git a/timers/clocksource-switch.c b/timers/clocksource-switch.c
new file mode 100644
index 0000000..627ec74
--- /dev/null
+++ b/timers/clocksource-switch.c
@@ -0,0 +1,179 @@
+/* Clocksource change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which quickly changes the clocksourc and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the inconsistency-check and nanosleep tests be present in the
+ * same directory it is run from.
+ *
+ * To build:
+ * $ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+
+int get_clocksources(char list[][30])
+{
+ int fd, i;
+ size_t size;
+ char buf[512];
+ char *head, *tmp;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+ size = read(fd, buf, 512);
+
+ close(fd);
+
+ for (i = 0; i < 30; i++)
+ list[i][0] = '\0';
+
+ head = buf;
+ i = 0;
+ while (head - buf < size) {
+ /* Find the next space */
+ for (tmp = head; *tmp != ' '; tmp++) {
+ if (*tmp == '\n')
+ break;
+ if (*tmp == '\0')
+ break;
+ }
+ *tmp = '\0';
+ strcpy(list[i], head);
+ head = tmp + 1;
+ i++;
+ }
+
+ return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+ int fd;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+ size = read(fd, buf, size);
+
+ return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+ int fd;
+ size_t size;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+ if (fd < 0)
+ return -1;
+
+ size = write(fd, clocksource, strlen(clocksource));
+
+ if (size < 0)
+ return -1;
+
+ close(fd);
+ return 0;
+}
+
+
+int run_tests(int secs)
+{
+ int ret;
+ char buf[255];
+
+ sprintf(buf, "./inconsistency-check -t %i", secs);
+ ret = system(buf);
+ if (ret)
+ return ret;
+ ret = system("./nanosleep");
+ return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+ char orig_clk[512];
+ int count, i, status;
+ pid_t pid;
+
+ get_cur_clocksource(orig_clk, 512);
+
+ count = get_clocksources(clocksource_list);
+
+ if (change_clocksource(clocksource_list[0])) {
+ printf("Error: You probably need to run this as root\n");
+ return -1;
+ }
+
+ /* Check everything is sane before we start switching asyncrhonously */
+ for (i = 0; i < count; i++) {
+ printf("Validating clocksource %s\n", clocksource_list[i]);
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+ if (run_tests(5)) {
+ status = -1;
+ goto out;
+ }
+ }
+
+
+ printf("Running Asyncrhonous Switching Tests...\n");
+ pid = fork();
+ if (!pid)
+ return run_tests(60);
+
+ while (pid != waitpid(pid, &status, WNOHANG))
+ for (i = 0; i < count; i++)
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+out:
+ change_clocksource(orig_clk);
+
+ if (status)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/timers/inconsistency-check.c b/timers/inconsistency-check.c
new file mode 100644
index 0000000..caf1bc9
--- /dev/null
+++ b/timers/inconsistency-check.c
@@ -0,0 +1,204 @@
+/* Time inconsistency check test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2003, 2004, 2005, 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ /* use unsigned to avoid false positives on 2038 rollover */
+ if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+ return 1;
+ if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+ struct timespec list[CALLS_PER_LOOP];
+ int i, inconsistent;
+ long now, then;
+ time_t t;
+ char *start_str;
+
+ clock_gettime(clock_type, &list[0]);
+ now = then = list[0].tv_sec;
+
+ /* timestamp start of test */
+ t = time(0);
+ start_str = ctime(&t);
+
+ while (seconds == -1 || now - then < seconds) {
+ inconsistent = 0;
+
+ /* Fill list */
+ for (i = 0; i < CALLS_PER_LOOP; i++)
+ clock_gettime(clock_type, &list[i]);
+
+ /* Check for inconsistencies */
+ for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+ if (!in_order(list[i], list[i+1]))
+ inconsistent = i;
+
+ /* display inconsistency */
+ if (inconsistent) {
+ unsigned long long delta;
+
+ printf("\%s\n", start_str);
+ for (i = 0; i < CALLS_PER_LOOP; i++) {
+ if (i == inconsistent)
+ printf("--------------------\n");
+ printf("%lu:%lu\n", list[i].tv_sec,
+ list[i].tv_nsec);
+ if (i == inconsistent + 1)
+ printf("--------------------\n");
+ }
+ delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+ delta += list[inconsistent].tv_nsec;
+ delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+ delta -= list[inconsistent+1].tv_nsec;
+ printf("Delta: %llu ns\n", delta);
+ fflush(0);
+ /* timestamp inconsistency*/
+ t = time(0);
+ printf("%s\n", ctime(&t));
+ printf("[FAILED]\n");
+ return -1;
+ }
+ now = list[0].tv_sec;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+ int clockid, opt;
+ int userclock = CLOCK_REALTIME;
+ int maxclocks = NR_CLOCKIDS;
+ int runtime = 10;
+ struct timespec ts;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'c':
+ userclock = atoi(optarg);
+ maxclocks = userclock + 1;
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+ printf(" -t: Number of seconds to run\n");
+ printf(" -c: clockid to use (default, all clockids)\n");
+ exit(-1);
+ }
+ }
+
+ setbuf(stdout, NULL);
+
+ for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+ if (clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ if (!clock_gettime(clockid, &ts)) {
+ printf("Consistent %-30s ", clockstring(clockid));
+ if (consistency_test(clockid, runtime))
+ return ksft_exit_fail();
+ }
+ }
+ return ksft_exit_pass();
+}
diff --git a/timers/leap-a-day.c b/timers/leap-a-day.c
new file mode 100644
index 0000000..fb46ad6
--- /dev/null
+++ b/timers/leap-a-day.c
@@ -0,0 +1,388 @@
+/* Leap second stress test
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPLv2
+ *
+ * This test signals the kernel to insert a leap second
+ * every day at midnight GMT. This allows for stessing the
+ * kernel's leap-second behavior, as well as how well applications
+ * handle the leap-second discontinuity.
+ *
+ * Usage: leap-a-day [-s] [-i <num>]
+ *
+ * Options:
+ * -s: Each iteration, set the date to 10 seconds before midnight GMT.
+ * This speeds up the number of leapsecond transitions tested,
+ * but because it calls settimeofday frequently, advancing the
+ * time by 24 hours every ~16 seconds, it may cause application
+ * disruption.
+ *
+ * -i: Number of iterations to run (default: infinite)
+ *
+ * Other notes: Disabling NTP prior to running this is advised, as the two
+ * may conflict in their commands to the kernel.
+ *
+ * To build:
+ * $ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/errno.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+time_t next_leap;
+int error_found;
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+char *time_state_str(int state)
+{
+ switch (state) {
+ case TIME_OK: return "TIME_OK";
+ case TIME_INS: return "TIME_INS";
+ case TIME_DEL: return "TIME_DEL";
+ case TIME_OOP: return "TIME_OOP";
+ case TIME_WAIT: return "TIME_WAIT";
+ case TIME_BAD: return "TIME_BAD";
+ }
+ return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ /* Clear maxerror, as it can cause UNSYNC to be set */
+ tx.modes = ADJ_MAXERROR;
+ tx.maxerror = 0;
+ ret = adjtimex(&tx);
+
+ /* Clear the status */
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+void sigalarm(int signo)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tx.time.tv_sec < next_leap) {
+ printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ if (ret != TIME_WAIT) {
+ printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+}
+
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+ struct timespec now, target;
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = timespec_add(now, NSEC_PER_SEC/2);
+ clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(CLOCK_REALTIME, &now);
+
+ if (!in_order(target, now)) {
+ printf("ERROR: hrtimer early expiration failure observed.\n");
+ error_found = 1;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ timer_t tm1;
+ struct itimerspec its1;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int settime = 0;
+ int tai_time = 0;
+ int insert = 1;
+ int iterations = -1;
+ int opt;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "sti:")) != -1) {
+ switch (opt) {
+ case 's':
+ printf("Setting time to speed up testing\n");
+ settime = 1;
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ case 't':
+ tai_time = 1;
+ break;
+ default:
+ printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
+ printf(" -s: Set time to right before leap second each iteration\n");
+ printf(" -i: Number of iterations\n");
+ printf(" -t: Print TAI time\n");
+ exit(-1);
+ }
+ }
+
+ /* Make sure TAI support is present if -t was used */
+ if (tai_time) {
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_TAI, &ts)) {
+ printf("System doesn't support CLOCK_TAI\n");
+ ksft_exit_fail();
+ }
+ }
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+
+ /* Set up timer signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ if (iterations < 0)
+ printf("This runs continuously. Press ctrl-c to stop\n");
+ else
+ printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+ printf("\n");
+ while (1) {
+ int ret;
+ struct timespec ts;
+ struct timex tx;
+ time_t now;
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ if (settime) {
+ struct timeval tv;
+
+ tv.tv_sec = next_leap - 10;
+ tv.tv_usec = 0;
+ settimeofday(&tv, NULL);
+ printf("Setting time to %s", ctime(&tv.tv_sec));
+ }
+
+ /* Reset NTP time state */
+ clear_time_state();
+
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ /* Validate STA_INS was set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Error: STA_INS/STA_DEL not set!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ if (tai_time) {
+ printf("Using TAI time,"
+ " no inconsistencies should be seen!\n");
+ }
+
+ printf("Scheduling leap second for %s", ctime(&next_leap));
+
+ /* Set up timer */
+ printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+ if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+ printf("Error: timer_create failed\n");
+ return ksft_exit_fail();
+ }
+ its1.it_value.tv_sec = next_leap;
+ its1.it_value.tv_nsec = 0;
+ its1.it_interval.tv_sec = 0;
+ its1.it_interval.tv_nsec = 0;
+ timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
+ /* Wake up 3 seconds before leap */
+ ts.tv_sec = next_leap - 3;
+ ts.tv_nsec = 0;
+
+
+ while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+ printf("Something woke us up, returning to sleep\n");
+
+ /* Validate STA_INS is still set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ }
+
+ /* Check adjtimex output every half second */
+ now = tx.time.tv_sec;
+ while (now < next_leap + 2) {
+ char buf[26];
+ struct timespec tai;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tai_time) {
+ clock_gettime(CLOCK_TAI, &tai);
+ printf("%ld sec, %9ld ns\t%s\n",
+ tai.tv_sec,
+ tai.tv_nsec,
+ time_state_str(ret));
+ } else {
+ ctime_r(&tx.time.tv_sec, buf);
+ buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+ printf("%s + %6ld us (%i)\t%s\n",
+ buf,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ now = tx.time.tv_sec;
+ /* Sleep for another half second */
+ ts.tv_sec = 0;
+ ts.tv_nsec = NSEC_PER_SEC / 2;
+ clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+ }
+ /* Switch to using other mode */
+ insert = !insert;
+
+ /* Note if kernel has known hrtimer failure */
+ test_hrtimer_failure();
+
+ printf("Leap complete\n");
+ if (error_found) {
+ printf("Errors observed\n");
+ clear_time_state();
+ return ksft_exit_fail();
+ }
+ printf("\n");
+ if ((iterations != -1) && !(--iterations))
+ break;
+ }
+
+ clear_time_state();
+ return ksft_exit_pass();
+}
diff --git a/timers/leapcrash.c b/timers/leapcrash.c
new file mode 100644
index 0000000..a1071bd
--- /dev/null
+++ b/timers/leapcrash.c
@@ -0,0 +1,120 @@
+/* Demo leapsecond deadlock
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ * To build:
+ * $ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+
+int main(void)
+{
+ struct timex tx;
+ struct timespec ts;
+ time_t next_leap;
+ int count = 0;
+
+ setbuf(stdout, NULL);
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+ printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+ clear_time_state();
+
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ for (count = 0; count < 20; count++) {
+ struct timeval tv;
+
+
+ /* set the time to 2 seconds before the leap */
+ tv.tv_sec = next_leap - 2;
+ tv.tv_usec = 0;
+ if (settimeofday(&tv, NULL)) {
+ printf("Error: You're likely not running with proper (ie: root) permissions\n");
+ return ksft_exit_fail();
+ }
+ tx.modes = 0;
+ adjtimex(&tx);
+
+ /* hammer on adjtime w/ STA_INS */
+ while (tx.time.tv_sec < next_leap + 1) {
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_INS;
+ adjtimex(&tx);
+ }
+ clear_time_state();
+ printf(".");
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/timers/mqueue-lat.c b/timers/mqueue-lat.c
new file mode 100644
index 0000000..a2a3924
--- /dev/null
+++ b/timers/mqueue-lat.c
@@ -0,0 +1,124 @@
+/* Measure mqueue timeout latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ *
+ * Inspired with permission from example test by:
+ * Romain Francoise <romain@orebokech.com>
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+ mqd_t q;
+ struct mq_attr attr;
+ struct timespec start, end, now, target;
+ int i, count, ret;
+
+ q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+ if (q < 0) {
+ perror("mq_open");
+ return -1;
+ }
+ mq_getattr(q, &attr);
+
+
+ count = 100;
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < count; i++) {
+ char buf[attr.mq_msgsize];
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = now;
+ target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+ ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+ if (ret < 0 && errno != ETIMEDOUT) {
+ perror("mq_timedreceive");
+ return -1;
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ mq_close(q);
+
+ if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ printf("Mqueue latency : ");
+
+ ret = mqueue_lat_test();
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/timers/nanosleep.c b/timers/nanosleep.c
new file mode 100644
index 0000000..ff942ff
--- /dev/null
+++ b/timers/nanosleep.c
@@ -0,0 +1,175 @@
+/* Make sure timers don't return early
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro 2013 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nanosleep.c -o nanosleep -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+ struct timespec now, target, rel;
+
+ /* First check abs time */
+ if (clock_gettime(clockid, &now))
+ return UNSUPPORTED;
+ target = timespec_add(now, ns);
+
+ if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+ return UNSUPPORTED;
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+
+ /* Second check reltime */
+ clock_gettime(clockid, &now);
+ rel.tv_sec = 0;
+ rel.tv_nsec = 0;
+ rel = timespec_add(rel, ns);
+ target = timespec_add(now, ns);
+ clock_nanosleep(clockid, 0, &rel, NULL);
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("Nanosleep %-31s ", clockstring(clockid));
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_test(clockid, length);
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ goto next;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ length *= 100;
+ }
+ printf("[OK]\n");
+next:
+ ret = 0;
+ }
+ return ksft_exit_pass();
+}
diff --git a/timers/nsleep-lat.c b/timers/nsleep-lat.c
new file mode 100644
index 0000000..2d7898f
--- /dev/null
+++ b/timers/nsleep-lat.c
@@ -0,0 +1,190 @@
+/* Measure nanosleep timer latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+ struct timespec start, end, target;
+ long long latency = 0;
+ int i, count;
+
+ target.tv_sec = ns/NSEC_PER_SEC;
+ target.tv_nsec = ns%NSEC_PER_SEC;
+
+ if (clock_gettime(clockid, &start))
+ return UNSUPPORTED;
+ if (clock_nanosleep(clockid, 0, &target, NULL))
+ return UNSUPPORTED;
+
+ count = 10;
+
+ /* First check relative latency */
+ clock_gettime(clockid, &start);
+ for (i = 0; i < count; i++)
+ clock_nanosleep(clockid, 0, &target, NULL);
+ clock_gettime(clockid, &end);
+
+ if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+ printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+ return -1;
+ }
+
+ /* Next check absolute latency */
+ for (i = 0; i < count; i++) {
+ clock_gettime(clockid, &start);
+ target = timespec_add(start, ns);
+ clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(clockid, &end);
+ latency += timespec_sub(target, end);
+ }
+
+ if (latency/count > UNRESONABLE_LATENCY) {
+ printf("Large abs latency: %lld ns :", latency/count);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("nsleep latency %-26s ", clockstring(clockid));
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_lat_test(clockid, length);
+ if (ret)
+ break;
+ length *= 100;
+
+ }
+
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ continue;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ }
+ return ksft_exit_pass();
+}
diff --git a/timers/posix_timers.c b/timers/posix_timers.c
index f87d970..5a246a0 100644
--- a/timers/posix_timers.c
+++ b/timers/posix_timers.c
@@ -35,10 +35,11 @@ static void user_loop(void)
static void kernel_loop(void)
{
void *addr = sbrk(0);
+ int err = 0;
- while (!done) {
- brk(addr + 4096);
- brk(addr);
+ while (!done && !err) {
+ err = brk(addr + 4096);
+ err |= brk(addr);
}
}
@@ -190,8 +191,6 @@ static int check_timer_create(int which)
int main(int argc, char **argv)
{
- int err;
-
printf("Testing posix timers. False negative may happen on CPU execution \n");
printf("based timers if other threads run on the CPU...\n");
diff --git a/timers/raw_skew.c b/timers/raw_skew.c
new file mode 100644
index 0000000..30906bf
--- /dev/null
+++ b/timers/raw_skew.c
@@ -0,0 +1,154 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz <john.stultz@linaro.org>
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc raw_skew.c -o raw_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+
+#define CLOCK_MONOTONIC_RAW 4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({ \
+ __typeof__(x) __x = (x); \
+ __typeof__(s) __s = (s); \
+ __x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec mon, raw, start, end;
+ long long delta1, delta2, interval, eppm, ppm;
+ struct timex tx1, tx2;
+
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+ get_monotonic_and_raw(&mon, &raw);
+ start = mon;
+ delta1 = diff_timespec(mon, raw);
+
+ if (tx1.offset)
+ printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+ printf("Estimating clock drift: ");
+ sleep(120);
+
+ get_monotonic_and_raw(&mon, &raw);
+ end = mon;
+ tx2.modes = 0;
+ adjtimex(&tx2);
+ delta2 = diff_timespec(mon, raw);
+
+ interval = diff_timespec(start, end);
+
+ /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+ eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+ eppm = -eppm;
+ printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+ /* Avg the two actual freq samples adjtimex gave us */
+ ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+ ppm = (long long)tx1.freq * 1000;
+ ppm = shift_right(ppm, 16);
+ printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+ if (llabs(eppm - ppm) > 1000) {
+ printf(" [FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf(" [OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/timers/rtctest.c b/timers/rtctest.c
new file mode 100644
index 0000000..624bce5
--- /dev/null
+++ b/timers/rtctest.c
@@ -0,0 +1,271 @@
+/*
+ * Real Time Clock Driver Test/Example Program
+ *
+ * Compile with:
+ * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
+ *
+ * Copyright (C) 1996, Paul Gortmaker.
+ *
+ * Released under the GNU General Public License, version 2,
+ * included herein by reference.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+
+int main(int argc, char **argv)
+{
+ int i, fd, retval, irqcount = 0;
+ unsigned long tmp, data;
+ struct rtc_time rtc_tm;
+ const char *rtc = default_rtc;
+ struct timeval start, end, diff;
+
+ switch (argc) {
+ case 2:
+ rtc = argv[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest [rtcdev]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
+
+ /* Turn on update interrupts (one per second) */
+ retval = ioctl(fd, RTC_UIE_ON, 0);
+ if (retval == -1) {
+ if (errno == EINVAL) {
+ fprintf(stderr,
+ "\n...Update IRQs not supported.\n");
+ goto test_READ;
+ }
+ perror("RTC_UIE_ON ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
+ rtc);
+ fflush(stderr);
+ for (i=1; i<6; i++) {
+ /* This read will block */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
+ fflush(stderr);
+ for (i=1; i<6; i++) {
+ struct timeval tv = {5, 0}; /* 5 second timeout on select */
+ fd_set readfds;
+
+ FD_ZERO(&readfds);
+ FD_SET(fd, &readfds);
+ /* The select will wait until an RTC interrupt happens. */
+ retval = select(fd+1, &readfds, NULL, NULL, &tv);
+ if (retval == -1) {
+ perror("select");
+ exit(errno);
+ }
+ /* This read won't block unlike the select-less case above. */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ /* Turn off update interrupts */
+ retval = ioctl(fd, RTC_UIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_UIE_OFF ioctl");
+ exit(errno);
+ }
+
+test_READ:
+ /* Read the RTC time/date */
+ retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
+ if (retval == -1) {
+ perror("RTC_RD_TIME ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+ rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+ /* Set the alarm to 5 sec in the future, and check for rollover */
+ rtc_tm.tm_sec += 5;
+ if (rtc_tm.tm_sec >= 60) {
+ rtc_tm.tm_sec %= 60;
+ rtc_tm.tm_min++;
+ }
+ if (rtc_tm.tm_min == 60) {
+ rtc_tm.tm_min = 0;
+ rtc_tm.tm_hour++;
+ }
+ if (rtc_tm.tm_hour == 24)
+ rtc_tm.tm_hour = 0;
+
+ retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
+ if (retval == -1) {
+ if (errno == ENOTTY) {
+ fprintf(stderr,
+ "\n...Alarm IRQs not supported.\n");
+ goto test_PIE;
+ }
+ perror("RTC_ALM_SET ioctl");
+ exit(errno);
+ }
+
+ /* Read the current alarm settings */
+ retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
+ if (retval == -1) {
+ perror("RTC_ALM_READ ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ retval = ioctl(fd, RTC_AIE_ON, 0);
+ if (retval == -1) {
+ perror("RTC_AIE_ON ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "Waiting 5 seconds for alarm...");
+ fflush(stderr);
+ /* This blocks until the alarm ring causes an interrupt */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ irqcount++;
+ fprintf(stderr, " okay. Alarm rang.\n");
+
+ /* Disable alarm interrupts */
+ retval = ioctl(fd, RTC_AIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_AIE_OFF ioctl");
+ exit(errno);
+ }
+
+test_PIE:
+ /* Read periodic IRQ rate */
+ retval = ioctl(fd, RTC_IRQP_READ, &tmp);
+ if (retval == -1) {
+ /* not all RTCs support periodic IRQs */
+ if (errno == ENOTTY) {
+ fprintf(stderr, "\nNo periodic IRQ support\n");
+ goto done;
+ }
+ perror("RTC_IRQP_READ ioctl");
+ exit(errno);
+ }
+ fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
+
+ fprintf(stderr, "Counting 20 interrupts at:");
+ fflush(stderr);
+
+ /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+ for (tmp=2; tmp<=64; tmp*=2) {
+
+ retval = ioctl(fd, RTC_IRQP_SET, tmp);
+ if (retval == -1) {
+ /* not all RTCs can change their periodic IRQ rate */
+ if (errno == ENOTTY) {
+ fprintf(stderr,
+ "\n...Periodic IRQ rate is fixed\n");
+ goto done;
+ }
+ perror("RTC_IRQP_SET ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n%ldHz:\t", tmp);
+ fflush(stderr);
+
+ /* Enable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_ON, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_ON ioctl");
+ exit(errno);
+ }
+
+ for (i=1; i<21; i++) {
+ gettimeofday(&start, NULL);
+ /* This blocks */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ if (diff.tv_sec > 0 ||
+ diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+ fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+ diff.tv_sec, diff.tv_usec,
+ (1000000L / tmp));
+ fflush(stdout);
+ exit(-1);
+ }
+
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ /* Disable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_OFF ioctl");
+ exit(errno);
+ }
+ }
+
+done:
+ fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+ close(fd);
+
+ return 0;
+}
diff --git a/timers/set-2038.c b/timers/set-2038.c
new file mode 100644
index 0000000..c8a7e14
--- /dev/null
+++ b/timers/set-2038.c
@@ -0,0 +1,144 @@
+/* Time bounds setting test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which sets the time to edge cases then
+ * uses other tests to detect problems. Thus this test requires that
+ * the inconsistency-check and nanosleep tests be present in the same
+ * directory it is run from.
+ *
+ * To build:
+ * $ gcc set-2038.c -o set-2038 -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX ((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */
+#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */
+
+int is32bits(void)
+{
+ return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+ struct timeval now;
+ int ret;
+
+ now.tv_sec = (time_t)time;
+ now.tv_usec = 0;
+
+ ret = settimeofday(&now, NULL);
+
+ printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+ return ret;
+}
+
+int do_tests(void)
+{
+ int ret;
+
+ ret = system("date");
+ ret = system("./inconsistency-check -c 0 -t 20");
+ ret |= system("./nanosleep");
+ ret |= system("./nsleep-lat");
+ return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ int opt, dangerous = 0;
+ time_t start;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d")) != -1) {
+ switch (opt) {
+ case 'd':
+ dangerous = 1;
+ }
+ }
+
+ start = time(0);
+
+ /* First test that crazy values don't work */
+ if (!settime(YEAR_1901)) {
+ ret = -1;
+ goto out;
+ }
+ if (!settime(YEAR_MAX)) {
+ ret = -1;
+ goto out;
+ }
+ if (!is32bits() && !settime(YEAR_2262)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Now test behavior near edges */
+ settime(YEAR_1970);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2038 - 600);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ /* The rest of the tests can blowup on 32bit systems */
+ if (is32bits() && !dangerous)
+ goto out;
+ /* Test rollover behavior 32bit edge */
+ settime(YEAR_2038 - 10);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2262 - 600);
+ ret = do_tests();
+
+out:
+ /* restore clock */
+ settime(start);
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/timers/set-tai.c b/timers/set-tai.c
new file mode 100644
index 0000000..dc88dbc
--- /dev/null
+++ b/timers/set-tai.c
@@ -0,0 +1,79 @@
+/* Set tai offset
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+int set_tai(int offset)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ tx.modes = ADJ_TAI;
+ tx.constant = offset;
+
+ return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ adjtimex(&tx);
+ return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret;
+
+ ret = get_tai();
+ printf("tai offset started at %i\n", ret);
+
+ printf("Checking tai offsets can be properly set: ");
+ for (i = 1; i <= 60; i++) {
+ ret = set_tai(i);
+ ret = get_tai();
+ if (ret != i) {
+ printf("[FAILED] expected: %i got %i\n", i, ret);
+ return ksft_exit_fail();
+ }
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/timers/set-timer-lat.c b/timers/set-timer-lat.c
new file mode 100644
index 0000000..4fc98c5
--- /dev/null
+++ b/timers/set-timer-lat.c
@@ -0,0 +1,216 @@
+/* set_timer latency test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2014
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the set_timer api is correct
+ *
+ * To build:
+ * $ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+ if (delta_ns < 0)
+ printf("%s timer fired early: FAIL\n", clockstring(clock_id));
+
+ if (delta_ns > max_latency_ns)
+ max_latency_ns = delta_ns;
+}
+
+int do_timer(int clock_id, int flags)
+{
+ struct sigevent se;
+ timer_t tm1;
+ struct itimerspec its1, its2;
+ int err;
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = SIGRTMAX;
+ se.sigev_value.sival_int = 0;
+
+ max_latency_ns = 0;
+ alarmcount = 0;
+
+ err = timer_create(clock_id, &se, &tm1);
+ if (err) {
+ if ((clock_id == CLOCK_REALTIME_ALARM) ||
+ (clock_id == CLOCK_BOOTTIME_ALARM)) {
+ printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME");
+ return 0;
+ }
+ printf("%s - timer_create() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ clock_gettime(clock_id, &start_time);
+ if (flags) {
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += TIMER_SECS;
+ } else {
+ its1.it_value.tv_sec = TIMER_SECS;
+ its1.it_value.tv_nsec = 0;
+ }
+ its1.it_interval.tv_sec = TIMER_SECS;
+ its1.it_interval.tv_nsec = 0;
+
+ err = timer_settime(tm1, flags, &its1, &its2);
+ if (err) {
+ printf("%s - timer_settime() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ while (alarmcount < 5)
+ sleep(1);
+
+ printf("%-22s %s max latency: %10lld ns : ",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME",
+ max_latency_ns);
+
+ timer_delete(tm1);
+ if (max_latency_ns < UNRESONABLE_LATENCY) {
+ printf("[OK]\n");
+ return 0;
+ }
+ printf("[FAILED]\n");
+ return -1;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int ret = 0;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ printf("Setting timers for every %i seconds\n", TIMER_SECS);
+ for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+ if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+ (clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+ (clock_id == CLOCK_MONOTONIC_RAW) ||
+ (clock_id == CLOCK_REALTIME_COARSE) ||
+ (clock_id == CLOCK_MONOTONIC_COARSE) ||
+ (clock_id == CLOCK_HWSPECIFIC))
+ continue;
+
+ ret |= do_timer(clock_id, TIMER_ABSTIME);
+ ret |= do_timer(clock_id, 0);
+ }
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/timers/skew_consistency.c b/timers/skew_consistency.c
new file mode 100644
index 0000000..5562f84
--- /dev/null
+++ b/timers/skew_consistency.c
@@ -0,0 +1,89 @@
+/* ADJ_FREQ Skew consistency test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ * and forth and watches for consistency problems. Thus this test requires
+ * that the inconsistency-check tests be present in the same directory it
+ * is run from.
+ *
+ * To build:
+ * $ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int ret, ppm;
+ pid_t pid;
+
+
+ printf("Running Asyncrhonous Frequency Changing Tests...\n");
+
+ pid = fork();
+ if (!pid)
+ return system("./inconsistency-check -c 1 -t 600");
+
+ ppm = 500;
+ ret = 0;
+
+ while (pid != waitpid(pid, &ret, WNOHANG)) {
+ ppm = -ppm;
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+ adjtimex(&tx);
+ usleep(500000);
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+
+ if (ret) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/timers/threadtest.c b/timers/threadtest.c
new file mode 100644
index 0000000..e632e11
--- /dev/null
+++ b/timers/threadtest.c
@@ -0,0 +1,204 @@
+/* threadtest.c
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2004, 2005, 2006, 2012
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc threadtest.c -o threadtest -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+ int i, j;
+ struct timespec *a, *b;
+
+ /* scan the list */
+ for (i = 0; i < size-1; i++) {
+ a = &list[i];
+ b = &list[i+1];
+
+ /* look for any time inconsistencies */
+ if ((b->tv_sec <= a->tv_sec) &&
+ (b->tv_nsec < a->tv_nsec)) {
+
+ /* flag other threads */
+ done = 1;
+
+ /*serialize printing to avoid junky output*/
+ pthread_mutex_lock(&print_lock);
+
+ /* dump the list */
+ printf("\n");
+ for (j = 0; j < size; j++) {
+ if (j == i)
+ printf("---------------\n");
+ printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+ if (j == i+1)
+ printf("---------------\n");
+ }
+ printf("[FAILED]\n");
+
+ pthread_mutex_unlock(&print_lock);
+ }
+ }
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+ while (!done) {
+ /* protect the list */
+ pthread_mutex_lock(&list_lock);
+
+ /* see if we're ready to check the list */
+ if (listcount >= LISTSIZE) {
+ checklist(global_list, LISTSIZE);
+ listcount = 0;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+ pthread_mutex_unlock(&list_lock);
+ }
+ return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+ struct timespec my_list[LISTSIZE];
+ int count;
+
+ while (!done) {
+ /* fill the list */
+ for (count = 0; count < LISTSIZE; count++)
+ clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+ checklist(my_list, LISTSIZE);
+ }
+ return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+ int thread_count, i;
+ time_t start, now, runtime;
+ char buf[255];
+ pthread_t pth[MAX_THREADS];
+ int opt;
+ void *tret;
+ int ret = 0;
+ void *(*thread)(void *) = shared_thread;
+
+ thread_count = DEFAULT_THREAD_COUNT;
+ runtime = DEFAULT_RUNTIME;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'n':
+ thread_count = atoi(optarg);
+ break;
+ case 'i':
+ thread = independent_thread;
+ printf("using independent threads\n");
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+ printf(" -t: time to run\n");
+ printf(" -n: number of threads\n");
+ printf(" -i: use independent threads\n");
+ return -1;
+ }
+ }
+
+ if (thread_count > MAX_THREADS)
+ thread_count = MAX_THREADS;
+
+
+ setbuf(stdout, NULL);
+
+ start = time(0);
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+ printf("%s\n", buf);
+ printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+
+ /* spawn */
+ for (i = 0; i < thread_count; i++)
+ pthread_create(&pth[i], 0, thread, 0);
+
+ while (time(&now) < start + runtime) {
+ sleep(1);
+ if (done) {
+ ret = 1;
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+ printf("%s\n", buf);
+ goto out;
+ }
+ }
+ printf("[OK]\n");
+ done = 1;
+
+out:
+ /* wait */
+ for (i = 0; i < thread_count; i++)
+ pthread_join(pth[i], &tret);
+
+ /* die */
+ if (ret)
+ ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/timers/valid-adjtimex.c b/timers/valid-adjtimex.c
new file mode 100644
index 0000000..e86d937
--- /dev/null
+++ b/timers/valid-adjtimex.c
@@ -0,0 +1,202 @@
+/* valid adjtimex test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2015
+ * Licensed under the GPLv2
+ *
+ * This test validates adjtimex interface with valid
+ * and invalid test data.
+ *
+ * Usage: valid-adjtimex
+ *
+ * To build:
+ * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+ exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+ exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000L
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+ return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+ -499<<16,
+ -450<<16,
+ -400<<16,
+ -350<<16,
+ -300<<16,
+ -250<<16,
+ -200<<16,
+ -150<<16,
+ -100<<16,
+ -75<<16,
+ -50<<16,
+ -25<<16,
+ -10<<16,
+ -5<<16,
+ -1<<16,
+ -1000,
+ 1<<16,
+ 5<<16,
+ 10<<16,
+ 25<<16,
+ 50<<16,
+ 75<<16,
+ 100<<16,
+ 150<<16,
+ 200<<16,
+ 250<<16,
+ 300<<16,
+ 350<<16,
+ 400<<16,
+ 450<<16,
+ 499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+ -1000<<16,
+ -550<<16,
+ 550<<16,
+ 1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+ LONG_MAX,
+ LONG_MIN,
+};
+
+int validate_freq(void)
+{
+ struct timex tx;
+ int ret, pass = 0;
+ int i;
+
+ clear_time_state();
+
+ memset(&tx, 0, sizeof(struct timex));
+ /* Set the leap second insert flag */
+
+ printf("Testing ADJ_FREQ... ");
+ for (i = 0; i < NUM_FREQ_VALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = valid_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ valid_freq[i], valid_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq != valid_freq[i]) {
+ printf("Warning: freq value %ld not what we set it (%ld)!\n",
+ tx.freq, valid_freq[i]);
+ }
+ }
+ for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = outofrange_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ outofrange_freq[i], outofrange_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq == outofrange_freq[i]) {
+ printf("[FAIL]\n");
+ printf("ERROR: out of range value %ld actually set!\n",
+ tx.freq);
+ pass = -1;
+ goto out;
+ }
+ }
+
+
+ if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+ for (i = 0; i < NUM_FREQ_INVALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = invalid_freq[i];
+ ret = adjtimex(&tx);
+ if (ret >= 0) {
+ printf("[FAIL]\n");
+ printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+ invalid_freq[i]);
+ pass = -1;
+ goto out;
+ }
+ }
+ }
+
+ printf("[OK]\n");
+out:
+ /* reset freq to zero */
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = 0;
+ ret = adjtimex(&tx);
+
+ return pass;
+}
+
+
+int main(int argc, char **argv)
+{
+ if (validate_freq())
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/user/Makefile b/user/Makefile
index 12c9d15..d401b63 100644
--- a/user/Makefile
+++ b/user/Makefile
@@ -3,5 +3,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-run_tests: all
- ./test_user_copy.sh
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
diff --git a/vm/Makefile b/vm/Makefile
index 077828c..e4bb1de 100644
--- a/vm/Makefile
+++ b/vm/Makefile
@@ -1,16 +1,29 @@
# Makefile for vm selftests
-CC = $(CROSS_COMPILE)gcc
-CFLAGS = -Wall
-BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
+CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+BINARIES = compaction_test
+BINARIES += hugepage-mmap
+BINARIES += hugepage-shm
+BINARIES += map_hugetlb
+BINARIES += mlock2-tests
+BINARIES += on-fault-limit
+BINARIES += thuge-gen
BINARIES += transhuge-stress
+BINARIES += userfaultfd
all: $(BINARIES)
%: %.c
$(CC) $(CFLAGS) -o $@ $^ -lrt
+userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
+ $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread
-run_tests: all
- @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1)
+../../../../usr/include/linux/kernel.h:
+ make -C ../../../.. headers_install
+
+TEST_PROGS := run_vmtests
+TEST_FILES := $(BINARIES)
+
+include ../lib.mk
clean:
$(RM) $(BINARIES)
diff --git a/vm/compaction_test.c b/vm/compaction_test.c
new file mode 100644
index 0000000..932ff57
--- /dev/null
+++ b/vm/compaction_test.c
@@ -0,0 +1,225 @@
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+ void *map;
+ struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+ char buffer[256] = {0};
+ char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+
+ *memfree = atoll(buffer);
+ cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+ cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+ *hugepagesize = atoll(buffer);
+
+ return 0;
+}
+
+int prereq(void)
+{
+ char allowed;
+ int fd;
+
+ fd = open("/proc/sys/vm/compact_unevictable_allowed",
+ O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ return -1;
+ }
+
+ if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+ perror("Failed to read from\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ if (allowed == '1')
+ return 0;
+
+ return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+ int fd;
+ int compaction_index = 0;
+ char initial_nr_hugepages[10] = {0};
+ char nr_hugepages[10] = {0};
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+ mem_free = mem_free * 0.8;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open /proc/sys/vm/nr_hugepages");
+ return -1;
+ }
+
+ if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ goto close_fd;
+ }
+
+ /* Start with the initial condition of 0 huge pages*/
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Request a large number of huge pages. The Kernel will allocate
+ as much as it can */
+ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ /* We should have been able to request at least 1/3 rd of the memory in
+ huge pages */
+ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+ if (compaction_index > 3) {
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+ fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+
+ if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+ != strlen(initial_nr_hugepages)) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ close(fd);
+ return 0;
+
+ close_fd:
+ close(fd);
+ printf("Not OK. Compaction test failed.");
+ return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit lim;
+ struct map_list *list, *entry;
+ size_t page_size, i;
+ void *map = NULL;
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ unsigned long mem_fragmentable = 0;
+
+ if (prereq() != 0) {
+ printf("Either the sysctl compact_unevictable_allowed is not\n"
+ "set to 1 or couldn't read the proc file.\n"
+ "Skipping the test\n");
+ return 0;
+ }
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+ perror("Failed to set rlimit:\n");
+ return -1;
+ }
+
+ page_size = getpagesize();
+
+ list = NULL;
+
+ if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+ printf("ERROR: Cannot read meminfo\n");
+ return -1;
+ }
+
+ mem_fragmentable = mem_free * 0.8 / 1024;
+
+ while (mem_fragmentable > 0) {
+ map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+ if (map == MAP_FAILED)
+ break;
+
+ entry = malloc(sizeof(struct map_list));
+ if (!entry) {
+ munmap(map, MAP_SIZE);
+ break;
+ }
+ entry->map = map;
+ entry->next = list;
+ list = entry;
+
+ /* Write something (in this case the address of the map) to
+ * ensure that KSM can't merge the mapped pages
+ */
+ for (i = 0; i < MAP_SIZE; i += page_size)
+ *(unsigned long *)(map + i) = (unsigned long)map + i;
+
+ mem_fragmentable--;
+ }
+
+ for (entry = list; entry != NULL; entry = entry->next) {
+ munmap(entry->map, MAP_SIZE);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+
+ if (check_compaction(mem_free, hugepage_size) == 0)
+ return 0;
+
+ return -1;
+}
diff --git a/vm/hugetlbfstest.c b/vm/hugetlbfstest.c
deleted file mode 100644
index ea40ff8..0000000
--- a/vm/hugetlbfstest.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#define _GNU_SOURCE
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-typedef unsigned long long u64;
-
-static size_t length = 1 << 24;
-
-static u64 read_rss(void)
-{
- char buf[4096], *s = buf;
- int i, fd;
- u64 rss;
-
- fd = open("/proc/self/statm", O_RDONLY);
- assert(fd > 2);
- memset(buf, 0, sizeof(buf));
- read(fd, buf, sizeof(buf) - 1);
- for (i = 0; i < 1; i++)
- s = strchr(s, ' ') + 1;
- rss = strtoull(s, NULL, 10);
- return rss << 12; /* assumes 4k pagesize */
-}
-
-static void do_mmap(int fd, int extra_flags, int unmap)
-{
- int *p;
- int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags;
- u64 before, after;
-
- before = read_rss();
- p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0);
- assert(p != MAP_FAILED ||
- !"mmap returned an unexpected error");
- after = read_rss();
- assert(llabs(after - before - length) < 0x40000 ||
- !"rss didn't grow as expected");
- if (!unmap)
- return;
- munmap(p, length);
- after = read_rss();
- assert(llabs(after - before) < 0x40000 ||
- !"rss didn't shrink as expected");
-}
-
-static int open_file(const char *path)
-{
- int fd, err;
-
- unlink(path);
- fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL
- | O_LARGEFILE | O_CLOEXEC, 0600);
- assert(fd > 2);
- unlink(path);
- err = ftruncate(fd, length);
- assert(!err);
- return fd;
-}
-
-int main(void)
-{
- int hugefd, fd;
-
- fd = open_file("/dev/shm/hugetlbhog");
- hugefd = open_file("/hugepages/hugetlbhog");
-
- system("echo 100 > /proc/sys/vm/nr_hugepages");
- do_mmap(-1, MAP_ANONYMOUS, 1);
- do_mmap(fd, 0, 1);
- do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1);
- do_mmap(hugefd, 0, 1);
- do_mmap(hugefd, MAP_HUGETLB, 1);
- /* Leak the last one to test do_exit() */
- do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0);
- printf("oll korrekt.\n");
- return 0;
-}
diff --git a/vm/map_hugetlb.c b/vm/map_hugetlb.c
index ac56639..addcd6f 100644
--- a/vm/map_hugetlb.c
+++ b/vm/map_hugetlb.c
@@ -73,7 +73,11 @@ int main(void)
write_bytes(addr);
ret = read_bytes(addr);
- munmap(addr, LENGTH);
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, LENGTH)) {
+ perror("munmap");
+ exit(1);
+ }
return ret;
}
diff --git a/vm/mlock2-tests.c b/vm/mlock2-tests.c
new file mode 100644
index 0000000..02ca5e0
--- /dev/null
+++ b/vm/mlock2-tests.c
@@ -0,0 +1,737 @@
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <syscall.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 1
+#endif
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int mlock2_(void *start, size_t len, int flags)
+{
+#ifdef __NR_mlock2
+ return syscall(__NR_mlock2, start, len, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+struct vm_boundaries {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
+{
+ FILE *file;
+ int ret = 1;
+ char line[1024] = {0};
+ char *end_addr;
+ char *stop;
+ unsigned long start;
+ unsigned long end;
+
+ if (!area)
+ return ret;
+
+ file = fopen("/proc/self/maps", "r");
+ if (!file) {
+ perror("fopen");
+ return ret;
+ }
+
+ memset(area, 0, sizeof(struct vm_boundaries));
+
+ while(fgets(line, 1024, file)) {
+ end_addr = strchr(line, '-');
+ if (!end_addr) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ *end_addr = '\0';
+ end_addr++;
+ stop = strchr(end_addr, ' ');
+ if (!stop) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ stop = '\0';
+
+ sscanf(line, "%lx", &start);
+ sscanf(end_addr, "%lx", &end);
+
+ if (start <= addr && end > addr) {
+ area->start = start;
+ area->end = end;
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ fclose(file);
+ return ret;
+}
+
+static uint64_t get_pageflags(unsigned long addr)
+{
+ FILE *file;
+ uint64_t pfn;
+ unsigned long offset;
+
+ file = fopen("/proc/self/pagemap", "r");
+ if (!file) {
+ perror("fopen pagemap");
+ _exit(1);
+ }
+
+ offset = addr / getpagesize() * sizeof(pfn);
+
+ if (fseek(file, offset, SEEK_SET)) {
+ perror("fseek pagemap");
+ _exit(1);
+ }
+
+ if (fread(&pfn, sizeof(pfn), 1, file) != 1) {
+ perror("fread pagemap");
+ _exit(1);
+ }
+
+ fclose(file);
+ return pfn;
+}
+
+static uint64_t get_kpageflags(unsigned long pfn)
+{
+ uint64_t flags;
+ FILE *file;
+
+ file = fopen("/proc/kpageflags", "r");
+ if (!file) {
+ perror("fopen kpageflags");
+ _exit(1);
+ }
+
+ if (fseek(file, pfn * sizeof(flags), SEEK_SET)) {
+ perror("fseek kpageflags");
+ _exit(1);
+ }
+
+ if (fread(&flags, sizeof(flags), 1, file) != 1) {
+ perror("fread kpageflags");
+ _exit(1);
+ }
+
+ fclose(file);
+ return flags;
+}
+
+static FILE *seek_to_smaps_entry(unsigned long addr)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ unsigned long start, end;
+ char perms[5];
+ unsigned long offset;
+ char dev[32];
+ unsigned long inode;
+ char path[BUFSIZ];
+
+ file = fopen("/proc/self/smaps", "r");
+ if (!file) {
+ perror("fopen smaps");
+ _exit(1);
+ }
+
+ while (getline(&line, &size, file) > 0) {
+ if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
+ &start, &end, perms, &offset, dev, &inode, path) < 6)
+ goto next;
+
+ if (start <= addr && addr < end)
+ goto out;
+
+next:
+ free(line);
+ line = NULL;
+ size = 0;
+ }
+
+ fclose(file);
+ file = NULL;
+
+out:
+ free(line);
+ return file;
+}
+
+#define VMFLAGS "VmFlags:"
+
+static bool is_vmflag_set(unsigned long addr, const char *vmflag)
+{
+ char *line = NULL;
+ char *flags;
+ size_t size = 0;
+ bool ret = false;
+ FILE *smaps;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, VMFLAGS)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ flags = line + strlen(VMFLAGS);
+ ret = (strstr(flags, vmflag) != NULL);
+ goto out;
+ }
+
+out:
+ free(line);
+ fclose(smaps);
+ return ret;
+}
+
+#define SIZE "Size:"
+#define RSS "Rss:"
+#define LOCKED "lo"
+
+static bool is_vma_lock_on_fault(unsigned long addr)
+{
+ bool ret = false;
+ bool locked;
+ FILE *smaps = NULL;
+ unsigned long vma_size, vma_rss;
+ char *line = NULL;
+ char *value;
+ size_t size = 0;
+
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ goto out;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, SIZE)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ value = line + strlen(SIZE);
+ if (sscanf(value, "%lu kB", &vma_size) < 1) {
+ printf("Unable to parse smaps entry for Size\n");
+ goto out;
+ }
+ break;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, RSS)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ value = line + strlen(RSS);
+ if (sscanf(value, "%lu kB", &vma_rss) < 1) {
+ printf("Unable to parse smaps entry for Rss\n");
+ goto out;
+ }
+ break;
+ }
+
+ ret = locked && (vma_rss < vma_size);
+out:
+ free(line);
+ if (smaps)
+ fclose(smaps);
+ return ret;
+}
+
+#define PRESENT_BIT 0x8000000000000000ULL
+#define PFN_MASK 0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
+
+static int lock_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+ uint64_t page1_flags, page2_flags;
+
+ page1_flags = get_pageflags((unsigned long)map);
+ page2_flags = get_pageflags((unsigned long)map + page_size);
+
+ /* Both pages should be present */
+ if (((page1_flags & PRESENT_BIT) == 0) ||
+ ((page2_flags & PRESENT_BIT) == 0)) {
+ printf("Failed to make both pages present\n");
+ return 1;
+ }
+
+ page1_flags = get_kpageflags(page1_flags & PFN_MASK);
+ page2_flags = get_kpageflags(page2_flags & PFN_MASK);
+
+ /* Both pages should be unevictable */
+ if (((page1_flags & UNEVICTABLE_BIT) == 0) ||
+ ((page2_flags & UNEVICTABLE_BIT) == 0)) {
+ printf("Failed to make both pages unevictable\n");
+ return 1;
+ }
+
+ if (!is_vmflag_set((unsigned long)map, LOCKED)) {
+ printf("VMA flag %s is missing on page 1\n", LOCKED);
+ return 1;
+ }
+
+ if (!is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
+ printf("VMA flag %s is missing on page 2\n", LOCKED);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unlock_lock_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+ uint64_t page1_flags, page2_flags;
+
+ page1_flags = get_pageflags((unsigned long)map);
+ page2_flags = get_pageflags((unsigned long)map + page_size);
+ page1_flags = get_kpageflags(page1_flags & PFN_MASK);
+ page2_flags = get_kpageflags(page2_flags & PFN_MASK);
+
+ if ((page1_flags & UNEVICTABLE_BIT) || (page2_flags & UNEVICTABLE_BIT)) {
+ printf("A page is still marked unevictable after unlock\n");
+ return 1;
+ }
+
+ if (is_vmflag_set((unsigned long)map, LOCKED)) {
+ printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+ return 1;
+ }
+
+ if (is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
+ printf("VMA flag %s is present on page 2 after unlock\n", LOCKED);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_lock()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, 0)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(0);
+ }
+ perror("mlock2(0)");
+ goto unmap;
+ }
+
+ if (lock_check(map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int onfault_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+ uint64_t page1_flags, page2_flags;
+
+ page1_flags = get_pageflags((unsigned long)map);
+ page2_flags = get_pageflags((unsigned long)map + page_size);
+
+ /* Neither page should be present */
+ if ((page1_flags & PRESENT_BIT) || (page2_flags & PRESENT_BIT)) {
+ printf("Pages were made present by MLOCK_ONFAULT\n");
+ return 1;
+ }
+
+ *map = 'a';
+ page1_flags = get_pageflags((unsigned long)map);
+ page2_flags = get_pageflags((unsigned long)map + page_size);
+
+ /* Only page 1 should be present */
+ if ((page1_flags & PRESENT_BIT) == 0) {
+ printf("Page 1 is not present after fault\n");
+ return 1;
+ } else if (page2_flags & PRESENT_BIT) {
+ printf("Page 2 was made present\n");
+ return 1;
+ }
+
+ page1_flags = get_kpageflags(page1_flags & PFN_MASK);
+
+ /* Page 1 should be unevictable */
+ if ((page1_flags & UNEVICTABLE_BIT) == 0) {
+ printf("Failed to make faulted page unevictable\n");
+ return 1;
+ }
+
+ if (!is_vma_lock_on_fault((unsigned long)map)) {
+ printf("VMA is not marked for lock on fault\n");
+ return 1;
+ }
+
+ if (!is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA is not marked for lock on fault\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unlock_onfault_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+ uint64_t page1_flags;
+
+ page1_flags = get_pageflags((unsigned long)map);
+ page1_flags = get_kpageflags(page1_flags & PFN_MASK);
+
+ if (page1_flags & UNEVICTABLE_BIT) {
+ printf("Page 1 is still marked unevictable after unlock\n");
+ return 1;
+ }
+
+ if (is_vma_lock_on_fault((unsigned long)map) ||
+ is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA is still lock on fault after unlock\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_onfault()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(0);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(0);
+ }
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_onfault_check(map);
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_lock_onfault_of_present()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+ uint64_t page1_flags, page2_flags;
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ *map = 'a';
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(0);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ page1_flags = get_pageflags((unsigned long)map);
+ page2_flags = get_pageflags((unsigned long)map + page_size);
+ page1_flags = get_kpageflags(page1_flags & PFN_MASK);
+ page2_flags = get_kpageflags(page2_flags & PFN_MASK);
+
+ /* Page 1 should be unevictable */
+ if ((page1_flags & UNEVICTABLE_BIT) == 0) {
+ printf("Failed to make present page unevictable\n");
+ goto unmap;
+ }
+
+ if (!is_vma_lock_on_fault((unsigned long)map) ||
+ !is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA with present pages is not marked lock on fault\n");
+ goto unmap;
+ }
+ ret = 0;
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_munlockall()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT)) {
+ perror("mlockall(MCL_CURRENT)");
+ goto out;
+ }
+
+ if (lock_check(map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_lock_check(map))
+ goto unmap;
+
+ munmap(map, 2 * page_size);
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall second mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
+ perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_onfault_check(map))
+ goto unmap;
+
+ if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+ perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
+ goto out;
+ }
+
+ if (lock_check(map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ munlockall();
+ return ret;
+}
+
+static int test_vma_management(bool call_mlock)
+{
+ int ret = 1;
+ void *map;
+ unsigned long page_size = getpagesize();
+ struct vm_boundaries page1;
+ struct vm_boundaries page2;
+ struct vm_boundaries page3;
+
+ map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (map == MAP_FAILED) {
+ perror("mmap()");
+ return ret;
+ }
+
+ if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(0);
+ }
+ perror("mlock(ONFAULT)\n");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /*
+ * Before we unlock a portion, we need to that all three pages are in
+ * the same VMA. If they are not we abort this test (Note that this is
+ * not a failure)
+ */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("VMAs are not merged to start, aborting test\n");
+ ret = 0;
+ goto out;
+ }
+
+ if (munlock(map + page_size, page_size)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* All three VMAs should be different */
+ if (page1.start == page2.start || page2.start == page3.start) {
+ printf("failed to split VMA for munlock\n");
+ goto out;
+ }
+
+ /* Now unlock the first and third page and check the VMAs again */
+ if (munlock(map, page_size * 3)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* Now all three VMAs should be the same */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("failed to merge VMAs after munlock\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ munmap(map, 3 * page_size);
+ return ret;
+}
+
+static int test_mlockall(int (test_function)(bool call_mlock))
+{
+ int ret = 1;
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ ret = test_function(false);
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+ ret += test_mlock_lock();
+ ret += test_mlock_onfault();
+ ret += test_munlockall();
+ ret += test_lock_onfault_of_present();
+ ret += test_vma_management(true);
+ ret += test_mlockall(test_vma_management);
+ return ret;
+}
diff --git a/vm/on-fault-limit.c b/vm/on-fault-limit.c
new file mode 100644
index 0000000..245accc
--- /dev/null
+++ b/vm/on-fault-limit.c
@@ -0,0 +1,47 @@
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int test_limit(void)
+{
+ int ret = 1;
+ struct rlimit lims;
+ void *map;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
+ perror("getrlimit");
+ return ret;
+ }
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, 0, 0);
+ if (map != MAP_FAILED)
+ printf("mmap should have failed, but didn't\n");
+ else {
+ ret = 0;
+ munmap(map, 2 * lims.rlim_max);
+ }
+
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+
+ ret += test_limit();
+ return ret;
+}
diff --git a/vm/run_vmtests b/vm/run_vmtests
index c87b681..e11968b 100644..100755
--- a/vm/run_vmtests
+++ b/vm/run_vmtests
@@ -20,13 +20,26 @@ done < /proc/meminfo
if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then
nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
needpgs=`expr $needmem / $pgsize`
- if [ $freepgs -lt $needpgs ]; then
+ tries=2
+ while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
lackpgs=$(( $needpgs - $freepgs ))
+ echo 3 > /proc/sys/vm/drop_caches
echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
if [ $? -ne 0 ]; then
echo "Please run this test as root"
exit 1
fi
+ while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+ tries=$((tries - 1))
+ done
+ if [ $freepgs -lt $needpgs ]; then
+ printf "Not enough huge pages available (%d < %d)\n" \
+ $freepgs $needpgs
+ exit 1
fi
else
echo "no hugetlbfs support in kernel?"
@@ -75,10 +88,14 @@ else
echo "[PASS]"
fi
+echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
+echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
+echo " hugetlb regression testing."
+
echo "--------------------"
-echo "running hugetlbfstest"
+echo "running userfaultfd"
echo "--------------------"
-./hugetlbfstest
+./userfaultfd 128 32
if [ $? -ne 0 ]; then
echo "[FAIL]"
exitcode=1
@@ -90,4 +107,38 @@ fi
umount $mnt
rm -rf $mnt
echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running on-fault-limit"
+echo "--------------------"
+sudo -u nobody ./on-fault-limit
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running mlock2-tests"
+echo "--------------------"
+./mlock2-tests
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
exit $exitcode
diff --git a/vm/userfaultfd.c b/vm/userfaultfd.c
new file mode 100644
index 0000000..d77ed41
--- /dev/null
+++ b/vm/userfaultfd.c
@@ -0,0 +1,645 @@
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ *
+ * The program takes two parameters: the amounts of physical memory in
+ * megabytes (MiB) of the area and the number of bounces to execute.
+ *
+ * # 100MiB 99999 bounces
+ * ./userfaultfd 100 99999
+ *
+ * # 1GiB 99 bounces
+ * ./userfaultfd 1000 99
+ *
+ * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
+ * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+
+#ifdef __NR_userfaultfd
+
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+
+static unsigned long long *count_verify;
+static int uffd, finished, *pipefd;
+static char *area_src, *area_dst;
+static char *zeropage;
+pthread_attr_t attr;
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr) \
+ ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr) \
+ ((volatile unsigned long long *) ((unsigned long) \
+ ((___area) + (___nr)*page_size + \
+ sizeof(pthread_mutex_t) + \
+ sizeof(unsigned long long) - 1) & \
+ ~(unsigned long)(sizeof(unsigned long long) \
+ - 1)))
+
+static int my_bcmp(char *str1, char *str2, size_t n)
+{
+ unsigned long i;
+ for (i = 0; i < n; i++)
+ if (str1[i] != str2[i])
+ return 1;
+ return 0;
+}
+
+static void *locking_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct random_data rand;
+ unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ int32_t rand_nr;
+ unsigned long long count;
+ char randstate[64];
+ unsigned int seed;
+ time_t start;
+
+ if (bounces & BOUNCE_RANDOM) {
+ seed = (unsigned int) time(NULL) - bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ seed += cpu;
+ bzero(&rand, sizeof(rand));
+ bzero(&randstate, sizeof(randstate));
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ fprintf(stderr, "srandom_r error\n"), exit(1);
+ } else {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * nr_pages_per_cpu;
+ }
+
+ while (!finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 1 error\n"), exit(1);
+ page_nr = rand_nr;
+ if (sizeof(page_nr) > sizeof(rand_nr)) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 2 error\n"), exit(1);
+ page_nr |= (((unsigned long) rand_nr) << 16) <<
+ 16;
+ }
+ } else
+ page_nr += 1;
+ page_nr %= nr_pages;
+
+ start = time(NULL);
+ if (bounces & BOUNCE_VERIFY) {
+ count = *area_count(area_dst, page_nr);
+ if (!count)
+ fprintf(stderr,
+ "page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+
+
+ /*
+ * We can't use bcmp (or memcmp) because that
+ * returns 0 erroneously if the memory is
+ * changing under it (even if the end of the
+ * page is never changing and always
+ * different).
+ */
+#if 1
+ if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size))
+ fprintf(stderr,
+ "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+#else
+ unsigned long loops;
+
+ loops = 0;
+ /* uncomment the below line to test with mutex */
+ /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
+ while (!bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size)) {
+ loops += 1;
+ if (loops > 10)
+ break;
+ }
+ /* uncomment below line to test with mutex */
+ /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
+ if (loops) {
+ fprintf(stderr,
+ "page_nr %lu all zero thread %lu %p %lu\n",
+ page_nr, cpu, area_dst + page_nr * page_size,
+ loops);
+ if (loops > 10)
+ exit(1);
+ }
+#endif
+ }
+
+ pthread_mutex_lock(area_mutex(area_dst, page_nr));
+ count = *area_count(area_dst, page_nr);
+ if (count != count_verify[page_nr]) {
+ fprintf(stderr,
+ "page_nr %lu memory corruption %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+ }
+ count++;
+ *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+
+ if (time(NULL) - start > 1)
+ fprintf(stderr,
+ "userfault too slow %ld "
+ "possible false positive with overcommit\n",
+ time(NULL) - start);
+ }
+
+ return NULL;
+}
+
+static int copy_page(unsigned long offset)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_copy.dst = (unsigned long) area_dst + offset;
+ uffdio_copy.src = (unsigned long) area_src + offset;
+ uffdio_copy.len = page_size;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY error %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else if (uffdio_copy.copy != page_size) {
+ fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else
+ return 1;
+ return 0;
+}
+
+static void *uffd_poll_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ int ret;
+ unsigned long offset;
+ char tmp_chr;
+ unsigned long userfaults = 0;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (!ret)
+ fprintf(stderr, "poll error %d\n", ret), exit(1);
+ if (ret < 0)
+ perror("poll"), exit(1);
+ if (pollfd[1].revents & POLLIN) {
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ fprintf(stderr, "read pipefd error\n"),
+ exit(1);
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ fprintf(stderr, "pollfd[0].revents %d\n",
+ pollfd[0].revents), exit(1);
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ perror("nonblocking read error"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(offset))
+ userfaults++;
+ }
+ return (void *)userfaults;
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ unsigned long *this_cpu_userfaults;
+ struct uffd_msg msg;
+ unsigned long offset;
+ int ret;
+
+ this_cpu_userfaults = (unsigned long *) arg;
+ *this_cpu_userfaults = 0;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (ret < 0)
+ perror("blocking read error"), exit(1);
+ else
+ fprintf(stderr, "short read\n"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (bounces & BOUNCE_VERIFY &&
+ msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(offset))
+ (*this_cpu_userfaults)++;
+ }
+ return (void *)NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr;
+
+ for (page_nr = cpu * nr_pages_per_cpu;
+ page_nr < (cpu+1) * nr_pages_per_cpu;
+ page_nr++)
+ copy_page(page_nr * page_size);
+
+ return NULL;
+}
+
+static int stress(unsigned long *userfaults)
+{
+ unsigned long cpu;
+ pthread_t locking_threads[nr_cpus];
+ pthread_t uffd_threads[nr_cpus];
+ pthread_t background_threads[nr_cpus];
+ void **_userfaults = (void **) userfaults;
+
+ finished = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)cpu))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_poll_thread, (void *)cpu))
+ return 1;
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ &_userfaults[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)cpu))
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ return 1;
+ }
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ char c;
+ if (bounces & BOUNCE_POLL) {
+ if (write(pipefd[cpu*2+1], &c, 1) != 1) {
+ fprintf(stderr, "pipefd write error\n");
+ return 1;
+ }
+ if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ return 0;
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ struct uffdio_api uffdio_api;
+ unsigned long cpu;
+ int uffd_flags, err;
+ unsigned long userfaults[nr_cpus];
+
+ if (posix_memalign(&area, page_size, nr_pages * page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ area_src = area;
+ if (posix_memalign(&area, page_size, nr_pages * page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ area_dst = area;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ fprintf(stderr,
+ "userfaultfd syscall not available in this kernel\n");
+ return 1;
+ }
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ fprintf(stderr, "UFFDIO_API\n");
+ return 1;
+ }
+ if (uffdio_api.api != UFFD_API) {
+ fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+ return 1;
+ }
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify) {
+ perror("count_verify");
+ return 1;
+ }
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) = (pthread_mutex_t)
+ PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd) {
+ perror("pipefd");
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
+ perror("pipe");
+ return 1;
+ }
+ }
+
+ if (posix_memalign(&area, page_size, page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ zeropage = area;
+ bzero(zeropage, page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ err = 0;
+ while (bounces--) {
+ unsigned long expected_ioctls;
+
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ else
+ fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+ expected_ioctls = (1 << _UFFDIO_WAKE) |
+ (1 << _UFFDIO_COPY) |
+ (1 << _UFFDIO_ZEROPAGE);
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls) {
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n");
+ return 1;
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise 2");
+ return 1;
+ }
+
+ /* bounce pass */
+ if (stress(userfaults))
+ return 1;
+
+ /* unregister */
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY) {
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (*area_count(area_dst, nr) != count_verify[nr]) {
+ fprintf(stderr,
+ "error area_count %Lu %Lu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr],
+ nr);
+ err = 1;
+ bounces = 0;
+ }
+ }
+ }
+
+ /* prepare next bounce */
+ tmp_area = area_src;
+ area_src = area_dst;
+ area_dst = tmp_area;
+
+ printf("userfaults:");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ printf(" %lu", userfaults[cpu]);
+ printf("\n");
+ }
+
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 3)
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+ > page_size)
+ fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
+ nr_cpus;
+ if (!nr_pages_per_cpu) {
+ fprintf(stderr, "invalid MiB\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ bounces = atoi(argv[2]);
+ if (bounces <= 0) {
+ fprintf(stderr, "invalid bounces\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return 0;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/x86/.gitignore b/x86/.gitignore
new file mode 100644
index 0000000..15034fe
--- /dev/null
+++ b/x86/.gitignore
@@ -0,0 +1,2 @@
+*_32
+*_64
diff --git a/x86/Makefile b/x86/Makefile
new file mode 100644
index 0000000..eabcff4
--- /dev/null
+++ b/x86/Makefile
@@ -0,0 +1,65 @@
+all:
+
+include ../lib.mk
+
+.PHONY: all all_32 all_64 warn_32bit_failure clean
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
+ test_FCMOV test_FCOMI test_FISTTP
+
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
+BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+ifeq ($(CAN_BUILD_I386),1)
+all: all_32
+TEST_PROGS += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+all: all_64
+TEST_PROGS += $(BINARIES_64)
+endif
+
+all_32: $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+clean:
+ $(RM) $(BINARIES_32) $(BINARIES_64)
+
+$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
+ $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
+
+$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+ $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+
+# Some tests have additional dependencies.
+sysret_ss_attrs_64: thunks.S
+ptrace_syscall_32: raw_syscall_helper_32.S
+test_syscall_vdso_32: thunks_32.S
diff --git a/x86/check_cc.sh b/x86/check_cc.sh
new file mode 100755
index 0000000..172d329
--- /dev/null
+++ b/x86/check_cc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+ echo 1
+else
+ echo 0
+fi
+
+exit 0
diff --git a/x86/entry_from_vm86.c b/x86/entry_from_vm86.c
new file mode 100644
index 0000000..d075ea0
--- /dev/null
+++ b/x86/entry_from_vm86.c
@@ -0,0 +1,238 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
+asm (
+ ".pushsection .rodata\n\t"
+ ".type vmcode_bound, @object\n\t"
+ "vmcode:\n\t"
+ "vmcode_bound:\n\t"
+ ".code16\n\t"
+ "bound %ax, (2048)\n\t"
+ "int3\n\t"
+ "vmcode_sysenter:\n\t"
+ "sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
+ ".size vmcode, . - vmcode\n\t"
+ "end_vmcode:\n\t"
+ ".code32\n\t"
+ ".popsection"
+ );
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[];
+
+/* Returns false if the test was skipped. */
+static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
+ const char *text)
+{
+ long ret;
+
+ printf("[RUN]\t%s from vm86 mode\n", text);
+ v86->regs.eip = eip;
+ ret = vm86(VM86_ENTER, v86);
+
+ if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+ printf("[SKIP]\tvm86 %s\n",
+ errno == ENOSYS ? "not supported" : "not allowed");
+ return false;
+ }
+
+ if (VM86_TYPE(ret) == VM86_INTx) {
+ char trapname[32];
+ int trapno = VM86_ARG(ret);
+ if (trapno == 13)
+ strcpy(trapname, "GP");
+ else if (trapno == 5)
+ strcpy(trapname, "BR");
+ else if (trapno == 14)
+ strcpy(trapname, "PF");
+ else
+ sprintf(trapname, "%d", trapno);
+
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
+ } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
+ } else {
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
+ VM86_TYPE(ret), VM86_ARG(ret));
+ }
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason\n");
+ nerrs++;
+ }
+
+ return true;
+}
+
+int main(void)
+{
+ struct vm86plus_struct v86;
+ unsigned char *addr = mmap((void *)load_addr, 4096,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+ if (addr != (unsigned char *)load_addr)
+ err(1, "mmap");
+
+ memcpy(addr, vmcode, end_vmcode - vmcode);
+ addr[2048] = 2;
+ addr[2050] = 3;
+
+ memset(&v86, 0, sizeof(v86));
+
+ v86.regs.cs = load_addr / 16;
+ v86.regs.ss = load_addr / 16;
+ v86.regs.ds = load_addr / 16;
+ v86.regs.es = load_addr / 16;
+
+ assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
+
+ /* #BR -- should deliver SIG??? */
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
+ !got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);
+
+ /* Make sure nothing explodes if we fork. */
+ if (fork() > 0)
+ return 0;
+
+ return (nerrs == 0 ? 0 : 1);
+}
diff --git a/x86/ldt_gdt.c b/x86/ldt_gdt.c
new file mode 100644
index 0000000..31a3035
--- /dev/null
+++ b/x86/ldt_gdt.c
@@ -0,0 +1,576 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/ldt.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+#include <linux/futex.h>
+
+#define AR_ACCESSED (1<<8)
+
+#define AR_TYPE_RODATA (0 * (1<<9))
+#define AR_TYPE_RWDATA (1 * (1<<9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9))
+#define AR_TYPE_XOCODE (4 * (1<<9))
+#define AR_TYPE_XRCODE (5 * (1<<9))
+#define AR_TYPE_XOCODE_CONF (6 * (1<<9))
+#define AR_TYPE_XRCODE_CONF (7 * (1<<9))
+
+#define AR_DPL3 (3 * (1<<13))
+
+#define AR_S (1 << 12)
+#define AR_P (1 << 15)
+#define AR_AVL (1 << 20)
+#define AR_L (1 << 21)
+#define AR_DB (1 << 22)
+#define AR_G (1 << 23)
+
+static int nerrs;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (has_limit || has_ar) {
+ printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s entry %hu is invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ }
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+ uint32_t expected_ar, uint32_t expected_limit,
+ bool verbose)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (!has_limit || !has_ar) {
+ printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ return;
+ }
+
+ if (ar != expected_ar) {
+ printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+ nerrs++;
+ } else if (limit != expected_limit) {
+ printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, limit, expected_limit);
+ nerrs++;
+ } else if (verbose) {
+ printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, limit);
+ }
+}
+
+static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+ bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ uint32_t limit = desc->limit;
+ if (desc->limit_in_pages)
+ limit = (limit << 12) + 4095;
+ check_valid_segment(desc->entry_number, 1, ar, limit, true);
+ return true;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ return false;
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ return false;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ return false;
+ }
+ }
+}
+
+static bool install_valid(const struct user_desc *desc, uint32_t ar)
+{
+ return install_valid_mode(desc, ar, false);
+}
+
+static void install_invalid(const struct user_desc *desc, bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ check_invalid_segment(desc->entry_number, 1);
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ }
+ }
+}
+
+static int safe_modify_ldt(int func, struct user_desc *ptr,
+ unsigned long bytecount)
+{
+ int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
+ if (ret < -1)
+ errno = -ret;
+ return ret;
+}
+
+static void fail_install(struct user_desc *desc)
+{
+ if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
+ printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
+ nerrs++;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ printf("[OK]\tmodify_ldt failure %d\n", errno);
+ }
+}
+
+static void do_simple_tests(void)
+{
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 10,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.entry_number = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.base_addr = 0xf0000000;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ desc.useable = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_32bit = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_G | AR_AVL);
+
+ desc.seg_32bit = 1;
+ desc.contents = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.contents = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.limit_in_pages = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.contents = 3;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.contents = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+
+#ifdef __x86_64__
+ desc.lm = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+ desc.lm = 0;
+#endif
+
+ bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ if (entry1_okay) {
+ printf("[RUN]\tTest fork\n");
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ check_valid_segment(desc.entry_number, 1,
+ AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, desc.limit,
+ true);
+ check_invalid_segment(1, 1);
+ exit(nerrs ? 1 : 0);
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tTest size\n");
+ int i;
+ for (i = 0; i < 8192; i++) {
+ desc.entry_number = i;
+ desc.limit = i;
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ printf("[FAIL]\tFailed to install entry %d\n", i);
+ nerrs++;
+ break;
+ }
+ }
+ for (int j = 0; j < i; j++) {
+ check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, j, false);
+ }
+ printf("[DONE]\tSize test\n");
+ } else {
+ printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
+ }
+
+ /* Test entry_number too high. */
+ desc.entry_number = 8192;
+ fail_install(&desc);
+
+ /* Test deletion and actions mistakeable for deletion. */
+ memset(&desc, 0, sizeof(desc));
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
+
+ desc.read_exec_only = 0;
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.read_exec_only = 1;
+ desc.limit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.limit = 0;
+ desc.base_addr = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.base_addr = 0;
+ install_invalid(&desc, false);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 0;
+ desc.seg_32bit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+ install_invalid(&desc, true);
+}
+
+/*
+ * 0: thread is idle
+ * 1: thread armed
+ * 2: thread should clear LDT entry 0
+ * 3: thread should exit
+ */
+static volatile unsigned int ftx;
+
+static void *threadproc(void *ctx)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 1"); /* should never fail */
+
+ while (1) {
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ while (ftx != 2) {
+ if (ftx >= 3)
+ return NULL;
+ }
+
+ /* clear LDT entry 0 */
+ const struct user_desc desc = {};
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
+ err(1, "modify_ldt");
+
+ /* If ftx == 2, set it to zero. If ftx == 100, quit. */
+ unsigned int x = -2;
+ asm volatile ("lock xaddl %[x], %[ftx]" :
+ [x] "+r" (x), [ftx] "+m" (ftx));
+ if (x != 2)
+ return NULL;
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static void do_multicpu_tests(void)
+{
+ cpu_set_t cpuset;
+ pthread_t thread;
+ int failures = 0, iters = 5, i;
+ unsigned short orig_ss;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 1\n");
+ return;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 0\n");
+ return;
+ }
+
+ sethandler(SIGSEGV, sigsegv, 0);
+#ifdef __i386__
+ /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
+ sethandler(SIGILL, sigsegv, 0);
+#endif
+
+ printf("[RUN]\tCross-CPU LDT invalidation\n");
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
+
+ for (i = 0; i < 5; i++) {
+ if (sigsetjmp(jmpbuf, 1) != 0)
+ continue;
+
+ /* Make sure the thread is ready after the last test. */
+ while (ftx != 0)
+ ;
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ if (errno != ENOSYS)
+ err(1, "modify_ldt");
+ printf("[SKIP]\tmodify_ldt unavailable\n");
+ break;
+ }
+
+ /* Arm the thread. */
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ asm volatile ("mov %0, %%ss" : : "r" (0x7));
+
+ /* Go! */
+ ftx = 2;
+
+ while (ftx != 0)
+ ;
+
+ /*
+ * On success, modify_ldt will segfault us synchronously,
+ * and we'll escape via siglongjmp.
+ */
+
+ failures++;
+ asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
+ };
+
+ ftx = 100; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ if (failures) {
+ printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
+ nerrs++;
+ } else {
+ printf("[OK]\tAll %d iterations succeeded\n", iters);
+ }
+}
+
+static int finish_exec_test(void)
+{
+ /*
+ * In a sensible world, this would be check_invalid_segment(0, 1);
+ * For better or for worse, though, the LDT is inherited across exec.
+ * We can probably change this safely, but for now we test it.
+ */
+ check_valid_segment(0, 1,
+ AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
+ 42, true);
+
+ return nerrs ? 1 : 0;
+}
+
+static void do_exec_test(void)
+{
+ printf("[RUN]\tTest exec\n");
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 42,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ pid_t child = fork();
+ if (child == 0) {
+ execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
+ printf("[FAIL]\tCould not exec self\n");
+ exit(1); /* exec failed */
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
+ return finish_exec_test();
+
+ do_simple_tests();
+
+ do_multicpu_tests();
+
+ do_exec_test();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/x86/ptrace_syscall.c b/x86/ptrace_syscall.c
new file mode 100644
index 0000000..5105b49
--- /dev/null
+++ b/x86/ptrace_syscall.c
@@ -0,0 +1,294 @@
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <asm/ptrace-abi.h>
+#include <sys/auxv.h>
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#ifdef __x86_64__
+# define user_syscall_nr orig_rax
+# define user_arg0 rdi
+# define user_arg1 rsi
+# define user_arg2 rdx
+# define user_arg3 r10
+# define user_arg4 r8
+# define user_arg5 r9
+# define user_ip rip
+# define user_ax rax
+#else
+# define user_syscall_nr orig_eax
+# define user_arg0 ebx
+# define user_arg1 ecx
+# define user_arg2 edx
+# define user_arg3 esi
+# define user_arg4 edi
+# define user_arg5 ebp
+# define user_ip eip
+# define user_ax eax
+#endif
+
+static int nerrs = 0;
+
+struct syscall_args32 {
+ uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
+};
+
+#ifdef __i386__
+extern void sys32_helper(struct syscall_args32 *, void *);
+extern void int80_and_ret(void);
+#endif
+
+/*
+ * Helper to invoke int80 with controlled regs and capture the final regs.
+ */
+static void do_full_int80(struct syscall_args32 *args)
+{
+#ifdef __x86_64__
+ register unsigned long bp asm("bp") = args->arg5;
+ asm volatile ("int $0x80"
+ : "+a" (args->nr),
+ "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
+ "+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+ args->arg5 = bp;
+#else
+ sys32_helper(args, int80_and_ret);
+#endif
+}
+
+#ifdef __i386__
+static void (*vsyscall32)(void);
+
+/*
+ * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with
+ * controlled regs and capture the final regs. This is so nasty that it
+ * crashes my copy of gdb :)
+ */
+static void do_full_vsyscall32(struct syscall_args32 *args)
+{
+ sys32_helper(args, vsyscall32);
+}
+#endif
+
+static siginfo_t wait_trap(pid_t chld)
+{
+ siginfo_t si;
+ if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+ err(1, "waitid");
+ if (si.si_pid != chld)
+ errx(1, "got unexpected pid in event\n");
+ if (si.si_code != CLD_TRAPPED)
+ errx(1, "got unexpected event type %d\n", si.si_code);
+ return si;
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define REG_BP REG_RBP
+#else
+# define REG_BP REG_EBP
+#endif
+
+static void empty_handler(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
+{
+ struct syscall_args32 args = {
+ .nr = 224, /* gettid */
+ .arg0 = 10, .arg1 = 11, .arg2 = 12,
+ .arg3 = 13, .arg4 = 14, .arg5 = 15,
+ };
+
+ do_syscall(&args);
+
+ if (args.nr != getpid() ||
+ args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tgetpid() failed to preseve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tgetpid() preserves regs\n");
+ }
+
+ sethandler(SIGUSR1, empty_handler, 0);
+
+ args.nr = 37; /* kill */
+ args.arg0 = getpid();
+ args.arg1 = SIGUSR1;
+ do_syscall(&args);
+ if (args.nr != 0 ||
+ args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
+ }
+ clearhandler(SIGUSR1);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+ printf("[RUN]\tptrace-induced syscall restart\n");
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ printf("\tChild will make one syscall\n");
+ raise(SIGSTOP);
+
+ syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+ _exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ printf("[RUN]\tSYSEMU\n");
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n");
+ }
+
+ printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * This does exactly what it appears to do if syscall is int80 or
+ * SYSCALL64. For SYSCALL32 or SYSENTER, though, this is highly
+ * magical. It needs to work so that ptrace and syscall restart
+ * work as expected.
+ */
+ regs.user_ax = regs.user_syscall_nr;
+ regs.user_ip -= 2;
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tRestarted nr and args are correct\n");
+ }
+
+ printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ regs.user_ax = SYS_getpid;
+ regs.user_arg0 = 20;
+ regs.user_arg1 = 21;
+ regs.user_arg2 = 22;
+ regs.user_arg3 = 23;
+ regs.user_arg4 = 24;
+ regs.user_arg5 = 25;
+ regs.user_ip -= 2;
+
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_getpid ||
+ regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 ||
+ regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tReplacement nr and args are correct\n");
+ }
+
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild exited cleanly\n");
+ }
+}
+
+int main()
+{
+ printf("[RUN]\tCheck int80 return regs\n");
+ test_sys32_regs(do_full_int80);
+
+#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
+ vsyscall32 = (void *)getauxval(AT_SYSINFO);
+ printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+ test_sys32_regs(do_full_vsyscall32);
+#endif
+
+ test_ptrace_syscall_restart();
+
+ return 0;
+}
diff --git a/x86/raw_syscall_helper_32.S b/x86/raw_syscall_helper_32.S
new file mode 100644
index 0000000..534e71e
--- /dev/null
+++ b/x86/raw_syscall_helper_32.S
@@ -0,0 +1,46 @@
+.global sys32_helper
+sys32_helper:
+ /* Args: syscall_args_32*, function pointer */
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 5*4(%esp), %eax /* pointer to args struct */
+
+ movl 1*4(%eax), %ebx
+ movl 2*4(%eax), %ecx
+ movl 3*4(%eax), %edx
+ movl 4*4(%eax), %esi
+ movl 5*4(%eax), %edi
+ movl 6*4(%eax), %ebp
+ movl 0*4(%eax), %eax
+
+ call *(6*4)(%esp) /* Do the syscall */
+
+ /* Now we need to recover without losing any reg values */
+ pushl %eax
+ movl 6*4(%esp), %eax
+ popl 0*4(%eax)
+ movl %ebx, 1*4(%eax)
+ movl %ecx, 2*4(%eax)
+ movl %edx, 3*4(%eax)
+ movl %esi, 4*4(%eax)
+ movl %edi, 5*4(%eax)
+ movl %ebp, 6*4(%eax)
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+
+ .type sys32_helper, @function
+ .size sys32_helper, .-sys32_helper
+
+.global int80_and_ret
+int80_and_ret:
+ int $0x80
+ ret
+
+ .type int80_and_ret, @function
+ .size int80_and_ret, .-int80_and_ret
diff --git a/x86/sigreturn.c b/x86/sigreturn.c
new file mode 100644
index 0000000..b5aa1ba
--- /dev/null
+++ b/x86/sigreturn.c
@@ -0,0 +1,684 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation. (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline. Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+ ".type int3, @function\n\t"
+ ".align 4096\n\t"
+ "int3:\n\t"
+ "mov %ss,%eax\n\t"
+ "int3\n\t"
+ ".size int3, . - int3\n\t"
+ ".align 4096, 0xcc\n\t"
+ ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ * descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ * stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table. These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+ return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+ return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+ const char *name)
+{
+ if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+ *var = LDT3(desc->entry_number);
+ } else {
+ printf("[NOTE]\tFailed to create %s segment\n", name);
+ *var = 0;
+ }
+}
+
+static void setup_ldt(void)
+{
+ if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+ errx(1, "stack16 is too high\n");
+ if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+ errx(1, "int3 is too high\n");
+
+ ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+ const struct user_desc code16_desc = {
+ .entry_number = LDT_OFFSET + 0,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 0,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&code16_desc, &code16_sel, "code16");
+
+ const struct user_desc data16_desc = {
+ .entry_number = LDT_OFFSET + 1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&data16_desc, &data16_sel, "data16");
+
+ const struct user_desc npcode32_desc = {
+ .entry_number = LDT_OFFSET + 3,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+ const struct user_desc npdata32_desc = {
+ .entry_number = LDT_OFFSET + 4,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+ struct user_desc gdt_data16_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+ /*
+ * This probably indicates vulnerability to CVE-2014-8133.
+ * Merely getting here isn't definitive, though, and we'll
+ * diagnose the problem for real later on.
+ */
+ printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+ gdt_data16_desc.entry_number);
+ gdt_data16_idx = gdt_data16_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+
+ struct user_desc gdt_npdata32_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+ /*
+ * As a hardening measure, newer kernels don't allow this.
+ */
+ printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+ gdt_npdata32_desc.entry_number);
+ gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_AX REG_RAX
+
+struct selectors {
+ unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_AX REG_EAX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+/*
+ * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
+ * int3 trampoline. Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+ *csptr(ctx) = sig_cs;
+ *ssptr(ctx) = sig_ss;
+
+ ctx->uc_mcontext.gregs[REG_IP] =
+ sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+ ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+ ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+ memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */
+
+ return;
+}
+
+/*
+ * Called after a successful sigreturn. Restores our state so that
+ * the original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+ sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+ unsigned short ss;
+ asm ("mov %%ss,%0" : "=r" (ss));
+
+ greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+ if (asm_ss != sig_ss && sig == SIGTRAP) {
+ /* Sanity check failure. */
+ printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+ ss, *ssptr(ctx), (unsigned long long)asm_ss);
+ nerrs++;
+ }
+
+ memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+ sig_trapped = sig;
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+ uint32_t valid = 0, ar;
+ asm ("lar %[cs], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "mov $1, %[valid]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [valid] "+rm" (valid)
+ : [cs] "r" (cs));
+
+ if (!valid)
+ return -1;
+
+ bool db = (ar & (1 << 22));
+ bool l = (ar & (1 << 21));
+
+ if (!(ar & (1<<11)))
+ return -1; /* Not code. */
+
+ if (l && !db)
+ return 64;
+ else if (!l && db)
+ return 32;
+ else if (!l && !db)
+ return 16;
+ else
+ return -1; /* Unknown bitness. */
+}
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+ unsigned short my_cs;
+
+ asm ("mov %%cs,%0" : "=r" (my_cs));
+
+ if (cs_bitness(my_cs) == bitness)
+ return my_cs;
+ if (cs_bitness(my_cs + (2 << 3)) == bitness)
+ return my_cs + (2 << 3);
+ if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+ return my_cs - (2 << 3);
+ if (cs_bitness(code16_sel) == bitness)
+ return code16_sel;
+
+ printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+ return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+ int cs = find_cs(cs_bits);
+ if (cs == -1) {
+ printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+ cs_bits, use_16bit_ss ? 16 : 32);
+ return 0;
+ }
+
+ if (force_ss != -1) {
+ sig_ss = force_ss;
+ } else {
+ if (use_16bit_ss) {
+ if (!data16_sel) {
+ printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+ cs_bits);
+ return 0;
+ }
+ sig_ss = data16_sel;
+ } else {
+ asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+ }
+ }
+
+ sig_cs = cs;
+
+ printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+ cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+ (sig_ss & 4) ? "" : ", GDT");
+
+ raise(SIGUSR1);
+
+ nerrs = 0;
+
+ /*
+ * Check that each register had an acceptable value when the
+ * int3 trampoline was invoked.
+ */
+ for (int i = 0; i < NGREG; i++) {
+ greg_t req = requested_regs[i], res = resulting_regs[i];
+ if (i == REG_TRAPNO || i == REG_IP)
+ continue; /* don't care */
+ if (i == REG_SP) {
+ printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
+ (unsigned long long)res);
+
+ /*
+ * In many circumstances, the high 32 bits of rsp
+ * are zeroed. For example, we could be a real
+ * 32-bit program, or we could hit any of a number
+ * of poorly-documented IRET or segmented ESP
+ * oddities. If this happens, it's okay.
+ */
+ if (res == (req & 0xFFFFFFFF))
+ continue; /* OK; not expected to work */
+ }
+
+ bool ignore_reg = false;
+#if __i386__
+ if (i == REG_UESP)
+ ignore_reg = true;
+#else
+ if (i == REG_CSGSFS) {
+ struct selectors *req_sels =
+ (void *)&requested_regs[REG_CSGSFS];
+ struct selectors *res_sels =
+ (void *)&resulting_regs[REG_CSGSFS];
+ if (req_sels->cs != res_sels->cs) {
+ printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->cs, res_sels->cs);
+ nerrs++;
+ }
+
+ if (req_sels->ss != res_sels->ss) {
+ printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->ss, res_sels->ss);
+ nerrs++;
+ }
+
+ continue;
+ }
+#endif
+
+ /* Sanity check on the kernel */
+ if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
+ printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)requested_regs[i],
+ (unsigned long long)resulting_regs[i]);
+ nerrs++;
+ continue;
+ }
+
+ if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
+ /*
+ * SP is particularly interesting here. The
+ * usual cause of failures is that we hit the
+ * nasty IRET case of returning to a 16-bit SS,
+ * in which case bits 16:31 of the *kernel*
+ * stack pointer persist in ESP.
+ */
+ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+ i, (unsigned long long)requested_regs[i],
+ (unsigned long long)resulting_regs[i]);
+ nerrs++;
+ }
+ }
+
+ if (nerrs == 0)
+ printf("[OK]\tall registers okay\n");
+
+ return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+ int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+ if (cs == -1)
+ return 0;
+
+ sig_cs = cs;
+ sig_ss = ss;
+
+ printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+ cs_bits, sig_cs, sig_ss);
+
+ sig_trapped = 0;
+ raise(SIGUSR1);
+ if (sig_trapped) {
+ char errdesc[32] = "";
+ if (sig_err) {
+ const char *src = (sig_err & 1) ? " EXT" : "";
+ const char *table;
+ if ((sig_err & 0x6) == 0x0)
+ table = "GDT";
+ else if ((sig_err & 0x6) == 0x4)
+ table = "LDT";
+ else if ((sig_err & 0x6) == 0x2)
+ table = "IDT";
+ else
+ table = "???";
+
+ sprintf(errdesc, "%s%s index %d, ",
+ table, src, sig_err >> 3);
+ }
+
+ char trapname[32];
+ if (sig_trapno == 13)
+ strcpy(trapname, "GP");
+ else if (sig_trapno == 11)
+ strcpy(trapname, "NP");
+ else if (sig_trapno == 12)
+ strcpy(trapname, "SS");
+ else if (sig_trapno == 32)
+ strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
+ else
+ sprintf(trapname, "%d", sig_trapno);
+
+ printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+ trapname, (unsigned long)sig_err,
+ errdesc, strsignal(sig_trapped));
+ return 0;
+ } else {
+ printf("[FAIL]\tDid not get SIGSEGV\n");
+ return 1;
+ }
+}
+
+int main()
+{
+ int total_nerrs = 0;
+ unsigned short my_cs, my_ss;
+
+ asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+ asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+ setup_ldt();
+
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+ /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+ total_nerrs += test_valid_sigreturn(64, false, -1);
+ total_nerrs += test_valid_sigreturn(32, false, -1);
+ total_nerrs += test_valid_sigreturn(16, false, -1);
+
+ /*
+ * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+ * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
+ *
+ * This catches the original missing-espfix-on-64-bit-kernels issue
+ * as well as CVE-2014-8134.
+ */
+ total_nerrs += test_valid_sigreturn(64, true, -1);
+ total_nerrs += test_valid_sigreturn(32, true, -1);
+ total_nerrs += test_valid_sigreturn(16, true, -1);
+
+ if (gdt_data16_idx) {
+ /*
+ * For performance reasons, Linux skips espfix if SS points
+ * to the GDT. If we were able to allocate a 16-bit SS in
+ * the GDT, see if it leaks parts of the kernel stack pointer.
+ *
+ * This tests for CVE-2014-8133.
+ */
+ total_nerrs += test_valid_sigreturn(64, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(32, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(16, true,
+ GDT3(gdt_data16_idx));
+ }
+
+ /*
+ * We're done testing valid sigreturn cases. Now we test states
+ * for which sigreturn itself will succeed but the subsequent
+ * entry to user mode will fail.
+ *
+ * Depending on the failure mode and the kernel bitness, these
+ * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+ */
+ clearhandler(SIGTRAP);
+ sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+ sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+ sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
+
+ /* Easy failures: invalid SS, resulting in #GP(0) */
+ test_bad_iret(64, ldt_nonexistent_sel, -1);
+ test_bad_iret(32, ldt_nonexistent_sel, -1);
+ test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+ /* These fail because SS isn't a data segment, resulting in #GP(SS) */
+ test_bad_iret(64, my_cs, -1);
+ test_bad_iret(32, my_cs, -1);
+ test_bad_iret(16, my_cs, -1);
+
+ /* Try to return to a not-present code segment, triggering #NP(SS). */
+ test_bad_iret(32, my_ss, npcode32_sel);
+
+ /*
+ * Try to return to a not-present but otherwise valid data segment.
+ * This will cause IRET to fail with #SS on the espfix stack. This
+ * exercises CVE-2014-9322.
+ *
+ * Note that, if espfix is enabled, 64-bit Linux will lose track
+ * of the actual cause of failure and report #GP(0) instead.
+ * This would be very difficult for Linux to avoid, because
+ * espfix64 causes IRET failures to be promoted to #DF, so the
+ * original exception frame is never pushed onto the stack.
+ */
+ test_bad_iret(32, npdata32_sel, -1);
+
+ /*
+ * Try to return to a not-present but otherwise valid data
+ * segment without invoking espfix. Newer kernels don't allow
+ * this to happen in the first place. On older kernels, though,
+ * this can trigger CVE-2014-9322.
+ */
+ if (gdt_npdata32_idx)
+ test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+ return total_nerrs ? 1 : 0;
+}
diff --git a/x86/single_step_syscall.c b/x86/single_step_syscall.c
new file mode 100644
index 0000000..50c2635
--- /dev/null
+++ b/x86/single_step_syscall.c
@@ -0,0 +1,181 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set. This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0. This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+ printf("[WARN]\tSIGTRAP handler had TF set\n");
+ _exit(1);
+ }
+
+ sig_traps++;
+
+ if (sig_traps == 10000 || sig_traps == 10001) {
+ printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+ (int)sig_traps,
+ (unsigned long)info->si_addr,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+ }
+}
+
+static void check_result(void)
+{
+ unsigned long new_eflags = get_eflags();
+ set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+ if (!sig_traps) {
+ printf("[FAIL]\tNo SIGTRAP\n");
+ exit(1);
+ }
+
+ if (!(new_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
+ exit(1);
+ }
+
+ printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+ sig_traps = 0;
+}
+
+int main()
+{
+ int tmp;
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet TF and check nop\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("nop");
+ check_result();
+
+#ifdef __x86_64__
+ printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ extern unsigned char post_nop[];
+ asm volatile ("pushf" WIDTH "\n\t"
+ "pop" WIDTH " %%r11\n\t"
+ "nop\n\t"
+ "post_nop:"
+ : : "c" (post_nop) : "r11");
+ check_result();
+#endif
+
+ printf("[RUN]\tSet TF and check int80\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+ check_result();
+
+ /*
+ * This test is particularly interesting if fast syscalls use
+ * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+ * Specifically, SYSENTER does not clear TF, so either SYSENTER
+ * or the next instruction traps at CPL0. (Of course, Intel
+ * mostly forgot to document exactly what happens here.) So we
+ * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+ * no stack. The only sane way the kernel can possibly handle
+ * it is to clear TF on return from the #DB handler, but this
+ * happens way too early to set TF in the saved pt_regs, so the
+ * kernel has to do something clever to avoid losing track of
+ * the TF bit.
+ *
+ * Needless to say, we've had bugs in this area.
+ */
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid);
+ check_result();
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+
+ return 0;
+}
diff --git a/x86/syscall_arg_fault.c b/x86/syscall_arg_fault.c
new file mode 100644
index 0000000..7db4fc9
--- /dev/null
+++ b/x86/syscall_arg_fault.c
@@ -0,0 +1,130 @@
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <setjmp.h>
+#include <errno.h>
+
+/* Our sigaltstack scratch space. */
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+static sigjmp_buf jmpbuf;
+
+static volatile sig_atomic_t n_errs;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
+ printf("[FAIL]\tAX had the wrong value: 0x%x\n",
+ ctx->uc_mcontext.gregs[REG_EAX]);
+ n_errs++;
+ } else {
+ printf("[OK]\tSeems okay\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigill(int sig, siginfo_t *info, void *ctx_void)
+{
+ printf("[SKIP]\tIllegal instruction\n");
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+ sethandler(SIGILL, sigill, SA_ONSTACK);
+
+ /*
+ * Exercise another nasty special case. The 32-bit SYSCALL
+ * and SYSENTER instructions (even in compat mode) each
+ * clobber one register. A Linux system call has a syscall
+ * number and six arguments, and the user stack pointer
+ * needs to live in some register on return. That means
+ * that we need eight registers, but SYSCALL and SYSENTER
+ * only preserve seven registers. As a result, one argument
+ * ends up on the stack. The stack is user memory, which
+ * means that the kernel can fail to read it.
+ *
+ * The 32-bit fast system calls don't have a defined ABI:
+ * we're supposed to invoke them through the vDSO. So we'll
+ * fudge it: we set all regs to invalid pointer values and
+ * invoke the entry instruction. The return will fail no
+ * matter what, and we completely lose our program state,
+ * but we can fix it up with a signal handler.
+ */
+
+ printf("[RUN]\tSYSENTER with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+
+ printf("[RUN]\tSYSCALL with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "syscall\n\t"
+ "pushl $0" /* make sure we segfault cleanly */
+ : : : "memory", "flags");
+ }
+
+ return 0;
+}
diff --git a/x86/syscall_nt.c b/x86/syscall_nt.c
new file mode 100644
index 0000000..60c06af
--- /dev/null
+++ b/x86/syscall_nt.c
@@ -0,0 +1,54 @@
+/*
+ * syscall_nt.c - checks syscalls with NT set
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Some obscure user-space code requires the ability to make system calls
+ * with FLAGS.NT set. Make sure it works.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/processor-flags.h>
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+int main()
+{
+ printf("[RUN]\tSet NT and issue a syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_NT);
+ syscall(SYS_getpid);
+ if (get_eflags() & X86_EFLAGS_NT) {
+ printf("[OK]\tThe syscall worked and NT is still set\n");
+ return 0;
+ } else {
+ printf("[FAIL]\tThe syscall worked but NT was cleared\n");
+ return 1;
+ }
+}
diff --git a/x86/sysret_ss_attrs.c b/x86/sysret_ss_attrs.c
new file mode 100644
index 0000000..ce42d5a
--- /dev/null
+++ b/x86/sysret_ss_attrs.c
@@ -0,0 +1,112 @@
+/*
+ * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
+ * the hidden attributes set to an unusable state. Make sure the kernel
+ * doesn't let this happen.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+static void *threadproc(void *ctx)
+{
+ /*
+ * Do our best to cause sleeps on this CPU to exit the kernel and
+ * re-enter with SS = 0.
+ */
+ while (true)
+ ;
+
+ return NULL;
+}
+
+#ifdef __x86_64__
+extern unsigned long call32_from_64(void *stack, void (*function)(void));
+
+asm (".pushsection .text\n\t"
+ ".code32\n\t"
+ "test_ss:\n\t"
+ "pushl $0\n\t"
+ "popl %eax\n\t"
+ "ret\n\t"
+ ".code64");
+extern void test_ss(void);
+#endif
+
+int main()
+{
+ /*
+ * Start a busy-looping thread on the same CPU we're on.
+ * For simplicity, just stick everything to CPU 0. This will
+ * fail in some containers, but that's probably okay.
+ */
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ printf("[WARN]\tsched_setaffinity failed\n");
+
+ pthread_t thread;
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+#ifdef __x86_64__
+ unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (stack32 == MAP_FAILED)
+ err(1, "mmap");
+#endif
+
+ printf("[RUN]\tSyscalls followed by SS validation\n");
+
+ for (int i = 0; i < 1000; i++) {
+ /*
+ * Go to sleep and return using sysret (if we're 64-bit
+ * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs,
+ * SYSRET doesn't fix up the cached SS descriptor, so the
+ * kernel needs some kind of workaround to make sure that we
+ * end the system call with a valid stack segment. This
+ * can be a confusing failure because the SS *selector*
+ * is the same regardless.
+ */
+ usleep(2);
+
+#ifdef __x86_64__
+ /*
+ * On 32-bit, just doing a syscall through glibc is enough
+ * to cause a crash if our cached SS descriptor is invalid.
+ * On 64-bit, it's not, so try extra hard.
+ */
+ call32_from_64(stack32 + 4088, test_ss);
+#endif
+ }
+
+ printf("[OK]\tWe survived\n");
+
+#ifdef __x86_64__
+ munmap(stack32, 4096);
+#endif
+
+ return 0;
+}
diff --git a/x86/test_FCMOV.c b/x86/test_FCMOV.c
new file mode 100644
index 0000000..4adcca0
--- /dev/null
+++ b/x86/test_FCMOV.c
@@ -0,0 +1,93 @@
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#define TEST(insn) \
+long double __attribute__((noinline)) insn(long flags) \
+{ \
+ long double out; \
+ asm ("\n" \
+ " push %1""\n" \
+ " popf""\n" \
+ " fldpi""\n" \
+ " fld1""\n" \
+ " " #insn " %%st(1), %%st" "\n" \
+ " ffree %%st(1)" "\n" \
+ : "=t" (out) \
+ : "r" (flags) \
+ ); \
+ return out; \
+}
+
+TEST(fcmovb)
+TEST(fcmove)
+TEST(fcmovbe)
+TEST(fcmovu)
+TEST(fcmovnb)
+TEST(fcmovne)
+TEST(fcmovnbe)
+TEST(fcmovnu)
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+};
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fcmovCC instructions\n");
+ /* If fcmovCC() returns 1.0, the move wasn't done */
+ err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0);
+ err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0);
+ err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0);
+ err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0);
+
+ err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0);
+ err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0);
+ err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0);
+ err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0);
+
+ err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0);
+ err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0);
+ err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0);
+ err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0);
+
+ err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0);
+ err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0);
+ err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0);
+ err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0);
+
+ if (!err)
+ printf("[OK]\tfcmovCC\n");
+ else
+ printf("[FAIL]\tfcmovCC errors: %d\n", err);
+
+ return err;
+}
diff --git a/x86/test_FCOMI.c b/x86/test_FCOMI.c
new file mode 100644
index 0000000..db4933e
--- /dev/null
+++ b/x86/test_FCOMI.c
@@ -0,0 +1,331 @@
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+ ARITH = CF | PF | ZF,
+};
+
+long res_fcomi_pi_1;
+long res_fcomi_1_pi;
+long res_fcomi_1_1;
+long res_fcomi_nan_1;
+/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */
+/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */
+int snan = 0x7fc11111;
+int qnan = 0x7f811111;
+unsigned short snan1[5];
+/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */
+unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff };
+
+int test(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int test_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_snan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+// " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register!
+// " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111!
+// " fnclex""\n" // flds of a snan raised FE_INVALID, clear it
+ " fldt snan80""\n" // fldt never raise FE_INVALID
+ " fld1""\n"
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+// printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]);
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testup_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting f[u]comi[p] instructions\n");
+ err |= test(0);
+ err |= test_qnan(0);
+ err |= testu_qnan(0);
+ err |= testu_snan(0);
+ err |= test(CF|ZF|PF);
+ err |= test_qnan(CF|ZF|PF);
+ err |= testu_qnan(CF|ZF|PF);
+ err |= testu_snan(CF|ZF|PF);
+ err |= testp(0);
+ err |= testp_qnan(0);
+ err |= testup_qnan(0);
+ err |= testp(CF|ZF|PF);
+ err |= testp_qnan(CF|ZF|PF);
+ err |= testup_qnan(CF|ZF|PF);
+ if (!err)
+ printf("[OK]\tf[u]comi[p]\n");
+ else
+ printf("[FAIL]\tf[u]comi[p] errors: %d\n", err);
+
+ return err;
+}
diff --git a/x86/test_FISTTP.c b/x86/test_FISTTP.c
new file mode 100644
index 0000000..b8e61a0
--- /dev/null
+++ b/x86/test_FISTTP.c
@@ -0,0 +1,137 @@
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+unsigned long long res64 = -1;
+unsigned int res32 = -1;
+unsigned short res16 = -1;
+
+int test(void)
+{
+ int ex;
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fld1""\n"
+ " fisttp res16""\n"
+ " fld1""\n"
+ " fisttpl res32""\n"
+ " fld1""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 1 || res32 != 1 || res64 != 1) {
+ printf("[BAD]\tfisttp 1\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != 0) {
+ printf("[BAD]\tfisttp 1: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 3 || res32 != 3 || res64 != 3) {
+ printf("[BAD]\tfisttp pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) {
+ printf("[BAD]\tfisttp -pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp -pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldln2""\n"
+ " fisttp res16""\n"
+ " fldln2""\n"
+ " fisttpl res32""\n"
+ " fldln2""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ /* Test truncation to zero (round-to-nearest would give 1 here) */
+ if (res16 != 0 || res32 != 0 || res64 != 0) {
+ printf("[BAD]\tfisttp ln2\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp ln2: wrong exception state\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fisttp emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fisttp instructions\n");
+ err |= test();
+ if (!err)
+ printf("[OK]\tfisttp\n");
+ else
+ printf("[FAIL]\tfisttp errors: %d\n", err);
+
+ return err;
+}
diff --git a/x86/test_syscall_vdso.c b/x86/test_syscall_vdso.c
new file mode 100644
index 0000000..4037035
--- /dev/null
+++ b/x86/test_syscall_vdso.c
@@ -0,0 +1,401 @@
+/*
+ * 32-bit syscall ABI conformance test.
+ *
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <elf.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+ printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+ return 0;
+}
+#else
+
+long syscall_addr;
+long get_syscall(char **envp)
+{
+ Elf32_auxv_t *auxv;
+ while (*envp++ != NULL)
+ continue;
+ for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
+ if (auxv->a_type == AT_SYSINFO)
+ return auxv->a_un.a_val;
+ printf("[WARN]\tAT_SYSINFO not supplied\n");
+ return 0;
+}
+
+asm (
+ " .pushsection .text\n"
+ " .global int80\n"
+ "int80:\n"
+ " int $0x80\n"
+ " ret\n"
+ " .popsection\n"
+);
+extern char int80;
+
+struct regs64 {
+ uint64_t rax, rbx, rcx, rdx;
+ uint64_t rsi, rdi, rbp, rsp;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+};
+struct regs64 regs64;
+int kernel_is_64bit;
+
+asm (
+ " .pushsection .text\n"
+ " .code64\n"
+ "get_regs64:\n"
+ " push %rax\n"
+ " mov $regs64, %eax\n"
+ " pop 0*8(%rax)\n"
+ " movq %rbx, 1*8(%rax)\n"
+ " movq %rcx, 2*8(%rax)\n"
+ " movq %rdx, 3*8(%rax)\n"
+ " movq %rsi, 4*8(%rax)\n"
+ " movq %rdi, 5*8(%rax)\n"
+ " movq %rbp, 6*8(%rax)\n"
+ " movq %rsp, 7*8(%rax)\n"
+ " movq %r8, 8*8(%rax)\n"
+ " movq %r9, 9*8(%rax)\n"
+ " movq %r10, 10*8(%rax)\n"
+ " movq %r11, 11*8(%rax)\n"
+ " movq %r12, 12*8(%rax)\n"
+ " movq %r13, 13*8(%rax)\n"
+ " movq %r14, 14*8(%rax)\n"
+ " movq %r15, 15*8(%rax)\n"
+ " ret\n"
+ "poison_regs64:\n"
+ " movq $0x7f7f7f7f, %r8\n"
+ " shl $32, %r8\n"
+ " orq $0x7f7f7f7f, %r8\n"
+ " movq %r8, %r9\n"
+ " movq %r8, %r10\n"
+ " movq %r8, %r11\n"
+ " movq %r8, %r12\n"
+ " movq %r8, %r13\n"
+ " movq %r8, %r14\n"
+ " movq %r8, %r15\n"
+ " ret\n"
+ " .code32\n"
+ " .popsection\n"
+);
+extern void get_regs64(void);
+extern void poison_regs64(void);
+extern unsigned long call64_from_32(void (*function)(void));
+void print_regs64(void)
+{
+ if (!kernel_is_64bit)
+ return;
+ printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
+ printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
+ printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
+ printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
+}
+
+int check_regs64(void)
+{
+ int err = 0;
+ int num = 8;
+ uint64_t *r64 = &regs64.r8;
+
+ if (!kernel_is_64bit)
+ return 0;
+
+ do {
+ if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
+ continue; /* register did not change */
+ if (syscall_addr != (long)&int80) {
+ /*
+ * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
+ * either clear them to 0, or for R11, load EFLAGS.
+ */
+ if (*r64 == 0)
+ continue;
+ if (num == 11) {
+ printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
+ continue;
+ }
+ } else {
+ /* INT80 syscall entrypoint can be used by
+ * 64-bit programs too, unlike SYSCALL/SYSENTER.
+ * Therefore it must preserve R12+
+ * (they are callee-saved registers in 64-bit C ABI).
+ *
+ * This was probably historically not intended,
+ * but R8..11 are clobbered (cleared to 0).
+ * IOW: they are the only registers which aren't
+ * preserved across INT80 syscall.
+ */
+ if (*r64 == 0 && num <= 11)
+ continue;
+ }
+ printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
+ err++;
+ } while (r64++, ++num < 16);
+
+ if (!err)
+ printf("[OK]\tR8..R15 did not leak kernel data\n");
+ return err;
+}
+
+int nfds;
+fd_set rfds;
+fd_set wfds;
+fd_set efds;
+struct timespec timeout;
+sigset_t sigmask;
+struct {
+ sigset_t *sp;
+ int sz;
+} sigmask_desc;
+
+void prep_args()
+{
+ nfds = 42;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ FD_ZERO(&efds);
+ FD_SET(0, &rfds);
+ FD_SET(1, &wfds);
+ FD_SET(2, &efds);
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 123;
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGINT);
+ sigaddset(&sigmask, SIGUSR2);
+ sigaddset(&sigmask, SIGRTMAX);
+ sigmask_desc.sp = &sigmask;
+ sigmask_desc.sz = 8; /* bytes */
+}
+
+static void print_flags(const char *name, unsigned long r)
+{
+ static const char *bitarray[] = {
+ "\n" ,"c\n" ,/* Carry Flag */
+ "0 " ,"1 " ,/* Bit 1 - always on */
+ "" ,"p " ,/* Parity Flag */
+ "0 " ,"3? " ,
+ "" ,"a " ,/* Auxiliary carry Flag */
+ "0 " ,"5? " ,
+ "" ,"z " ,/* Zero Flag */
+ "" ,"s " ,/* Sign Flag */
+ "" ,"t " ,/* Trap Flag */
+ "" ,"i " ,/* Interrupt Flag */
+ "" ,"d " ,/* Direction Flag */
+ "" ,"o " ,/* Overflow Flag */
+ "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
+ "0" ,"1" ,/* I/O Privilege Level (2 bits) */
+ "" ,"n " ,/* Nested Task */
+ "0 " ,"15? ",
+ "" ,"r " ,/* Resume Flag */
+ "" ,"v " ,/* Virtual Mode */
+ "" ,"ac " ,/* Alignment Check/Access Control */
+ "" ,"vif ",/* Virtual Interrupt Flag */
+ "" ,"vip ",/* Virtual Interrupt Pending */
+ "" ,"id " ,/* CPUID detection */
+ NULL
+ };
+ const char **bitstr;
+ int bit;
+
+ printf("%s=%016lx ", name, r);
+ bitstr = bitarray + 42;
+ bit = 21;
+ if ((r >> 22) != 0)
+ printf("(extra bits are set) ");
+ do {
+ if (bitstr[(r >> bit) & 1][0])
+ fputs(bitstr[(r >> bit) & 1], stdout);
+ bitstr -= 2;
+ bit--;
+ } while (bit >= 0);
+}
+
+int run_syscall(void)
+{
+ long flags, bad_arg;
+
+ prep_args();
+
+ if (kernel_is_64bit)
+ call64_from_32(poison_regs64);
+ /*print_regs64();*/
+
+ asm("\n"
+ /* Try 6-arg syscall: pselect. It should return quickly */
+ " push %%ebp\n"
+ " mov $308, %%eax\n" /* PSELECT */
+ " mov nfds, %%ebx\n" /* ebx arg1 */
+ " mov $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $wfds, %%edx\n" /* edx arg3 */
+ " mov $efds, %%esi\n" /* esi arg4 */
+ " mov $timeout, %%edi\n" /* edi arg5 */
+ " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " push $0x200ed7\n" /* set almost all flags */
+ " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
+ " call *syscall_addr\n"
+ /* Check that registers are not clobbered */
+ " pushf\n"
+ " pop %%eax\n"
+ " cld\n"
+ " cmp nfds, %%ebx\n" /* ebx arg1 */
+ " mov $1, %%ebx\n"
+ " jne 1f\n"
+ " cmp $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $2, %%ebx\n"
+ " jne 1f\n"
+ " cmp $wfds, %%edx\n" /* edx arg3 */
+ " mov $3, %%ebx\n"
+ " jne 1f\n"
+ " cmp $efds, %%esi\n" /* esi arg4 */
+ " mov $4, %%ebx\n"
+ " jne 1f\n"
+ " cmp $timeout, %%edi\n" /* edi arg5 */
+ " mov $5, %%ebx\n"
+ " jne 1f\n"
+ " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " mov $6, %%ebx\n"
+ " jne 1f\n"
+ " mov $0, %%ebx\n"
+ "1:\n"
+ " pop %%ebp\n"
+ : "=a" (flags), "=b" (bad_arg)
+ :
+ : "cx", "dx", "si", "di"
+ );
+
+ if (kernel_is_64bit) {
+ memset(&regs64, 0x77, sizeof(regs64));
+ call64_from_32(get_regs64);
+ /*print_regs64();*/
+ }
+
+ /*
+ * On paravirt kernels, flags are not preserved across syscalls.
+ * Thus, we do not consider it a bug if some are changed.
+ * We just show ones which do.
+ */
+ if ((0x200ed7 ^ flags) != 0) {
+ print_flags("[WARN]\tFlags before", 0x200ed7);
+ print_flags("[WARN]\tFlags after", flags);
+ print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
+ }
+
+ if (bad_arg) {
+ printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
+ return 1;
+ }
+ printf("[OK]\tArguments are preserved across syscall\n");
+
+ return check_regs64();
+}
+
+int run_syscall_twice()
+{
+ int exitcode = 0;
+ long sv;
+
+ if (syscall_addr) {
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
+ exitcode = run_syscall();
+ }
+ sv = syscall_addr;
+ syscall_addr = (long)&int80;
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
+ exitcode += run_syscall();
+ syscall_addr = sv;
+ return exitcode;
+}
+
+void ptrace_me()
+{
+ pid_t pid;
+
+ fflush(NULL);
+ pid = fork();
+ if (pid < 0)
+ exit(1);
+ if (pid == 0) {
+ /* child */
+ if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
+ exit(0);
+ raise(SIGSTOP);
+ return;
+ }
+ /* parent */
+ printf("[RUN]\tRunning tests under ptrace\n");
+ while (1) {
+ int status;
+ pid = waitpid(-1, &status, __WALL);
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ if (WIFSIGNALED(status))
+ exit(WTERMSIG(status));
+ if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
+ exit(255);
+ /*
+ * Note: we do not inject sig = WSTOPSIG(status).
+ * We probably should, but careful: do not inject SIGTRAP
+ * generated by syscall entry/exit stops.
+ * That kills the child.
+ */
+ ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
+ }
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int exitcode = 0;
+ int cs;
+
+ asm("\n"
+ " movl %%cs, %%eax\n"
+ : "=a" (cs)
+ );
+ kernel_is_64bit = (cs == 0x23);
+ if (!kernel_is_64bit)
+ printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
+
+ /* This only works for non-static builds:
+ * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
+ */
+ syscall_addr = get_syscall(envp);
+
+ exitcode += run_syscall_twice();
+ ptrace_me();
+ exitcode += run_syscall_twice();
+
+ return exitcode;
+}
+#endif
diff --git a/x86/thunks.S b/x86/thunks.S
new file mode 100644
index 0000000..ce8a995
--- /dev/null
+++ b/x86/thunks.S
@@ -0,0 +1,67 @@
+/*
+ * thunks.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+
+ .global call32_from_64
+ .type call32_from_64, @function
+call32_from_64:
+ // rdi: stack to use
+ // esi: function to call
+
+ // Save registers
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+
+ // Switch stacks
+ mov %rsp,(%rdi)
+ mov %rdi,%rsp
+
+ // Switch to compatibility mode
+ pushq $0x23 /* USER32_CS */
+ pushq $1f
+ lretq
+
+1:
+ .code32
+ // Call the function
+ call *%esi
+ // Switch back to long mode
+ jmp $0x33,$1f
+ .code64
+
+1:
+ // Restore the stack
+ mov (%rsp),%rsp
+
+ // Restore registers
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+
+ ret
+
+.size call32_from_64, .-call32_from_64
diff --git a/x86/thunks_32.S b/x86/thunks_32.S
new file mode 100644
index 0000000..29b644b
--- /dev/null
+++ b/x86/thunks_32.S
@@ -0,0 +1,55 @@
+/*
+ * thunks_32.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+ .code32
+
+ .global call64_from_32
+ .type call32_from_64, @function
+
+ // 4(%esp): function to call
+call64_from_32:
+ // Fetch function address
+ mov 4(%esp), %eax
+
+ // Save registers which are callee-clobbered by 64-bit ABI
+ push %ecx
+ push %edx
+ push %esi
+ push %edi
+
+ // Switch to long mode
+ jmp $0x33,$1f
+1: .code64
+
+ // Call the function
+ call *%rax
+
+ // Switch to compatibility mode
+ push $0x23 /* USER32_CS */
+ .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
+ lretq
+1: .code32
+
+ pop %edi
+ pop %esi
+ pop %edx
+ pop %ecx
+
+ ret
+
+.size call64_from_32, .-call64_from_32
diff --git a/x86/trivial_32bit_program.c b/x86/trivial_32bit_program.c
new file mode 100644
index 0000000..fabdf0f
--- /dev/null
+++ b/x86/trivial_32bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __i386__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/x86/trivial_64bit_program.c b/x86/trivial_64bit_program.c
new file mode 100644
index 0000000..05c6a41
--- /dev/null
+++ b/x86/trivial_64bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 64-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/x86/unwind_vdso.c b/x86/unwind_vdso.c
new file mode 100644
index 0000000..00a26a8
--- /dev/null
+++ b/x86/unwind_vdso.c
@@ -0,0 +1,211 @@
+/*
+ * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This tests __kernel_vsyscall's unwind info.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdio.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
+
+int main()
+{
+ /* We need getauxval(). */
+ printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n");
+ return 0;
+}
+
+#else
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/ucontext.h>
+#include <link.h>
+#include <sys/auxv.h>
+#include <dlfcn.h>
+#include <unwind.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static volatile sig_atomic_t nerrs;
+static unsigned long sysinfo;
+static bool got_sysinfo = false;
+static unsigned long return_address;
+
+struct unwind_state {
+ unsigned long ip; /* trap source */
+ int depth; /* -1 until we hit the trap source */
+};
+
+_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque)
+{
+ struct unwind_state *state = opaque;
+ unsigned long ip = _Unwind_GetIP(ctx);
+
+ if (state->depth == -1) {
+ if (ip == state->ip)
+ state->depth = 0;
+ else
+ return _URC_NO_REASON; /* Not there yet */
+ }
+ printf("\t 0x%lx\n", ip);
+
+ if (ip == return_address) {
+ /* Here we are. */
+ unsigned long eax = _Unwind_GetGR(ctx, 0);
+ unsigned long ecx = _Unwind_GetGR(ctx, 1);
+ unsigned long edx = _Unwind_GetGR(ctx, 2);
+ unsigned long ebx = _Unwind_GetGR(ctx, 3);
+ unsigned long ebp = _Unwind_GetGR(ctx, 5);
+ unsigned long esi = _Unwind_GetGR(ctx, 6);
+ unsigned long edi = _Unwind_GetGR(ctx, 7);
+ bool ok = (eax == SYS_getpid || eax == getpid()) &&
+ ebx == 1 && ecx == 2 && edx == 3 &&
+ esi == 4 && edi == 5 && ebp == 6;
+
+ if (!ok)
+ nerrs++;
+ printf("[%s]\t NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n",
+ (ok ? "OK" : "FAIL"),
+ eax, ebx, ecx, edx, esi, edi, ebp);
+
+ return _URC_NORMAL_STOP;
+ } else {
+ state->depth++;
+ return _URC_NO_REASON;
+ }
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ struct unwind_state state;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP];
+
+ if (!got_sysinfo && ip == sysinfo) {
+ got_sysinfo = true;
+
+ /* Find the return address. */
+ return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP];
+
+ printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n",
+ ip, return_address);
+ }
+
+ if (!got_sysinfo)
+ return; /* Not there yet */
+
+ if (ip == return_address) {
+ ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+ printf("\tVsyscall is done\n");
+ return;
+ }
+
+ printf("\tSIGTRAP at 0x%lx\n", ip);
+
+ state.ip = ip;
+ state.depth = -1;
+ _Unwind_Backtrace(trace_fn, &state);
+}
+
+int main()
+{
+ sysinfo = getauxval(AT_SYSINFO);
+ printf("\tAT_SYSINFO is 0x%lx\n", sysinfo);
+
+ Dl_info info;
+ if (!dladdr((void *)sysinfo, &info)) {
+ printf("[WARN]\tdladdr failed on AT_SYSINFO\n");
+ } else {
+ printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n",
+ info.dli_fname, info.dli_fbase);
+ }
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid, 1, 2, 3, 4, 5, 6);
+ if (!got_sysinfo) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * The most likely cause of this is that you're on Debian or
+ * a Debian-based distro, you're missing libc6-i686, and you're
+ * affected by libc/19006 (https://sourceware.org/PR19006).
+ */
+ printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n");
+ }
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is still set\n");
+ nerrs++;
+ }
+
+ if (nerrs) {
+ printf("[FAIL]\tThere were errors\n");
+ return 1;
+ } else {
+ printf("[OK]\tAll is well\n");
+ return 0;
+ }
+}
+
+#endif /* New enough libc */
diff --git a/zram/Makefile b/zram/Makefile
new file mode 100644
index 0000000..29d8034
--- /dev/null
+++ b/zram/Makefile
@@ -0,0 +1,9 @@
+all:
+
+TEST_PROGS := zram.sh
+TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+
+include ../lib.mk
+
+clean:
+ $(RM) err.log
diff --git a/zram/README b/zram/README
new file mode 100644
index 0000000..eb17917
--- /dev/null
+++ b/zram/README
@@ -0,0 +1,40 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Kconfig required:
+CONFIG_ZRAM=y
+CONFIG_ZRAM_LZ4_COMPRESS=y
+CONFIG_ZPOOL=y
+CONFIG_ZSMALLOC=y
+
+ZRAM Testcases
+--------------
+zram_lib.sh: create library with initialization/cleanup functions
+zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02
+
+Two functional tests: zram01 and zram02:
+zram01.sh: creates general purpose ram disks with ext4 filesystems
+zram02.sh: creates block device for swap
+
+Commands required for testing:
+ - bc
+ - dd
+ - free
+ - awk
+ - mkswap
+ - swapon
+ - swapoff
+ - mkfs/ mkfs.ext4
+
+For more information please refer:
+kernel-source-tree/Documentation/blockdev/zram.txt
diff --git a/zram/zram.sh b/zram/zram.sh
new file mode 100755
index 0000000..683a292
--- /dev/null
+++ b/zram/zram.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+TCID="zram.sh"
+
+. ./zram_lib.sh
+
+run_zram () {
+echo "--------------------"
+echo "running zram tests"
+echo "--------------------"
+./zram01.sh
+echo ""
+./zram02.sh
+}
+
+check_prereqs
+
+# check zram module exists
+MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+if [ -f $MODULE_PATH ]; then
+ run_zram
+elif [ -b /dev/zram0 ]; then
+ run_zram
+else
+ echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
+ echo "$TCID : CONFIG_ZRAM is not set"
+ exit 1
+fi
diff --git a/zram/zram01.sh b/zram/zram01.sh
new file mode 100755
index 0000000..b9566a6
--- /dev/null
+++ b/zram/zram01.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test creates several zram devices with different filesystems on them.
+# It fills each device with zeros and checks that compression works.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram01"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="2097152" # 2MB
+zram_mem_limits="2M"
+zram_filesystems="ext4"
+zram_algs="lzo"
+
+zram_fill_fs()
+{
+ local mem_free0=$(free -m | awk 'NR==2 {print $4}')
+
+ for i in $(seq 0 $(($dev_num - 1))); do
+ echo "fill zram$i..."
+ local b=0
+ while [ true ]; do
+ dd conv=notrunc if=/dev/zero of=zram${i}/file \
+ oflag=append count=1 bs=1024 status=none \
+ > /dev/null 2>&1 || break
+ b=$(($b + 1))
+ done
+ echo "zram$i can be filled with '$b' KB"
+ done
+
+ local mem_free1=$(free -m | awk 'NR==2 {print $4}')
+ local used_mem=$(($mem_free0 - $mem_free1))
+
+ local total_size=0
+ for sm in $zram_sizes; do
+ local s=$(echo $sm | sed 's/M//')
+ total_size=$(($total_size + $s))
+ done
+
+ echo "zram used ${used_mem}M, zram disk sizes ${total_size}M"
+
+ local v=$((100 * $total_size / $used_mem))
+
+ if [ "$v" -lt 100 ]; then
+ echo "FAIL compression ratio: 0.$v:1"
+ ERR_CODE=-1
+ zram_cleanup
+ return
+ fi
+
+ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+}
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_compress_alg
+zram_set_disksizes
+zram_set_memlimit
+zram_makefs
+zram_mount
+
+zram_fill_fs
+zram_cleanup
+zram_unload
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/zram/zram02.sh b/zram/zram02.sh
new file mode 100755
index 0000000..74569b8
--- /dev/null
+++ b/zram/zram02.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test checks that we can create swap zram device.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram02"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="1048576" # 1M
+zram_mem_limits="1M"
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_set_disksizes
+zram_set_memlimit
+zram_makeswap
+zram_swapoff
+zram_cleanup
+zram_unload
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/zram/zram_lib.sh b/zram/zram_lib.sh
new file mode 100755
index 0000000..f6a9c73
--- /dev/null
+++ b/zram/zram_lib.sh
@@ -0,0 +1,233 @@
+#!/bin/sh
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+MODULE=0
+dev_makeswap=-1
+dev_mounted=-1
+
+trap INT
+
+check_prereqs()
+{
+ local msg="skip all tests:"
+ local uid=$(id -u)
+
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit 0
+ fi
+}
+
+zram_cleanup()
+{
+ echo "zram cleanup"
+ local i=
+ for i in $(seq 0 $dev_makeswap); do
+ swapoff /dev/zram$i
+ done
+
+ for i in $(seq 0 $dev_mounted); do
+ umount /dev/zram$i
+ done
+
+ for i in $(seq 0 $(($dev_num - 1))); do
+ echo 1 > /sys/block/zram${i}/reset
+ rm -rf zram$i
+ done
+
+}
+
+zram_unload()
+{
+ if [ $MODULE -ne 0 ] ; then
+ echo "zram rmmod zram"
+ rmmod zram > /dev/null 2>&1
+ fi
+}
+
+zram_load()
+{
+ # check zram module exists
+ MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+ if [ -f $MODULE_PATH ]; then
+ MODULE=1
+ echo "create '$dev_num' zram device(s)"
+ modprobe zram num_devices=$dev_num
+ if [ $? -ne 0 ]; then
+ echo "failed to insert zram module"
+ exit 1
+ fi
+
+ dev_num_created=$(ls /dev/zram* | wc -w)
+
+ if [ "$dev_num_created" -ne "$dev_num" ]; then
+ echo "unexpected num of devices: $dev_num_created"
+ ERR_CODE=-1
+ else
+ echo "zram load module successful"
+ fi
+ elif [ -b /dev/zram0 ]; then
+ echo "/dev/zram0 device file found: OK"
+ else
+ echo "ERROR: No zram.ko module or no /dev/zram0 device found"
+ echo "$TCID : CONFIG_ZRAM is not set"
+ exit 1
+ fi
+}
+
+zram_max_streams()
+{
+ echo "set max_comp_streams to zram device(s)"
+
+ local i=0
+ for max_s in $zram_max_streams; do
+ local sys_path="/sys/block/zram${i}/max_comp_streams"
+ echo $max_s > $sys_path || \
+ echo "FAIL failed to set '$max_s' to $sys_path"
+ sleep 1
+ local max_streams=$(cat $sys_path)
+
+ [ "$max_s" -ne "$max_streams" ] && \
+ echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$max_streams' ($i/$dev_num)"
+ done
+
+ echo "zram max streams: OK"
+}
+
+zram_compress_alg()
+{
+ echo "test that we can set compression algorithm"
+
+ local algs=$(cat /sys/block/zram0/comp_algorithm)
+ echo "supported algs: $algs"
+ local i=0
+ for alg in $zram_algs; do
+ local sys_path="/sys/block/zram${i}/comp_algorithm"
+ echo "$alg" > $sys_path || \
+ echo "FAIL can't set '$alg' to $sys_path"
+ i=$(($i + 1))
+ echo "$sys_path = '$alg' ($i/$dev_num)"
+ done
+
+ echo "zram set compression algorithm: OK"
+}
+
+zram_set_disksizes()
+{
+ echo "set disk size to zram device(s)"
+ local i=0
+ for ds in $zram_sizes; do
+ local sys_path="/sys/block/zram${i}/disksize"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds' ($i/$dev_num)"
+ done
+
+ echo "zram set disksizes: OK"
+}
+
+zram_set_memlimit()
+{
+ echo "set memory limit to zram device(s)"
+
+ local i=0
+ for ds in $zram_mem_limits; do
+ local sys_path="/sys/block/zram${i}/mem_limit"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds' ($i/$dev_num)"
+ done
+
+ echo "zram set memory limit: OK"
+}
+
+zram_makeswap()
+{
+ echo "make swap with zram device(s)"
+ local i=0
+ for i in $(seq 0 $(($dev_num - 1))); do
+ mkswap /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL mkswap /dev/zram$1 failed"
+ fi
+
+ swapon /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapon /dev/zram$1 failed"
+ fi
+
+ echo "done with /dev/zram$i"
+ dev_makeswap=$i
+ done
+
+ echo "zram making zram mkswap and swapon: OK"
+}
+
+zram_swapoff()
+{
+ local i=
+ for i in $(seq 0 $dev_makeswap); do
+ swapoff /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapoff /dev/zram$i failed"
+ fi
+ done
+ dev_makeswap=-1
+
+ echo "zram swapoff: OK"
+}
+
+zram_makefs()
+{
+ local i=0
+ for fs in $zram_filesystems; do
+ # if requested fs not supported default it to ext2
+ which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+
+ echo "make $fs filesystem on /dev/zram$i"
+ mkfs.$fs /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL failed to make $fs on /dev/zram$i"
+ fi
+ i=$(($i + 1))
+ echo "zram mkfs.$fs: OK"
+ done
+}
+
+zram_mount()
+{
+ local i=0
+ for i in $(seq 0 $(($dev_num - 1))); do
+ echo "mount /dev/zram$i"
+ mkdir zram$i
+ mount /dev/zram$i zram$i > /dev/null || \
+ echo "FAIL mount /dev/zram$i failed"
+ dev_mounted=$i
+ done
+
+ echo "zram mount of zram device(s): OK"
+}