From 683131c4e268334e803ed23560301181381ba294 Mon Sep 17 00:00:00 2001 From: Milosz Wasilewski Date: Tue, 6 Oct 2015 23:02:08 +0100 Subject: Update to commit: 9ffecb10283508260936b96022d4ee43a7798b4c from repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git branch: master --- Makefile | 50 + breakpoints/Makefile | 19 +- capabilities/.gitignore | 2 + capabilities/Makefile | 18 + capabilities/test_execve.c | 427 ++++ capabilities/validate_cap.c | 73 + cpu-hotplug/Makefile | 7 +- cpu-hotplug/cpu-on-off-test.sh | 269 +++ cpu-hotplug/on-off-test.sh | 269 --- efivarfs/Makefile | 7 +- efivarfs/efivarfs.sh | 0 exec/Makefile | 7 +- exec/execveat.c | 10 +- firmware/Makefile | 20 +- firmware/fw_filesystem.sh | 25 +- firmware/fw_userhelper.sh | 12 +- ftrace/Makefile | 6 +- ftrace/test.d/00basic/basic4.tc | 2 +- ftrace/test.d/event/event-enable.tc | 15 +- ftrace/test.d/event/subsystem-enable.tc | 15 +- ftrace/test.d/event/toplevel-enable.tc | 15 +- ftrace/test.d/ftrace/fgraph-filter-stack.tc | 6 +- ftrace/test.d/ftrace/fgraph-filter.tc | 2 +- ftrace/test.d/ftrace/func_profiler.tc | 2 +- futex/Makefile | 29 + futex/README | 62 + futex/functional/.gitignore | 7 + futex/functional/Makefile | 25 + futex/functional/futex_requeue_pi.c | 409 ++++ futex/functional/futex_requeue_pi_mismatched_ops.c | 135 ++ futex/functional/futex_requeue_pi_signal_restart.c | 223 ++ futex/functional/futex_wait_private_mapped_file.c | 125 ++ futex/functional/futex_wait_timeout.c | 86 + futex/functional/futex_wait_uninitialized_heap.c | 124 ++ futex/functional/futex_wait_wouldblock.c | 79 + futex/functional/run.sh | 79 + futex/include/atomic.h | 83 + futex/include/futextest.h | 266 +++ futex/include/logging.h | 153 ++ futex/run.sh | 33 + gen_kselftest_tar.sh | 55 + ipc/Makefile | 11 +- kcmp/Makefile | 6 +- kselftest.h | 17 +- kselftest_install.sh | 37 + lib.mk | 38 + membarrier/.gitignore | 1 + membarrier/Makefile | 10 + membarrier/membarrier_test.c | 118 ++ memfd/Makefile | 14 +- memory-hotplug/Makefile | 9 +- memory-hotplug/mem-on-off-test.sh | 238 +++ memory-hotplug/on-off-test.sh | 238 --- mount/.gitignore | 1 + mount/Makefile | 20 +- mqueue/Makefile | 22 +- net/Makefile | 10 +- net/psock_fanout.c | 71 +- net/psock_lib.h | 29 +- net/run_afpackettests | 0 net/run_netsocktests | 0 powerpc/Makefile | 32 +- powerpc/copyloops/Makefile | 16 +- powerpc/copyloops/asm/ppc_asm.h | 33 - powerpc/dscr/.gitignore | 7 + powerpc/dscr/Makefile | 14 + powerpc/dscr/dscr.h | 127 ++ powerpc/dscr/dscr_default_test.c | 127 ++ powerpc/dscr/dscr_explicit_test.c | 71 + powerpc/dscr/dscr_inherit_exec_test.c | 117 ++ powerpc/dscr/dscr_inherit_test.c | 95 + powerpc/dscr/dscr_sysfs_test.c | 97 + powerpc/dscr/dscr_sysfs_thread_test.c | 80 + powerpc/dscr/dscr_user_test.c | 61 + powerpc/harness.c | 47 + powerpc/mm/Makefile | 16 +- powerpc/mm/hugetlb_vs_thp_test.c | 8 +- powerpc/pmu/Makefile | 48 +- powerpc/pmu/ebb/Makefile | 13 +- powerpc/pmu/lib.c | 47 - powerpc/pmu/lib.h | 1 - powerpc/primitives/Makefile | 15 +- powerpc/stringloops/Makefile | 15 +- powerpc/switch_endian/.gitignore | 2 + powerpc/switch_endian/Makefile | 18 + powerpc/switch_endian/check.S | 100 + powerpc/switch_endian/common.h | 6 + powerpc/switch_endian/switch_endian_test.S | 81 + powerpc/tm/.gitignore | 1 + powerpc/tm/Makefile | 18 +- powerpc/tm/tm-syscall-asm.S | 27 + powerpc/tm/tm-syscall.c | 122 ++ powerpc/utils.h | 3 +- powerpc/vphn/.gitignore | 1 + powerpc/vphn/Makefile | 12 + powerpc/vphn/test-vphn.c | 410 ++++ powerpc/vphn/vphn.c | 1 + powerpc/vphn/vphn.h | 1 + ptrace/Makefile | 5 +- rcutorture/bin/configinit.sh | 2 +- rcutorture/bin/kvm-recheck.sh | 4 + rcutorture/bin/kvm.sh | 27 +- rcutorture/configs/rcu/CFcommon | 3 + rcutorture/configs/rcu/SRCU-N | 1 + rcutorture/configs/rcu/SRCU-P | 1 + rcutorture/configs/rcu/SRCU-P.boot | 2 +- rcutorture/configs/rcu/TASKS01 | 5 +- rcutorture/configs/rcu/TASKS02 | 1 - rcutorture/configs/rcu/TASKS03 | 2 +- rcutorture/configs/rcu/TINY02 | 2 +- rcutorture/configs/rcu/TINY02.boot | 1 + rcutorture/configs/rcu/TREE01 | 2 +- rcutorture/configs/rcu/TREE02 | 3 +- rcutorture/configs/rcu/TREE02-T | 2 - rcutorture/configs/rcu/TREE03 | 9 +- rcutorture/configs/rcu/TREE03.boot | 1 + rcutorture/configs/rcu/TREE04 | 7 +- rcutorture/configs/rcu/TREE05 | 5 +- rcutorture/configs/rcu/TREE06 | 5 +- rcutorture/configs/rcu/TREE06.boot | 1 + rcutorture/configs/rcu/TREE07 | 3 +- rcutorture/configs/rcu/TREE08 | 7 +- rcutorture/configs/rcu/TREE08-T | 2 - rcutorture/configs/rcu/TREE08-T.boot | 1 + rcutorture/configs/rcu/TREE08.boot | 1 + rcutorture/configs/rcu/TREE09 | 2 +- rcutorture/doc/TREE_RCU-kconfig.txt | 35 +- seccomp/.gitignore | 1 + seccomp/Makefile | 10 + seccomp/seccomp_bpf.c | 2157 ++++++++++++++++++++ seccomp/test_harness.h | 534 +++++ size/Makefile | 7 +- static_keys/Makefile | 8 + static_keys/test_static_keys.sh | 16 + sysctl/Makefile | 12 +- sysctl/run_numerictests | 0 sysctl/run_stringtests | 0 timers/.gitignore | 18 + timers/Makefile | 38 +- timers/alarmtimer-suspend.c | 189 ++ timers/change_skew.c | 107 + timers/clocksource-switch.c | 179 ++ timers/inconsistency-check.c | 204 ++ timers/leap-a-day.c | 388 ++++ timers/leapcrash.c | 120 ++ timers/mqueue-lat.c | 124 ++ timers/nanosleep.c | 174 ++ timers/nsleep-lat.c | 190 ++ timers/posix_timers.c | 9 +- timers/raw_skew.c | 154 ++ timers/rtctest.c | 271 +++ timers/set-2038.c | 144 ++ timers/set-tai.c | 79 + timers/set-timer-lat.c | 216 ++ timers/skew_consistency.c | 89 + timers/threadtest.c | 204 ++ timers/valid-adjtimex.c | 202 ++ user/Makefile | 5 +- vm/Makefile | 21 +- vm/compaction_test.c | 225 ++ vm/hugetlbfstest.c | 84 - vm/map_hugetlb.c | 6 +- vm/run_vmtests | 20 +- vm/userfaultfd.c | 645 ++++++ x86/.gitignore | 2 + x86/Makefile | 62 + x86/check_cc.sh | 16 + x86/entry_from_vm86.c | 234 +++ x86/ldt_gdt.c | 576 ++++++ x86/sigreturn.c | 684 +++++++ x86/single_step_syscall.c | 181 ++ x86/syscall_arg_fault.c | 130 ++ x86/syscall_nt.c | 54 + x86/sysret_ss_attrs.c | 112 + x86/thunks.S | 67 + x86/trivial_32bit_program.c | 18 + x86/trivial_64bit_program.c | 18 + zram/Makefile | 9 + zram/README | 40 + zram/zram.sh | 27 + zram/zram01.sh | 99 + zram/zram02.sh | 54 + zram/zram_lib.sh | 233 +++ 183 files changed, 14654 insertions(+), 985 deletions(-) create mode 100644 capabilities/.gitignore create mode 100644 capabilities/Makefile create mode 100644 capabilities/test_execve.c create mode 100644 capabilities/validate_cap.c create mode 100755 cpu-hotplug/cpu-on-off-test.sh delete mode 100644 cpu-hotplug/on-off-test.sh mode change 100644 => 100755 efivarfs/efivarfs.sh mode change 100644 => 100755 firmware/fw_filesystem.sh mode change 100644 => 100755 firmware/fw_userhelper.sh create mode 100644 futex/Makefile create mode 100644 futex/README create mode 100644 futex/functional/.gitignore create mode 100644 futex/functional/Makefile create mode 100644 futex/functional/futex_requeue_pi.c create mode 100644 futex/functional/futex_requeue_pi_mismatched_ops.c create mode 100644 futex/functional/futex_requeue_pi_signal_restart.c create mode 100644 futex/functional/futex_wait_private_mapped_file.c create mode 100644 futex/functional/futex_wait_timeout.c create mode 100644 futex/functional/futex_wait_uninitialized_heap.c create mode 100644 futex/functional/futex_wait_wouldblock.c create mode 100755 futex/functional/run.sh create mode 100644 futex/include/atomic.h create mode 100644 futex/include/futextest.h create mode 100644 futex/include/logging.h create mode 100755 futex/run.sh create mode 100755 gen_kselftest_tar.sh create mode 100755 kselftest_install.sh create mode 100644 lib.mk create mode 100644 membarrier/.gitignore create mode 100644 membarrier/Makefile create mode 100644 membarrier/membarrier_test.c create mode 100755 memory-hotplug/mem-on-off-test.sh delete mode 100644 memory-hotplug/on-off-test.sh create mode 100644 mount/.gitignore mode change 100644 => 100755 net/run_afpackettests mode change 100644 => 100755 net/run_netsocktests create mode 100644 powerpc/dscr/.gitignore create mode 100644 powerpc/dscr/Makefile create mode 100644 powerpc/dscr/dscr.h create mode 100644 powerpc/dscr/dscr_default_test.c create mode 100644 powerpc/dscr/dscr_explicit_test.c create mode 100644 powerpc/dscr/dscr_inherit_exec_test.c create mode 100644 powerpc/dscr/dscr_inherit_test.c create mode 100644 powerpc/dscr/dscr_sysfs_test.c create mode 100644 powerpc/dscr/dscr_sysfs_thread_test.c create mode 100644 powerpc/dscr/dscr_user_test.c create mode 100644 powerpc/switch_endian/.gitignore create mode 100644 powerpc/switch_endian/Makefile create mode 100644 powerpc/switch_endian/check.S create mode 100644 powerpc/switch_endian/common.h create mode 100644 powerpc/switch_endian/switch_endian_test.S create mode 100644 powerpc/tm/tm-syscall-asm.S create mode 100644 powerpc/tm/tm-syscall.c create mode 100644 powerpc/vphn/.gitignore create mode 100644 powerpc/vphn/Makefile create mode 100644 powerpc/vphn/test-vphn.c create mode 120000 powerpc/vphn/vphn.c create mode 120000 powerpc/vphn/vphn.h create mode 100644 rcutorture/configs/rcu/TREE03.boot create mode 100644 rcutorture/configs/rcu/TREE08-T.boot create mode 100644 seccomp/.gitignore create mode 100644 seccomp/Makefile create mode 100644 seccomp/seccomp_bpf.c create mode 100644 seccomp/test_harness.h create mode 100644 static_keys/Makefile create mode 100644 static_keys/test_static_keys.sh mode change 100644 => 100755 sysctl/run_numerictests mode change 100644 => 100755 sysctl/run_stringtests create mode 100644 timers/.gitignore create mode 100644 timers/alarmtimer-suspend.c create mode 100644 timers/change_skew.c create mode 100644 timers/clocksource-switch.c create mode 100644 timers/inconsistency-check.c create mode 100644 timers/leap-a-day.c create mode 100644 timers/leapcrash.c create mode 100644 timers/mqueue-lat.c create mode 100644 timers/nanosleep.c create mode 100644 timers/nsleep-lat.c create mode 100644 timers/raw_skew.c create mode 100644 timers/rtctest.c create mode 100644 timers/set-2038.c create mode 100644 timers/set-tai.c create mode 100644 timers/set-timer-lat.c create mode 100644 timers/skew_consistency.c create mode 100644 timers/threadtest.c create mode 100644 timers/valid-adjtimex.c create mode 100644 vm/compaction_test.c delete mode 100644 vm/hugetlbfstest.c mode change 100644 => 100755 vm/run_vmtests create mode 100644 vm/userfaultfd.c create mode 100644 x86/.gitignore create mode 100644 x86/Makefile create mode 100755 x86/check_cc.sh create mode 100644 x86/entry_from_vm86.c create mode 100644 x86/ldt_gdt.c create mode 100644 x86/sigreturn.c create mode 100644 x86/single_step_syscall.c create mode 100644 x86/syscall_arg_fault.c create mode 100644 x86/syscall_nt.c create mode 100644 x86/sysret_ss_attrs.c create mode 100644 x86/thunks.S create mode 100644 x86/trivial_32bit_program.c create mode 100644 x86/trivial_64bit_program.c create mode 100644 zram/Makefile create mode 100644 zram/README create mode 100755 zram/zram.sh create mode 100755 zram/zram01.sh create mode 100755 zram/zram02.sh create mode 100755 zram/zram_lib.sh diff --git a/Makefile b/Makefile index 4e51122..cfe1213 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,9 @@ TARGETS += efivarfs TARGETS += exec TARGETS += firmware TARGETS += ftrace +TARGETS += futex TARGETS += kcmp +TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug TARGETS += mount @@ -12,16 +14,32 @@ TARGETS += mqueue TARGETS += net TARGETS += powerpc TARGETS += ptrace +TARGETS += seccomp TARGETS += size +TARGETS += static_keys TARGETS += sysctl +ifneq (1, $(quicktest)) TARGETS += timers +endif TARGETS += user TARGETS += vm +TARGETS += x86 +TARGETS += zram #Please keep the TARGETS list alphabetically sorted +# Run "make quicktest=1 run_tests" or +# "make quicktest=1 kselftest from top level Makefile TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Clear LDFLAGS and MAKEFLAGS if called from main +# Makefile to avoid test build failures when test +# Makefile doesn't have explicit build rules. +ifeq (1,$(MAKELEVEL)) +override LDFLAGS = +override MAKEFLAGS = +endif + all: for TARGET in $(TARGETS); do \ make -C $$TARGET; \ @@ -47,7 +65,39 @@ clean_hotplug: make -C $$TARGET clean; \ done; +INSTALL_PATH ?= install +INSTALL_PATH := $(abspath $(INSTALL_PATH)) +ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh + +install: +ifdef INSTALL_PATH + @# Ask all targets to install their files + mkdir -p $(INSTALL_PATH) + for TARGET in $(TARGETS); do \ + make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + done; + + @# Ask all targets to emit their test scripts + echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + + for TARGET in $(TARGETS); do \ + echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "cd $$TARGET" >> $(ALL_SCRIPT); \ + make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + done; + + chmod u+x $(ALL_SCRIPT) +else + $(error Error: set INSTALL_PATH to use install) +endif + clean: for TARGET in $(TARGETS); do \ make -C $$TARGET clean; \ done; + +.PHONY: install diff --git a/breakpoints/Makefile b/breakpoints/Makefile index e18b42b..d27108b 100644 --- a/breakpoints/Makefile +++ b/breakpoints/Makefile @@ -1,23 +1,14 @@ # Taken from perf makefile uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) -ifeq ($(ARCH),i386) - ARCH := x86 -endif -ifeq ($(ARCH),x86_64) - ARCH := x86 -endif - +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -all: ifeq ($(ARCH),x86) - gcc breakpoint_test.c -o breakpoint_test -else - echo "Not an x86 target, can't build breakpoints selftests" +TEST_PROGS := breakpoint_test endif -run_tests: - @./breakpoint_test || echo "breakpoints selftests: [FAIL]" +all: + +include ../lib.mk clean: rm -fr breakpoint_test diff --git a/capabilities/.gitignore b/capabilities/.gitignore new file mode 100644 index 0000000..b732dd0 --- /dev/null +++ b/capabilities/.gitignore @@ -0,0 +1,2 @@ +test_execve +validate_cap diff --git a/capabilities/Makefile b/capabilities/Makefile new file mode 100644 index 0000000..8c8f0c1 --- /dev/null +++ b/capabilities/Makefile @@ -0,0 +1,18 @@ +all: + +include ../lib.mk + +.PHONY: all clean + +TARGETS := validate_cap test_execve +TEST_PROGS := test_execve + +CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng + +all: $(TARGETS) + +clean: + $(RM) $(TARGETS) + +$(TARGETS): %: %.c + $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl diff --git a/capabilities/test_execve.c b/capabilities/test_execve.c new file mode 100644 index 0000000..10a21a9 --- /dev/null +++ b/capabilities/test_execve.c @@ -0,0 +1,427 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +static int nerrs; + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + err(1, "vsnprintf failed"); + } + if (buf_len >= sizeof(buf)) { + errx(1, "vsnprintf output truncated"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + err(1, "open of %s failed", filename); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + errx(1, "short write to %s", filename); + } else { + err(1, "write to %s failed", filename); + } + } + if (close(fd) != 0) { + err(1, "close of %s failed", filename); + } +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static bool create_and_enter_ns(uid_t inner_uid) +{ + uid_t outer_uid; + gid_t outer_gid; + int i; + bool have_outer_privilege; + + outer_uid = getuid(); + outer_gid = getgid(); + + /* + * TODO: If we're already root, we could skip creating the userns. + */ + + if (unshare(CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing global UIDs for tests\n"); + if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) + err(1, "PR_SET_KEEPCAPS"); + if (setresuid(inner_uid, inner_uid, -1) != 0) + err(1, "setresuid"); + + // Re-enable effective caps + capng_get_caps_process(); + for (i = 0; i < CAP_LAST_CAP; i++) + if (capng_have_capability(CAPNG_PERMITTED, i)) + capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + have_outer_privilege = true; + } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing a user namespace for tests\n"); + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid); + write_file("/proc/self/gid_map", "0 %d 1", outer_gid); + + have_outer_privilege = false; + } else { + errx(1, "must be root or be able to create a userns"); + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0) + err(1, "remount everything private"); + + return have_outer_privilege; +} + +static void chdir_to_tmpfs(void) +{ + char cwd[PATH_MAX]; + if (getcwd(cwd, sizeof(cwd)) != cwd) + err(1, "getcwd"); + + if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0) + err(1, "mount private tmpfs"); + + if (chdir(cwd) != 0) + err(1, "chdir to private tmpfs"); + + if (umount2(".", MNT_DETACH) != 0) + err(1, "detach private tmpfs"); +} + +static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) +{ + int from = openat(fromfd, fromname, O_RDONLY); + if (from == -1) + err(1, "open copy source"); + + int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700); + + while (true) { + char buf[4096]; + ssize_t sz = read(from, buf, sizeof(buf)); + if (sz == 0) + break; + if (sz < 0) + err(1, "read"); + + if (write(to, buf, sz) != sz) + err(1, "write"); /* no short writes on tmpfs */ + } + + close(from); + close(to); +} + +static bool fork_wait(void) +{ + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + return true; + } else if (child > 0) { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + + return false; + } else { + err(1, "fork"); + } +} + +static void exec_other_validate_cap(const char *name, + bool eff, bool perm, bool inh, bool ambient) +{ + execl(name, name, (eff ? "1" : "0"), + (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"), + NULL); + err(1, "execl"); +} + +static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) +{ + exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient); +} + +static int do_tests(int uid, const char *our_path) +{ + bool have_outer_privilege = create_and_enter_ns(uid); + + int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); + if (ourpath_fd == -1) + err(1, "open '%s'", our_path); + + chdir_to_tmpfs(); + + copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap"); + + if (have_outer_privilege) { + uid_t gid = getegid(); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidroot"); + if (chown("validate_cap_suidroot", 0, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidnonroot"); + if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidroot"); + if (chown("validate_cap_sgidroot", -1, 0) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidnonroot"); + if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) + err(1, "chmod"); +} + + capng_get_caps_process(); + + /* Make sure that i starts out clear */ + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (uid == 0) { + printf("[RUN]\tRoot => ep\n"); + if (fork_wait()) + exec_validate_cap(true, true, false, false); + } else { + printf("[RUN]\tNon-root => no caps\n"); + if (fork_wait()) + exec_validate_cap(false, false, false, false); + } + + printf("[OK]\tCheck cap_ambient manipulation rules\n"); + + /* We should not be able to add ambient caps yet. */ + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) { + if (errno == EINVAL) + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n"); + else + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) { + printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_CLEAR_ALL"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tDropping I should have dropped A\n"); + return 1; + } + + printf("[OK]\tBasic manipulation appears to work\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (uid == 0) { + printf("[RUN]\tRoot +i => eip\n"); + if (fork_wait()) + exec_validate_cap(true, true, true, false); + } else { + printf("[RUN]\tNon-root +i => i\n"); + if (fork_wait()) + exec_validate_cap(false, false, true, false); + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + printf("[RUN]\tUID %d +ia => eipa\n", uid); + if (fork_wait()) + exec_validate_cap(true, true, true, true); + + /* The remaining tests need real privilege */ + + if (!have_outer_privilege) { + printf("[SKIP]\tSUID/SGID tests (needs privilege)\n"); + goto done; + } + + if (uid == 0) { + printf("[RUN]\tRoot +ia, suidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidroot", + true, true, true, true); + + printf("[RUN]\tRoot +ia, suidnonroot => ip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidnonroot", + false, true, true, false); + + printf("[RUN]\tRoot +ia, sgidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, true); + + if (fork_wait()) { + printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, false); + } + + printf("[RUN]\tRoot +ia, sgidnonroot => eip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidnonroot", + true, true, true, false); + } else { + printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + exec_other_validate_cap("./validate_cap_sgidnonroot", + false, false, true, false); + + if (fork_wait()) { + printf("[RUN]\tNon-root +ia, sgidroot => i\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + false, false, true, false); + } + } + +done: + return nerrs ? 1 : 0; +} + +int main(int argc, char **argv) +{ + char *tmp1, *tmp2, *our_path; + + /* Find our path */ + tmp1 = strdup(argv[0]); + if (!tmp1) + err(1, "strdup"); + tmp2 = dirname(tmp1); + our_path = strdup(tmp2); + if (!our_path) + err(1, "strdup"); + free(tmp1); + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid == 0 +++\n"); + return do_tests(0, our_path); + } + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid != 0 +++\n"); + return do_tests(1, our_path); + } + + return nerrs ? 1 : 0; +} diff --git a/capabilities/validate_cap.c b/capabilities/validate_cap.c new file mode 100644 index 0000000..dd3c45f --- /dev/null +++ b/capabilities/validate_cap.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19) +# define HAVE_GETAUXVAL +#endif + +static bool bool_arg(char **argv, int i) +{ + if (!strcmp(argv[i], "0")) + return false; + else if (!strcmp(argv[i], "1")) + return true; + else + errx(1, "wrong argv[%d]", i); +} + +int main(int argc, char **argv) +{ + const char *atsec = ""; + + /* + * Be careful just in case a setgid or setcapped copy of this + * helper gets out. + */ + + if (argc != 5) + errx(1, "wrong argc"); + +#ifdef HAVE_GETAUXVAL + if (getauxval(AT_SECURE)) + atsec = " (AT_SECURE is set)"; + else + atsec = " (AT_SECURE is not set)"; +#endif + + capng_get_caps_process(); + + if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { + printf("[FAIL]\tWrong effective state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { + printf("[FAIL]\tWrong permitted state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { + printf("[FAIL]\tWrong inheritable state%s\n", atsec); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { + printf("[FAIL]\tWrong ambient state%s\n", atsec); + return 1; + } + + printf("[OK]\tCapabilities after execve were correct\n"); + return 0; +} diff --git a/cpu-hotplug/Makefile b/cpu-hotplug/Makefile index e9c28d8..fe1f991 100644 --- a/cpu-hotplug/Makefile +++ b/cpu-hotplug/Makefile @@ -1,9 +1,10 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" +TEST_PROGS := cpu-on-off-test.sh + +include ../lib.mk run_full_test: - @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" + @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/cpu-hotplug/cpu-on-off-test.sh b/cpu-hotplug/cpu-on-off-test.sh new file mode 100755 index 0000000..98b1d65 --- /dev/null +++ b/cpu-hotplug/cpu-on-off-test.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + taskset -p 01 $$ + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then + echo $msg cpu hotplug is not supported >&2 + exit 0 + fi + + echo "CPU online/offline summary:" + online_cpus=`cat $SYSFS/devices/system/cpu/online` + online_max=${online_cpus##*-} + echo -e "\t Cpus in online state: $online_cpus" + + offline_cpus=`cat $SYSFS/devices/system/cpu/offline` + if [[ "a$offline_cpus" = "a" ]]; then + offline_cpus=0 + else + offline_max=${offline_cpus##*-} + fi + echo -e "\t Cpus in offline state: $offline_cpus" +} + +# +# list all hot-pluggable CPUs +# +hotpluggable_cpus() +{ + local state=${1:-.\*} + + for cpu in $SYSFS/devices/system/cpu/cpu*; do + if [ -f $cpu/online ] && grep -q $state $cpu/online; then + echo ${cpu##/*/cpu} + fi + done +} + +hotplaggable_offline_cpus() +{ + hotpluggable_cpus 0 +} + +hotpluggable_online_cpus() +{ + hotpluggable_cpus 1 +} + +cpu_is_online() +{ + grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online +} + +cpu_is_offline() +{ + grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu() +{ + echo 1 > $SYSFS/devices/system/cpu/cpu$1/online +} + +offline_cpu() +{ + echo 0 > $SYSFS/devices/system/cpu/cpu$1/online +} + +online_cpu_expect_success() +{ + local cpu=$1 + + if ! online_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +online_cpu_expect_fail() +{ + local cpu=$1 + + if online_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected online >&2 + fi +} + +offline_cpu_expect_success() +{ + local cpu=$1 + + if ! offline_cpu $cpu; then + echo $FUNCNAME $cpu: unexpected fail >&2 + elif ! cpu_is_offline $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +offline_cpu_expect_fail() +{ + local cpu=$1 + + if offline_cpu $cpu 2> /dev/null; then + echo $FUNCNAME $cpu: unexpected success >&2 + elif ! cpu_is_online $cpu; then + echo $FUNCNAME $cpu: unexpected offline >&2 + fi +} + +error=-12 +allcpus=0 +priority=0 +online_cpus=0 +online_max=0 +offline_cpus=0 +offline_max=0 + +while getopts e:ahp: opt; do + case $opt in + e) + error=$OPTARG + ;; + a) + allcpus=1 + ;; + h) + echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]" + echo -e "\t default offline one cpu" + echo -e "\t run with -a option to offline all cpus" + exit + ;; + p) + priority=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +# +# Safe test (default) - offline and online one cpu +# +if [ $allcpus -eq 0 ]; then + echo "Limited scope test: one hotplug cpu" + echo -e "\t (leaves cpu in the original state):" + echo -e "\t online to offline to online: cpu $online_max" + offline_cpu_expect_success $online_max + online_cpu_expect_success $online_max + + if [[ $offline_cpus -gt 0 ]]; then + echo -e "\t offline to online to offline: cpu $offline_max" + online_cpu_expect_success $offline_max + offline_cpu_expect_success $offline_max + fi + exit 0 +else + echo "Full scope test: all hotplug cpus" + echo -e "\t online all offline cpus" + echo -e "\t offline all online cpus" + echo -e "\t online all offline cpus" +fi + +# +# Online all hot-pluggable CPUs +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Offline all hot-pluggable CPUs +# +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Online all hot-pluggable CPUs again +# +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test with cpu notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r cpu-notifier-error-inject + /sbin/modprobe -q cpu-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg cpu-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_success $cpu +done + +# +# Test CPU hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_fail $cpu +done + +# +# Online all hot-pluggable CPUs +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error +for cpu in `hotplaggable_offline_cpus`; do + online_cpu_expect_success $cpu +done + +# +# Test CPU hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +for cpu in `hotpluggable_online_cpus`; do + offline_cpu_expect_fail $cpu +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error +/sbin/modprobe -q -r cpu-notifier-error-inject diff --git a/cpu-hotplug/on-off-test.sh b/cpu-hotplug/on-off-test.sh deleted file mode 100644 index 98b1d65..0000000 --- a/cpu-hotplug/on-off-test.sh +++ /dev/null @@ -1,269 +0,0 @@ -#!/bin/bash - -SYSFS= - -prerequisite() -{ - msg="skip all tests:" - - if [ $UID != 0 ]; then - echo $msg must be run as root >&2 - exit 0 - fi - - taskset -p 01 $$ - - SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` - - if [ ! -d "$SYSFS" ]; then - echo $msg sysfs is not mounted >&2 - exit 0 - fi - - if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then - echo $msg cpu hotplug is not supported >&2 - exit 0 - fi - - echo "CPU online/offline summary:" - online_cpus=`cat $SYSFS/devices/system/cpu/online` - online_max=${online_cpus##*-} - echo -e "\t Cpus in online state: $online_cpus" - - offline_cpus=`cat $SYSFS/devices/system/cpu/offline` - if [[ "a$offline_cpus" = "a" ]]; then - offline_cpus=0 - else - offline_max=${offline_cpus##*-} - fi - echo -e "\t Cpus in offline state: $offline_cpus" -} - -# -# list all hot-pluggable CPUs -# -hotpluggable_cpus() -{ - local state=${1:-.\*} - - for cpu in $SYSFS/devices/system/cpu/cpu*; do - if [ -f $cpu/online ] && grep -q $state $cpu/online; then - echo ${cpu##/*/cpu} - fi - done -} - -hotplaggable_offline_cpus() -{ - hotpluggable_cpus 0 -} - -hotpluggable_online_cpus() -{ - hotpluggable_cpus 1 -} - -cpu_is_online() -{ - grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online -} - -cpu_is_offline() -{ - grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online -} - -online_cpu() -{ - echo 1 > $SYSFS/devices/system/cpu/cpu$1/online -} - -offline_cpu() -{ - echo 0 > $SYSFS/devices/system/cpu/cpu$1/online -} - -online_cpu_expect_success() -{ - local cpu=$1 - - if ! online_cpu $cpu; then - echo $FUNCNAME $cpu: unexpected fail >&2 - elif ! cpu_is_online $cpu; then - echo $FUNCNAME $cpu: unexpected offline >&2 - fi -} - -online_cpu_expect_fail() -{ - local cpu=$1 - - if online_cpu $cpu 2> /dev/null; then - echo $FUNCNAME $cpu: unexpected success >&2 - elif ! cpu_is_offline $cpu; then - echo $FUNCNAME $cpu: unexpected online >&2 - fi -} - -offline_cpu_expect_success() -{ - local cpu=$1 - - if ! offline_cpu $cpu; then - echo $FUNCNAME $cpu: unexpected fail >&2 - elif ! cpu_is_offline $cpu; then - echo $FUNCNAME $cpu: unexpected offline >&2 - fi -} - -offline_cpu_expect_fail() -{ - local cpu=$1 - - if offline_cpu $cpu 2> /dev/null; then - echo $FUNCNAME $cpu: unexpected success >&2 - elif ! cpu_is_online $cpu; then - echo $FUNCNAME $cpu: unexpected offline >&2 - fi -} - -error=-12 -allcpus=0 -priority=0 -online_cpus=0 -online_max=0 -offline_cpus=0 -offline_max=0 - -while getopts e:ahp: opt; do - case $opt in - e) - error=$OPTARG - ;; - a) - allcpus=1 - ;; - h) - echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]" - echo -e "\t default offline one cpu" - echo -e "\t run with -a option to offline all cpus" - exit - ;; - p) - priority=$OPTARG - ;; - esac -done - -if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then - echo "error code must be -4095 <= errno < 0" >&2 - exit 1 -fi - -prerequisite - -# -# Safe test (default) - offline and online one cpu -# -if [ $allcpus -eq 0 ]; then - echo "Limited scope test: one hotplug cpu" - echo -e "\t (leaves cpu in the original state):" - echo -e "\t online to offline to online: cpu $online_max" - offline_cpu_expect_success $online_max - online_cpu_expect_success $online_max - - if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max - fi - exit 0 -else - echo "Full scope test: all hotplug cpus" - echo -e "\t online all offline cpus" - echo -e "\t offline all online cpus" - echo -e "\t online all offline cpus" -fi - -# -# Online all hot-pluggable CPUs -# -for cpu in `hotplaggable_offline_cpus`; do - online_cpu_expect_success $cpu -done - -# -# Offline all hot-pluggable CPUs -# -for cpu in `hotpluggable_online_cpus`; do - offline_cpu_expect_success $cpu -done - -# -# Online all hot-pluggable CPUs again -# -for cpu in `hotplaggable_offline_cpus`; do - online_cpu_expect_success $cpu -done - -# -# Test with cpu notifier error injection -# - -DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` -NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu - -prerequisite_extra() -{ - msg="skip extra tests:" - - /sbin/modprobe -q -r cpu-notifier-error-inject - /sbin/modprobe -q cpu-notifier-error-inject priority=$priority - - if [ ! -d "$DEBUGFS" ]; then - echo $msg debugfs is not mounted >&2 - exit 0 - fi - - if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then - echo $msg cpu-notifier-error-inject module is not available >&2 - exit 0 - fi -} - -prerequisite_extra - -# -# Offline all hot-pluggable CPUs -# -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error -for cpu in `hotpluggable_online_cpus`; do - offline_cpu_expect_success $cpu -done - -# -# Test CPU hot-add error handling (offline => online) -# -echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error -for cpu in `hotplaggable_offline_cpus`; do - online_cpu_expect_fail $cpu -done - -# -# Online all hot-pluggable CPUs -# -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error -for cpu in `hotplaggable_offline_cpus`; do - online_cpu_expect_success $cpu -done - -# -# Test CPU hot-remove error handling (online => offline) -# -echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error -for cpu in `hotpluggable_online_cpus`; do - offline_cpu_expect_fail $cpu -done - -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error -/sbin/modprobe -q -r cpu-notifier-error-inject diff --git a/efivarfs/Makefile b/efivarfs/Makefile index 29e8c6b..736c3dd 100644 --- a/efivarfs/Makefile +++ b/efivarfs/Makefile @@ -1,12 +1,13 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall test_objs = open-unlink create-read all: $(test_objs) -run_tests: all - @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" +TEST_PROGS := efivarfs.sh +TEST_FILES := $(test_objs) + +include ../lib.mk clean: rm -f $(test_objs) diff --git a/efivarfs/efivarfs.sh b/efivarfs/efivarfs.sh old mode 100644 new mode 100755 diff --git a/exec/Makefile b/exec/Makefile index 66dfc2c..4e400eb 100644 --- a/exec/Makefile +++ b/exec/Makefile @@ -1,4 +1,3 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = execveat DEPS = execveat.symlink execveat.denatured script subdir @@ -18,8 +17,10 @@ execveat.denatured: execveat %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - ./execveat +TEST_PROGS := execveat +TEST_FILES := $(DEPS) + +include ../lib.mk clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/exec/execveat.c b/exec/execveat.c index e238c95..8d5d1d2 100644 --- a/exec/execveat.c +++ b/exec/execveat.c @@ -30,7 +30,7 @@ static int execveat_(int fd, const char *path, char **argv, char **envp, #ifdef __NR_execveat return syscall(__NR_execveat, fd, path, argv, envp, flags); #else - errno = -ENOSYS; + errno = ENOSYS; return -1; #endif } @@ -234,6 +234,14 @@ static int run_tests(void) int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC); int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC); + /* Check if we have execveat at all, and bail early if not */ + errno = 0; + execveat_(-1, NULL, NULL, NULL, 0); + if (errno == ENOSYS) { + printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); + return 1; + } + /* Change file position to confirm it doesn't affect anything */ lseek(fd, 10, SEEK_SET); diff --git a/firmware/Makefile b/firmware/Makefile index e23cce0..9bf8223 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -3,25 +3,9 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -fw_filesystem: - @if /bin/sh ./fw_filesystem.sh ; then \ - echo "fw_filesystem: ok"; \ - else \ - echo "fw_filesystem: [FAIL]"; \ - exit 1; \ - fi +TEST_PROGS := fw_filesystem.sh fw_userhelper.sh -fw_userhelper: - @if /bin/sh ./fw_userhelper.sh ; then \ - echo "fw_userhelper: ok"; \ - else \ - echo "fw_userhelper: [FAIL]"; \ - exit 1; \ - fi - -run_tests: all fw_filesystem fw_userhelper +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all clean run_tests fw_filesystem fw_userhelper diff --git a/firmware/fw_filesystem.sh b/firmware/fw_filesystem.sh old mode 100644 new mode 100755 index 3fc6c10..c4366dc --- a/firmware/fw_filesystem.sh +++ b/firmware/fw_filesystem.sh @@ -9,7 +9,15 @@ modprobe test_firmware DIR=/sys/devices/virtual/misc/test_firmware -OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ +# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that +# as an indicator for CONFIG_FW_LOADER_USER_HELPER. +HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) + +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +fi + OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path) FWPATH=$(mktemp -d) @@ -17,7 +25,9 @@ FW="$FWPATH/test-firmware.bin" test_finish() { - echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout + fi echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path rm -f "$FW" rmdir "$FWPATH" @@ -25,8 +35,11 @@ test_finish() trap "test_finish" EXIT -# Turn down the timeout so failures don't take so long. -echo 1 >/sys/class/firmware/timeout +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + # Turn down the timeout so failures don't take so long. + echo 1 >/sys/class/firmware/timeout +fi + # Set the kernel search path. echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path @@ -41,7 +54,9 @@ if diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was not expected to match" >&2 exit 1 else - echo "$0: timeout works" + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "$0: timeout works" + fi fi # This should succeed via kernel load or will fail after 1 second after diff --git a/firmware/fw_userhelper.sh b/firmware/fw_userhelper.sh old mode 100644 new mode 100755 index 6efbade..b9983f8 --- a/firmware/fw_userhelper.sh +++ b/firmware/fw_userhelper.sh @@ -9,7 +9,17 @@ modprobe test_firmware DIR=/sys/devices/virtual/misc/test_firmware -OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ +# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that +# as an indicator for CONFIG_FW_LOADER_USER_HELPER. +HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) + +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +else + echo "usermode helper disabled so ignoring test" + exit 0 +fi FWPATH=$(mktemp -d) FW="$FWPATH/test-firmware.bin" diff --git a/ftrace/Makefile b/ftrace/Makefile index 76cc9f1..4e6ed13 100644 --- a/ftrace/Makefile +++ b/ftrace/Makefile @@ -1,7 +1,9 @@ all: -run_tests: - @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]" +TEST_PROGS := ftracetest +TEST_DIRS := test.d + +include ../lib.mk clean: rm -rf logs/* diff --git a/ftrace/test.d/00basic/basic4.tc b/ftrace/test.d/00basic/basic4.tc index fd9c49a..aa51f6c 100644 --- a/ftrace/test.d/00basic/basic4.tc +++ b/ftrace/test.d/00basic/basic4.tc @@ -2,4 +2,4 @@ # description: Basic event tracing check test -f available_events -a -f set_event -a -d events # check scheduler events are available -grep -q sched available_events && exit 0 || exit -1 \ No newline at end of file +grep -q sched available_events && exit 0 || exit $FAIL diff --git a/ftrace/test.d/event/event-enable.tc b/ftrace/test.d/event/event-enable.tc index 668616d..87eb9d6 100644 --- a/ftrace/test.d/event/event-enable.tc +++ b/ftrace/test.d/event/event-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:sched_switch' > set_event -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -ne 0 ]; then diff --git a/ftrace/test.d/event/subsystem-enable.tc b/ftrace/test.d/event/subsystem-enable.tc index 655c415..ced27ef 100644 --- a/ftrace/test.d/event/subsystem-enable.tc +++ b/ftrace/test.d/event/subsystem-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:*' > set_event -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then diff --git a/ftrace/test.d/event/toplevel-enable.tc b/ftrace/test.d/event/toplevel-enable.tc index 4808457..0bb5df3 100644 --- a/ftrace/test.d/event/toplevel-enable.tc +++ b/ftrace/test.d/event/toplevel-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then @@ -21,6 +25,9 @@ reset_tracer do_reset echo '*:*' > set_event + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -29,6 +36,9 @@ fi do_reset echo 1 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -37,6 +47,9 @@ fi do_reset echo 0 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -ne 0 ]; then fail "any of events should not be recorded" diff --git a/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/ftrace/test.d/ftrace/fgraph-filter-stack.tc index c15e018..15c2dba 100644 --- a/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -16,7 +16,9 @@ fi do_reset() { reset_tracer - echo 0 > /proc/sys/kernel/stack_tracer_enabled + if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then + echo 0 > /proc/sys/kernel/stack_tracer_enabled + fi enable_tracing clear_trace echo > set_ftrace_filter @@ -25,7 +27,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/ftrace/test.d/ftrace/fgraph-filter.tc b/ftrace/test.d/ftrace/fgraph-filter.tc index 6af5f63..0ab2189 100644 --- a/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/ftrace/test.d/ftrace/fgraph-filter.tc @@ -17,7 +17,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/ftrace/test.d/ftrace/func_profiler.tc b/ftrace/test.d/ftrace/func_profiler.tc index 2e719cb..7808336 100644 --- a/ftrace/test.d/ftrace/func_profiler.tc +++ b/ftrace/test.d/ftrace/func_profiler.tc @@ -31,7 +31,7 @@ fail() { # mesg reset_tracer echo > set_ftrace_filter echo $1 - exit -1 + exit $FAIL } echo "Testing function tracer with profiler:" diff --git a/futex/Makefile b/futex/Makefile new file mode 100644 index 0000000..6a17529 --- /dev/null +++ b/futex/Makefile @@ -0,0 +1,29 @@ +SUBDIRS := functional + +TEST_PROGS := run.sh + +.PHONY: all clean +all: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done + +include ../lib.mk + +override define RUN_TESTS + ./run.sh +endef + +override define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + + @for SUBDIR in $(SUBDIRS); do \ + $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ + done; +endef + +override define EMIT_TESTS + echo "./run.sh" +endef + +clean: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done diff --git a/futex/README b/futex/README new file mode 100644 index 0000000..3224a04 --- /dev/null +++ b/futex/README @@ -0,0 +1,62 @@ +Futex Test +========== +Futex Test is intended to thoroughly test the Linux kernel futex system call +API. + +Functional tests shall test the documented behavior of the futex operation +code under test. This includes checking for proper behavior under normal use, +odd corner cases, regression tests, and abject abuse and misuse. + +Futextest will also provide example implementation of mutual exclusion +primitives. These can be used as is in user applications or can serve as +examples for system libraries. These will likely be added to either a new lib/ +directory or purely as header files under include/, I'm leaning toward the +latter. + +Quick Start +----------- +# make +# ./run.sh + +Design and Implementation Goals +------------------------------- +o Tests should be as self contained as is practical so as to facilitate sharing + the individual tests on mailing list discussions and bug reports. +o The build system shall remain as simple as possible, avoiding any archive or + shared object building and linking. +o Where possible, any helper functions or other package-wide code shall be + implemented in header files, avoiding the need to compile intermediate object + files. +o External dependendencies shall remain as minimal as possible. Currently gcc + and glibc are the only dependencies. +o Tests return 0 for success and < 0 for failure. + +Output Formatting +----------------- +Test output shall be easily parsable by both human and machine. Title and +results are printed to stdout, while intermediate ERROR or FAIL messages are +sent to stderr. Tests shall support the -c option to print PASS, FAIL, and +ERROR strings in color for easy visual parsing. Output shall conform to the +following format: + +test_name: Description of the test + Arguments: arg1=val1 #units specified for clarity where appropriate + ERROR: Description of unexpected error + FAIL: Reason for test failure + # FIXME: Perhaps an " INFO: informational message" option would be + # useful here. Using -v to toggle it them on and off, as with -c. + # there may be multiple ERROR or FAIL messages +Result: (PASS|FAIL|ERROR) + +Naming +------ +o FIXME: decide on a sane test naming scheme. Currently the tests are named + based on the primary futex operation they test. Eventually this will become a + problem as we intend to write multiple tests which collide in this namespace. + Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the + detailed description in the test source and the output. + +Coding Style +------------ +o The Futex Test project adheres to the coding standards set forth by Linux + kernel as defined in the Linux source Documentation/CodingStyle. diff --git a/futex/functional/.gitignore b/futex/functional/.gitignore new file mode 100644 index 0000000..a09f570 --- /dev/null +++ b/futex/functional/.gitignore @@ -0,0 +1,7 @@ +futex_requeue_pi +futex_requeue_pi_mismatched_ops +futex_requeue_pi_signal_restart +futex_wait_private_mapped_file +futex_wait_timeout +futex_wait_uninitialized_heap +futex_wait_wouldblock diff --git a/futex/functional/Makefile b/futex/functional/Makefile new file mode 100644 index 0000000..9d6b75e --- /dev/null +++ b/futex/functional/Makefile @@ -0,0 +1,25 @@ +INCLUDES := -I../include -I../../ +CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) +LDFLAGS := $(LDFLAGS) -pthread -lrt + +HEADERS := ../include/futextest.h +TARGETS := \ + futex_wait_timeout \ + futex_wait_wouldblock \ + futex_requeue_pi \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ + futex_wait_private_mapped_file + +TEST_PROGS := $(TARGETS) run.sh + +.PHONY: all clean +all: $(TARGETS) + +$(TARGETS): $(HEADERS) + +include ../../lib.mk + +clean: + rm -f $(TARGETS) diff --git a/futex/functional/futex_requeue_pi.c b/futex/functional/futex_requeue_pi.c new file mode 100644 index 0000000..3da06ad --- /dev/null +++ b/futex/functional/futex_requeue_pi.c @@ -0,0 +1,409 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test excercises the futex syscall op codes needed for requeuing + * priority inheritance aware POSIX condition variables and mutexes. + * + * AUTHORS + * Sripathi Kodi + * Darren Hart + * + * HISTORY + * 2008-Jan-13: Initial version by Sripathi Kodi + * 2009-Nov-6: futex test adaptation by Darren Hart + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define MAX_WAKE_ITERS 1000 +#define THREAD_MAX 10 +#define SIGNAL_PERIOD_US 100 + +atomic_t waiters_blocked = ATOMIC_INITIALIZER; +atomic_t waiters_woken = ATOMIC_INITIALIZER; + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +futex_t wake_complete = FUTEX_INITIALIZER; + +/* Test option defaults */ +static long timeout_ns; +static int broadcast; +static int owner; +static int locked; + +struct thread_arg { + long id; + struct timespec *timeout; + int lock; + int ret; +}; +#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -b Broadcast wakeup (all waiters)\n"); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -l Lock the pi futex across requeue\n"); + printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); + printf(" -t N Timeout in nanoseconds (default: 0)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + int ret; + struct sched_param schedp; + pthread_attr_t attr; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + + +void *waiterfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + futex_t old_val; + + info("Waiter %ld: running\n", args->id); + /* Each thread sleeps for a different amount of time + * This is to avoid races, because we don't lock the + * external mutex here */ + usleep(1000 * (long)args->id); + + old_val = f1; + atomic_inc(&waiters_blocked); + info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + &f1, f1, &f2); + args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, + FUTEX_PRIVATE_FLAG); + + info("waiter %ld woke with %d %s\n", args->id, args->ret, + args->ret < 0 ? strerror(errno) : ""); + atomic_inc(&waiters_woken); + if (args->ret < 0) { + if (args->timeout && errno == ETIMEDOUT) + args->ret = 0; + else { + args->ret = RET_ERROR; + error("futex_wait_requeue_pi\n", errno); + } + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + info("Waiter %ld: exiting with %d\n", args->id, args->ret); + pthread_exit((void *)&args->ret); +} + +void *broadcast_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int nr_requeue = INT_MAX; + int task_count = 0; + futex_t old_val; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + info("Waker: Calling broadcast\n"); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + continue_requeue: + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) { + args->ret = RET_ERROR; + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + } else if (++i < MAX_WAKE_ITERS) { + task_count += args->ret; + if (task_count < THREAD_MAX - waiters_woken.val) + goto continue_requeue; + } else { + error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); + args->ret = RET_ERROR; + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->lock) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + if (args->ret > 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + pthread_exit((void *)&args->ret); +} + +void *signal_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + unsigned int old_val; + int nr_requeue = 0; + int task_count = 0; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { + info("task_count: %d, waiters_woken: %d\n", + task_count, waiters_woken.val); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + info("Waker: Calling signal\n"); + /* cond_signal */ + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, + nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) + args->ret = -errno; + info("futex: %x\n", f2); + if (args->lock) { + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + } + info("futex: %x\n", f2); + if (args->ret < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + args->ret = RET_ERROR; + break; + } + + task_count += args->ret; + usleep(SIGNAL_PERIOD_US); + i++; + /* we have to loop at least THREAD_MAX times */ + if (i > MAX_WAKE_ITERS + THREAD_MAX) { + error("max signaling iterations (%d) reached, giving up on pending waiters.\n", + 0, MAX_WAKE_ITERS + THREAD_MAX); + args->ret = RET_ERROR; + break; + } + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->ret >= 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + info("Waker: waiters_woken: %d\n", waiters_woken.val); + pthread_exit((void *)&args->ret); +} + +void *third_party_blocker(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int ret2 = 0; + + args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + if (args->ret) + goto out; + args->ret = futex_wait(&wake_complete, wake_complete, NULL, + FUTEX_PRIVATE_FLAG); + ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + out: + if (args->ret || ret2) { + error("third_party_blocker() futex error", 0); + args->ret = RET_ERROR; + } + + pthread_exit((void *)&args->ret); +} + +int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +{ + void *(*wakerfn)(void *) = signal_wakerfn; + struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; + struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; + pthread_t waiter[THREAD_MAX], waker, blocker; + struct timespec ts, *tsp = NULL; + struct thread_arg args[THREAD_MAX]; + int *waiter_ret; + int i, ret = RET_PASS; + + if (timeout_ns) { + time_t secs; + + info("timeout_ns = %ld\n", timeout_ns); + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + secs = (ts.tv_nsec + timeout_ns) / 1000000000; + ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; + ts.tv_sec += secs; + info("ts.tv_sec = %ld\n", ts.tv_sec); + info("ts.tv_nsec = %ld\n", ts.tv_nsec); + tsp = &ts; + } + + if (broadcast) + wakerfn = broadcast_wakerfn; + + if (third_party_owner) { + if (create_rt_thread(&blocker, third_party_blocker, + (void *)&blocker_arg, SCHED_FIFO, 1)) { + error("Creating third party blocker thread failed\n", + errno); + ret = RET_ERROR; + goto out; + } + } + + atomic_set(&waiters_woken, 0); + for (i = 0; i < THREAD_MAX; i++) { + args[i].id = i; + args[i].timeout = tsp; + info("Starting thread %d\n", i); + if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], + SCHED_FIFO, 1)) { + error("Creating waiting thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + } + waker_arg.lock = lock; + if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, + SCHED_FIFO, 1)) { + error("Creating waker thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + + /* Wait for threads to finish */ + /* Store the first error or failure encountered in waiter_ret */ + waiter_ret = &args[0].ret; + for (i = 0; i < THREAD_MAX; i++) + pthread_join(waiter[i], + *waiter_ret ? NULL : (void **)&waiter_ret); + + if (third_party_owner) + pthread_join(blocker, NULL); + pthread_join(waker, NULL); + +out: + if (!ret) { + if (*waiter_ret) + ret = *waiter_ret; + else if (waker_arg.ret < 0) + ret = waker_arg.ret; + else if (blocker_arg.ret) + ret = blocker_arg.ret; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + int c, ret; + + while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { + switch (c) { + case 'b': + broadcast = 1; + break; + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'l': + locked = 1; + break; + case 'o': + owner = 1; + locked = 0; + break; + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test requeue functionality\n", basename(argv[0])); + printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); + + /* + * FIXME: unit_test is obsolete now that we parse options and the + * various style of runs are done by run.sh - simplify the code and move + * unit_test into main() + */ + ret = unit_test(broadcast, locked, owner, timeout_ns); + + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_requeue_pi_mismatched_ops.c b/futex/functional/futex_requeue_pi_mismatched_ops.c new file mode 100644 index 0000000..d5e4f2c --- /dev/null +++ b/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * 1. Block a thread using FUTEX_WAIT + * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1. + * 3. The kernel must detect the mismatch and return -EINVAL. + * + * AUTHOR + * Darren Hart + * + * HISTORY + * 2009-Nov-9: Initial version by Darren Hart + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include "futextest.h" +#include "logging.h" + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +int child_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *blocking_child(void *arg) +{ + child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); + if (child_ret < 0) { + child_ret = -errno; + error("futex_wait\n", errno); + } + return (void *)&child_ret; +} + +int main(int argc, char *argv[]) +{ + int ret = RET_PASS; + pthread_t child; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Detect mismatched requeue_pi operations\n", + basename(argv[0])); + + if (pthread_create(&child, NULL, blocking_child, NULL)) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + /* Allow the child to block in the kernel. */ + sleep(1); + + /* + * The kernel should detect the waiter did not setup the + * q->requeue_pi_key and return -EINVAL. If it does not, + * it likely gave the lock to the child, which is now hung + * in the kernel. + */ + ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG); + if (ret < 0) { + if (errno == EINVAL) { + /* + * The kernel correctly detected the mismatched + * requeue_pi target and aborted. Wake the child with + * FUTEX_WAKE. + */ + ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); + if (ret == 1) { + ret = RET_PASS; + } else if (ret < 0) { + error("futex_wake\n", errno); + ret = RET_ERROR; + } else { + error("futex_wake did not wake the child\n", 0); + ret = RET_ERROR; + } + } else { + error("futex_cmp_requeue_pi\n", errno); + ret = RET_ERROR; + } + } else if (ret > 0) { + fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); + ret = RET_FAIL; + } else { + error("futex_cmp_requeue_pi found no waiters\n", 0); + ret = RET_ERROR; + } + + pthread_join(child, NULL); + + if (!ret) + ret = child_ret; + + out: + /* If the kernel crashes, we shouldn't return at all. */ + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_requeue_pi_signal_restart.c b/futex/functional/futex_requeue_pi_signal_restart.c new file mode 100644 index 0000000..3d7dc6a --- /dev/null +++ b/futex/functional/futex_requeue_pi_signal_restart.c @@ -0,0 +1,223 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test exercises the futex_wait_requeue_pi() signal handling both + * before and after the requeue. The first should be restarted by the + * kernel. The latter should return EWOULDBLOCK to the waiter. + * + * AUTHORS + * Darren Hart + * + * HISTORY + * 2008-May-5: Initial version by Darren Hart + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define DELAY_US 100 + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +atomic_t requeued = ATOMIC_INITIALIZER; + +int waiter_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + struct sched_param schedp; + pthread_attr_t attr; + int ret; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + +void handle_signal(int signo) +{ + info("signal received %s requeue\n", + requeued.val ? "after" : "prior to"); +} + +void *waiterfn(void *arg) +{ + unsigned int old_val; + int res; + + waiter_ret = RET_PASS; + + info("Waiter running\n"); + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + old_val = f1; + res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, + FUTEX_PRIVATE_FLAG); + if (!requeued.val || errno != EWOULDBLOCK) { + fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + res, strerror(errno)); + info("w2:futex: %x\n", f2); + if (!res) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + waiter_ret = RET_FAIL; + } + + info("Waiter exiting with %d\n", waiter_ret); + pthread_exit(NULL); +} + + +int main(int argc, char *argv[]) +{ + unsigned int old_val; + struct sigaction sa; + pthread_t waiter; + int c, res, ret = RET_PASS; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test signal handling during requeue_pi\n", + basename(argv[0])); + printf("\tArguments: \n"); + + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGUSR1, &sa, NULL)) { + error("sigaction\n", errno); + exit(1); + } + + info("m1:f2: %x\n", f2); + info("Creating waiter\n"); + res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); + if (res) { + error("Creating waiting thread failed", res); + ret = RET_ERROR; + goto out; + } + + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + info("m2:f2: %x\n", f2); + futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); + info("m3:f2: %x\n", f2); + + while (1) { + /* + * signal the waiter before requeue, waiter should automatically + * restart futex_wait_requeue_pi() in the kernel. Wait for the + * waiter to block on f1 again. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + usleep(DELAY_US); + + info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + old_val = f1; + res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, + FUTEX_PRIVATE_FLAG); + /* + * If res is non-zero, we either requeued the waiter or hit an + * error, break out and handle it. If it is zero, then the + * signal may have hit before the the waiter was blocked on f1. + * Try again. + */ + if (res > 0) { + atomic_set(&requeued, 1); + break; + } else if (res < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ret = RET_ERROR; + break; + } + } + info("m4:f2: %x\n", f2); + + /* + * Signal the waiter after requeue, waiter should return from + * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the + * futex_unlock_pi() can't happen before the signal wakeup is detected + * in the kernel. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + info("Waiting for waiter to return\n"); + pthread_join(waiter, NULL); + + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + info("m5:f2: %x\n", f2); + + out: + if (ret == RET_PASS && waiter_ret) + ret = waiter_ret; + + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_wait_private_mapped_file.c b/futex/functional/futex_wait_private_mapped_file.c new file mode 100644 index 0000000..5f687f2 --- /dev/null +++ b/futex/functional/futex_wait_private_mapped_file.c @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Internally, Futex has two handling mode, anon and file. The private file + * mapping is special. At first it behave as file, but after write anything + * it behave as anon. This test is intent to test such case. + * + * AUTHOR + * KOSAKI Motohiro + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logging.h" +#include "futextest.h" + +#define PAGE_SZ 4096 + +char pad[PAGE_SZ] = {1}; +futex_t val = 1; +char pad2[PAGE_SZ] = {1}; + +#define WAKE_WAIT_US 3000000 +struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *thr_futex_wait(void *arg) +{ + int ret; + + info("futex wait\n"); + ret = futex_wait(&val, 1, &wait_timeout, 0); + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { + error("futex error.\n", errno); + print_result(RET_ERROR); + exit(RET_ERROR); + } + + if (ret && errno == ETIMEDOUT) + fail("waiter timedout\n"); + + info("futex_wait: ret = %d, errno = %d\n", ret, errno); + + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t thr; + int ret = RET_PASS; + int res; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); + + ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (ret < 0) { + fprintf(stderr, "pthread_create error\n"); + ret = RET_ERROR; + goto out; + } + + info("wait a while\n"); + usleep(WAKE_WAIT_US); + val = 2; + res = futex_wake(&val, 1, 0); + info("futex_wake %d\n", res); + if (res != 1) { + fail("FUTEX_WAKE didn't find the waiting thread.\n"); + ret = RET_FAIL; + } + + info("join\n"); + pthread_join(thr, NULL); + + out: + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_wait_timeout.c b/futex/functional/futex_wait_timeout.c new file mode 100644 index 0000000..ab428ca --- /dev/null +++ b/futex/functional/futex_wait_timeout.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Block on a futex and wait for timeout. + * + * AUTHOR + * Darren Hart + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "futextest.h" +#include "logging.h" + +static long timeout_ns = 100000; /* 100us default timeout */ + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + printf("\tArguments: timeout=%ldns\n", timeout_ns); + + /* initialize timeout */ + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); + res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_wait_uninitialized_heap.c b/futex/functional/futex_wait_uninitialized_heap.c new file mode 100644 index 0000000..fe7aee9 --- /dev/null +++ b/futex/functional/futex_wait_uninitialized_heap.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should + * return immediately. This test is intent to test zero page handling in + * futex. + * + * AUTHOR + * KOSAKI Motohiro + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logging.h" +#include "futextest.h" + +#define WAIT_US 5000000 + +static int child_blocked = 1; +static int child_ret; +void *buf; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *wait_thread(void *arg) +{ + int res; + + child_ret = RET_PASS; + res = futex_wait(buf, 1, NULL, 0); + child_blocked = 0; + + if (res != 0 && errno != EWOULDBLOCK) { + error("futex failure\n", errno); + child_ret = RET_ERROR; + } + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + int c, ret = RET_PASS; + long page_size; + pthread_t thr; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + page_size = sysconf(_SC_PAGESIZE); + + buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (buf == (void *)-1) { + error("mmap\n", errno); + exit(1); + } + + printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + basename(argv[0])); + + + ret = pthread_create(&thr, NULL, wait_thread, NULL); + if (ret) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + + info("waiting %dus for child to return\n", WAIT_US); + usleep(WAIT_US); + + ret = child_ret; + if (child_blocked) { + fail("child blocked in kernel\n"); + ret = RET_FAIL; + } + + out: + print_result(ret); + return ret; +} diff --git a/futex/functional/futex_wait_wouldblock.c b/futex/functional/futex_wait_wouldblock.c new file mode 100644 index 0000000..b6b0274 --- /dev/null +++ b/futex/functional/futex_wait_wouldblock.c @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs + * from the expected one. + * + * AUTHOR + * Gowrishankar + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "futextest.h" +#include "logging.h" + +#define timeout_ns 100000 + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + + info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != EWOULDBLOCK) { + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/futex/functional/run.sh b/futex/functional/run.sh new file mode 100755 index 0000000..e87dbe2 --- /dev/null +++ b/futex/functional/run.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run tests in the current directory. +# +# AUTHOR +# Darren Hart +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart +# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file +# by KOSAKI Motohiro +# +############################################################################### + +# Test for a color capable console +if [ -z "$USE_COLOR" ]; then + tput setf 7 + if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 + fi +fi +if [ "$USE_COLOR" -eq 1 ]; then + COLOR="-c" +fi + + +echo +# requeue pi testing +# without timeouts +./futex_requeue_pi $COLOR +./futex_requeue_pi $COLOR -b +./futex_requeue_pi $COLOR -b -l +./futex_requeue_pi $COLOR -b -o +./futex_requeue_pi $COLOR -l +./futex_requeue_pi $COLOR -o +# with timeouts +./futex_requeue_pi $COLOR -b -l -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -l -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +./futex_requeue_pi $COLOR -b -t 5000 +./futex_requeue_pi $COLOR -t 5000 +./futex_requeue_pi $COLOR -b -t 500000 +./futex_requeue_pi $COLOR -t 500000 +./futex_requeue_pi $COLOR -b -o -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -o -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +# with long timeout +./futex_requeue_pi $COLOR -b -l -t 2000000000 +./futex_requeue_pi $COLOR -l -t 2000000000 + + +echo +./futex_requeue_pi_mismatched_ops $COLOR + +echo +./futex_requeue_pi_signal_restart $COLOR + +echo +./futex_wait_timeout $COLOR + +echo +./futex_wait_wouldblock $COLOR + +echo +./futex_wait_uninitialized_heap $COLOR +./futex_wait_private_mapped_file $COLOR diff --git a/futex/include/atomic.h b/futex/include/atomic.h new file mode 100644 index 0000000..f861da3 --- /dev/null +++ b/futex/include/atomic.h @@ -0,0 +1,83 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * GCC atomic builtin wrappers + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * AUTHOR + * Darren Hart + * + * HISTORY + * 2009-Nov-17: Initial version by Darren Hart + * + *****************************************************************************/ + +#ifndef _ATOMIC_H +#define _ATOMIC_H + +typedef struct { + volatile int val; +} atomic_t; + +#define ATOMIC_INITIALIZER { 0 } + +/** + * atomic_cmpxchg() - Atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Return the old value of addr->val. + */ +static inline int +atomic_cmpxchg(atomic_t *addr, int oldval, int newval) +{ + return __sync_val_compare_and_swap(&addr->val, oldval, newval); +} + +/** + * atomic_inc() - Atomic incrememnt + * @addr: Address of the variable to increment + * + * Return the new value of addr->val. + */ +static inline int +atomic_inc(atomic_t *addr) +{ + return __sync_add_and_fetch(&addr->val, 1); +} + +/** + * atomic_dec() - Atomic decrement + * @addr: Address of the variable to decrement + * + * Return the new value of addr-val. + */ +static inline int +atomic_dec(atomic_t *addr) +{ + return __sync_sub_and_fetch(&addr->val, 1); +} + +/** + * atomic_set() - Atomic set + * @addr: Address of the variable to set + * @newval: New value for the atomic_t + * + * Return the new value of addr->val. + */ +static inline int +atomic_set(atomic_t *addr, int newval) +{ + addr->val = newval; + return newval; +} + +#endif diff --git a/futex/include/futextest.h b/futex/include/futextest.h new file mode 100644 index 0000000..b98c3ab --- /dev/null +++ b/futex/include/futextest.h @@ -0,0 +1,266 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart + * + *****************************************************************************/ + +#ifndef _FUTEXTEST_H +#define _FUTEXTEST_H + +#include +#include +#include +#include + +typedef volatile u_int32_t futex_t; +#define FUTEX_INITIALIZER 0 + +/* Define the newer op codes if the system header file is not up to date. */ +#ifndef FUTEX_WAIT_BITSET +#define FUTEX_WAIT_BITSET 9 +#endif +#ifndef FUTEX_WAKE_BITSET +#define FUTEX_WAKE_BITSET 10 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI +#define FUTEX_WAIT_REQUEUE_PI 11 +#endif +#ifndef FUTEX_CMP_REQUEUE_PI +#define FUTEX_CMP_REQUEUE_PI 12 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_REQUEUE_PI_PRIVATE +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(futex_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + +/** + * futex_wait_bitset() - block on uaddr with bitset + * @bitset: bitset to be used with futex_wake_bitset + */ +static inline int +futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout, + u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset, + opflags); +} + +/** + * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset + * @bitset: bitset to compare with that used in futex_wait_bitset + */ +static inline int +futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset, + opflags); +} + +/** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(futex_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** + * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION + */ +static inline int +futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2, + int wake_op, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op, + opflags); +} + +/** + * futex_requeue() - requeue without expected value comparison, deprecated + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + * + * Due to its inherently racy implementation, futex_requeue() is deprecated in + * favor of futex_cmp_requeue(). + */ +static inline int +futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue, + int opflags) +{ + return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * + * This is the first half of the requeue_pi mechanism. It shall always be + * paired with futex_cmp_requeue_pi(). + */ +static inline int +futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, + struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware) + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_cmpxchg() - atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Implement cmpxchg using gcc atomic builtins. + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * Return the old futex value. + */ +static inline u_int32_t +futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval) +{ + return __sync_val_compare_and_swap(uaddr, oldval, newval); +} + +/** + * futex_dec() - atomic decrement of the futex value + * @uaddr: The address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_dec(futex_t *uaddr) +{ + return __sync_sub_and_fetch(uaddr, 1); +} + +/** + * futex_inc() - atomic increment of the futex value + * @uaddr: the address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_inc(futex_t *uaddr) +{ + return __sync_add_and_fetch(uaddr, 1); +} + +/** + * futex_set() - atomic decrement of the futex value + * @uaddr: the address of the futex to be modified + * @newval: New value for the atomic_t + * + * Return the new futex value. + */ +static inline u_int32_t +futex_set(futex_t *uaddr, u_int32_t newval) +{ + *uaddr = newval; + return newval; +} + +#endif diff --git a/futex/include/logging.h b/futex/include/logging.h new file mode 100644 index 0000000..014aa01 --- /dev/null +++ b/futex/include/logging.h @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart + * + *****************************************************************************/ + +#ifndef _LOGGING_H +#define _LOGGING_H + +#include +#include +#include +#include "kselftest.h" + +/* + * Define PASS, ERROR, and FAIL strings with and without color escape + * sequences, default to no color. + */ +#define ESC 0x1B, '[' +#define BRIGHT '1' +#define GREEN '3', '2' +#define YELLOW '3', '3' +#define RED '3', '1' +#define ESCEND 'm' +#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND +#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND +#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND +#define RESET_COLOR ESC, '0', 'm' +static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', + RESET_COLOR, 0}; +static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', + RESET_COLOR, 0}; +static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', + RESET_COLOR, 0}; +static const char INFO_NORMAL[] = " INFO"; +static const char PASS_NORMAL[] = " PASS"; +static const char ERROR_NORMAL[] = "ERROR"; +static const char FAIL_NORMAL[] = " FAIL"; +const char *INFO = INFO_NORMAL; +const char *PASS = PASS_NORMAL; +const char *ERROR = ERROR_NORMAL; +const char *FAIL = FAIL_NORMAL; + +/* Verbosity setting for INFO messages */ +#define VQUIET 0 +#define VCRITICAL 1 +#define VINFO 2 +#define VMAX VINFO +int _verbose = VCRITICAL; + +/* Functional test return codes */ +#define RET_PASS 0 +#define RET_ERROR -1 +#define RET_FAIL -2 + +/** + * log_color() - Use colored output for PASS, ERROR, and FAIL strings + * @use_color: use color (1) or not (0) + */ +void log_color(int use_color) +{ + if (use_color) { + PASS = PASS_COLOR; + ERROR = ERROR_COLOR; + FAIL = FAIL_COLOR; + } else { + PASS = PASS_NORMAL; + ERROR = ERROR_NORMAL; + FAIL = FAIL_NORMAL; + } +} + +/** + * log_verbosity() - Set verbosity of test output + * @verbose: Enable (1) verbose output or not (0) + * + * Currently setting verbose=1 will enable INFO messages and 0 will disable + * them. FAIL and ERROR messages are always displayed. + */ +void log_verbosity(int level) +{ + if (level > VMAX) + level = VMAX; + else if (level < 0) + level = 0; + _verbose = level; +} + +/** + * print_result() - Print standard PASS | ERROR | FAIL results + * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL + * + * print_result() is primarily intended for functional tests. + */ +void print_result(int ret) +{ + const char *result = "Unknown return code"; + + switch (ret) { + case RET_PASS: + ksft_inc_pass_cnt(); + result = PASS; + break; + case RET_ERROR: + result = ERROR; + break; + case RET_FAIL: + ksft_inc_fail_cnt(); + result = FAIL; + break; + } + printf("Result: %s\n", result); +} + +/* log level macros */ +#define info(message, vargs...) \ +do { \ + if (_verbose >= VINFO) \ + fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ +} while (0) + +#define error(message, err, args...) \ +do { \ + if (_verbose >= VCRITICAL) {\ + if (err) \ + fprintf(stderr, "\t%s: %s: "message, \ + ERROR, strerror(err), ##args); \ + else \ + fprintf(stderr, "\t%s: "message, ERROR, ##args); \ + } \ +} while (0) + +#define fail(message, args...) \ +do { \ + if (_verbose >= VCRITICAL) \ + fprintf(stderr, "\t%s: "message, FAIL, ##args); \ +} while (0) + +#endif diff --git a/futex/run.sh b/futex/run.sh new file mode 100755 index 0000000..4126312 --- /dev/null +++ b/futex/run.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run all tests under the functional, performance, and stress directories. +# Format and summarize the results. +# +# AUTHOR +# Darren Hart +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart +# +############################################################################### + +# Test for a color capable shell and pass the result to the subdir scripts +USE_COLOR=0 +tput setf 7 +if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 +fi +export USE_COLOR + +(cd functional; ./run.sh) diff --git a/gen_kselftest_tar.sh b/gen_kselftest_tar.sh new file mode 100755 index 0000000..17d5bd0 --- /dev/null +++ b/gen_kselftest_tar.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# gen_kselftest_tar +# Generate kselftest tarball +# Author: Shuah Khan +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +# main +main() +{ + if [ "$#" -eq 0 ]; then + echo "$0: Generating default compression gzip" + copts="cvzf" + ext=".tar.gz" + else + case "$1" in + tar) + copts="cvf" + ext=".tar" + ;; + targz) + copts="cvzf" + ext=".tar.gz" + ;; + tarbz2) + copts="cvjf" + ext=".tar.bz2" + ;; + tarxz) + copts="cvJf" + ext=".tar.xz" + ;; + *) + echo "Unknown tarball format $1" + exit 1 + ;; + esac + fi + + install_dir=./kselftest + +# Run install using INSTALL_KSFT_PATH override to generate install +# directory +./kselftest_install.sh +tar $copts kselftest${ext} $install_dir +echo "Kselftest archive kselftest${ext} created!" + +# clean up install directory +rm -rf kselftest +} + +main "$@" diff --git a/ipc/Makefile b/ipc/Makefile index 74bbefd..25d2e70 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -12,14 +12,11 @@ endif CFLAGS += -I../../../../usr/include/ all: -ifeq ($(ARCH),x86) - gcc $(CFLAGS) msgque.c -o msgque_test -else - echo "Not an x86 target, can't build msgque selftest" -endif + $(CC) $(CFLAGS) msgque.c -o msgque_test + +TEST_PROGS := msgque_test -run_tests: all - ./msgque_test +include ../lib.mk clean: rm -fr ./msgque_test diff --git a/kcmp/Makefile b/kcmp/Makefile index ff0eefd..2ae7450 100644 --- a/kcmp/Makefile +++ b/kcmp/Makefile @@ -1,10 +1,10 @@ -CC := $(CROSS_COMPILE)$(CC) CFLAGS += -I../../../../usr/include/ all: kcmp_test -run_tests: all - @./kcmp_test || echo "kcmp_test: [FAIL]" +TEST_PROGS := kcmp_test + +include ../lib.mk clean: $(RM) kcmp_test kcmp-test-file diff --git a/kselftest.h b/kselftest.h index 572c888..ef1c80d 100644 --- a/kselftest.h +++ b/kselftest.h @@ -13,6 +13,13 @@ #include #include +/* define kselftest exit codes */ +#define KSFT_PASS 0 +#define KSFT_FAIL 1 +#define KSFT_XFAIL 2 +#define KSFT_XPASS 3 +#define KSFT_SKIP 4 + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void) static inline int ksft_exit_pass(void) { - exit(0); + exit(KSFT_PASS); } static inline int ksft_exit_fail(void) { - exit(1); + exit(KSFT_FAIL); } static inline int ksft_exit_xfail(void) { - exit(2); + exit(KSFT_XFAIL); } static inline int ksft_exit_xpass(void) { - exit(3); + exit(KSFT_XPASS); } static inline int ksft_exit_skip(void) { - exit(4); + exit(KSFT_SKIP); } #endif /* __KSELFTEST_H */ diff --git a/kselftest_install.sh b/kselftest_install.sh new file mode 100755 index 0000000..1555fbd --- /dev/null +++ b/kselftest_install.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Kselftest Install +# Install kselftest tests +# Author: Shuah Khan +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +install_loc=`pwd` + +main() +{ + if [ $(basename $install_loc) != "selftests" ]; then + echo "$0: Please run it in selftests directory ..." + exit 1; + fi + if [ "$#" -eq 0 ]; then + echo "$0: Installing in default location - $install_loc ..." + elif [ ! -d "$1" ]; then + echo "$0: $1 doesn't exist!!" + exit 1; + else + install_loc=$1 + echo "$0: Installing in specified location - $install_loc ..." + fi + + install_dir=$install_loc/kselftest + +# Create install directory + mkdir -p $install_dir +# Build tests + INSTALL_PATH=$install_dir make install +} + +main "$@" diff --git a/lib.mk b/lib.mk new file mode 100644 index 0000000..50a93f5 --- /dev/null +++ b/lib.mk @@ -0,0 +1,38 @@ +# This mimics the top-level Makefile. We do it explicitly here so that this +# Makefile can operate with or without the kbuild infrastructure. +CC := $(CROSS_COMPILE)gcc + +define RUN_TESTS + @for TEST in $(TEST_PROGS); do \ + (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \ + done; +endef + +run_tests: all + $(RUN_TESTS) + +define INSTALL_RULE + @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ + mkdir -p ${INSTALL_PATH}; \ + echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ + rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ + fi +endef + +install: all +ifdef INSTALL_PATH + $(INSTALL_RULE) +else + $(error Error: set INSTALL_PATH to use install) +endif + +define EMIT_TESTS + @for TEST in $(TEST_PROGS); do \ + echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \ + done; +endef + +emit_tests: + $(EMIT_TESTS) + +.PHONY: run_tests all clean install emit_tests diff --git a/membarrier/.gitignore b/membarrier/.gitignore new file mode 100644 index 0000000..020c44f --- /dev/null +++ b/membarrier/.gitignore @@ -0,0 +1 @@ +membarrier_test diff --git a/membarrier/Makefile b/membarrier/Makefile new file mode 100644 index 0000000..a1a9708 --- /dev/null +++ b/membarrier/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -g -I../../../../usr/include/ + +TEST_PROGS := membarrier_test + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/membarrier/membarrier_test.c b/membarrier/membarrier_test.c new file mode 100644 index 0000000..535f0fe --- /dev/null +++ b/membarrier/membarrier_test.c @@ -0,0 +1,118 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +enum test_membarrier_status { + TEST_MEMBARRIER_PASS = 0, + TEST_MEMBARRIER_FAIL, + TEST_MEMBARRIER_SKIP, +}; + +static int sys_membarrier(int cmd, int flags) +{ + return syscall(__NR_membarrier, cmd, flags); +} + +static enum test_membarrier_status test_membarrier_cmd_fail(void) +{ + int cmd = -1, flags = 0; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong command should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_flags_fail(void) +{ + int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong flags should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_success(void) +{ + int cmd = MEMBARRIER_CMD_SHARED, flags = 0; + + if (sys_membarrier(cmd, flags) != 0) { + printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n", + strerror(errno)); + return TEST_MEMBARRIER_FAIL; + } + + printf("membarrier: MEMBARRIER_CMD_SHARED success.\n"); + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier(void) +{ + enum test_membarrier_status status; + + status = test_membarrier_cmd_fail(); + if (status) + return status; + status = test_membarrier_flags_fail(); + if (status) + return status; + status = test_membarrier_success(); + if (status) + return status; + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_query(void) +{ + int flags = 0, ret; + + printf("membarrier MEMBARRIER_CMD_QUERY "); + ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags); + if (ret < 0) { + printf("failed. %s.\n", strerror(errno)); + switch (errno) { + case ENOSYS: + /* + * It is valid to build a kernel with + * CONFIG_MEMBARRIER=n. However, this skips the tests. + */ + return TEST_MEMBARRIER_SKIP; + case EINVAL: + default: + return TEST_MEMBARRIER_FAIL; + } + } + if (!(ret & MEMBARRIER_CMD_SHARED)) { + printf("command MEMBARRIER_CMD_SHARED is not supported.\n"); + return TEST_MEMBARRIER_FAIL; + } + printf("syscall available.\n"); + return TEST_MEMBARRIER_PASS; +} + +int main(int argc, char **argv) +{ + switch (test_membarrier_query()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + switch (test_membarrier()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + + printf("membarrier: tests done!\n"); + return ksft_exit_pass(); +} diff --git a/memfd/Makefile b/memfd/Makefile index b80cd10..3e7eb79 100644 --- a/memfd/Makefile +++ b/memfd/Makefile @@ -1,17 +1,19 @@ +CC = $(CROSS_COMPILE)gcc CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ all: - gcc $(CFLAGS) memfd_test.c -o memfd_test + $(CC) $(CFLAGS) memfd_test.c -o memfd_test -run_tests: all - gcc $(CFLAGS) memfd_test.c -o memfd_test - @./memfd_test || echo "memfd_test: [FAIL]" +TEST_PROGS := memfd_test + +include ../lib.mk build_fuse: - gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt - gcc $(CFLAGS) fuse_test.c -o fuse_test + $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt + $(CC) $(CFLAGS) fuse_test.c -o fuse_test run_fuse: build_fuse @./run_fuse_test.sh || echo "fuse_test: [FAIL]" diff --git a/memory-hotplug/Makefile b/memory-hotplug/Makefile index d46b8d4..afb2624 100644 --- a/memory-hotplug/Makefile +++ b/memory-hotplug/Makefile @@ -1,9 +1,12 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]" +include ../lib.mk + +TEST_PROGS := mem-on-off-test.sh +override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]" +override EMIT_TESTS := echo "$(RUN_TESTS)" run_full_test: - @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/memory-hotplug/mem-on-off-test.sh b/memory-hotplug/mem-on-off-test.sh new file mode 100755 index 0000000..6cddde0 --- /dev/null +++ b/memory-hotplug/mem-on-off-test.sh @@ -0,0 +1,238 @@ +#!/bin/bash + +SYSFS= + +prerequisite() +{ + msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 0 + fi + + if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then + echo $msg memory hotplug is not supported >&2 + exit 0 + fi +} + +# +# list all hot-pluggable memory +# +hotpluggable_memory() +{ + local state=${1:-.\*} + + for memory in $SYSFS/devices/system/memory/memory*; do + if grep -q 1 $memory/removable && + grep -q $state $memory/state; then + echo ${memory##/*/memory} + fi + done +} + +hotplaggable_offline_memory() +{ + hotpluggable_memory offline +} + +hotpluggable_online_memory() +{ + hotpluggable_memory online +} + +memory_is_online() +{ + grep -q online $SYSFS/devices/system/memory/memory$1/state +} + +memory_is_offline() +{ + grep -q offline $SYSFS/devices/system/memory/memory$1/state +} + +online_memory() +{ + echo online > $SYSFS/devices/system/memory/memory$1/state +} + +offline_memory() +{ + echo offline > $SYSFS/devices/system/memory/memory$1/state +} + +online_memory_expect_success() +{ + local memory=$1 + + if ! online_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +online_memory_expect_fail() +{ + local memory=$1 + + if online_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected online >&2 + fi +} + +offline_memory_expect_success() +{ + local memory=$1 + + if ! offline_memory $memory; then + echo $FUNCNAME $memory: unexpected fail >&2 + elif ! memory_is_offline $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +offline_memory_expect_fail() +{ + local memory=$1 + + if offline_memory $memory 2> /dev/null; then + echo $FUNCNAME $memory: unexpected success >&2 + elif ! memory_is_online $memory; then + echo $FUNCNAME $memory: unexpected offline >&2 + fi +} + +error=-12 +priority=0 +ratio=10 + +while getopts e:hp:r: opt; do + case $opt in + e) + error=$OPTARG + ;; + h) + echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]" + exit + ;; + p) + priority=$OPTARG + ;; + r) + ratio=$OPTARG + ;; + esac +done + +if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then + echo "error code must be -4095 <= errno < 0" >&2 + exit 1 +fi + +prerequisite + +echo "Test scope: $ratio% hotplug memory" +echo -e "\t online all hotplug memory in offline state" +echo -e "\t offline $ratio% hotplug memory in online state" +echo -e "\t online all hotplug memory in offline state" + +# +# Online all hot-pluggable memory +# +for memory in `hotplaggable_offline_memory`; do + echo offline-online $memory + online_memory_expect_success $memory +done + +# +# Offline $ratio percent of hot-pluggable memory +# +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + echo online-offline $memory + offline_memory_expect_success $memory + fi +done + +# +# Online all hot-pluggable memory again +# +for memory in `hotplaggable_offline_memory`; do + echo offline-online $memory + online_memory_expect_success $memory +done + +# +# Test with memory notifier error injection +# + +DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` +NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory + +prerequisite_extra() +{ + msg="skip extra tests:" + + /sbin/modprobe -q -r memory-notifier-error-inject + /sbin/modprobe -q memory-notifier-error-inject priority=$priority + + if [ ! -d "$DEBUGFS" ]; then + echo $msg debugfs is not mounted >&2 + exit 0 + fi + + if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then + echo $msg memory-notifier-error-inject module is not available >&2 + exit 0 + fi +} + +prerequisite_extra + +# +# Offline $ratio percent of hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_success $memory + fi +done + +# +# Test memory hot-add error handling (offline => online) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_fail $memory +done + +# +# Online all hot-pluggable memory +# +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error +for memory in `hotplaggable_offline_memory`; do + online_memory_expect_success $memory +done + +# +# Test memory hot-remove error handling (online => offline) +# +echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +for memory in `hotpluggable_online_memory`; do + offline_memory_expect_fail $memory +done + +echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error +/sbin/modprobe -q -r memory-notifier-error-inject diff --git a/memory-hotplug/on-off-test.sh b/memory-hotplug/on-off-test.sh deleted file mode 100644 index 6cddde0..0000000 --- a/memory-hotplug/on-off-test.sh +++ /dev/null @@ -1,238 +0,0 @@ -#!/bin/bash - -SYSFS= - -prerequisite() -{ - msg="skip all tests:" - - if [ $UID != 0 ]; then - echo $msg must be run as root >&2 - exit 0 - fi - - SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` - - if [ ! -d "$SYSFS" ]; then - echo $msg sysfs is not mounted >&2 - exit 0 - fi - - if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then - echo $msg memory hotplug is not supported >&2 - exit 0 - fi -} - -# -# list all hot-pluggable memory -# -hotpluggable_memory() -{ - local state=${1:-.\*} - - for memory in $SYSFS/devices/system/memory/memory*; do - if grep -q 1 $memory/removable && - grep -q $state $memory/state; then - echo ${memory##/*/memory} - fi - done -} - -hotplaggable_offline_memory() -{ - hotpluggable_memory offline -} - -hotpluggable_online_memory() -{ - hotpluggable_memory online -} - -memory_is_online() -{ - grep -q online $SYSFS/devices/system/memory/memory$1/state -} - -memory_is_offline() -{ - grep -q offline $SYSFS/devices/system/memory/memory$1/state -} - -online_memory() -{ - echo online > $SYSFS/devices/system/memory/memory$1/state -} - -offline_memory() -{ - echo offline > $SYSFS/devices/system/memory/memory$1/state -} - -online_memory_expect_success() -{ - local memory=$1 - - if ! online_memory $memory; then - echo $FUNCNAME $memory: unexpected fail >&2 - elif ! memory_is_online $memory; then - echo $FUNCNAME $memory: unexpected offline >&2 - fi -} - -online_memory_expect_fail() -{ - local memory=$1 - - if online_memory $memory 2> /dev/null; then - echo $FUNCNAME $memory: unexpected success >&2 - elif ! memory_is_offline $memory; then - echo $FUNCNAME $memory: unexpected online >&2 - fi -} - -offline_memory_expect_success() -{ - local memory=$1 - - if ! offline_memory $memory; then - echo $FUNCNAME $memory: unexpected fail >&2 - elif ! memory_is_offline $memory; then - echo $FUNCNAME $memory: unexpected offline >&2 - fi -} - -offline_memory_expect_fail() -{ - local memory=$1 - - if offline_memory $memory 2> /dev/null; then - echo $FUNCNAME $memory: unexpected success >&2 - elif ! memory_is_online $memory; then - echo $FUNCNAME $memory: unexpected offline >&2 - fi -} - -error=-12 -priority=0 -ratio=10 - -while getopts e:hp:r: opt; do - case $opt in - e) - error=$OPTARG - ;; - h) - echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]" - exit - ;; - p) - priority=$OPTARG - ;; - r) - ratio=$OPTARG - ;; - esac -done - -if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then - echo "error code must be -4095 <= errno < 0" >&2 - exit 1 -fi - -prerequisite - -echo "Test scope: $ratio% hotplug memory" -echo -e "\t online all hotplug memory in offline state" -echo -e "\t offline $ratio% hotplug memory in online state" -echo -e "\t online all hotplug memory in offline state" - -# -# Online all hot-pluggable memory -# -for memory in `hotplaggable_offline_memory`; do - echo offline-online $memory - online_memory_expect_success $memory -done - -# -# Offline $ratio percent of hot-pluggable memory -# -for memory in `hotpluggable_online_memory`; do - if [ $((RANDOM % 100)) -lt $ratio ]; then - echo online-offline $memory - offline_memory_expect_success $memory - fi -done - -# -# Online all hot-pluggable memory again -# -for memory in `hotplaggable_offline_memory`; do - echo offline-online $memory - online_memory_expect_success $memory -done - -# -# Test with memory notifier error injection -# - -DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'` -NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory - -prerequisite_extra() -{ - msg="skip extra tests:" - - /sbin/modprobe -q -r memory-notifier-error-inject - /sbin/modprobe -q memory-notifier-error-inject priority=$priority - - if [ ! -d "$DEBUGFS" ]; then - echo $msg debugfs is not mounted >&2 - exit 0 - fi - - if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then - echo $msg memory-notifier-error-inject module is not available >&2 - exit 0 - fi -} - -prerequisite_extra - -# -# Offline $ratio percent of hot-pluggable memory -# -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error -for memory in `hotpluggable_online_memory`; do - if [ $((RANDOM % 100)) -lt $ratio ]; then - offline_memory_expect_success $memory - fi -done - -# -# Test memory hot-add error handling (offline => online) -# -echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error -for memory in `hotplaggable_offline_memory`; do - online_memory_expect_fail $memory -done - -# -# Online all hot-pluggable memory -# -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error -for memory in `hotplaggable_offline_memory`; do - online_memory_expect_success $memory -done - -# -# Test memory hot-remove error handling (online => offline) -# -echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error -for memory in `hotpluggable_online_memory`; do - offline_memory_expect_fail $memory -done - -echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error -/sbin/modprobe -q -r memory-notifier-error-inject diff --git a/mount/.gitignore b/mount/.gitignore new file mode 100644 index 0000000..856ad41 --- /dev/null +++ b/mount/.gitignore @@ -0,0 +1 @@ +unprivileged-remount-test diff --git a/mount/Makefile b/mount/Makefile index 337d853..5e35c9c 100644 --- a/mount/Makefile +++ b/mount/Makefile @@ -1,17 +1,21 @@ # Makefile for mount selftests. - +CFLAGS = -Wall \ + -O2 all: unprivileged-remount-test unprivileged-remount-test: unprivileged-remount-test.c - gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test -# Allow specific tests to be selected. -test_unprivileged_remount: unprivileged-remount-test - @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +include ../lib.mk -run_tests: all test_unprivileged_remount +TEST_PROGS := unprivileged-remount-test +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ + then \ + ./unprivileged-remount-test ; \ + else \ + echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ + fi +override EMIT_TESTS := echo "$(RUN_TESTS)" clean: rm -f unprivileged-remount-test - -.PHONY: all test_unprivileged_remount diff --git a/mqueue/Makefile b/mqueue/Makefile index 8056e2e..eebac29 100644 --- a/mqueue/Makefile +++ b/mqueue/Makefile @@ -1,10 +1,20 @@ -all: - gcc -O2 mq_open_tests.c -o mq_open_tests -lrt - gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt +CFLAGS += -O2 +LDLIBS = -lrt -lpthread -lpopt +TEST_PROGS := mq_open_tests mq_perf_tests -run_tests: - @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" - @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" +all: $(TEST_PROGS) + +include ../lib.mk + +override define RUN_TESTS + @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" +endef + +override define EMIT_TESTS + echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" + echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" +endef clean: rm -f mq_open_tests mq_perf_tests diff --git a/net/Makefile b/net/Makefile index 62f22cc..fac4782 100644 --- a/net/Makefile +++ b/net/Makefile @@ -1,6 +1,5 @@ # Makefile for net selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ @@ -11,9 +10,10 @@ all: $(NET_PROGS) %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" - @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" - ./test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_FILES := $(NET_PROGS) + +include ../lib.mk + clean: $(RM) $(NET_PROGS) diff --git a/net/psock_fanout.c b/net/psock_fanout.c index 6f67333..4124593 100644 --- a/net/psock_fanout.c +++ b/net/psock_fanout.c @@ -19,6 +19,8 @@ * - PACKET_FANOUT_LB * - PACKET_FANOUT_CPU * - PACKET_FANOUT_ROLLOVER + * - PACKET_FANOUT_CBPF + * - PACKET_FANOUT_EBPF * * Todo: * - functionality: PACKET_FANOUT_FLAG_DEFRAG @@ -44,7 +46,9 @@ #include #include #include +#include /* for __NR_bpf */ #include +#include #include #include #include @@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets) return fd; } +static void sock_fanout_set_ebpf(int fd) +{ + const int len_off = __builtin_offsetof(struct __sk_buff, len); + struct bpf_insn prog[] = { + { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, + { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, + { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, + { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, + { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, + { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + char log_buf[512]; + union bpf_attr attr; + int pfd; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = (unsigned long) prog; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.license = (unsigned long) "GPL"; + attr.log_buf = (unsigned long) log_buf, + attr.log_size = sizeof(log_buf), + attr.log_level = 1, + + pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (pfd < 0) { + perror("bpf"); + fprintf(stderr, "bpf verifier:\n%s\n", log_buf); + exit(1); + } + + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { + perror("fanout data ebpf"); + exit(1); + } + + if (close(pfd)) { + perror("close ebpf"); + exit(1); + } +} + static char *sock_fanout_open_ring(int fd) { struct tpacket_req req = { @@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd) ring = mmap(0, req.tp_block_size * req.tp_block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (!ring) { - fprintf(stderr, "packetsock ring mmap\n"); + if (ring == MAP_FAILED) { + perror("packetsock ring mmap"); exit(1); } @@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off, { const int expect0[] = { 0, 0 }; char *rings[2]; + uint8_t type = typeflags & 0xFF; int fds[2], fds_udp[2][2], ret; fprintf(stderr, "test: datapath 0x%hx\n", typeflags); @@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off, fprintf(stderr, "ERROR: failed open\n"); exit(1); } + if (type == PACKET_FANOUT_CBPF) + sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA); + else if (type == PACKET_FANOUT_EBPF) + sock_fanout_set_ebpf(fds[0]); + rings[0] = sock_fanout_open_ring(fds[0]); rings[1] = sock_fanout_open_ring(fds[1]); pair_udp_open(fds_udp[0], PORT_BASE); @@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off, /* Send data, but not enough to overflow a queue */ pair_udp_send(fds_udp[0], 15); - pair_udp_send(fds_udp[1], 5); + pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1); ret = sock_fanout_read(fds, rings, expect1); /* Send more data, overflow the queue */ - pair_udp_send(fds_udp[0], 15); + pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1); /* TODO: ensure consistent order between expect1 and expect2 */ ret |= sock_fanout_read(fds, rings, expect2); @@ -272,9 +327,10 @@ int main(int argc, char **argv) const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; - const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; + const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; int port_off = 2, tries = 5, ret; test_control_single(); @@ -296,6 +352,11 @@ int main(int argc, char **argv) ret |= test_datapath(PACKET_FANOUT_ROLLOVER, port_off, expect_rb[0], expect_rb[1]); + ret |= test_datapath(PACKET_FANOUT_CBPF, + port_off, expect_bpf[0], expect_bpf[1]); + ret |= test_datapath(PACKET_FANOUT_EBPF, + port_off, expect_bpf[0], expect_bpf[1]); + set_cpuaffinity(0); ret |= test_datapath(PACKET_FANOUT_CPU, port_off, expect_cpu0[0], expect_cpu0[1]); diff --git a/net/psock_lib.h b/net/psock_lib.h index 37da54a..24bc7ec 100644 --- a/net/psock_lib.h +++ b/net/psock_lib.h @@ -30,6 +30,7 @@ #define DATA_LEN 100 #define DATA_CHAR 'a' +#define DATA_CHAR_1 'b' #define PORT_BASE 8000 @@ -37,29 +38,36 @@ # define __maybe_unused __attribute__ ((__unused__)) #endif -static __maybe_unused void pair_udp_setfilter(int fd) +static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) { struct sock_filter bpf_filter[] = { { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ - { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ - { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ - { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ - { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/ + { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/ { 0x06, 0, 0, 0x00000060 }, /* RET match */ { 0x06, 0, 0, 0x00000000 }, /* RET no match */ }; struct sock_fprog bpf_prog; + if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA) + bpf_filter[5].code = 0x16; /* RET A */ + bpf_prog.filter = bpf_filter; bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + if (setsockopt(fd, lvl, optnum, &bpf_prog, sizeof(bpf_prog))) { perror("setsockopt SO_ATTACH_FILTER"); exit(1); } } +static __maybe_unused void pair_udp_setfilter(int fd) +{ + sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER); +} + static __maybe_unused void pair_udp_open(int fds[], uint16_t port) { struct sockaddr_in saddr, daddr; @@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port) } } -static __maybe_unused void pair_udp_send(int fds[], int num) +static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload) { char buf[DATA_LEN], rbuf[DATA_LEN]; - memset(buf, DATA_CHAR, sizeof(buf)); + memset(buf, payload, sizeof(buf)); while (num--) { /* Should really handle EINTR and EAGAIN */ if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { @@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num) } } +static __maybe_unused void pair_udp_send(int fds[], int num) +{ + return pair_udp_send_char(fds, num, DATA_CHAR); +} + static __maybe_unused void pair_udp_close(int fds[]) { close(fds[0]); diff --git a/net/run_afpackettests b/net/run_afpackettests old mode 100644 new mode 100755 diff --git a/net/run_netsocktests b/net/run_netsocktests old mode 100644 new mode 100755 diff --git a/powerpc/Makefile b/powerpc/Makefile index 1d5e7ad..03ca2e6 100644 --- a/powerpc/Makefile +++ b/powerpc/Makefile @@ -8,27 +8,41 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CC := $(CROSS_COMPILE)$(CC) CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) -export CC CFLAGS +export CFLAGS -TARGETS = pmu copyloops mm tm primitives stringloops +SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian dscr endif -all: $(TARGETS) +all: $(SUB_DIRS) -$(TARGETS): +$(SUB_DIRS): $(MAKE) -k -C $@ all -run_tests: all - @for TARGET in $(TARGETS); do \ +include ../lib.mk + +override define RUN_TESTS + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET run_tests; \ done; +endef + +override define INSTALL_RULE + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -C $$TARGET install; \ + done; +endef + +override define EMIT_TESTS + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -s -C $$TARGET emit_tests; \ + done; +endef clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET clean; \ done; rm -f tags @@ -36,4 +50,4 @@ clean: tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: all run_tests clean tags $(TARGETS) +.PHONY: tags $(SUB_DIRS) diff --git a/powerpc/copyloops/Makefile b/powerpc/copyloops/Makefile index 6f2d3be..384843e 100644 --- a/powerpc/copyloops/Makefile +++ b/powerpc/copyloops/Makefile @@ -2,28 +2,24 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST +CFLAGS += -maltivec # Use our CFLAGS for the implicit .S rule ASFLAGS = $(CFLAGS) -PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 +TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7 memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7 -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/powerpc/copyloops/asm/ppc_asm.h b/powerpc/copyloops/asm/ppc_asm.h index d1dc374..50ae7d2 100644 --- a/powerpc/copyloops/asm/ppc_asm.h +++ b/powerpc/copyloops/asm/ppc_asm.h @@ -4,39 +4,6 @@ #define r1 1 -#define vr0 0 -#define vr1 1 -#define vr2 2 -#define vr3 3 -#define vr4 4 -#define vr5 5 -#define vr6 6 -#define vr7 7 -#define vr8 8 -#define vr9 9 -#define vr10 10 -#define vr11 11 -#define vr12 12 -#define vr13 13 -#define vr14 14 -#define vr15 15 -#define vr16 16 -#define vr17 17 -#define vr18 18 -#define vr19 19 -#define vr20 20 -#define vr21 21 -#define vr22 22 -#define vr23 23 -#define vr24 24 -#define vr25 25 -#define vr26 26 -#define vr27 27 -#define vr28 28 -#define vr29 29 -#define vr30 30 -#define vr31 31 - #define R14 r14 #define R15 r15 #define R16 r16 diff --git a/powerpc/dscr/.gitignore b/powerpc/dscr/.gitignore new file mode 100644 index 0000000..b585c6c --- /dev/null +++ b/powerpc/dscr/.gitignore @@ -0,0 +1,7 @@ +dscr_default_test +dscr_explicit_test +dscr_inherit_exec_test +dscr_inherit_test +dscr_sysfs_test +dscr_sysfs_thread_test +dscr_user_test diff --git a/powerpc/dscr/Makefile b/powerpc/dscr/Makefile new file mode 100644 index 0000000..49327ee --- /dev/null +++ b/powerpc/dscr/Makefile @@ -0,0 +1,14 @@ +TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ + dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ + dscr_sysfs_thread_test + +dscr_default_test: LDLIBS += -lpthread + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/powerpc/dscr/dscr.h b/powerpc/dscr/dscr.h new file mode 100644 index 0000000..a36af1b --- /dev/null +++ b/powerpc/dscr/dscr.h @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) + * + * This header file contains helper functions and macros + * required for all the DSCR related test cases. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H +#define _SELFTESTS_POWERPC_DSCR_DSCR_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define SPRN_DSCR 0x11 /* Privilege state SPR */ +#define SPRN_DSCR_USR 0x03 /* Problem state SPR */ +#define THREADS 100 /* Max threads */ +#define COUNT 100 /* Max iterations */ +#define DSCR_MAX 16 /* Max DSCR value */ +#define LEN_MAX 100 /* Max name length */ + +#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default" +#define CPU_PATH "/sys/devices/system/cpu/" + +#define rmb() asm volatile("lwsync":::"memory") +#define wmb() asm volatile("lwsync":::"memory") + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +/* Prilvilege state DSCR access */ +inline unsigned long get_dscr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR)); + + return ret; +} + +inline void set_dscr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +/* Problem state DSCR access */ +inline unsigned long get_dscr_usr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR)); + + return ret; +} + +inline void set_dscr_usr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR)); +} + +/* Default DSCR access */ +unsigned long get_default_dscr(void) +{ + int fd = -1, ret; + char buf[16]; + unsigned long val; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDONLY); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + memset(buf, 0, sizeof(buf)); + lseek(fd, 0, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret == -1) { + perror("read() failed"); + exit(1); + } + sscanf(buf, "%lx", &val); + close(fd); + return val; +} + +void set_default_dscr(unsigned long val) +{ + int fd = -1, ret; + char buf[16]; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDWR); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + sprintf(buf, "%lx\n", val); + ret = write(fd, buf, strlen(buf)); + if (ret == -1) { + perror("write() failed"); + exit(1); + } + close(fd); +} + +double uniform_deviate(int seed) +{ + return seed * (1.0 / (RAND_MAX + 1.0)); +} +#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */ diff --git a/powerpc/dscr/dscr_default_test.c b/powerpc/dscr/dscr_default_test.c new file mode 100644 index 0000000..df17c3b --- /dev/null +++ b/powerpc/dscr/dscr_default_test.c @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) default test + * + * This test modifies the system wide default DSCR through + * it's sysfs interface and then verifies that all threads + * see the correct changed DSCR value immediately. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static unsigned long dscr; /* System DSCR default */ +static unsigned long sequence; +static unsigned long result[THREADS]; + +static void *do_test(void *in) +{ + unsigned long thread = (unsigned long)in; + unsigned long i; + + for (i = 0; i < COUNT; i++) { + unsigned long d, cur_dscr, cur_dscr_usr; + unsigned long s1, s2; + + s1 = ACCESS_ONCE(sequence); + if (s1 & 1) + continue; + rmb(); + + d = dscr; + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + rmb(); + s2 = sequence; + + if (s1 != s2) + continue; + + if (cur_dscr != d) { + fprintf(stderr, "thread %ld kernel DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + + if (cur_dscr_usr != d) { + fprintf(stderr, "thread %ld user DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr_usr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + } + result[thread] = 0; + pthread_exit(&result[thread]); +} + +int dscr_default(void) +{ + pthread_t threads[THREADS]; + unsigned long i, *status[THREADS]; + unsigned long orig_dscr_default; + + orig_dscr_default = get_default_dscr(); + + /* Initial DSCR default */ + dscr = 1; + set_default_dscr(dscr); + + /* Spawn all testing threads */ + for (i = 0; i < THREADS; i++) { + if (pthread_create(&threads[i], NULL, do_test, (void *)i)) { + perror("pthread_create() failed"); + goto fail; + } + } + + srand(getpid()); + + /* Keep changing the DSCR default */ + for (i = 0; i < COUNT; i++) { + double ret = uniform_deviate(rand()); + + if (ret < 0.0001) { + sequence++; + wmb(); + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_default_dscr(dscr); + + wmb(); + sequence++; + } + } + + /* Individual testing thread exit status */ + for (i = 0; i < THREADS; i++) { + if (pthread_join(threads[i], (void **)&(status[i]))) { + perror("pthread_join() failed"); + goto fail; + } + + if (*status[i]) { + printf("%ldth thread failed to join with %ld status\n", + i, *status[i]); + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_default, "dscr_default_test"); +} diff --git a/powerpc/dscr/dscr_explicit_test.c b/powerpc/dscr/dscr_explicit_test.c new file mode 100644 index 0000000..ad9c3ec --- /dev/null +++ b/powerpc/dscr/dscr_explicit_test.c @@ -0,0 +1,71 @@ +/* + * POWER Data Stream Control Register (DSCR) explicit test + * + * This test modifies the DSCR value using mtspr instruction and + * verifies the change with mfspr instruction. It uses both the + * privilege state SPR and the problem state SPR for this purpose. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_explicit(void) +{ + unsigned long i, dscr = 0; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + double ret = uniform_deviate(rand()); + + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr(dscr); + } + + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr); + return 1; + } + + ret = uniform_deviate(rand()); + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr_usr(dscr); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr_usr); + return 1; + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_explicit, "dscr_explicit_test"); +} diff --git a/powerpc/dscr/dscr_inherit_exec_test.c b/powerpc/dscr/dscr_inherit_exec_test.c new file mode 100644 index 0000000..8265504 --- /dev/null +++ b/powerpc/dscr/dscr_inherit_exec_test.c @@ -0,0 +1,117 @@ +/* + * POWER Data Stream Control Register (DSCR) fork exec test + * + * This testcase modifies the DSCR using mtspr, forks & execs and + * verifies that the child is using the changed DSCR using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static char prog[LEN_MAX]; + +static void do_exec(unsigned long parent_dscr) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (cur_dscr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (kernel value)\n", parent_dscr); + exit(1); + } + + if (cur_dscr_usr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (user value)\n", parent_dscr); + exit(1); + } + exit(0); +} + +int dscr_inherit_exec(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + for (i = 0; i < COUNT; i++) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (dscr == get_default_dscr()) + continue; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + char dscr_str[16]; + + sprintf(dscr_str, "%ld", dscr); + execlp(prog, prog, "exec", dscr_str, NULL); + exit(1); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc == 3 && !strcmp(argv[1], "exec")) { + unsigned long parent_dscr; + + parent_dscr = atoi(argv[2]); + do_exec(parent_dscr); + } else if (argc != 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + exit(1); + } + + strncpy(prog, argv[0], strlen(argv[0])); + return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test"); +} diff --git a/powerpc/dscr/dscr_inherit_test.c b/powerpc/dscr/dscr_inherit_test.c new file mode 100644 index 0000000..4e414ca --- /dev/null +++ b/powerpc/dscr/dscr_inherit_test.c @@ -0,0 +1,95 @@ +/* + * POWER Data Stream Control Register (DSCR) fork test + * + * This testcase modifies the DSCR using mtspr, forks and then + * verifies that the child process has the correct changed DSCR + * value using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_inherit(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr); + exit(1); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr_usr); + exit(1); + } + exit(0); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_inherit, "dscr_inherit_test"); +} diff --git a/powerpc/dscr/dscr_sysfs_test.c b/powerpc/dscr/dscr_sysfs_test.c new file mode 100644 index 0000000..17fb1b4 --- /dev/null +++ b/powerpc/dscr/dscr_sysfs_test.c @@ -0,0 +1,97 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs interface test + * + * This test updates to system wide DSCR default through the sysfs interface + * and then verifies that all the CPU specific DSCR defaults are updated as + * well verified from their sysfs interfaces. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_cpu_dscr_default(char *file, unsigned long val) +{ + char buf[10]; + int fd, rc; + + fd = open(file, O_RDWR); + if (fd == -1) { + perror("open() failed"); + return 1; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc == -1) { + perror("read() failed"); + return 1; + } + close(fd); + + buf[rc] = '\0'; + if (strtol(buf, NULL, 16) != val) { + printf("DSCR match failed: %ld (system) %ld (cpu)\n", + val, strtol(buf, NULL, 16)); + return 1; + } + return 0; +} + +static int check_all_cpu_dscr_defaults(unsigned long val) +{ + DIR *sysfs; + struct dirent *dp; + char file[LEN_MAX]; + + sysfs = opendir(CPU_PATH); + if (!sysfs) { + perror("opendir() failed"); + return 1; + } + + while ((dp = readdir(sysfs))) { + if (!(dp->d_type & DT_DIR)) + continue; + if (!strcmp(dp->d_name, "cpuidle")) + continue; + if (!strstr(dp->d_name, "cpu")) + continue; + + sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name); + if (access(file, F_OK)) + continue; + + if (check_cpu_dscr_default(file, val)) + return 1; + } + closedir(sysfs); + return 0; +} + +int dscr_sysfs(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_all_cpu_dscr_defaults(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs, "dscr_sysfs_test"); +} diff --git a/powerpc/dscr/dscr_sysfs_thread_test.c b/powerpc/dscr/dscr_sysfs_thread_test.c new file mode 100644 index 0000000..ad97b59 --- /dev/null +++ b/powerpc/dscr/dscr_sysfs_thread_test.c @@ -0,0 +1,80 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs thread test + * + * This test updates the system wide DSCR default value through + * sysfs interface which should then update all the CPU specific + * DSCR default values which must also be then visible to threads + * executing on individual CPUs on the system. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#define _GNU_SOURCE +#include "dscr.h" + +static int test_thread_dscr(unsigned long val) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (val != cur_dscr) { + printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr); + return 1; + } + + if (val != cur_dscr_usr) { + printf("[cpu %d] User DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr_usr); + return 1; + } + return 0; +} + +static int check_cpu_dscr_thread(unsigned long val) +{ + cpu_set_t mask; + int cpu; + + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + continue; + + if (test_thread_dscr(val)) + return 1; + } + return 0; + +} + +int dscr_sysfs_thread(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_cpu_dscr_thread(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test"); +} diff --git a/powerpc/dscr/dscr_user_test.c b/powerpc/dscr/dscr_user_test.c new file mode 100644 index 0000000..77d16b5 --- /dev/null +++ b/powerpc/dscr/dscr_user_test.c @@ -0,0 +1,61 @@ +/* + * POWER Data Stream Control Register (DSCR) SPR test + * + * This test modifies the DSCR value through both the SPR number + * based mtspr instruction and then makes sure that the same is + * reflected through mfspr instruction using either of the SPR + * numbers. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2013, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_dscr(char *str) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr != cur_dscr_usr) { + printf("%s set, kernel get %lx != user get %lx\n", + str, cur_dscr, cur_dscr_usr); + return 1; + } + return 0; +} + +int dscr_user(void) +{ + int i; + + check_dscr(""); + + for (i = 0; i < COUNT; i++) { + set_dscr(i); + if (check_dscr("kernel")) + return 1; + } + + for (i = 0; i < COUNT; i++) { + set_dscr_usr(i); + if (check_dscr("user")) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_user, "dscr_user_test"); +} diff --git a/powerpc/harness.c b/powerpc/harness.c index 8ebc58a..f7997af 100644 --- a/powerpc/harness.c +++ b/powerpc/harness.c @@ -11,6 +11,10 @@ #include #include #include +#include +#include +#include +#include #include "subunit.h" #include "utils.h" @@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name) return rc; } + +static char auxv[4096]; + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + void *result; + ssize_t num; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + perror("open"); + return NULL; + } + + result = NULL; + + num = read(fd, auxv, sizeof(auxv)); + if (num < 0) { + perror("read"); + goto out; + } + + if (num > sizeof(auxv)) { + printf("Overflowed auxv buffer\n"); + goto out; + } + + p = (ElfW(auxv_t) *)auxv; + + while (p->a_type != AT_NULL) { + if (p->a_type == type) { + result = (void *)p->a_un.a_val; + break; + } + + p++; + } +out: + close(fd); + return result; +} diff --git a/powerpc/mm/Makefile b/powerpc/mm/Makefile index a14c538..ee179e2 100644 --- a/powerpc/mm/Makefile +++ b/powerpc/mm/Makefile @@ -1,21 +1,17 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test subpage_prot +TEST_PROGS := hugetlb_vs_thp_test subpage_prot +TEST_FILES := tempfile -all: $(PROGS) tempfile +all: $(TEST_PROGS) $(TEST_FILES) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk tempfile: dd if=/dev/zero of=tempfile bs=64k count=1 clean: - rm -f $(PROGS) tempfile - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) tempfile diff --git a/powerpc/mm/hugetlb_vs_thp_test.c b/powerpc/mm/hugetlb_vs_thp_test.c index 3d8e5b0..4900367 100644 --- a/powerpc/mm/hugetlb_vs_thp_test.c +++ b/powerpc/mm/hugetlb_vs_thp_test.c @@ -21,9 +21,13 @@ static int test_body(void) * Typically the mmap will fail because no huge pages are * allocated on the system. But if there are huge pages * allocated the mmap will succeed. That's fine too, we just - * munmap here before continuing. + * munmap here before continuing. munmap() length of + * MAP_HUGETLB memory must be hugepage aligned. */ - munmap(addr, SIZE); + if (munmap(addr, SIZE)) { + perror("munmap"); + return 1; + } } p = mmap(addr, SIZE, PROT_READ | PROT_WRITE, diff --git a/powerpc/pmu/Makefile b/powerpc/pmu/Makefile index c9f4263..a9099d9 100644 --- a/powerpc/pmu/Makefile +++ b/powerpc/pmu/Makefile @@ -1,38 +1,42 @@ noarg: $(MAKE) -C ../ -PROGS := count_instructions l3_bank_test per_event_excludes +TEST_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c -SUB_TARGETS = ebb +all: $(TEST_PROGS) ebb -all: $(PROGS) $(SUB_TARGETS) - -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) # loop.S can only be built 64-bit count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ -run_tests: all sub_run_tests - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk -clean: sub_clean - rm -f $(PROGS) loop.o +DEFAULT_RUN_TESTS := $(RUN_TESTS) +override define RUN_TESTS + $(DEFAULT_RUN_TESTS) + $(MAKE) -C ebb run_tests +endef -$(SUB_TARGETS): - $(MAKE) -k -C $@ all +DEFAULT_EMIT_TESTS := $(EMIT_TESTS) +override define EMIT_TESTS + $(DEFAULT_EMIT_TESTS) + $(MAKE) -s -C ebb emit_tests +endef -sub_run_tests: all - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET run_tests; \ - done; +DEFAULT_INSTALL_RULE := $(INSTALL_RULE) +override define INSTALL_RULE + $(DEFAULT_INSTALL_RULE) + $(MAKE) -C ebb install +endef -sub_clean: - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET clean; \ - done; +clean: + rm -f $(TEST_PROGS) loop.o + $(MAKE) -C ebb clean + +ebb: + $(MAKE) -k -C $@ all -.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS) +.PHONY: all run_tests clean ebb diff --git a/powerpc/pmu/ebb/Makefile b/powerpc/pmu/ebb/Makefile index 3dc4332..5cdc9db 100644 --- a/powerpc/pmu/ebb/Makefile +++ b/powerpc/pmu/ebb/Makefile @@ -4,7 +4,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 -PROGS := reg_access_test event_attributes_test cycles_test \ +TEST_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \ @@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test \ lost_exception_test no_handler_test \ cycles_with_mmcr2_test -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S +$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S instruction_count_test: ../loop.S lost_exception_test: ../lib.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../../lib.mk clean: - rm -f $(PROGS) + rm -f $(TEST_PROGS) diff --git a/powerpc/pmu/lib.c b/powerpc/pmu/lib.c index 9768dea..a07104c 100644 --- a/powerpc/pmu/lib.c +++ b/powerpc/pmu/lib.c @@ -5,15 +5,10 @@ #define _GNU_SOURCE /* For CPU_ZERO etc. */ -#include #include -#include -#include #include #include #include -#include -#include #include #include "utils.h" @@ -256,45 +251,3 @@ out: return rc; } -static char auxv[4096]; - -void *get_auxv_entry(int type) -{ - ElfW(auxv_t) *p; - void *result; - ssize_t num; - int fd; - - fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - perror("open"); - return NULL; - } - - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); - if (num < 0) { - perror("read"); - goto out; - } - - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); - goto out; - } - - p = (ElfW(auxv_t) *)auxv; - - while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } - - p++; - } -out: - close(fd); - return result; -} diff --git a/powerpc/pmu/lib.h b/powerpc/pmu/lib.h index 0f0339c..ca5d72a 100644 --- a/powerpc/pmu/lib.h +++ b/powerpc/pmu/lib.h @@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe); extern int notify_parent_of_error(union pipe write_pipe); extern pid_t eat_cpu(int (test_function)(void)); extern bool require_paranoia_below(int level); -extern void *get_auxv_entry(int type); struct addr_range { uint64_t first, last; diff --git a/powerpc/primitives/Makefile b/powerpc/primitives/Makefile index ea737ca..b68c622 100644 --- a/powerpc/primitives/Makefile +++ b/powerpc/primitives/Makefile @@ -1,17 +1,12 @@ CFLAGS += -I$(CURDIR) -PROGS := load_unaligned_zeropad +TEST_PROGS := load_unaligned_zeropad -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/powerpc/stringloops/Makefile b/powerpc/stringloops/Makefile index 506d773..2a728f4 100644 --- a/powerpc/stringloops/Makefile +++ b/powerpc/stringloops/Makefile @@ -2,19 +2,14 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) -PROGS := memcmp +TEST_PROGS := memcmp EXTRA_SOURCES := memcmp_64.S ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/powerpc/switch_endian/.gitignore b/powerpc/switch_endian/.gitignore new file mode 100644 index 0000000..89e762e --- /dev/null +++ b/powerpc/switch_endian/.gitignore @@ -0,0 +1,2 @@ +switch_endian_test +check-reversed.S diff --git a/powerpc/switch_endian/Makefile b/powerpc/switch_endian/Makefile new file mode 100644 index 0000000..e21d106 --- /dev/null +++ b/powerpc/switch_endian/Makefile @@ -0,0 +1,18 @@ +TEST_PROGS := switch_endian_test + +ASFLAGS += -O2 -Wall -g -nostdlib -m64 + +all: $(TEST_PROGS) + +switch_endian_test: check-reversed.S + +check-reversed.o: check.o + $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@ + +check-reversed.S: check-reversed.o + hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@ + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o check-reversed.S diff --git a/powerpc/switch_endian/check.S b/powerpc/switch_endian/check.S new file mode 100644 index 0000000..e2484d2 --- /dev/null +++ b/powerpc/switch_endian/check.S @@ -0,0 +1,100 @@ +#include "common.h" + +/* + * Checks that registers contain what we expect, ie. they were not clobbered by + * the syscall. + * + * r15: pattern to check registers against. + * + * At the end r3 == 0 if everything's OK. + */ + nop # guaranteed to be illegal in reverse-endian + mr r9,r15 + cmpd r9,r3 # check r3 + bne 1f + addi r9,r15,4 # check r4 + cmpd r9,r4 + bne 1f + lis r9,0x00FF # check CR + ori r9,r9,0xF000 + mfcr r10 + and r10,r10,r9 + cmpw r9,r10 + addi r9,r15,34 + bne 1f + addi r9,r15,32 # check LR + mflr r10 + cmpd r9,r10 + bne 1f + addi r9,r15,5 # check r5 + cmpd r9,r5 + bne 1f + addi r9,r15,6 # check r6 + cmpd r9,r6 + bne 1f + addi r9,r15,7 # check r7 + cmpd r9,r7 + bne 1f + addi r9,r15,8 # check r8 + cmpd r9,r8 + bne 1f + addi r9,r15,13 # check r13 + cmpd r9,r13 + bne 1f + addi r9,r15,14 # check r14 + cmpd r9,r14 + bne 1f + addi r9,r15,16 # check r16 + cmpd r9,r16 + bne 1f + addi r9,r15,17 # check r17 + cmpd r9,r17 + bne 1f + addi r9,r15,18 # check r18 + cmpd r9,r18 + bne 1f + addi r9,r15,19 # check r19 + cmpd r9,r19 + bne 1f + addi r9,r15,20 # check r20 + cmpd r9,r20 + bne 1f + addi r9,r15,21 # check r21 + cmpd r9,r21 + bne 1f + addi r9,r15,22 # check r22 + cmpd r9,r22 + bne 1f + addi r9,r15,23 # check r23 + cmpd r9,r23 + bne 1f + addi r9,r15,24 # check r24 + cmpd r9,r24 + bne 1f + addi r9,r15,25 # check r25 + cmpd r9,r25 + bne 1f + addi r9,r15,26 # check r26 + cmpd r9,r26 + bne 1f + addi r9,r15,27 # check r27 + cmpd r9,r27 + bne 1f + addi r9,r15,28 # check r28 + cmpd r9,r28 + bne 1f + addi r9,r15,29 # check r29 + cmpd r9,r29 + bne 1f + addi r9,r15,30 # check r30 + cmpd r9,r30 + bne 1f + addi r9,r15,31 # check r31 + cmpd r9,r31 + bne 1f + b 2f +1: mr r3, r9 + li r0, __NR_exit + sc +2: li r0, __NR_switch_endian + nop diff --git a/powerpc/switch_endian/common.h b/powerpc/switch_endian/common.h new file mode 100644 index 0000000..69e3996 --- /dev/null +++ b/powerpc/switch_endian/common.h @@ -0,0 +1,6 @@ +#include +#include + +#ifndef __NR_switch_endian +#define __NR_switch_endian 363 +#endif diff --git a/powerpc/switch_endian/switch_endian_test.S b/powerpc/switch_endian/switch_endian_test.S new file mode 100644 index 0000000..ef7c971 --- /dev/null +++ b/powerpc/switch_endian/switch_endian_test.S @@ -0,0 +1,81 @@ +#include "common.h" + + .data + .balign 8 +message: + .ascii "success: switch_endian_test\n\0" + + .section ".toc" + .balign 8 +pattern: + .llong 0x5555AAAA5555AAAA + + .text +FUNC_START(_start) + /* Load the pattern */ + ld r15, pattern@TOC(%r2) + + /* Setup CR, only CR2-CR4 are maintained */ + lis r3, 0x00FF + ori r3, r3, 0xF000 + mtcr r3 + + /* Load the pattern slightly modified into the registers */ + mr r3, r15 + addi r4, r15, 4 + + addi r5, r15, 32 + mtlr r5 + + addi r5, r15, 5 + addi r6, r15, 6 + addi r7, r15, 7 + addi r8, r15, 8 + + /* r9 - r12 are clobbered */ + + addi r13, r15, 13 + addi r14, r15, 14 + + /* Skip r15 we're using it */ + + addi r16, r15, 16 + addi r17, r15, 17 + addi r18, r15, 18 + addi r19, r15, 19 + addi r20, r15, 20 + addi r21, r15, 21 + addi r22, r15, 22 + addi r23, r15, 23 + addi r24, r15, 24 + addi r25, r15, 25 + addi r26, r15, 26 + addi r27, r15, 27 + addi r28, r15, 28 + addi r29, r15, 29 + addi r30, r15, 30 + addi r31, r15, 31 + + /* + * Call the syscall to switch endian. + * It clobbers r9-r12, XER, CTR and CR0-1,5-7. + */ + li r0, __NR_switch_endian + sc + +#include "check-reversed.S" + + /* Flip back, r0 already has the switch syscall number */ + .long 0x02000044 /* sc */ + +#include "check.S" + + li r0, __NR_write + li r3, 1 /* stdout */ + ld r4, message@got(%r2) + li r5, 28 /* strlen(message3) */ + sc + li r0, __NR_exit + li r3, 0 + sc + b . diff --git a/powerpc/tm/.gitignore b/powerpc/tm/.gitignore index 33d02cc..2699635 100644 --- a/powerpc/tm/.gitignore +++ b/powerpc/tm/.gitignore @@ -1 +1,2 @@ tm-resched-dscr +tm-syscall diff --git a/powerpc/tm/Makefile b/powerpc/tm/Makefile index 2cede23..4bea62a 100644 --- a/powerpc/tm/Makefile +++ b/powerpc/tm/Makefile @@ -1,15 +1,13 @@ -PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr tm-syscall -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +tm-syscall: tm-syscall-asm.S +tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include -clean: - rm -f $(PROGS) *.o +include ../../lib.mk -.PHONY: all run_tests clean +clean: + rm -f $(TEST_PROGS) *.o diff --git a/powerpc/tm/tm-syscall-asm.S b/powerpc/tm/tm-syscall-asm.S new file mode 100644 index 0000000..431f61a --- /dev/null +++ b/powerpc/tm/tm-syscall-asm.S @@ -0,0 +1,27 @@ +#include +#include + + .text +FUNC_START(getppid_tm_active) + tbegin. + beq 1f + li r0, __NR_getppid + sc + tend. + blr +1: + li r3, -1 + blr + +FUNC_START(getppid_tm_suspended) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + sc + tresume. + tend. + blr +1: + li r3, -1 + blr diff --git a/powerpc/tm/tm-syscall.c b/powerpc/tm/tm-syscall.c new file mode 100644 index 0000000..1276e23 --- /dev/null +++ b/powerpc/tm/tm-syscall.c @@ -0,0 +1,122 @@ +/* + * Copyright 2015, Sam Bobroff, IBM Corp. + * Licensed under GPLv2. + * + * Test the kernel's system call code to ensure that a system call + * made from within an active HTM transaction is aborted with the + * correct failure code. + * Conversely, ensure that a system call made from within a + * suspended transaction can succeed. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +extern int getppid_tm_active(void); +extern int getppid_tm_suspended(void); + +unsigned retries = 0; + +#define TEST_DURATION 10 /* seconds */ +#define TM_RETRIES 100 + +long failure_code(void) +{ + return __builtin_get_texasru() >> 24; +} + +bool failure_is_persistent(void) +{ + return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT; +} + +bool failure_is_syscall(void) +{ + return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; +} + +pid_t getppid_tm(bool suspend) +{ + int i; + pid_t pid; + + for (i = 0; i < TM_RETRIES; i++) { + if (suspend) + pid = getppid_tm_suspended(); + else + pid = getppid_tm_active(); + + if (pid >= 0) + return pid; + + if (failure_is_persistent()) { + if (failure_is_syscall()) + return -1; + + printf("Unexpected persistent transaction failure.\n"); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + exit(-1); + } + + retries++; + } + + printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + + exit(-1); +} + +int tm_syscall(void) +{ + unsigned count = 0; + struct timeval end, now; + + SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) + & PPC_FEATURE2_HTM_NOSC)); + setbuf(stdout, NULL); + + printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); + + gettimeofday(&end, NULL); + now.tv_sec = TEST_DURATION; + now.tv_usec = 0; + timeradd(&end, &now, &end); + + for (count = 0; timercmp(&now, &end, <); count++) { + /* + * Test a syscall within a suspended transaction and verify + * that it succeeds. + */ + FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + + /* + * Test a syscall within an active transaction and verify that + * it fails with the correct failure code. + */ + FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + gettimeofday(&now, 0); + } + + printf("%d active and suspended transactions behaved correctly.\n", count); + printf("(There were %d transaction retries.)\n", retries); + + return 0; +} + +int main(void) +{ + return test_harness(tm_syscall, "tm_syscall"); +} diff --git a/powerpc/utils.h b/powerpc/utils.h index a93777a..b7d4108 100644 --- a/powerpc/utils.h +++ b/powerpc/utils.h @@ -15,11 +15,12 @@ typedef signed long long s64; /* Just for familiarity */ typedef uint32_t u32; +typedef uint16_t u16; typedef uint8_t u8; int test_harness(int (test_function)(void), char *name); - +extern void *get_auxv_entry(int type); /* Yes, this is evil */ #define FAIL_IF(x) \ diff --git a/powerpc/vphn/.gitignore b/powerpc/vphn/.gitignore new file mode 100644 index 0000000..7c04395 --- /dev/null +++ b/powerpc/vphn/.gitignore @@ -0,0 +1 @@ +test-vphn diff --git a/powerpc/vphn/Makefile b/powerpc/vphn/Makefile new file mode 100644 index 0000000..a485f2e --- /dev/null +++ b/powerpc/vphn/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := test-vphn + +CFLAGS += -m64 + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/powerpc/vphn/test-vphn.c b/powerpc/vphn/test-vphn.c new file mode 100644 index 0000000..5742f68 --- /dev/null +++ b/powerpc/vphn/test-vphn.c @@ -0,0 +1,410 @@ +#include +#include +#include "utils.h" +#include "subunit.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#define be16_to_cpup(x) bswap_16(*x) +#define cpu_to_be64(x) bswap_64(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#define be16_to_cpup(x) (*x) +#define cpu_to_be64(x) (x) +#endif + +#include "vphn.c" + +static struct test { + char *descr; + long input[VPHN_REGISTER_COUNT]; + u32 expected[VPHN_ASSOC_BUFSIZE]; +} all_tests[] = { + { + "vphn: no data", + { + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000000 + } + }, + { + "vphn: 1 x 16-bit value", + { + 0x8001ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 16-bit values", + { + 0x80018002ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 3 x 16-bit values", + { + 0x800180028003ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + "vphn: 4 x 16-bit values", + { + 0x8001800280038004, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000004, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004 + } + }, + { + /* Parsing the next 16-bit value out of the next 64-bit input + * value. + */ + "vphn: 5 x 16-bit values", + { + 0x8001800280038004, + 0x8005ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000005, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 24 x 16-bit values", + { + 0x8001800280038004, + 0x8005800680078008, + 0x8009800a800b800c, + 0x800d800e800f8010, + 0x8011801280138014, + 0x8015801680178018 + }, + { + 0x00000018, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c, + 0x0000000d, + 0x0000000e, + 0x0000000f, + 0x00000010, + 0x00000011, + 0x00000012, + 0x00000013, + 0x00000014, + 0x00000015, + 0x00000016, + 0x00000017, + 0x00000018 + } + }, + { + "vphn: 1 x 32-bit value", + { + 0x00000001ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 32-bit values", + { + 0x0000000100000002, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parsing the next 32-bit value out of the next 64-bit input + * value. + */ + "vphn: 3 x 32-bit values", + { + 0x0000000100000002, + 0x00000003ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 12 x 32-bit values", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b0000000c + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: 16-bit value followed by 32-bit value", + { + 0x800100000002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 32-bit value followed by 16-bit value", + { + 0x000000018002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parse a 32-bit value split accross two consecutives 64-bit + * input values. + */ + "vphn: 16-bit value followed by 2 x 32-bit values", + { + 0x8001000000020000, + 0x0003ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* The lower bits in 0x0001ffff don't get mixed up with the + * 0xffff terminator. + */ + "vphn: 32-bit value has all ones in 16 lower bits", + { + 0x0001ffff80028003, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x0001ffff, + 0x00000002, + 0x00000003 + } + }, + { + /* The following input doesn't follow the specification. + */ + "vphn: last 32-bit value is truncated", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b800c2bad + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: garbage after terminator", + { + 0xffff2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad + }, + { + 0x00000000 + } + }, + { + NULL + } +}; + +static int test_one(struct test *test) +{ + __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 }; + int i, len; + + vphn_unpack_associativity(test->input, output); + + len = be32_to_cpu(output[0]); + if (len != test->expected[0]) { + printf("expected %d elements, got %d\n", test->expected[0], + len); + return 1; + } + + for (i = 1; i < len; i++) { + u32 val = be32_to_cpu(output[i]); + if (val != test->expected[i]) { + printf("element #%d is 0x%x, should be 0x%x\n", i, val, + test->expected[i]); + return 1; + } + } + + return 0; +} + +static int test_vphn(void) +{ + static struct test *test; + + for (test = all_tests; test->descr; test++) { + int ret; + + ret = test_one(test); + test_finish(test->descr, ret); + if (ret) + return ret; + } + + return 0; +} + +int main(int argc, char **argv) +{ + return test_harness(test_vphn, "test-vphn"); +} diff --git a/powerpc/vphn/vphn.c b/powerpc/vphn/vphn.c new file mode 120000 index 0000000..186b906 --- /dev/null +++ b/powerpc/vphn/vphn.c @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.c \ No newline at end of file diff --git a/powerpc/vphn/vphn.h b/powerpc/vphn/vphn.h new file mode 120000 index 0000000..7131efe --- /dev/null +++ b/powerpc/vphn/vphn.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.h \ No newline at end of file diff --git a/ptrace/Makefile b/ptrace/Makefile index 47ae2d3..453927f 100644 --- a/ptrace/Makefile +++ b/ptrace/Makefile @@ -6,5 +6,6 @@ all: peeksiginfo clean: rm -f peeksiginfo -run_tests: all - @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" +TEST_PROGS := peeksiginfo + +include ../lib.mk diff --git a/rcutorture/bin/configinit.sh b/rcutorture/bin/configinit.sh index 15f1a17..3f81a10 100755 --- a/rcutorture/bin/configinit.sh +++ b/rcutorture/bin/configinit.sh @@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1 mv $builddir/.config $builddir/.config.sav sh $T/upd.sh < $builddir/.config.sav > $builddir/.config cp $builddir/.config $builddir/.config.new -yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1 +yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err # verify new config matches specification. configcheck.sh $builddir/.config $c diff --git a/rcutorture/bin/kvm-recheck.sh b/rcutorture/bin/kvm-recheck.sh index 4f5b20f..d86bdd6 100755 --- a/rcutorture/bin/kvm-recheck.sh +++ b/rcutorture/bin/kvm-recheck.sh @@ -43,6 +43,10 @@ do if test -f "$i/console.log" then configcheck.sh $i/.config $i/ConfigFragment + if test -r $i/Make.oldconfig.err + then + cat $i/Make.oldconfig.err + fi parse-build.sh $i/Make.out $configfile parse-torture.sh $i/console.log $configfile parse-console.sh $i/console.log $configfile diff --git a/rcutorture/bin/kvm.sh b/rcutorture/bin/kvm.sh index 368d64a..fbe2dbf 100755 --- a/rcutorture/bin/kvm.sh +++ b/rcutorture/bin/kvm.sh @@ -55,7 +55,7 @@ usage () { echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" - echo " --configs \"config-file list\"" + echo " --configs \"config-file list w/ repeat factor (3*TINY01)\"" echo " --cpus N" echo " --datestamp string" echo " --defconfig string" @@ -178,13 +178,26 @@ fi touch $T/cfgcpu for CF in $configs do - if test -f "$CONFIGFRAG/$CF" + case $CF in + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + config_reps=`echo $CF | sed -e 's/\*.*$//'` + CF1=`echo $CF | sed -e 's/^[^*]*\*//'` + ;; + *) + config_reps=1 + CF1=$CF + ;; + esac + if test -f "$CONFIGFRAG/$CF1" then - cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF` - cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"` - echo $CF $cpu_count >> $T/cfgcpu + cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` + cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` + for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) + do + echo $CF1 $cpu_count >> $T/cfgcpu + done else - echo "The --configs file $CF does not exist, terminating." + echo "The --configs file $CF1 does not exist, terminating." exit 1 fi done @@ -310,7 +323,7 @@ function dump(first, pastlast) cfr[jn] = cf[j] "." cfrep[cf[j]]; } if (cpusr[jn] > ncpus && ncpus != 0) - ovf = "(!)"; + ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; diff --git a/rcutorture/configs/rcu/CFcommon b/rcutorture/configs/rcu/CFcommon index d2d2a86..f824b4c 100644 --- a/rcutorture/configs/rcu/CFcommon +++ b/rcutorture/configs/rcu/CFcommon @@ -1,2 +1,5 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y +CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y +CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/rcutorture/configs/rcu/SRCU-N b/rcutorture/configs/rcu/SRCU-N index 9fbb41b..1a087c3 100644 --- a/rcutorture/configs/rcu/SRCU-N +++ b/rcutorture/configs/rcu/SRCU-N @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/SRCU-P b/rcutorture/configs/rcu/SRCU-P index 4b6f272..4837430 100644 --- a/rcutorture/configs/rcu/SRCU-P +++ b/rcutorture/configs/rcu/SRCU-P @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/rcutorture/configs/rcu/SRCU-P.boot b/rcutorture/configs/rcu/SRCU-P.boot index 238bfe3..84a7d51 100644 --- a/rcutorture/configs/rcu/SRCU-P.boot +++ b/rcutorture/configs/rcu/SRCU-P.boot @@ -1 +1 @@ -rcutorture.torture_type=srcu +rcutorture.torture_type=srcud diff --git a/rcutorture/configs/rcu/TASKS01 b/rcutorture/configs/rcu/TASKS01 index 97f0a0b..bafe94c 100644 --- a/rcutorture/configs/rcu/TASKS01 +++ b/rcutorture/configs/rcu/TASKS01 @@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_RCU=y -CONFIG_TASKS_RCU=y +CONFIG_PROVE_LOCKING=y +#CHECK#CONFIG_PROVE_RCU=y +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TASKS02 b/rcutorture/configs/rcu/TASKS02 index 696d2ea..ad2be91 100644 --- a/rcutorture/configs/rcu/TASKS02 +++ b/rcutorture/configs/rcu/TASKS02 @@ -2,4 +2,3 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_TASKS_RCU=y diff --git a/rcutorture/configs/rcu/TASKS03 b/rcutorture/configs/rcu/TASKS03 index 9c60da5..c70c51d 100644 --- a/rcutorture/configs/rcu/TASKS03 +++ b/rcutorture/configs/rcu/TASKS03 @@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_TASKS_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ_FULL_ALL=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/rcutorture/configs/rcu/TINY02 b/rcutorture/configs/rcu/TINY02 index 36e41df..f1892e0 100644 --- a/rcutorture/configs/rcu/TINY02 +++ b/rcutorture/configs/rcu/TINY02 @@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=y diff --git a/rcutorture/configs/rcu/TINY02.boot b/rcutorture/configs/rcu/TINY02.boot index 0f08027..6c1a292 100644 --- a/rcutorture/configs/rcu/TINY02.boot +++ b/rcutorture/configs/rcu/TINY02.boot @@ -1,2 +1,3 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 +rcutorture.torture_type=rcu_bh diff --git a/rcutorture/configs/rcu/TREE01 b/rcutorture/configs/rcu/TREE01 index f8a10a7..f572b87 100644 --- a/rcutorture/configs/rcu/TREE01 +++ b/rcutorture/configs/rcu/TREE01 @@ -13,6 +13,6 @@ CONFIG_MAXSMP=y CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE02 b/rcutorture/configs/rcu/TREE02 index 629122f..ef6a22c 100644 --- a/rcutorture/configs/rcu/TREE02 +++ b/rcutorture/configs/rcu/TREE02 @@ -14,10 +14,9 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE02-T b/rcutorture/configs/rcu/TREE02-T index a25de47..917d251 100644 --- a/rcutorture/configs/rcu/TREE02-T +++ b/rcutorture/configs/rcu/TREE02-T @@ -14,10 +14,8 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/rcutorture/configs/rcu/TREE03 b/rcutorture/configs/rcu/TREE03 index 53f24e0..7a17c50 100644 --- a/rcutorture/configs/rcu/TREE03 +++ b/rcutorture/configs/rcu/TREE03 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=16 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -9,12 +9,11 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=4 -CONFIG_RCU_FANOUT_LEAF=4 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE03.boot b/rcutorture/configs/rcu/TREE03.boot new file mode 100644 index 0000000..120c0c8 --- /dev/null +++ b/rcutorture/configs/rcu/TREE03.boot @@ -0,0 +1 @@ +rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 diff --git a/rcutorture/configs/rcu/TREE04 b/rcutorture/configs/rcu/TREE04 index 0f84db3..39a2c6d 100644 --- a/rcutorture/configs/rcu/TREE04 +++ b/rcutorture/configs/rcu/TREE04 @@ -13,10 +13,9 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=2 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=4 +CONFIG_RCU_FANOUT_LEAF=4 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE05 b/rcutorture/configs/rcu/TREE05 index 212e3bf..1257d32 100644 --- a/rcutorture/configs/rcu/TREE05 +++ b/rcutorture/configs/rcu/TREE05 @@ -12,11 +12,10 @@ CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE06 b/rcutorture/configs/rcu/TREE06 index 7eee63b..d3e456b 100644 --- a/rcutorture/configs/rcu/TREE06 +++ b/rcutorture/configs/rcu/TREE06 @@ -14,10 +14,9 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE06.boot b/rcutorture/configs/rcu/TREE06.boot index da9a03a..dd90f28 100644 --- a/rcutorture/configs/rcu/TREE06.boot +++ b/rcutorture/configs/rcu/TREE06.boot @@ -1,3 +1,4 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/rcutorture/configs/rcu/TREE07 b/rcutorture/configs/rcu/TREE07 index 92a97fa..3956b41 100644 --- a/rcutorture/configs/rcu/TREE07 +++ b/rcutorture/configs/rcu/TREE07 @@ -15,8 +15,7 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE08 b/rcutorture/configs/rcu/TREE08 index 5812027..bb9b0c1 100644 --- a/rcutorture/configs/rcu/TREE08 +++ b/rcutorture/configs/rcu/TREE08 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=16 +CONFIG_NR_CPUS=8 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -13,13 +13,12 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/rcutorture/configs/rcu/TREE08-T b/rcutorture/configs/rcu/TREE08-T index 3eaecca..2ad13f0 100644 --- a/rcutorture/configs/rcu/TREE08-T +++ b/rcutorture/configs/rcu/TREE08-T @@ -13,11 +13,9 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/rcutorture/configs/rcu/TREE08-T.boot b/rcutorture/configs/rcu/TREE08-T.boot new file mode 100644 index 0000000..883149b --- /dev/null +++ b/rcutorture/configs/rcu/TREE08-T.boot @@ -0,0 +1 @@ +rcutree.rcu_fanout_exact=1 diff --git a/rcutorture/configs/rcu/TREE08.boot b/rcutorture/configs/rcu/TREE08.boot index 2561daf..fb066dc 100644 --- a/rcutorture/configs/rcu/TREE08.boot +++ b/rcutorture/configs/rcu/TREE08.boot @@ -1,3 +1,4 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/rcutorture/configs/rcu/TREE09 b/rcutorture/configs/rcu/TREE09 index 6076b36..6710e74 100644 --- a/rcutorture/configs/rcu/TREE09 +++ b/rcutorture/configs/rcu/TREE09 @@ -13,6 +13,6 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/rcutorture/doc/TREE_RCU-kconfig.txt b/rcutorture/doc/TREE_RCU-kconfig.txt index ec03c88..657f3a0 100644 --- a/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/rcutorture/doc/TREE_RCU-kconfig.txt @@ -12,13 +12,11 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. CONFIG_PREEMPT -- Do half. (First three and #8.) -CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. -CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. +CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. +CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. -CONFIG_RCU_CPU_STALL_INFO -- Do one. -CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. -CONFIG_RCU_FANOUT_EXACT -- Do one. +CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. CONFIG_RCU_NOCB_CPU -- Do three, see below. @@ -27,28 +25,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one. CONFIG_RCU_NOCB_CPU_ZERO -- Do one. CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. +!RCU_EXPERT -- Do a few, but these have to be vanilla configurations. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. -Hierarchy: - -TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. -TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=3. -TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=4. -TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n - CONFIG_RCU_FANOUT_LEAF=6. -TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y - CONFIG_RCU_FANOUT_LEAF=6. -TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y, - CONFIG_RCU_FANOUT_LEAF=2. -TREE09. CONFIG_NR_CPUS=1. +Boot parameters: + +nohz_full - do at least one. +maxcpu -- do at least one. +rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test -- Do at least one each, offloaded and not. +rcutree.rcu_fanout_exact -- Do at least one. Kconfig Parameters Ignored: diff --git a/seccomp/.gitignore b/seccomp/.gitignore new file mode 100644 index 0000000..346d83c --- /dev/null +++ b/seccomp/.gitignore @@ -0,0 +1 @@ +seccomp_bpf diff --git a/seccomp/Makefile b/seccomp/Makefile new file mode 100644 index 0000000..8401e87 --- /dev/null +++ b/seccomp/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := seccomp_bpf +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/seccomp/seccomp_bpf.c b/seccomp/seccomp_bpf.c new file mode 100644 index 0000000..770f47a --- /dev/null +++ b/seccomp/seccomp_bpf.c @@ -0,0 +1,2157 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * Test code for seccomp bpf. + */ + +#include +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _GNU_SOURCE +#include +#include + +#include "test_harness.h" + +#ifndef PR_SET_PTRACER +# define PR_SET_PTRACER 0x59616d61 +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 +#endif + +#ifndef PR_SECCOMP_EXT +#define PR_SECCOMP_EXT 43 +#endif + +#ifndef SECCOMP_EXT_ACT +#define SECCOMP_EXT_ACT 1 +#endif + +#ifndef SECCOMP_EXT_ACT_TSYNC +#define SECCOMP_EXT_ACT_TSYNC 1 +#endif + +#ifndef SECCOMP_MODE_STRICT +#define SECCOMP_MODE_STRICT 1 +#endif + +#ifndef SECCOMP_MODE_FILTER +#define SECCOMP_MODE_FILTER 2 +#endif + +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) +#else +#error "wut? Unknown __BYTE_ORDER?!" +#endif + +#define SIBLING_EXIT_UNKILLED 0xbadbeef +#define SIBLING_EXIT_FAILURE 0xbadface +#define SIBLING_EXIT_NEWPRIVS 0xbadfeed + +TEST(mode_strict_support) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_exit, 1); +} + +TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + NULL, NULL, NULL); + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +/* Note! This doesn't test no new privs behavior */ +TEST(no_new_privs_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } +} + +/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +TEST(mode_filter_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); + } +} + +TEST(mode_filter_without_nnp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); + ASSERT_LE(0, ret) { + TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); + } + errno = 0; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + /* Succeeds with CAP_SYS_ADMIN, fails without */ + /* TODO(wad) check caps not euid */ + if (geteuid()) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EACCES, errno); + } else { + EXPECT_EQ(0, ret); + } +} + +#define MAX_INSNS_PER_PATH 32768 + +TEST(filter_size_limits) +{ + int i; + int count = BPF_MAXINSNS + 1; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = count; + + /* Too many filter instructions in a single filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_NE(0, ret) { + TH_LOG("Installing %d insn filter was allowed", prog.len); + } + + /* One less is okay, though. */ + prog.len -= 1; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Installing %d insn filter wasn't allowed", prog.len); + } +} + +TEST(filter_chain_limits) +{ + int i; + int count = BPF_MAXINSNS; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = 1; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + prog.len = count; + + /* Too many total filter instructions. */ + for (i = 0; i < MAX_INSNS_PER_PATH; i++) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + if (ret != 0) + break; + } + ASSERT_NE(0, ret) { + TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", + i, count, i * (count + 4)); + } +} + +TEST(mode_filter_cannot_move_to_strict) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + + +TEST(mode_filter_get_seccomp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(2, ret); +} + + +TEST(ALLOW_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST(empty_prog) +{ + struct sock_filter filter[] = { + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x10000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +/* return code >= 0x80000000 is unused. */ +TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x90000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +TEST_SIGNAL(KILL_all, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST_SIGNAL(KILL_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_SIGNAL(KILL_one_arg_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE)); +} + +TEST_SIGNAL(KILL_one_arg_six, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE)); +} + +/* TODO(wad) add 64-bit versus 32-bit arg tests. */ +TEST(arg_out_of_range) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST(ERRNO_valid) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(E2BIG, errno); +} + +TEST(ERRNO_zero) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* "errno" of 0 is ok. */ + EXPECT_EQ(0, read(0, NULL, 0)); +} + +TEST(ERRNO_capped) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(4095, errno); +} + +FIXTURE_DATA(TRAP) { + struct sock_fprog prog; +}; + +FIXTURE_SETUP(TRAP) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); +} + +FIXTURE_TEARDOWN(TRAP) +{ + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F_SIGNAL(TRAP, dfl, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +/* Ensure that SIGSYS overrides SIG_IGN */ +TEST_F_SIGNAL(TRAP, ign, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + signal(SIGSYS, SIG_IGN); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +static struct siginfo TRAP_info; +static volatile int TRAP_nr; +static void TRAP_action(int nr, siginfo_t *info, void *void_context) +{ + memcpy(&TRAP_info, info, sizeof(TRAP_info)); + TRAP_nr = nr; +} + +TEST_F(TRAP, handler) +{ + int ret, test; + struct sigaction act; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &TRAP_action; + act.sa_flags = SA_SIGINFO; + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigaction failed"); + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigprocmask failed"); + } + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + TRAP_nr = 0; + memset(&TRAP_info, 0, sizeof(TRAP_info)); + /* Expect the registers to be rolled back. (nr = error) may vary + * based on arch. */ + ret = syscall(__NR_getpid); + /* Silence gcc warning about volatile. */ + test = TRAP_nr; + EXPECT_EQ(SIGSYS, test); + struct local_sigsys { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } *sigsys = (struct local_sigsys *) +#ifdef si_syscall + &(TRAP_info.si_call_addr); +#else + &TRAP_info.si_pid; +#endif + EXPECT_EQ(__NR_getpid, sigsys->_syscall); + /* Make sure arch is non-zero. */ + EXPECT_NE(0, sigsys->_arch); + EXPECT_NE(0, (unsigned long)sigsys->_call_addr); +} + +FIXTURE_DATA(precedence) { + struct sock_fprog allow; + struct sock_fprog trace; + struct sock_fprog error; + struct sock_fprog trap; + struct sock_fprog kill; +}; + +FIXTURE_SETUP(precedence) +{ + struct sock_filter allow_insns[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter trace_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), + }; + struct sock_filter error_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), + }; + struct sock_filter trap_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + }; + struct sock_filter kill_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + + memset(self, 0, sizeof(*self)); +#define FILTER_ALLOC(_x) \ + self->_x.filter = malloc(sizeof(_x##_insns)); \ + ASSERT_NE(NULL, self->_x.filter); \ + memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ + self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) + FILTER_ALLOC(allow); + FILTER_ALLOC(trace); + FILTER_ALLOC(error); + FILTER_ALLOC(trap); + FILTER_ALLOC(kill); +} + +FIXTURE_TEARDOWN(precedence) +{ +#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) + FILTER_FREE(allow); + FILTER_FREE(trace); + FILTER_FREE(error); + FILTER_FREE(trap); + FILTER_FREE(kill); +} + +TEST_F(precedence, allow_ok) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); + /* getpid() should never return. */ + res = syscall(__NR_getpid); + EXPECT_EQ(0, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +#ifndef PTRACE_O_TRACESECCOMP +#define PTRACE_O_TRACESECCOMP 0x00000080 +#endif + +/* Catch the Ubuntu 12.04 value error. */ +#if PTRACE_EVENT_SECCOMP != 7 +#undef PTRACE_EVENT_SECCOMP +#endif + +#ifndef PTRACE_EVENT_SECCOMP +#define PTRACE_EVENT_SECCOMP 7 +#endif + +#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) +bool tracer_running; +void tracer_stop(int sig) +{ + tracer_running = false; +} + +typedef void tracer_func_t(struct __test_metadata *_metadata, + pid_t tracee, int status, void *args); + +void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, + tracer_func_t tracer_func, void *args) +{ + int ret = -1; + struct sigaction action = { + .sa_handler = tracer_stop, + }; + + /* Allow external shutdown. */ + tracer_running = true; + ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); + + errno = 0; + while (ret == -1 && errno != EINVAL) + ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); + ASSERT_EQ(0, ret) { + kill(tracee, SIGKILL); + } + /* Wait for attach stop */ + wait(NULL); + + ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP); + ASSERT_EQ(0, ret) { + TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); + kill(tracee, SIGKILL); + } + ptrace(PTRACE_CONT, tracee, NULL, 0); + + /* Unblock the tracee */ + ASSERT_EQ(1, write(fd, "A", 1)); + ASSERT_EQ(0, close(fd)); + + /* Run until we're shut down. Must assert to stop execution. */ + while (tracer_running) { + int status; + + if (wait(&status) != tracee) + continue; + if (WIFSIGNALED(status) || WIFEXITED(status)) + /* Child is dead. Time to go. */ + return; + + /* Make sure this is a seccomp event. */ + ASSERT_EQ(true, IS_SECCOMP_EVENT(status)); + + tracer_func(_metadata, tracee, status, args); + + ret = ptrace(PTRACE_CONT, tracee, NULL, NULL); + ASSERT_EQ(0, ret); + } + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* Common tracer setup/teardown functions. */ +void cont_handler(int num) +{ } +pid_t setup_trace_fixture(struct __test_metadata *_metadata, + tracer_func_t func, void *args) +{ + char sync; + int pipefd[2]; + pid_t tracer_pid; + pid_t tracee = getpid(); + + /* Setup a pipe for clean synchronization. */ + ASSERT_EQ(0, pipe(pipefd)); + + /* Fork a child which we'll promote to tracer */ + tracer_pid = fork(); + ASSERT_LE(0, tracer_pid); + signal(SIGALRM, cont_handler); + if (tracer_pid == 0) { + close(pipefd[0]); + tracer(_metadata, pipefd[1], tracee, func, args); + syscall(__NR_exit, 0); + } + close(pipefd[1]); + prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); + read(pipefd[0], &sync, 1); + close(pipefd[0]); + + return tracer_pid; +} +void teardown_trace_fixture(struct __test_metadata *_metadata, + pid_t tracer) +{ + if (tracer) { + int status; + /* + * Extract the exit code from the other process and + * adopt it for ourselves in case its asserts failed. + */ + ASSERT_EQ(0, kill(tracer, SIGUSR1)); + ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); + if (WEXITSTATUS(status)) + _metadata->passed = 0; + } +} + +/* "poke" tracer arguments and function. */ +struct tracer_args_poke_t { + unsigned long poke_addr; +}; + +void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, + void *args) +{ + int ret; + unsigned long msg; + struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; + + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + /* If this fails, don't try to recover. */ + ASSERT_EQ(0x1001, msg) { + kill(tracee, SIGKILL); + } + /* + * Poke in the message. + * Registers are not touched to try to keep this relatively arch + * agnostic. + */ + ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); + EXPECT_EQ(0, ret); +} + +FIXTURE_DATA(TRACE_poke) { + struct sock_fprog prog; + pid_t tracer; + long poked; + struct tracer_args_poke_t tracer_args; +}; + +FIXTURE_SETUP(TRACE_poke) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + self->poked = 0; + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Set up tracer args. */ + self->tracer_args.poke_addr = (unsigned long)&self->poked; + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_poke, + &self->tracer_args); +} + +FIXTURE_TEARDOWN(TRACE_poke) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_poke, read_has_side_effects) +{ + ssize_t ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + ret = read(-1, NULL, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(0x1001, self->poked); +} + +TEST_F(TRACE_poke, getpid_runs_normally) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + EXPECT_NE(0, syscall(__NR_getpid)); + EXPECT_EQ(0, self->poked); +} + +#if defined(__x86_64__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_rax +# define SYSCALL_RET rax +#elif defined(__i386__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_eax +# define SYSCALL_RET eax +#elif defined(__arm__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM ARM_r7 +# define SYSCALL_RET ARM_r0 +#elif defined(__aarch64__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM regs[8] +# define SYSCALL_RET regs[0] +#elif defined(__powerpc__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM gpr[0] +# define SYSCALL_RET gpr[3] +#elif defined(__s390__) +# define ARCH_REGS s390_regs +# define SYSCALL_NUM gprs[2] +# define SYSCALL_RET gprs[2] +#else +# error "Do not know how to find your architecture's registers and syscalls" +#endif + +/* Architecture-specific syscall fetching routine. */ +int get_syscall(struct __test_metadata *_metadata, pid_t tracee) +{ + struct iovec iov; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { + TH_LOG("PTRACE_GETREGSET failed"); + return -1; + } + + return regs.SYSCALL_NUM; +} + +/* Architecture-specific syscall changing routine. */ +void change_syscall(struct __test_metadata *_metadata, + pid_t tracee, int syscall) +{ + struct iovec iov; + int ret; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); + +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(__powerpc__) || defined(__s390__) + { + regs.SYSCALL_NUM = syscall; + } + +#elif defined(__arm__) +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif + { + ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); + EXPECT_EQ(0, ret); + } + +#else + ASSERT_EQ(1, 0) { + TH_LOG("How is the syscall changed on this architecture?"); + } +#endif + + /* If syscall is skipped, change return value. */ + if (syscall == -1) + regs.SYSCALL_RET = 1; + + ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); +} + +void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, + int status, void *args) +{ + int ret; + unsigned long msg; + + /* Make sure we got the right message. */ + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + + /* Validate and take action on expected syscalls. */ + switch (msg) { + case 0x1002: + /* change getpid to getppid. */ + EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, __NR_getppid); + break; + case 0x1003: + /* skip gettid. */ + EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1); + break; + case 0x1004: + /* do nothing (allow getppid) */ + EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); + break; + default: + EXPECT_EQ(0, msg) { + TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); + kill(tracee, SIGKILL); + } + } + +} + +FIXTURE_DATA(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; +}; + +FIXTURE_SETUP(TRACE_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Prepare some testable syscall results. */ + self->mytid = syscall(__NR_gettid); + ASSERT_GT(self->mytid, 0); + ASSERT_NE(self->mytid, 1) { + TH_LOG("Running this test as init is not supported. :)"); + } + + self->mypid = getpid(); + ASSERT_GT(self->mypid, 0); + ASSERT_EQ(self->mytid, self->mypid); + + self->parent = getppid(); + ASSERT_GT(self->parent, 0); + ASSERT_NE(self->parent, self->mypid); + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL); +} + +FIXTURE_TEARDOWN(TRACE_syscall) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_syscall, syscall_allowed) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getppid works as expected (no changes). */ + EXPECT_EQ(self->parent, syscall(__NR_getppid)); + EXPECT_NE(self->mypid, syscall(__NR_getppid)); +} + +TEST_F(TRACE_syscall, syscall_redirected) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getpid has been redirected to getppid as expected. */ + EXPECT_EQ(self->parent, syscall(__NR_getpid)); + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, syscall_dropped) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* gettid has been skipped and an altered return value stored. */ + EXPECT_EQ(1, syscall(__NR_gettid)); + EXPECT_NE(self->mytid, syscall(__NR_gettid)); +} + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# elif defined(__powerpc__) +# define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_FLAG_FILTER_TSYNC +#define SECCOMP_FLAG_FILTER_TSYNC 1 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, filter); +} +#endif + +TEST(seccomp_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Reject insane operation. */ + ret = seccomp(-1, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy op value!"); + } + + /* Reject strict with flags or pointer. */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with uargs!"); + } + + /* Reject insane args for filter. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy filter flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Did not reject NULL filter!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, errno) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", + strerror(errno)); + } +} + +TEST(seccomp_syscall_mode_lock) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(0, ret) { + TH_LOG("Could not install filter!"); + } + + /* Make sure neither entry point will switch to strict. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } + + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } +} + +TEST(TSYNC_first) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(0, ret) { + TH_LOG("Could not install initial filter with TSYNC!"); + } +} + +#define TSYNC_SIBLINGS 2 +struct tsync_sibling { + pthread_t tid; + pid_t system_tid; + sem_t *started; + pthread_cond_t *cond; + pthread_mutex_t *mutex; + int diverge; + int num_waits; + struct sock_fprog *prog; + struct __test_metadata *metadata; +}; + +FIXTURE_DATA(TSYNC) { + struct sock_fprog root_prog, apply_prog; + struct tsync_sibling sibling[TSYNC_SIBLINGS]; + sem_t started; + pthread_cond_t cond; + pthread_mutex_t mutex; + int sibling_count; +}; + +FIXTURE_SETUP(TSYNC) +{ + struct sock_filter root_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter apply_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->root_prog, 0, sizeof(self->root_prog)); + memset(&self->apply_prog, 0, sizeof(self->apply_prog)); + memset(&self->sibling, 0, sizeof(self->sibling)); + self->root_prog.filter = malloc(sizeof(root_filter)); + ASSERT_NE(NULL, self->root_prog.filter); + memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); + self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); + + self->apply_prog.filter = malloc(sizeof(apply_filter)); + ASSERT_NE(NULL, self->apply_prog.filter); + memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); + self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); + + self->sibling_count = 0; + pthread_mutex_init(&self->mutex, NULL); + pthread_cond_init(&self->cond, NULL); + sem_init(&self->started, 0, 0); + self->sibling[0].tid = 0; + self->sibling[0].cond = &self->cond; + self->sibling[0].started = &self->started; + self->sibling[0].mutex = &self->mutex; + self->sibling[0].diverge = 0; + self->sibling[0].num_waits = 1; + self->sibling[0].prog = &self->root_prog; + self->sibling[0].metadata = _metadata; + self->sibling[1].tid = 0; + self->sibling[1].cond = &self->cond; + self->sibling[1].started = &self->started; + self->sibling[1].mutex = &self->mutex; + self->sibling[1].diverge = 0; + self->sibling[1].prog = &self->root_prog; + self->sibling[1].num_waits = 1; + self->sibling[1].metadata = _metadata; +} + +FIXTURE_TEARDOWN(TSYNC) +{ + int sib = 0; + + if (self->root_prog.filter) + free(self->root_prog.filter); + if (self->apply_prog.filter) + free(self->apply_prog.filter); + + for ( ; sib < self->sibling_count; ++sib) { + struct tsync_sibling *s = &self->sibling[sib]; + void *status; + + if (!s->tid) + continue; + if (pthread_kill(s->tid, 0)) { + pthread_cancel(s->tid); + pthread_join(s->tid, &status); + } + } + pthread_mutex_destroy(&self->mutex); + pthread_cond_destroy(&self->cond); + sem_destroy(&self->started); +} + +void *tsync_sibling(void *data) +{ + long ret = 0; + struct tsync_sibling *me = data; + + me->system_tid = syscall(__NR_gettid); + + pthread_mutex_lock(me->mutex); + if (me->diverge) { + /* Just re-apply the root prog to fork the tree */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + me->prog, 0, 0); + } + sem_post(me->started); + /* Return outside of started so parent notices failures. */ + if (ret) { + pthread_mutex_unlock(me->mutex); + return (void *)SIBLING_EXIT_FAILURE; + } + do { + pthread_cond_wait(me->cond, me->mutex); + me->num_waits = me->num_waits - 1; + } while (me->num_waits); + pthread_mutex_unlock(me->mutex); + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + if (!ret) + return (void *)SIBLING_EXIT_NEWPRIVS; + read(0, NULL, 0); + return (void *)SIBLING_EXIT_UNKILLED; +} + +void tsync_start_sibling(struct tsync_sibling *sibling) +{ + pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); +} + +TEST_F(TSYNC, siblings_fail_prctl) +{ + long ret; + void *status; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check prctl failure detection by requesting sib 0 diverge. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("setting filter failed"); + } + + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Signal the threads to clean up*/ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure diverging sibling failed to call prctl. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_ancestor) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_sibling_want_nnp) +{ + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Tell the siblings to test no policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both upset about lacking nnp. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_no_filter) +{ + long ret; + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_one_divergence) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(self->sibling[0].system_tid, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_not_under_filter) +{ + long ret, sib; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* + * Sibling 0 will have its own seccomp policy + * and Sibling 1 will not be under seccomp at + * all. Sibling 1 will enter seccomp and 0 + * will cause failure. + */ + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(ret, self->sibling[0].system_tid) { + TH_LOG("Did not fail on diverged sibling."); + } + sib = 1; + if (ret == self->sibling[0].system_tid) + sib = 0; + + pthread_mutex_lock(&self->mutex); + + /* Increment the other siblings num_waits so we can clean up + * the one we just saw. + */ + self->sibling[!sib].num_waits += 1; + + /* Signal the thread to clean up*/ + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + /* Switch to the remaining sibling */ + sib = !sib; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Expected the remaining sibling to sync"); + }; + + pthread_mutex_lock(&self->mutex); + + /* If remaining sibling didn't have a chance to wake up during + * the first broadcast, manually reduce the num_waits now. + */ + if (self->sibling[sib].num_waits > 1) + self->sibling[sib].num_waits = 1; + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(0, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret); /* just us chickens */ +} + +/* Make sure restarted syscalls are seen directly as "restart_syscall". */ +TEST(syscall_restart) +{ + long ret; + unsigned long msg; + pid_t child_pid; + int pipefd[2]; + int status; + siginfo_t info = { }; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), + + /* Allow __NR_write for easy logging. */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */ + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, pipe(pipefd)); + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + /* Child uses EXPECT not ASSERT to deliver status correctly. */ + char buf = ' '; + struct pollfd fds = { + .fd = pipefd[0], + .events = POLLIN, + }; + + /* Attach parent as tracer and stop. */ + EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); + EXPECT_EQ(0, raise(SIGSTOP)); + + EXPECT_EQ(0, close(pipefd[1])); + + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Failed to install filter!"); + } + + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed to read() sync from parent"); + } + EXPECT_EQ('.', buf) { + TH_LOG("Failed to get sync data from read()"); + } + + /* Start poll to be interrupted. */ + errno = 0; + EXPECT_EQ(1, poll(&fds, 1, -1)) { + TH_LOG("Call to poll() failed (errno %d)", errno); + } + + /* Read final sync from parent. */ + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed final read() from parent"); + } + EXPECT_EQ('!', buf) { + TH_LOG("Failed to get final data from read()"); + } + + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS + : EXIT_FAILURE); + } + EXPECT_EQ(0, close(pipefd[0])); + + /* Attach to child, setup options, and release. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, + PTRACE_O_TRACESECCOMP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], ".", 1)); + + /* Wait for poll() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x100, msg); + EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid)); + + /* Might as well check siginfo for sanity while we're here. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + ASSERT_EQ(SIGTRAP, info.si_signo); + ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); + EXPECT_EQ(0, info.si_errno); + EXPECT_EQ(getuid(), info.si_uid); + /* Verify signal delivery came from child (seccomp-triggered). */ + EXPECT_EQ(child_pid, info.si_pid); + + /* Interrupt poll with SIGSTOP (which we'll need to handle). */ + ASSERT_EQ(0, kill(child_pid, SIGSTOP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); + /* Verify signal delivery came from parent now. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + EXPECT_EQ(getpid(), info.si_pid); + + /* Restart poll with SIGCONT, which triggers restart_syscall. */ + ASSERT_EQ(0, kill(child_pid, SIGCONT)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGCONT, WSTOPSIG(status)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + + /* Wait for restart_syscall() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x200, msg); + ret = get_syscall(_metadata, child_pid); +#if defined(__arm__) + /* FIXME: ARM does not expose true syscall in registers. */ + EXPECT_EQ(__NR_poll, ret); +#else + EXPECT_EQ(__NR_restart_syscall, ret); +#endif + + /* Write again to end poll. */ + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], "!", 1)); + EXPECT_EQ(0, close(pipefd[1])); + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + if (WIFSIGNALED(status) || WEXITSTATUS(status)) + _metadata->passed = 0; +} + +/* + * TODO: + * - add microbenchmarks + * - expand NNP testing + * - better arch-specific TRACE and TRAP handlers. + * - endianness checking when appropriate + * - 64-bit arg prodding + * - arch value testing (x86 modes especially) + * - ... + */ + +TEST_HARNESS_MAIN diff --git a/seccomp/test_harness.h b/seccomp/test_harness.h new file mode 100644 index 0000000..fb28416 --- /dev/null +++ b/seccomp/test_harness.h @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * test_harness.h: simple C unit test helper. + * + * Usage: + * #include "test_harness.h" + * TEST(standalone_test) { + * do_some_stuff; + * EXPECT_GT(10, stuff) { + * stuff_state_t state; + * enumerate_stuff_state(&state); + * TH_LOG("expectation failed with state: %s", state.msg); + * } + * more_stuff; + * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!"); + * last_stuff; + * EXPECT_EQ(0, last_stuff); + * } + * + * FIXTURE(my_fixture) { + * mytype_t *data; + * int awesomeness_level; + * }; + * FIXTURE_SETUP(my_fixture) { + * self->data = mytype_new(); + * ASSERT_NE(NULL, self->data); + * } + * FIXTURE_TEARDOWN(my_fixture) { + * mytype_free(self->data); + * } + * TEST_F(my_fixture, data_is_good) { + * EXPECT_EQ(1, is_my_data_good(self->data)); + * } + * + * TEST_HARNESS_MAIN + * + * API inspired by code.google.com/p/googletest + */ +#ifndef TEST_HARNESS_H_ +#define TEST_HARNESS_H_ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +/* All exported functionality should be declared through this macro. */ +#define TEST_API(x) _##x + +/* + * Exported APIs + */ + +/* TEST(name) { implementation } + * Defines a test by name. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST TEST_API(TEST) + +/* TEST_SIGNAL(name, signal) { implementation } + * Defines a test by name and the expected term signal. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST_SIGNAL TEST_API(TEST_SIGNAL) + +/* FIXTURE(datatype name) { + * type property1; + * ... + * }; + * Defines the data provided to TEST_F()-defined tests as |self|. It should be + * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN. + */ +#define FIXTURE TEST_API(FIXTURE) + +/* FIXTURE_DATA(datatype name) + * This call may be used when the type of the fixture data + * is needed. In general, this should not be needed unless + * the |self| is being passed to a helper directly. + */ +#define FIXTURE_DATA TEST_API(FIXTURE_DATA) + +/* FIXTURE_SETUP(fixture name) { implementation } + * Populates the required "setup" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation. + * + * ASSERT_* are valid for use in this context and will prempt the execution + * of any dependent fixture tests. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP) + +/* FIXTURE_TEARDOWN(fixture name) { implementation } + * Populates the required "teardown" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation to clean up. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN) + +/* TEST_F(fixture, name) { implementation } + * Defines a test that depends on a fixture (e.g., is part of a test case). + * Very similar to TEST() except that |self| is the setup instance of fixture's + * datatype exposed for use by the implementation. + */ +#define TEST_F TEST_API(TEST_F) + +#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL) + +/* Use once to append a main() to the test file. E.g., + * TEST_HARNESS_MAIN + */ +#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN) + +/* + * Operators for use in TEST and TEST_F. + * ASSERT_* calls will stop test execution immediately. + * EXPECT_* calls will emit a failure warning, note it, and continue. + */ + +/* ASSERT_EQ(expected, measured): expected == measured */ +#define ASSERT_EQ TEST_API(ASSERT_EQ) +/* ASSERT_NE(expected, measured): expected != measured */ +#define ASSERT_NE TEST_API(ASSERT_NE) +/* ASSERT_LT(expected, measured): expected < measured */ +#define ASSERT_LT TEST_API(ASSERT_LT) +/* ASSERT_LE(expected, measured): expected <= measured */ +#define ASSERT_LE TEST_API(ASSERT_LE) +/* ASSERT_GT(expected, measured): expected > measured */ +#define ASSERT_GT TEST_API(ASSERT_GT) +/* ASSERT_GE(expected, measured): expected >= measured */ +#define ASSERT_GE TEST_API(ASSERT_GE) +/* ASSERT_NULL(measured): NULL == measured */ +#define ASSERT_NULL TEST_API(ASSERT_NULL) +/* ASSERT_TRUE(measured): measured != 0 */ +#define ASSERT_TRUE TEST_API(ASSERT_TRUE) +/* ASSERT_FALSE(measured): measured == 0 */ +#define ASSERT_FALSE TEST_API(ASSERT_FALSE) +/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define ASSERT_STREQ TEST_API(ASSERT_STREQ) +/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */ +#define ASSERT_STRNE TEST_API(ASSERT_STRNE) +/* EXPECT_EQ(expected, measured): expected == measured */ +#define EXPECT_EQ TEST_API(EXPECT_EQ) +/* EXPECT_NE(expected, measured): expected != measured */ +#define EXPECT_NE TEST_API(EXPECT_NE) +/* EXPECT_LT(expected, measured): expected < measured */ +#define EXPECT_LT TEST_API(EXPECT_LT) +/* EXPECT_LE(expected, measured): expected <= measured */ +#define EXPECT_LE TEST_API(EXPECT_LE) +/* EXPECT_GT(expected, measured): expected > measured */ +#define EXPECT_GT TEST_API(EXPECT_GT) +/* EXPECT_GE(expected, measured): expected >= measured */ +#define EXPECT_GE TEST_API(EXPECT_GE) +/* EXPECT_NULL(measured): NULL == measured */ +#define EXPECT_NULL TEST_API(EXPECT_NULL) +/* EXPECT_TRUE(measured): 0 != measured */ +#define EXPECT_TRUE TEST_API(EXPECT_TRUE) +/* EXPECT_FALSE(measured): 0 == measured */ +#define EXPECT_FALSE TEST_API(EXPECT_FALSE) +/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define EXPECT_STREQ TEST_API(EXPECT_STREQ) +/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */ +#define EXPECT_STRNE TEST_API(EXPECT_STRNE) + +/* TH_LOG(format, ...) + * Optional debug logging function available for use in tests. + * Logging may be enabled or disabled by defining TH_LOG_ENABLED. + * E.g., #define TH_LOG_ENABLED 1 + * If no definition is provided, logging is enabled by default. + */ +#define TH_LOG TEST_API(TH_LOG) + +/* + * Internal implementation. + * + */ + +/* Utilities exposed to the test definitions */ +#ifndef TH_LOG_STREAM +# define TH_LOG_STREAM stderr +#endif + +#ifndef TH_LOG_ENABLED +# define TH_LOG_ENABLED 1 +#endif + +#define _TH_LOG(fmt, ...) do { \ + if (TH_LOG_ENABLED) \ + __TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Unconditional logger for internal use. */ +#define __TH_LOG(fmt, ...) \ + fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \ + __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) + +/* Defines the test function and creates the registration stub. */ +#define _TEST(test_name) __TEST_IMPL(test_name, -1) + +#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal) + +#define __TEST_IMPL(test_name, _signal) \ + static void test_name(struct __test_metadata *_metadata); \ + static struct __test_metadata _##test_name##_object = \ + { name: "global." #test_name, \ + fn: &test_name, termsig: _signal }; \ + static void __attribute__((constructor)) _register_##test_name(void) \ + { \ + __register_test(&_##test_name##_object); \ + } \ + static void test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata) + +/* Wraps the struct name so we have one less argument to pass around. */ +#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name + +/* Called once per fixture to setup the data and register. */ +#define _FIXTURE(fixture_name) \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_data(void) \ + { \ + __fixture_count++; \ + } \ + _FIXTURE_DATA(fixture_name) + +/* Prepares the setup function for the fixture. |_metadata| is included + * so that ASSERT_* work as a convenience. + */ +#define _FIXTURE_SETUP(fixture_name) \ + void fixture_name##_setup( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) +#define _FIXTURE_TEARDOWN(fixture_name) \ + void fixture_name##_teardown( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Emits test registration and helpers for fixture-based test + * cases. + * TODO(wad) register fixtures on dedicated test lists. + */ +#define _TEST_F(fixture_name, test_name) \ + __TEST_F_IMPL(fixture_name, test_name, -1) + +#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \ + __TEST_F_IMPL(fixture_name, test_name, signal) + +#define __TEST_F_IMPL(fixture_name, test_name, signal) \ + static void fixture_name##_##test_name( \ + struct __test_metadata *_metadata, \ + _FIXTURE_DATA(fixture_name) *self); \ + static inline void wrapper_##fixture_name##_##test_name( \ + struct __test_metadata *_metadata) \ + { \ + /* fixture data is alloced, setup, and torn down per call. */ \ + _FIXTURE_DATA(fixture_name) self; \ + memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \ + fixture_name##_setup(_metadata, &self); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + fixture_name##_##test_name(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self); \ + } \ + static struct __test_metadata \ + _##fixture_name##_##test_name##_object = { \ + name: #fixture_name "." #test_name, \ + fn: &wrapper_##fixture_name##_##test_name, \ + termsig: signal, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##test_name(void) \ + { \ + __register_test(&_##fixture_name##_##test_name##_object); \ + } \ + static void fixture_name##_##test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Exports a simple wrapper to run the test harness. */ +#define _TEST_HARNESS_MAIN \ + static void __attribute__((constructor)) \ + __constructor_order_last(void) \ + { \ + if (!__constructor_order) \ + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \ + } \ + int main(int argc, char **argv) { \ + return test_harness_run(argc, argv); \ + } + +#define _ASSERT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 1) +#define _ASSERT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 1) +#define _ASSERT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 1) +#define _ASSERT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 1) +#define _ASSERT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 1) +#define _ASSERT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 1) +#define _ASSERT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 1) + +#define _ASSERT_TRUE(_seen) \ + _ASSERT_NE(0, _seen) +#define _ASSERT_FALSE(_seen) \ + _ASSERT_EQ(0, _seen) +#define _ASSERT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 1) +#define _ASSERT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 1) + +#define _EXPECT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 0) +#define _EXPECT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 0) +#define _EXPECT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 0) +#define _EXPECT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 0) +#define _EXPECT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 0) +#define _EXPECT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 0) + +#define _EXPECT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 0) +#define _EXPECT_TRUE(_seen) \ + _EXPECT_NE(0, _seen) +#define _EXPECT_FALSE(_seen) \ + _EXPECT_EQ(0, _seen) + +#define _EXPECT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 0) +#define _EXPECT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 0) + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is + * not thread-safe, but it should be fine in most sane test scenarios. + * + * Using __bail(), which optionally abort()s, is the easiest way to early + * return while still providing an optional block to the API consumer. + */ +#define OPTIONAL_HANDLER(_assert) \ + for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + +#define __EXPECT(_expected, _seen, _t, _assert) do { \ + /* Avoid multiple evaluation of the cases */ \ + __typeof__(_expected) __exp = (_expected); \ + __typeof__(_seen) __seen = (_seen); \ + if (!(__exp _t __seen)) { \ + unsigned long long __exp_print = (unsigned long long)__exp; \ + unsigned long long __seen_print = (unsigned long long)__seen; \ + __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + #_expected, __exp_print, #_t, \ + #_seen, __seen_print); \ + _metadata->passed = 0; \ + /* Ensure the optional handler is triggered */ \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ + const char *__exp = (_expected); \ + const char *__seen = (_seen); \ + if (!(strcmp(__exp, __seen) _t 0)) { \ + __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ + _metadata->passed = 0; \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +/* Contains all the information for test execution and status checking. */ +struct __test_metadata { + const char *name; + void (*fn)(struct __test_metadata *); + int termsig; + int passed; + int trigger; /* extra handler after the evaluation */ + struct __test_metadata *prev, *next; +}; + +/* Storage for the (global) tests to be run. */ +static struct __test_metadata *__test_list; +static unsigned int __test_count; +static unsigned int __fixture_count; +static int __constructor_order; + +#define _CONSTRUCTOR_ORDER_FORWARD 1 +#define _CONSTRUCTOR_ORDER_BACKWARD -1 + +/* + * Since constructors are called in reverse order, reverse the test + * list so tests are run in source declaration order. + * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html + * However, it seems not all toolchains do this correctly, so use + * __constructor_order to detect which direction is called first + * and adjust list building logic to get things running in the right + * direction. + */ +static inline void __register_test(struct __test_metadata *t) +{ + __test_count++; + /* Circular linked list where only prev is circular. */ + if (__test_list == NULL) { + __test_list = t; + t->next = NULL; + t->prev = t; + return; + } + if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { + t->next = NULL; + t->prev = __test_list->prev; + t->prev->next = t; + __test_list->prev = t; + } else { + t->next = __test_list; + t->next->prev = t; + t->prev = t; + __test_list = t; + } +} + +static inline int __bail(int for_realz) +{ + if (for_realz) + abort(); + return 0; +} + +void __run_test(struct __test_metadata *t) +{ + pid_t child_pid; + int status; + + t->passed = 1; + t->trigger = 0; + printf("[ RUN ] %s\n", t->name); + child_pid = fork(); + if (child_pid < 0) { + printf("ERROR SPAWNING TEST CHILD\n"); + t->passed = 0; + } else if (child_pid == 0) { + t->fn(t); + _exit(t->passed); + } else { + /* TODO(wad) add timeout support. */ + waitpid(child_pid, &status, 0); + if (WIFEXITED(status)) { + t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + if (t->termsig != -1) { + fprintf(TH_LOG_STREAM, + "%s: Test exited normally " + "instead of by signal (code: %d)\n", + t->name, + WEXITSTATUS(status)); + } + } else if (WIFSIGNALED(status)) { + t->passed = 0; + if (WTERMSIG(status) == SIGABRT) { + fprintf(TH_LOG_STREAM, + "%s: Test terminated by assertion\n", + t->name); + } else if (WTERMSIG(status) == t->termsig) { + t->passed = 1; + } else { + fprintf(TH_LOG_STREAM, + "%s: Test terminated unexpectedly " + "by signal %d\n", + t->name, + WTERMSIG(status)); + } + } else { + fprintf(TH_LOG_STREAM, + "%s: Test ended in some other way [%u]\n", + t->name, + status); + } + } + printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); +} + +static int test_harness_run(int __attribute__((unused)) argc, + char __attribute__((unused)) **argv) +{ + struct __test_metadata *t; + int ret = 0; + unsigned int count = 0; + unsigned int pass_count = 0; + + /* TODO(wad) add optional arguments similar to gtest. */ + printf("[==========] Running %u tests from %u test cases.\n", + __test_count, __fixture_count + 1); + for (t = __test_list; t; t = t->next) { + count++; + __run_test(t); + if (t->passed) + pass_count++; + else + ret = 1; + } + printf("[==========] %u / %u tests passed.\n", pass_count, count); + printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); + return ret; +} + +static void __attribute__((constructor)) __constructor_order_first(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_FORWARD; +} + +#endif /* TEST_HARNESS_H_ */ diff --git a/size/Makefile b/size/Makefile index 04dc25e..bbd0b53 100644 --- a/size/Makefile +++ b/size/Makefile @@ -1,12 +1,11 @@ -CC = $(CROSS_COMPILE)gcc - all: get_size get_size: get_size.c $(CC) -static -ffreestanding -nostartfiles -s $< -o $@ -run_tests: all - ./get_size +TEST_PROGS := get_size + +include ../lib.mk clean: $(RM) get_size diff --git a/static_keys/Makefile b/static_keys/Makefile new file mode 100644 index 0000000..9cdadf3 --- /dev/null +++ b/static_keys/Makefile @@ -0,0 +1,8 @@ +# Makefile for static keys selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := test_static_keys.sh + +include ../lib.mk diff --git a/static_keys/test_static_keys.sh b/static_keys/test_static_keys.sh new file mode 100644 index 0000000..1261e3f --- /dev/null +++ b/static_keys/test_static_keys.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Runs static keys kernel module tests + +if /sbin/modprobe -q test_static_key_base; then + if /sbin/modprobe -q test_static_keys; then + echo "static_key: ok" + /sbin/modprobe -q -r test_static_keys + /sbin/modprobe -q -r test_static_key_base + else + echo "static_keys: [FAIL]" + /sbin/modprobe -q -r test_static_key_base + fi +else + echo "static_key: [FAIL]" + exit 1 +fi diff --git a/sysctl/Makefile b/sysctl/Makefile index 0a92ada..b3c33e0 100644 --- a/sysctl/Makefile +++ b/sysctl/Makefile @@ -4,16 +4,10 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests". all: -# Allow specific tests to be selected. -test_num: - @/bin/sh ./run_numerictests +TEST_PROGS := run_numerictests run_stringtests +TEST_FILES := common_tests -test_string: - @/bin/sh ./run_stringtests - -run_tests: all test_num test_string +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all run_tests clean test_num test_string diff --git a/sysctl/run_numerictests b/sysctl/run_numerictests old mode 100644 new mode 100755 diff --git a/sysctl/run_stringtests b/sysctl/run_stringtests old mode 100644 new mode 100755 diff --git a/timers/.gitignore b/timers/.gitignore new file mode 100644 index 0000000..ced9981 --- /dev/null +++ b/timers/.gitignore @@ -0,0 +1,18 @@ +alarmtimer-suspend +change_skew +clocksource-switch +inconsistency-check +leap-a-day +leapcrash +mqueue-lat +nanosleep +nsleep-lat +posix_timers +raw_skew +rtctest +set-2038 +set-tai +set-timer-lat +skew_consistency +threadtest +valid-adjtimex diff --git a/timers/Makefile b/timers/Makefile index eb2859f..89a3f44 100644 --- a/timers/Makefile +++ b/timers/Makefile @@ -1,8 +1,36 @@ -all: - gcc posix_timers.c -o posix_timers -lrt +CC = $(CROSS_COMPILE)gcc +BUILD_FLAGS = -DKTEST +CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +LDFLAGS += -lrt -lpthread -run_tests: all - ./posix_timers +# these are all "safe" tests that don't modify +# system time or require escalated privledges +TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ + inconsistency-check raw_skew threadtest rtctest + +TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \ + skew_consistency clocksource-switch leap-a-day \ + leapcrash set-tai set-2038 + +bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED) + +all: ${bins} + +include ../lib.mk + +# these tests require escalated privledges +# and may modify the system time or trigger +# other behavior like suspend +run_destructive_tests: run_tests + ./alarmtimer-suspend + ./valid-adjtimex + ./change_skew + ./skew_consistency + ./clocksource-switch + ./leap-a-day -s -i 10 + ./leapcrash + ./set-tai + ./set-2038 clean: - rm -f ./posix_timers + rm -f ${bins} diff --git a/timers/alarmtimer-suspend.c b/timers/alarmtimer-suspend.c new file mode 100644 index 0000000..72cacf5 --- /dev/null +++ b/timers/alarmtimer-suspend.c @@ -0,0 +1,189 @@ +/* alarmtimer suspend test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This test makes sure the alarmtimer & RTC wakeup code is + * functioning. + * + * To build: + * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ + +#define SUSPEND_SECS 15 +int alarmcount; +int alarm_clock_id; +struct timespec start_time; + + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int final_ret = 0; + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(alarm_clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount; + + printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec, + ts.tv_nsec, delta_ns); + + if (delta_ns > UNREASONABLE_LAT) { + printf("[FAIL]\n"); + final_ret = -1; + } else + printf("[OK]\n"); + +} + +int main(void) +{ + timer_t tm1; + struct itimerspec its1, its2; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + + for (alarm_clock_id = CLOCK_REALTIME_ALARM; + alarm_clock_id <= CLOCK_BOOTTIME_ALARM; + alarm_clock_id++) { + + alarmcount = 0; + if (timer_create(alarm_clock_id, &se, &tm1) == -1) { + printf("timer_create failled, %s unspported?\n", + clockstring(alarm_clock_id)); + break; + } + + clock_gettime(alarm_clock_id, &start_time); + printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), + start_time.tv_sec, start_time.tv_nsec); + printf("Setting alarm for every %i seconds\n", SUSPEND_SECS); + its1.it_value = start_time; + its1.it_value.tv_sec += SUSPEND_SECS; + its1.it_interval.tv_sec = SUSPEND_SECS; + its1.it_interval.tv_nsec = 0; + + timer_settime(tm1, TIMER_ABSTIME, &its1, &its2); + + while (alarmcount < 5) + sleep(1); /* First 5 alarms, do nothing */ + + printf("Starting suspend loops\n"); + while (alarmcount < 10) { + int ret; + + sleep(3); + ret = system("echo mem > /sys/power/state"); + if (ret) + break; + } + timer_delete(tm1); + } + if (final_ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/timers/change_skew.c b/timers/change_skew.c new file mode 100644 index 0000000..cb19689 --- /dev/null +++ b/timers/change_skew.c @@ -0,0 +1,107 @@ +/* ADJ_FREQ Skew change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and + * then uses other tests to detect problems. Thus this test requires + * that the raw_skew, inconsistency-check and nanosleep tests be + * present in the same directory it is run from. + * + * To build: + * $ gcc change_skew.c -o change_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + + +int change_skew_test(int ppm) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error adjusting freq\n"); + return ret; + } + + ret = system("./raw_skew"); + ret |= system("./inconsistency-check"); + ret |= system("./nanosleep"); + + return ret; +} + + +int main(int argv, char **argc) +{ + struct timex tx; + int i, ret; + + int ppm[5] = {0, 250, 500, -250, -500}; + + /* Kill ntpd */ + ret = system("killall -9 ntpd"); + + /* Make sure there's no offset adjustment going on */ + tx.modes = ADJ_OFFSET; + tx.offset = 0; + ret = adjtimex(&tx); + + if (ret < 0) { + printf("Maybe you're not running as root?\n"); + return -1; + } + + for (i = 0; i < 5; i++) { + printf("Using %i ppm adjustment\n", ppm[i]); + ret = change_skew_test(ppm[i]); + if (ret) + break; + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + if (ret) { + printf("[FAIL]"); + return ksft_exit_fail(); + } + printf("[OK]"); + return ksft_exit_pass(); +} diff --git a/timers/clocksource-switch.c b/timers/clocksource-switch.c new file mode 100644 index 0000000..627ec74 --- /dev/null +++ b/timers/clocksource-switch.c @@ -0,0 +1,179 @@ +/* Clocksource change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which quickly changes the clocksourc and + * then uses other tests to detect problems. Thus this test requires + * that the inconsistency-check and nanosleep tests be present in the + * same directory it is run from. + * + * To build: + * $ gcc clocksource-switch.c -o clocksource-switch -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +int get_clocksources(char list[][30]) +{ + int fd, i; + size_t size; + char buf[512]; + char *head, *tmp; + + fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY); + + size = read(fd, buf, 512); + + close(fd); + + for (i = 0; i < 30; i++) + list[i][0] = '\0'; + + head = buf; + i = 0; + while (head - buf < size) { + /* Find the next space */ + for (tmp = head; *tmp != ' '; tmp++) { + if (*tmp == '\n') + break; + if (*tmp == '\0') + break; + } + *tmp = '\0'; + strcpy(list[i], head); + head = tmp + 1; + i++; + } + + return i-1; +} + +int get_cur_clocksource(char *buf, size_t size) +{ + int fd; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY); + + size = read(fd, buf, size); + + return 0; +} + +int change_clocksource(char *clocksource) +{ + int fd; + size_t size; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY); + + if (fd < 0) + return -1; + + size = write(fd, clocksource, strlen(clocksource)); + + if (size < 0) + return -1; + + close(fd); + return 0; +} + + +int run_tests(int secs) +{ + int ret; + char buf[255]; + + sprintf(buf, "./inconsistency-check -t %i", secs); + ret = system(buf); + if (ret) + return ret; + ret = system("./nanosleep"); + return ret; +} + + +char clocksource_list[10][30]; + +int main(int argv, char **argc) +{ + char orig_clk[512]; + int count, i, status; + pid_t pid; + + get_cur_clocksource(orig_clk, 512); + + count = get_clocksources(clocksource_list); + + if (change_clocksource(clocksource_list[0])) { + printf("Error: You probably need to run this as root\n"); + return -1; + } + + /* Check everything is sane before we start switching asyncrhonously */ + for (i = 0; i < count; i++) { + printf("Validating clocksource %s\n", clocksource_list[i]); + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } + if (run_tests(5)) { + status = -1; + goto out; + } + } + + + printf("Running Asyncrhonous Switching Tests...\n"); + pid = fork(); + if (!pid) + return run_tests(60); + + while (pid != waitpid(pid, &status, WNOHANG)) + for (i = 0; i < count; i++) + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } +out: + change_clocksource(orig_clk); + + if (status) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/timers/inconsistency-check.c b/timers/inconsistency-check.c new file mode 100644 index 0000000..caf1bc9 --- /dev/null +++ b/timers/inconsistency-check.c @@ -0,0 +1,204 @@ +/* Time inconsistency check test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2003, 2004, 2005, 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc inconsistency-check.c -o inconsistency-check -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CALLS_PER_LOOP 64 +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + /* use unsigned to avoid false positives on 2038 rollover */ + if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec) + return 1; + if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + + + +int consistency_test(int clock_type, unsigned long seconds) +{ + struct timespec list[CALLS_PER_LOOP]; + int i, inconsistent; + long now, then; + time_t t; + char *start_str; + + clock_gettime(clock_type, &list[0]); + now = then = list[0].tv_sec; + + /* timestamp start of test */ + t = time(0); + start_str = ctime(&t); + + while (seconds == -1 || now - then < seconds) { + inconsistent = 0; + + /* Fill list */ + for (i = 0; i < CALLS_PER_LOOP; i++) + clock_gettime(clock_type, &list[i]); + + /* Check for inconsistencies */ + for (i = 0; i < CALLS_PER_LOOP - 1; i++) + if (!in_order(list[i], list[i+1])) + inconsistent = i; + + /* display inconsistency */ + if (inconsistent) { + unsigned long long delta; + + printf("\%s\n", start_str); + for (i = 0; i < CALLS_PER_LOOP; i++) { + if (i == inconsistent) + printf("--------------------\n"); + printf("%lu:%lu\n", list[i].tv_sec, + list[i].tv_nsec); + if (i == inconsistent + 1) + printf("--------------------\n"); + } + delta = list[inconsistent].tv_sec * NSEC_PER_SEC; + delta += list[inconsistent].tv_nsec; + delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC; + delta -= list[inconsistent+1].tv_nsec; + printf("Delta: %llu ns\n", delta); + fflush(0); + /* timestamp inconsistency*/ + t = time(0); + printf("%s\n", ctime(&t)); + printf("[FAILED]\n"); + return -1; + } + now = list[0].tv_sec; + } + printf("[OK]\n"); + return 0; +} + + +int main(int argc, char *argv[]) +{ + int clockid, opt; + int userclock = CLOCK_REALTIME; + int maxclocks = NR_CLOCKIDS; + int runtime = 10; + struct timespec ts; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:c:")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'c': + userclock = atoi(optarg); + maxclocks = userclock + 1; + break; + default: + printf("Usage: %s [-t ] [-c ]\n", argv[0]); + printf(" -t: Number of seconds to run\n"); + printf(" -c: clockid to use (default, all clockids)\n"); + exit(-1); + } + } + + setbuf(stdout, NULL); + + for (clockid = userclock; clockid < maxclocks; clockid++) { + + if (clockid == CLOCK_HWSPECIFIC) + continue; + + if (!clock_gettime(clockid, &ts)) { + printf("Consistent %-30s ", clockstring(clockid)); + if (consistency_test(clockid, runtime)) + return ksft_exit_fail(); + } + } + return ksft_exit_pass(); +} diff --git a/timers/leap-a-day.c b/timers/leap-a-day.c new file mode 100644 index 0000000..fb46ad6 --- /dev/null +++ b/timers/leap-a-day.c @@ -0,0 +1,388 @@ +/* Leap second stress test + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPLv2 + * + * This test signals the kernel to insert a leap second + * every day at midnight GMT. This allows for stessing the + * kernel's leap-second behavior, as well as how well applications + * handle the leap-second discontinuity. + * + * Usage: leap-a-day [-s] [-i ] + * + * Options: + * -s: Each iteration, set the date to 10 seconds before midnight GMT. + * This speeds up the number of leapsecond transitions tested, + * but because it calls settimeofday frequently, advancing the + * time by 24 hours every ~16 seconds, it may cause application + * disruption. + * + * -i: Number of iterations to run (default: infinite) + * + * Other notes: Disabling NTP prior to running this is advised, as the two + * may conflict in their commands to the kernel. + * + * To build: + * $ gcc leap-a-day.c -o leap-a-day -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL +#define CLOCK_TAI 11 + +time_t next_leap; +int error_found; + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +char *time_state_str(int state) +{ + switch (state) { + case TIME_OK: return "TIME_OK"; + case TIME_INS: return "TIME_INS"; + case TIME_DEL: return "TIME_DEL"; + case TIME_OOP: return "TIME_OOP"; + case TIME_WAIT: return "TIME_WAIT"; + case TIME_BAD: return "TIME_BAD"; + } + return "ERROR"; +} + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + /* Clear maxerror, as it can cause UNSYNC to be set */ + tx.modes = ADJ_MAXERROR; + tx.maxerror = 0; + ret = adjtimex(&tx); + + /* Clear the status */ + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + +void sigalarm(int signo) +{ + struct timex tx; + int ret; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tx.time.tv_sec < next_leap) { + printf("Error: Early timer expiration! (Should be %ld)\n", next_leap); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + if (ret != TIME_WAIT) { + printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n"); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } +} + + +/* Test for known hrtimer failure */ +void test_hrtimer_failure(void) +{ + struct timespec now, target; + + clock_gettime(CLOCK_REALTIME, &now); + target = timespec_add(now, NSEC_PER_SEC/2); + clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); + clock_gettime(CLOCK_REALTIME, &now); + + if (!in_order(target, now)) { + printf("ERROR: hrtimer early expiration failure observed.\n"); + error_found = 1; + } +} + +int main(int argc, char **argv) +{ + timer_t tm1; + struct itimerspec its1; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + int settime = 0; + int tai_time = 0; + int insert = 1; + int iterations = -1; + int opt; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "sti:")) != -1) { + switch (opt) { + case 's': + printf("Setting time to speed up testing\n"); + settime = 1; + break; + case 'i': + iterations = atoi(optarg); + break; + case 't': + tai_time = 1; + break; + default: + printf("Usage: %s [-s] [-i ]\n", argv[0]); + printf(" -s: Set time to right before leap second each iteration\n"); + printf(" -i: Number of iterations\n"); + printf(" -t: Print TAI time\n"); + exit(-1); + } + } + + /* Make sure TAI support is present if -t was used */ + if (tai_time) { + struct timespec ts; + + if (clock_gettime(CLOCK_TAI, &ts)) { + printf("System doesn't support CLOCK_TAI\n"); + ksft_exit_fail(); + } + } + + signal(SIGINT, handler); + signal(SIGKILL, handler); + + /* Set up timer signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + if (iterations < 0) + printf("This runs continuously. Press ctrl-c to stop\n"); + else + printf("Running for %i iterations. Press ctrl-c to stop\n", iterations); + + printf("\n"); + while (1) { + int ret; + struct timespec ts; + struct timex tx; + time_t now; + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + if (settime) { + struct timeval tv; + + tv.tv_sec = next_leap - 10; + tv.tv_usec = 0; + settimeofday(&tv, NULL); + printf("Setting time to %s", ctime(&tv.tv_sec)); + } + + /* Reset NTP time state */ + clear_time_state(); + + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error: Problem setting STA_INS/STA_DEL!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + /* Validate STA_INS was set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Error: STA_INS/STA_DEL not set!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + if (tai_time) { + printf("Using TAI time," + " no inconsistencies should be seen!\n"); + } + + printf("Scheduling leap second for %s", ctime(&next_leap)); + + /* Set up timer */ + printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap)); + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) { + printf("Error: timer_create failed\n"); + return ksft_exit_fail(); + } + its1.it_value.tv_sec = next_leap; + its1.it_value.tv_nsec = 0; + its1.it_interval.tv_sec = 0; + its1.it_interval.tv_nsec = 0; + timer_settime(tm1, TIMER_ABSTIME, &its1, NULL); + + /* Wake up 3 seconds before leap */ + ts.tv_sec = next_leap - 3; + ts.tv_nsec = 0; + + + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) + printf("Something woke us up, returning to sleep\n"); + + /* Validate STA_INS is still set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Something cleared STA_INS/STA_DEL, setting it again.\n"); + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + } + + /* Check adjtimex output every half second */ + now = tx.time.tv_sec; + while (now < next_leap + 2) { + char buf[26]; + struct timespec tai; + int ret; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tai_time) { + clock_gettime(CLOCK_TAI, &tai); + printf("%ld sec, %9ld ns\t%s\n", + tai.tv_sec, + tai.tv_nsec, + time_state_str(ret)); + } else { + ctime_r(&tx.time.tv_sec, buf); + buf[strlen(buf)-1] = 0; /*remove trailing\n */ + + printf("%s + %6ld us (%i)\t%s\n", + buf, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + now = tx.time.tv_sec; + /* Sleep for another half second */ + ts.tv_sec = 0; + ts.tv_nsec = NSEC_PER_SEC / 2; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + } + /* Switch to using other mode */ + insert = !insert; + + /* Note if kernel has known hrtimer failure */ + test_hrtimer_failure(); + + printf("Leap complete\n"); + if (error_found) { + printf("Errors observed\n"); + clear_time_state(); + return ksft_exit_fail(); + } + printf("\n"); + if ((iterations != -1) && !(--iterations)) + break; + } + + clear_time_state(); + return ksft_exit_pass(); +} diff --git a/timers/leapcrash.c b/timers/leapcrash.c new file mode 100644 index 0000000..a1071bd --- /dev/null +++ b/timers/leapcrash.c @@ -0,0 +1,120 @@ +/* Demo leapsecond deadlock + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPL + * + * This test demonstrates leapsecond deadlock that is possibe + * on kernels from 2.6.26 to 3.3. + * + * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * RUN AT YOUR OWN RISK! + * To build: + * $ gcc leapcrash.c -o leapcrash -lrt + */ + + + +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + + +int main(void) +{ + struct timex tx; + struct timespec ts; + time_t next_leap; + int count = 0; + + setbuf(stdout, NULL); + + signal(SIGINT, handler); + signal(SIGKILL, handler); + printf("This runs for a few minutes. Press ctrl-c to stop\n"); + + clear_time_state(); + + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + for (count = 0; count < 20; count++) { + struct timeval tv; + + + /* set the time to 2 seconds before the leap */ + tv.tv_sec = next_leap - 2; + tv.tv_usec = 0; + if (settimeofday(&tv, NULL)) { + printf("Error: You're likely not running with proper (ie: root) permissions\n"); + return ksft_exit_fail(); + } + tx.modes = 0; + adjtimex(&tx); + + /* hammer on adjtime w/ STA_INS */ + while (tx.time.tv_sec < next_leap + 1) { + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + tx.status = STA_INS; + adjtimex(&tx); + } + clear_time_state(); + printf("."); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/timers/mqueue-lat.c b/timers/mqueue-lat.c new file mode 100644 index 0000000..a2a3924 --- /dev/null +++ b/timers/mqueue-lat.c @@ -0,0 +1,124 @@ +/* Measure mqueue timeout latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * + * Inspired with permission from example test by: + * Romain Francoise + * Licensed under the GPLv2 + * + * To build: + * $ gcc mqueue-lat.c -o mqueue-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int mqueue_lat_test(void) +{ + + mqd_t q; + struct mq_attr attr; + struct timespec start, end, now, target; + int i, count, ret; + + q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL); + if (q < 0) { + perror("mq_open"); + return -1; + } + mq_getattr(q, &attr); + + + count = 100; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < count; i++) { + char buf[attr.mq_msgsize]; + + clock_gettime(CLOCK_REALTIME, &now); + target = now; + target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */ + + ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target); + if (ret < 0 && errno != ETIMEDOUT) { + perror("mq_timedreceive"); + return -1; + } + } + clock_gettime(CLOCK_MONOTONIC, &end); + + mq_close(q); + + if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY) + return -1; + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + + printf("Mqueue latency : "); + + ret = mqueue_lat_test(); + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/timers/nanosleep.c b/timers/nanosleep.c new file mode 100644 index 0000000..8a3c29d --- /dev/null +++ b/timers/nanosleep.c @@ -0,0 +1,174 @@ +/* Make sure timers don't return early + * by: john stultz (johnstul@us.ibm.com) + * John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright Linaro 2013 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nanosleep.c -o nanosleep -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int nanosleep_test(int clockid, long long ns) +{ + struct timespec now, target, rel; + + /* First check abs time */ + if (clock_gettime(clockid, &now)) + return UNSUPPORTED; + target = timespec_add(now, ns); + + if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL)) + return UNSUPPORTED; + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + + /* Second check reltime */ + clock_gettime(clockid, &now); + rel.tv_sec = 0; + rel.tv_nsec = 0; + rel = timespec_add(rel, ns); + target = timespec_add(now, ns); + clock_nanosleep(clockid, 0, &rel, NULL); + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + return 0; +} + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("Nanosleep %-31s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_test(clockid, length); + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + goto next; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + length *= 100; + } + printf("[OK]\n"); +next: + ret = 0; + } + return ksft_exit_pass(); +} diff --git a/timers/nsleep-lat.c b/timers/nsleep-lat.c new file mode 100644 index 0000000..2d7898f --- /dev/null +++ b/timers/nsleep-lat.c @@ -0,0 +1,190 @@ +/* Measure nanosleep timer latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nsleep-lat.c -o nsleep-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int nanosleep_lat_test(int clockid, long long ns) +{ + struct timespec start, end, target; + long long latency = 0; + int i, count; + + target.tv_sec = ns/NSEC_PER_SEC; + target.tv_nsec = ns%NSEC_PER_SEC; + + if (clock_gettime(clockid, &start)) + return UNSUPPORTED; + if (clock_nanosleep(clockid, 0, &target, NULL)) + return UNSUPPORTED; + + count = 10; + + /* First check relative latency */ + clock_gettime(clockid, &start); + for (i = 0; i < count; i++) + clock_nanosleep(clockid, 0, &target, NULL); + clock_gettime(clockid, &end); + + if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) { + printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); + return -1; + } + + /* Next check absolute latency */ + for (i = 0; i < count; i++) { + clock_gettime(clockid, &start); + target = timespec_add(start, ns); + clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL); + clock_gettime(clockid, &end); + latency += timespec_sub(target, end); + } + + if (latency/count > UNRESONABLE_LATENCY) { + printf("Large abs latency: %lld ns :", latency/count); + return -1; + } + + return 0; +} + + + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("nsleep latency %-26s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_lat_test(clockid, length); + if (ret) + break; + length *= 100; + + } + + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + continue; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + } + return ksft_exit_pass(); +} diff --git a/timers/posix_timers.c b/timers/posix_timers.c index f87d970..5a246a0 100644 --- a/timers/posix_timers.c +++ b/timers/posix_timers.c @@ -35,10 +35,11 @@ static void user_loop(void) static void kernel_loop(void) { void *addr = sbrk(0); + int err = 0; - while (!done) { - brk(addr + 4096); - brk(addr); + while (!done && !err) { + err = brk(addr + 4096); + err |= brk(addr); } } @@ -190,8 +191,6 @@ static int check_timer_create(int which) int main(int argc, char **argv) { - int err; - printf("Testing posix timers. False negative may happen on CPU execution \n"); printf("based timers if other threads run on the CPU...\n"); diff --git a/timers/raw_skew.c b/timers/raw_skew.c new file mode 100644 index 0000000..30906bf --- /dev/null +++ b/timers/raw_skew.c @@ -0,0 +1,154 @@ +/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test + * by: john stultz (johnstul@us.ibm.com) + * John Stultz + * (C) Copyright IBM 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc raw_skew.c -o raw_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +#define CLOCK_MONOTONIC_RAW 4 +#define NSEC_PER_SEC 1000000000LL + +#define shift_right(x, s) ({ \ + __typeof__(x) __x = (x); \ + __typeof__(s) __s = (s); \ + __x < 0 ? -(-__x >> __s) : __x >> __s; \ +}) + +long long llabs(long long val) +{ + if (val < 0) + val = -val; + return val; +} + +unsigned long long ts_to_nsec(struct timespec ts) +{ + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +struct timespec nsec_to_ts(long long ns) +{ + struct timespec ts; + + ts.tv_sec = ns/NSEC_PER_SEC; + ts.tv_nsec = ns%NSEC_PER_SEC; + return ts; +} + +long long diff_timespec(struct timespec start, struct timespec end) +{ + long long start_ns, end_ns; + + start_ns = ts_to_nsec(start); + end_ns = ts_to_nsec(end); + return end_ns - start_ns; +} + +void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) +{ + struct timespec start, mid, end; + long long diff = 0, tmp; + int i; + + for (i = 0; i < 3; i++) { + long long newdiff; + + clock_gettime(CLOCK_MONOTONIC, &start); + clock_gettime(CLOCK_MONOTONIC_RAW, &mid); + clock_gettime(CLOCK_MONOTONIC, &end); + + newdiff = diff_timespec(start, end); + if (diff == 0 || newdiff < diff) { + diff = newdiff; + *raw = mid; + tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2; + *mon = nsec_to_ts(tmp); + } + } +} + +int main(int argv, char **argc) +{ + struct timespec mon, raw, start, end; + long long delta1, delta2, interval, eppm, ppm; + struct timex tx1, tx2; + + setbuf(stdout, NULL); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) { + printf("ERR: NO CLOCK_MONOTONIC_RAW\n"); + return -1; + } + + tx1.modes = 0; + adjtimex(&tx1); + get_monotonic_and_raw(&mon, &raw); + start = mon; + delta1 = diff_timespec(mon, raw); + + if (tx1.offset) + printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n"); + + printf("Estimating clock drift: "); + sleep(120); + + get_monotonic_and_raw(&mon, &raw); + end = mon; + tx2.modes = 0; + adjtimex(&tx2); + delta2 = diff_timespec(mon, raw); + + interval = diff_timespec(start, end); + + /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */ + eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval; + eppm = -eppm; + printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000))); + + /* Avg the two actual freq samples adjtimex gave us */ + ppm = (tx1.freq + tx2.freq) * 1000 / 2; + ppm = (long long)tx1.freq * 1000; + ppm = shift_right(ppm, 16); + printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); + + if (llabs(eppm - ppm) > 1000) { + printf(" [FAILED]\n"); + return ksft_exit_fail(); + } + printf(" [OK]\n"); + return ksft_exit_pass(); +} diff --git a/timers/rtctest.c b/timers/rtctest.c new file mode 100644 index 0000000..d80ae85 --- /dev/null +++ b/timers/rtctest.c @@ -0,0 +1,271 @@ +/* + * Real Time Clock Driver Test/Example Program + * + * Compile with: + * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest + * + * Copyright (C) 1996, Paul Gortmaker. + * + * Released under the GNU General Public License, version 2, + * included herein by reference. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data; + struct rtc_time rtc_tm; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); + + /* Turn on update interrupts (one per second) */ + retval = ioctl(fd, RTC_UIE_ON, 0); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Update IRQs not supported.\n"); + goto test_READ; + } + perror("RTC_UIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", + rtc); + fflush(stderr); + for (i=1; i<6; i++) { + /* This read will block */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); + fflush(stderr); + for (i=1; i<6; i++) { + struct timeval tv = {5, 0}; /* 5 second timeout on select */ + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + retval = select(fd+1, &readfds, NULL, NULL, &tv); + if (retval == -1) { + perror("select"); + exit(errno); + } + /* This read won't block unlike the select-less case above. */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Turn off update interrupts */ + retval = ioctl(fd, RTC_UIE_OFF, 0); + if (retval == -1) { + perror("RTC_UIE_OFF ioctl"); + exit(errno); + } + +test_READ: + /* Read the RTC time/date */ + retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); + if (retval == -1) { + perror("RTC_RD_TIME ioctl"); + exit(errno); + } + + fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Set the alarm to 5 sec in the future, and check for rollover */ + rtc_tm.tm_sec += 5; + if (rtc_tm.tm_sec >= 60) { + rtc_tm.tm_sec %= 60; + rtc_tm.tm_min++; + } + if (rtc_tm.tm_min == 60) { + rtc_tm.tm_min = 0; + rtc_tm.tm_hour++; + } + if (rtc_tm.tm_hour == 24) + rtc_tm.tm_hour = 0; + + retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Alarm IRQs not supported.\n"); + goto test_PIE; + } + perror("RTC_ALM_SET ioctl"); + exit(errno); + } + + /* Read the current alarm settings */ + retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); + if (retval == -1) { + perror("RTC_ALM_READ ioctl"); + exit(errno); + } + + fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Enable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_ON, 0); + if (retval == -1) { + perror("RTC_AIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Waiting 5 seconds for alarm..."); + fflush(stderr); + /* This blocks until the alarm ring causes an interrupt */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + irqcount++; + fprintf(stderr, " okay. Alarm rang.\n"); + + /* Disable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_OFF, 0); + if (retval == -1) { + perror("RTC_AIE_OFF ioctl"); + exit(errno); + } + +test_PIE: + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &tmp); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == ENOTTY) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/timers/set-2038.c b/timers/set-2038.c new file mode 100644 index 0000000..c8a7e14 --- /dev/null +++ b/timers/set-2038.c @@ -0,0 +1,144 @@ +/* Time bounds setting test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which sets the time to edge cases then + * uses other tests to detect problems. Thus this test requires that + * the inconsistency-check and nanosleep tests be present in the same + * directory it is run from. + * + * To build: + * $ gcc set-2038.c -o set-2038 -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +#define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +#define YEAR_1901 (-0x7fffffffL) +#define YEAR_1970 1 +#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */ +#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */ +#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */ + +int is32bits(void) +{ + return (sizeof(long) == 4); +} + +int settime(long long time) +{ + struct timeval now; + int ret; + + now.tv_sec = (time_t)time; + now.tv_usec = 0; + + ret = settimeofday(&now, NULL); + + printf("Setting time to 0x%lx: %d\n", (long)time, ret); + return ret; +} + +int do_tests(void) +{ + int ret; + + ret = system("date"); + ret = system("./inconsistency-check -c 0 -t 20"); + ret |= system("./nanosleep"); + ret |= system("./nsleep-lat"); + return ret; + +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + int opt, dangerous = 0; + time_t start; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d")) != -1) { + switch (opt) { + case 'd': + dangerous = 1; + } + } + + start = time(0); + + /* First test that crazy values don't work */ + if (!settime(YEAR_1901)) { + ret = -1; + goto out; + } + if (!settime(YEAR_MAX)) { + ret = -1; + goto out; + } + if (!is32bits() && !settime(YEAR_2262)) { + ret = -1; + goto out; + } + + /* Now test behavior near edges */ + settime(YEAR_1970); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2038 - 600); + ret = do_tests(); + if (ret) + goto out; + + /* The rest of the tests can blowup on 32bit systems */ + if (is32bits() && !dangerous) + goto out; + /* Test rollover behavior 32bit edge */ + settime(YEAR_2038 - 10); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2262 - 600); + ret = do_tests(); + +out: + /* restore clock */ + settime(start); + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/timers/set-tai.c b/timers/set-tai.c new file mode 100644 index 0000000..dc88dbc --- /dev/null +++ b/timers/set-tai.c @@ -0,0 +1,79 @@ +/* Set tai offset + * by: John Stultz + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +int set_tai(int offset) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + tx.modes = ADJ_TAI; + tx.constant = offset; + + return adjtimex(&tx); +} + +int get_tai(void) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + adjtimex(&tx); + return tx.tai; +} + +int main(int argc, char **argv) +{ + int i, ret; + + ret = get_tai(); + printf("tai offset started at %i\n", ret); + + printf("Checking tai offsets can be properly set: "); + for (i = 1; i <= 60; i++) { + ret = set_tai(i); + ret = get_tai(); + if (ret != i) { + printf("[FAILED] expected: %i got %i\n", i, ret); + return ksft_exit_fail(); + } + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/timers/set-timer-lat.c b/timers/set-timer-lat.c new file mode 100644 index 0000000..4fc98c5 --- /dev/null +++ b/timers/set-timer-lat.c @@ -0,0 +1,216 @@ +/* set_timer latency test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2014 + * Licensed under the GPLv2 + * + * This test makes sure the set_timer api is correct + * + * To build: + * $ gcc set-timer-lat.c -o set-timer-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + +#define TIMER_SECS 1 +int alarmcount; +int clock_id; +struct timespec start_time; +long long max_latency_ns; + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount; + + if (delta_ns < 0) + printf("%s timer fired early: FAIL\n", clockstring(clock_id)); + + if (delta_ns > max_latency_ns) + max_latency_ns = delta_ns; +} + +int do_timer(int clock_id, int flags) +{ + struct sigevent se; + timer_t tm1; + struct itimerspec its1, its2; + int err; + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGRTMAX; + se.sigev_value.sival_int = 0; + + max_latency_ns = 0; + alarmcount = 0; + + err = timer_create(clock_id, &se, &tm1); + if (err) { + if ((clock_id == CLOCK_REALTIME_ALARM) || + (clock_id == CLOCK_BOOTTIME_ALARM)) { + printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME"); + return 0; + } + printf("%s - timer_create() failed\n", clockstring(clock_id)); + return -1; + } + + clock_gettime(clock_id, &start_time); + if (flags) { + its1.it_value = start_time; + its1.it_value.tv_sec += TIMER_SECS; + } else { + its1.it_value.tv_sec = TIMER_SECS; + its1.it_value.tv_nsec = 0; + } + its1.it_interval.tv_sec = TIMER_SECS; + its1.it_interval.tv_nsec = 0; + + err = timer_settime(tm1, flags, &its1, &its2); + if (err) { + printf("%s - timer_settime() failed\n", clockstring(clock_id)); + return -1; + } + + while (alarmcount < 5) + sleep(1); + + printf("%-22s %s max latency: %10lld ns : ", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME", + max_latency_ns); + + timer_delete(tm1); + if (max_latency_ns < UNRESONABLE_LATENCY) { + printf("[OK]\n"); + return 0; + } + printf("[FAILED]\n"); + return -1; +} + +int main(void) +{ + struct sigaction act; + int signum = SIGRTMAX; + int ret = 0; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + printf("Setting timers for every %i seconds\n", TIMER_SECS); + for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + + if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || + (clock_id == CLOCK_THREAD_CPUTIME_ID) || + (clock_id == CLOCK_MONOTONIC_RAW) || + (clock_id == CLOCK_REALTIME_COARSE) || + (clock_id == CLOCK_MONOTONIC_COARSE) || + (clock_id == CLOCK_HWSPECIFIC)) + continue; + + ret |= do_timer(clock_id, TIMER_ABSTIME); + ret |= do_timer(clock_id, 0); + } + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/timers/skew_consistency.c b/timers/skew_consistency.c new file mode 100644 index 0000000..5562f84 --- /dev/null +++ b/timers/skew_consistency.c @@ -0,0 +1,89 @@ +/* ADJ_FREQ Skew consistency test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back + * and forth and watches for consistency problems. Thus this test requires + * that the inconsistency-check tests be present in the same directory it + * is run from. + * + * To build: + * $ gcc skew_consistency.c -o skew_consistency -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +int main(int argv, char **argc) +{ + struct timex tx; + int ret, ppm; + pid_t pid; + + + printf("Running Asyncrhonous Frequency Changing Tests...\n"); + + pid = fork(); + if (!pid) + return system("./inconsistency-check -c 1 -t 600"); + + ppm = 500; + ret = 0; + + while (pid != waitpid(pid, &ret, WNOHANG)) { + ppm = -ppm; + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + adjtimex(&tx); + usleep(500000); + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + + if (ret) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/timers/threadtest.c b/timers/threadtest.c new file mode 100644 index 0000000..e632e11 --- /dev/null +++ b/timers/threadtest.c @@ -0,0 +1,204 @@ +/* threadtest.c + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2004, 2005, 2006, 2012 + * Licensed under the GPLv2 + * + * To build: + * $ gcc threadtest.c -o threadtest -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +/* serializes shared list access */ +pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; +/* serializes console output */ +pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER; + + +#define MAX_THREADS 128 +#define LISTSIZE 128 + +int done = 0; + +struct timespec global_list[LISTSIZE]; +int listcount = 0; + + +void checklist(struct timespec *list, int size) +{ + int i, j; + struct timespec *a, *b; + + /* scan the list */ + for (i = 0; i < size-1; i++) { + a = &list[i]; + b = &list[i+1]; + + /* look for any time inconsistencies */ + if ((b->tv_sec <= a->tv_sec) && + (b->tv_nsec < a->tv_nsec)) { + + /* flag other threads */ + done = 1; + + /*serialize printing to avoid junky output*/ + pthread_mutex_lock(&print_lock); + + /* dump the list */ + printf("\n"); + for (j = 0; j < size; j++) { + if (j == i) + printf("---------------\n"); + printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec); + if (j == i+1) + printf("---------------\n"); + } + printf("[FAILED]\n"); + + pthread_mutex_unlock(&print_lock); + } + } +} + +/* The shared thread shares a global list + * that each thread fills while holding the lock. + * This stresses clock syncronization across cpus. + */ +void *shared_thread(void *arg) +{ + while (!done) { + /* protect the list */ + pthread_mutex_lock(&list_lock); + + /* see if we're ready to check the list */ + if (listcount >= LISTSIZE) { + checklist(global_list, LISTSIZE); + listcount = 0; + } + clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]); + + pthread_mutex_unlock(&list_lock); + } + return NULL; +} + + +/* Each independent thread fills in its own + * list. This stresses clock_gettime() lock contention. + */ +void *independent_thread(void *arg) +{ + struct timespec my_list[LISTSIZE]; + int count; + + while (!done) { + /* fill the list */ + for (count = 0; count < LISTSIZE; count++) + clock_gettime(CLOCK_MONOTONIC, &my_list[count]); + checklist(my_list, LISTSIZE); + } + return NULL; +} + +#define DEFAULT_THREAD_COUNT 8 +#define DEFAULT_RUNTIME 30 + +int main(int argc, char **argv) +{ + int thread_count, i; + time_t start, now, runtime; + char buf[255]; + pthread_t pth[MAX_THREADS]; + int opt; + void *tret; + int ret = 0; + void *(*thread)(void *) = shared_thread; + + thread_count = DEFAULT_THREAD_COUNT; + runtime = DEFAULT_RUNTIME; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:n:i")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'n': + thread_count = atoi(optarg); + break; + case 'i': + thread = independent_thread; + printf("using independent threads\n"); + break; + default: + printf("Usage: %s [-t ] [-n ] [-i]\n", argv[0]); + printf(" -t: time to run\n"); + printf(" -n: number of threads\n"); + printf(" -i: use independent threads\n"); + return -1; + } + } + + if (thread_count > MAX_THREADS) + thread_count = MAX_THREADS; + + + setbuf(stdout, NULL); + + start = time(0); + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start)); + printf("%s\n", buf); + printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime); + + /* spawn */ + for (i = 0; i < thread_count; i++) + pthread_create(&pth[i], 0, thread, 0); + + while (time(&now) < start + runtime) { + sleep(1); + if (done) { + ret = 1; + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now)); + printf("%s\n", buf); + goto out; + } + } + printf("[OK]\n"); + done = 1; + +out: + /* wait */ + for (i = 0; i < thread_count; i++) + pthread_join(pth[i], &tret); + + /* die */ + if (ret) + ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/timers/valid-adjtimex.c b/timers/valid-adjtimex.c new file mode 100644 index 0000000..e86d937 --- /dev/null +++ b/timers/valid-adjtimex.c @@ -0,0 +1,202 @@ +/* valid adjtimex test + * by: John Stultz + * (C) Copyright Linaro 2015 + * Licensed under the GPLv2 + * + * This test validates adjtimex interface with valid + * and invalid test data. + * + * Usage: valid-adjtimex + * + * To build: + * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000L + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + return ret; +} + +#define NUM_FREQ_VALID 32 +#define NUM_FREQ_OUTOFRANGE 4 +#define NUM_FREQ_INVALID 2 + +long valid_freq[NUM_FREQ_VALID] = { + -499<<16, + -450<<16, + -400<<16, + -350<<16, + -300<<16, + -250<<16, + -200<<16, + -150<<16, + -100<<16, + -75<<16, + -50<<16, + -25<<16, + -10<<16, + -5<<16, + -1<<16, + -1000, + 1<<16, + 5<<16, + 10<<16, + 25<<16, + 50<<16, + 75<<16, + 100<<16, + 150<<16, + 200<<16, + 250<<16, + 300<<16, + 350<<16, + 400<<16, + 450<<16, + 499<<16, +}; + +long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { + -1000<<16, + -550<<16, + 550<<16, + 1000<<16, +}; + +#define LONG_MAX (~0UL>>1) +#define LONG_MIN (-LONG_MAX - 1) + +long invalid_freq[NUM_FREQ_INVALID] = { + LONG_MAX, + LONG_MIN, +}; + +int validate_freq(void) +{ + struct timex tx; + int ret, pass = 0; + int i; + + clear_time_state(); + + memset(&tx, 0, sizeof(struct timex)); + /* Set the leap second insert flag */ + + printf("Testing ADJ_FREQ... "); + for (i = 0; i < NUM_FREQ_VALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = valid_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + valid_freq[i], valid_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq != valid_freq[i]) { + printf("Warning: freq value %ld not what we set it (%ld)!\n", + tx.freq, valid_freq[i]); + } + } + for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = outofrange_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + outofrange_freq[i], outofrange_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq == outofrange_freq[i]) { + printf("[FAIL]\n"); + printf("ERROR: out of range value %ld actually set!\n", + tx.freq); + pass = -1; + goto out; + } + } + + + if (sizeof(long) == 8) { /* this case only applies to 64bit systems */ + for (i = 0; i < NUM_FREQ_INVALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = invalid_freq[i]; + ret = adjtimex(&tx); + if (ret >= 0) { + printf("[FAIL]\n"); + printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n", + invalid_freq[i]); + pass = -1; + goto out; + } + } + } + + printf("[OK]\n"); +out: + /* reset freq to zero */ + tx.modes = ADJ_FREQUENCY; + tx.freq = 0; + ret = adjtimex(&tx); + + return pass; +} + + +int main(int argc, char **argv) +{ + if (validate_freq()) + return ksft_exit_fail(); + + return ksft_exit_pass(); +} diff --git a/user/Makefile b/user/Makefile index 12c9d15..d401b63 100644 --- a/user/Makefile +++ b/user/Makefile @@ -3,5 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -run_tests: all - ./test_user_copy.sh +TEST_PROGS := test_user_copy.sh + +include ../lib.mk diff --git a/vm/Makefile b/vm/Makefile index 077828c..3c53cac 100644 --- a/vm/Makefile +++ b/vm/Makefile @@ -1,16 +1,27 @@ # Makefile for vm selftests -CC = $(CROSS_COMPILE)gcc -CFLAGS = -Wall -BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest +CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) +BINARIES = compaction_test +BINARIES += hugepage-mmap +BINARIES += hugepage-shm +BINARIES += map_hugetlb +BINARIES += thuge-gen BINARIES += transhuge-stress +BINARIES += userfaultfd all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt +userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h + $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread -run_tests: all - @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) +../../../../usr/include/linux/kernel.h: + make -C ../../../.. headers_install + +TEST_PROGS := run_vmtests +TEST_FILES := $(BINARIES) + +include ../lib.mk clean: $(RM) $(BINARIES) diff --git a/vm/compaction_test.c b/vm/compaction_test.c new file mode 100644 index 0000000..932ff57 --- /dev/null +++ b/vm/compaction_test.c @@ -0,0 +1,225 @@ +/* + * + * A test for the patch "Allow compaction of unevictable pages". + * With this patch we should be able to allocate at least 1/4 + * of RAM in huge pages. Without the patch much less is + * allocated. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAP_SIZE 1048576 + +struct map_list { + void *map; + struct map_list *next; +}; + +int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) +{ + char buffer[256] = {0}; + char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + + *memfree = atoll(buffer); + cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + *hugepagesize = atoll(buffer); + + return 0; +} + +int prereq(void) +{ + char allowed; + int fd; + + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { + perror("Failed to read from\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + close(fd); + return -1; + } + + close(fd); + if (allowed == '1') + return 0; + + return -1; +} + +int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +{ + int fd; + int compaction_index = 0; + char initial_nr_hugepages[10] = {0}; + char nr_hugepages[10] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ + mem_free = mem_free * 0.8; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open /proc/sys/vm/nr_hugepages"); + return -1; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages"); + goto close_fd; + } + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ + compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + + if (compaction_index > 3) { + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + + if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + close(fd); + return 0; + + close_fd: + close(fd); + printf("Not OK. Compaction test failed."); + return -1; +} + + +int main(int argc, char **argv) +{ + struct rlimit lim; + struct map_list *list, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + unsigned long mem_fragmentable = 0; + + if (prereq() != 0) { + printf("Either the sysctl compact_unevictable_allowed is not\n" + "set to 1 or couldn't read the proc file.\n" + "Skipping the test\n"); + return 0; + } + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &lim)) { + perror("Failed to set rlimit:\n"); + return -1; + } + + page_size = getpagesize(); + + list = NULL; + + if (read_memory_info(&mem_free, &hugepage_size) != 0) { + printf("ERROR: Cannot read meminfo\n"); + return -1; + } + + mem_fragmentable = mem_free * 0.8 / 1024; + + while (mem_fragmentable > 0) { + map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + if (map == MAP_FAILED) + break; + + entry = malloc(sizeof(struct map_list)); + if (!entry) { + munmap(map, MAP_SIZE); + break; + } + entry->map = map; + entry->next = list; + list = entry; + + /* Write something (in this case the address of the map) to + * ensure that KSM can't merge the mapped pages + */ + for (i = 0; i < MAP_SIZE; i += page_size) + *(unsigned long *)(map + i) = (unsigned long)map + i; + + mem_fragmentable--; + } + + for (entry = list; entry != NULL; entry = entry->next) { + munmap(entry->map, MAP_SIZE); + if (!entry->next) + break; + entry = entry->next; + } + + if (check_compaction(mem_free, hugepage_size) == 0) + return 0; + + return -1; +} diff --git a/vm/hugetlbfstest.c b/vm/hugetlbfstest.c deleted file mode 100644 index ea40ff8..0000000 --- a/vm/hugetlbfstest.c +++ /dev/null @@ -1,84 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef unsigned long long u64; - -static size_t length = 1 << 24; - -static u64 read_rss(void) -{ - char buf[4096], *s = buf; - int i, fd; - u64 rss; - - fd = open("/proc/self/statm", O_RDONLY); - assert(fd > 2); - memset(buf, 0, sizeof(buf)); - read(fd, buf, sizeof(buf) - 1); - for (i = 0; i < 1; i++) - s = strchr(s, ' ') + 1; - rss = strtoull(s, NULL, 10); - return rss << 12; /* assumes 4k pagesize */ -} - -static void do_mmap(int fd, int extra_flags, int unmap) -{ - int *p; - int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; - u64 before, after; - - before = read_rss(); - p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); - assert(p != MAP_FAILED || - !"mmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before - length) < 0x40000 || - !"rss didn't grow as expected"); - if (!unmap) - return; - munmap(p, length); - after = read_rss(); - assert(llabs(after - before) < 0x40000 || - !"rss didn't shrink as expected"); -} - -static int open_file(const char *path) -{ - int fd, err; - - unlink(path); - fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL - | O_LARGEFILE | O_CLOEXEC, 0600); - assert(fd > 2); - unlink(path); - err = ftruncate(fd, length); - assert(!err); - return fd; -} - -int main(void) -{ - int hugefd, fd; - - fd = open_file("/dev/shm/hugetlbhog"); - hugefd = open_file("/hugepages/hugetlbhog"); - - system("echo 100 > /proc/sys/vm/nr_hugepages"); - do_mmap(-1, MAP_ANONYMOUS, 1); - do_mmap(fd, 0, 1); - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1); - do_mmap(hugefd, 0, 1); - do_mmap(hugefd, MAP_HUGETLB, 1); - /* Leak the last one to test do_exit() */ - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0); - printf("oll korrekt.\n"); - return 0; -} diff --git a/vm/map_hugetlb.c b/vm/map_hugetlb.c index ac56639..addcd6f 100644 --- a/vm/map_hugetlb.c +++ b/vm/map_hugetlb.c @@ -73,7 +73,11 @@ int main(void) write_bytes(addr); ret = read_bytes(addr); - munmap(addr, LENGTH); + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, LENGTH)) { + perror("munmap"); + exit(1); + } return ret; } diff --git a/vm/run_vmtests b/vm/run_vmtests old mode 100644 new mode 100755 index c87b681..9179ce8 --- a/vm/run_vmtests +++ b/vm/run_vmtests @@ -75,10 +75,14 @@ else echo "[PASS]" fi +echo "NOTE: The above hugetlb tests provide minimal coverage. Use" +echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" +echo " hugetlb regression testing." + echo "--------------------" -echo "running hugetlbfstest" +echo "running userfaultfd" echo "--------------------" -./hugetlbfstest +./userfaultfd 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -90,4 +94,16 @@ fi umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/vm/userfaultfd.c b/vm/userfaultfd.c new file mode 100644 index 0000000..d77ed41 --- /dev/null +++ b/vm/userfaultfd.c @@ -0,0 +1,645 @@ +/* + * Stress userfaultfd syscall. + * + * Copyright (C) 2015 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * This test allocates two virtual areas and bounces the physical + * memory across the two virtual areas (from area_src to area_dst) + * using userfaultfd. + * + * There are three threads running per CPU: + * + * 1) one per-CPU thread takes a per-page pthread_mutex in a random + * page of the area_dst (while the physical page may still be in + * area_src), and increments a per-page counter in the same page, + * and checks its value against a verification region. + * + * 2) another per-CPU thread handles the userfaults generated by + * thread 1 above. userfaultfd blocking reads or poll() modes are + * exercised interleaved. + * + * 3) one last per-CPU thread transfers the memory in the background + * at maximum bandwidth (if not already transferred by thread + * 2). Each cpu thread takes cares of transferring a portion of the + * area. + * + * When all threads of type 3 completed the transfer, one bounce is + * complete. area_src and area_dst are then swapped. All threads are + * respawned and so the bounce is immediately restarted in the + * opposite direction. + * + * per-CPU threads 1 by triggering userfaults inside + * pthread_mutex_lock will also verify the atomicity of the memory + * transfer (UFFDIO_COPY). + * + * The program takes two parameters: the amounts of physical memory in + * megabytes (MiB) of the area and the number of bounces to execute. + * + * # 100MiB 99999 bounces + * ./userfaultfd 100 99999 + * + * # 1GiB 99 bounces + * ./userfaultfd 1000 99 + * + * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers + * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __NR_userfaultfd + +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; + +#define BOUNCE_RANDOM (1<<0) +#define BOUNCE_RACINGFAULTS (1<<1) +#define BOUNCE_VERIFY (1<<2) +#define BOUNCE_POLL (1<<3) +static int bounces; + +static unsigned long long *count_verify; +static int uffd, finished, *pipefd; +static char *area_src, *area_dst; +static char *zeropage; +pthread_attr_t attr; + +/* pthread_mutex_t starts at page offset 0 */ +#define area_mutex(___area, ___nr) \ + ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +#define area_count(___area, ___nr) \ + ((volatile unsigned long long *) ((unsigned long) \ + ((___area) + (___nr)*page_size + \ + sizeof(pthread_mutex_t) + \ + sizeof(unsigned long long) - 1) & \ + ~(unsigned long)(sizeof(unsigned long long) \ + - 1))) + +static int my_bcmp(char *str1, char *str2, size_t n) +{ + unsigned long i; + for (i = 0; i < n; i++) + if (str1[i] != str2[i]) + return 1; + return 0; +} + +static void *locking_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct random_data rand; + unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ + int32_t rand_nr; + unsigned long long count; + char randstate[64]; + unsigned int seed; + time_t start; + + if (bounces & BOUNCE_RANDOM) { + seed = (unsigned int) time(NULL) - bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + seed += cpu; + bzero(&rand, sizeof(rand)); + bzero(&randstate, sizeof(randstate)); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) + fprintf(stderr, "srandom_r error\n"), exit(1); + } else { + page_nr = -bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + page_nr += cpu * nr_pages_per_cpu; + } + + while (!finished) { + if (bounces & BOUNCE_RANDOM) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 1 error\n"), exit(1); + page_nr = rand_nr; + if (sizeof(page_nr) > sizeof(rand_nr)) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 2 error\n"), exit(1); + page_nr |= (((unsigned long) rand_nr) << 16) << + 16; + } + } else + page_nr += 1; + page_nr %= nr_pages; + + start = time(NULL); + if (bounces & BOUNCE_VERIFY) { + count = *area_count(area_dst, page_nr); + if (!count) + fprintf(stderr, + "page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + + + /* + * We can't use bcmp (or memcmp) because that + * returns 0 erroneously if the memory is + * changing under it (even if the end of the + * page is never changing and always + * different). + */ +#if 1 + if (!my_bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) + fprintf(stderr, + "my_bcmp page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); +#else + unsigned long loops; + + loops = 0; + /* uncomment the below line to test with mutex */ + /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ + while (!bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) { + loops += 1; + if (loops > 10) + break; + } + /* uncomment below line to test with mutex */ + /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ + if (loops) { + fprintf(stderr, + "page_nr %lu all zero thread %lu %p %lu\n", + page_nr, cpu, area_dst + page_nr * page_size, + loops); + if (loops > 10) + exit(1); + } +#endif + } + + pthread_mutex_lock(area_mutex(area_dst, page_nr)); + count = *area_count(area_dst, page_nr); + if (count != count_verify[page_nr]) { + fprintf(stderr, + "page_nr %lu memory corruption %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + } + count++; + *area_count(area_dst, page_nr) = count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + + if (time(NULL) - start > 1) + fprintf(stderr, + "userfault too slow %ld " + "possible false positive with overcommit\n", + time(NULL) - start); + } + + return NULL; +} + +static int copy_page(unsigned long offset) +{ + struct uffdio_copy uffdio_copy; + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_copy.dst = (unsigned long) area_dst + offset; + uffdio_copy.src = (unsigned long) area_src + offset; + uffdio_copy.len = page_size; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy.copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY error %Ld\n", + uffdio_copy.copy), exit(1); + } else if (uffdio_copy.copy != page_size) { + fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", + uffdio_copy.copy), exit(1); + } else + return 1; + return 0; +} + +static void *uffd_poll_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct pollfd pollfd[2]; + struct uffd_msg msg; + int ret; + unsigned long offset; + char tmp_chr; + unsigned long userfaults = 0; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + + for (;;) { + ret = poll(pollfd, 2, -1); + if (!ret) + fprintf(stderr, "poll error %d\n", ret), exit(1); + if (ret < 0) + perror("poll"), exit(1); + if (pollfd[1].revents & POLLIN) { + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) + fprintf(stderr, "read pipefd error\n"), + exit(1); + break; + } + if (!(pollfd[0].revents & POLLIN)) + fprintf(stderr, "pollfd[0].revents %d\n", + pollfd[0].revents), exit(1); + ret = read(uffd, &msg, sizeof(msg)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + perror("nonblocking read error"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + userfaults++; + } + return (void *)userfaults; +} + +pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *uffd_read_thread(void *arg) +{ + unsigned long *this_cpu_userfaults; + struct uffd_msg msg; + unsigned long offset; + int ret; + + this_cpu_userfaults = (unsigned long *) arg; + *this_cpu_userfaults = 0; + + pthread_mutex_unlock(&uffd_read_mutex); + /* from here cancellation is ok */ + + for (;;) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (ret < 0) + perror("blocking read error"), exit(1); + else + fprintf(stderr, "short read\n"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (bounces & BOUNCE_VERIFY && + msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + (*this_cpu_userfaults)++; + } + return (void *)NULL; +} + +static void *background_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + unsigned long page_nr; + + for (page_nr = cpu * nr_pages_per_cpu; + page_nr < (cpu+1) * nr_pages_per_cpu; + page_nr++) + copy_page(page_nr * page_size); + + return NULL; +} + +static int stress(unsigned long *userfaults) +{ + unsigned long cpu; + pthread_t locking_threads[nr_cpus]; + pthread_t uffd_threads[nr_cpus]; + pthread_t background_threads[nr_cpus]; + void **_userfaults = (void **) userfaults; + + finished = 0; + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pthread_create(&locking_threads[cpu], &attr, + locking_thread, (void *)cpu)) + return 1; + if (bounces & BOUNCE_POLL) { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_poll_thread, (void *)cpu)) + return 1; + } else { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_read_thread, + &_userfaults[cpu])) + return 1; + pthread_mutex_lock(&uffd_read_mutex); + } + if (pthread_create(&background_threads[cpu], &attr, + background_thread, (void *)cpu)) + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(background_threads[cpu], NULL)) + return 1; + + /* + * Be strict and immediately zap area_src, the whole area has + * been transferred already by the background treads. The + * area_src could then be faulted in in a racy way by still + * running uffdio_threads reading zeropages after we zapped + * area_src (but they're guaranteed to get -EEXIST from + * UFFDIO_COPY without writing zero pages into area_dst + * because the background threads already completed). + */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise"); + return 1; + } + + for (cpu = 0; cpu < nr_cpus; cpu++) { + char c; + if (bounces & BOUNCE_POLL) { + if (write(pipefd[cpu*2+1], &c, 1) != 1) { + fprintf(stderr, "pipefd write error\n"); + return 1; + } + if (pthread_join(uffd_threads[cpu], &_userfaults[cpu])) + return 1; + } else { + if (pthread_cancel(uffd_threads[cpu])) + return 1; + if (pthread_join(uffd_threads[cpu], NULL)) + return 1; + } + } + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + + return 0; +} + +static int userfaultfd_stress(void) +{ + void *area; + char *tmp_area; + unsigned long nr; + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + unsigned long cpu; + int uffd_flags, err; + unsigned long userfaults[nr_cpus]; + + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_src = area; + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_dst = area; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + fprintf(stderr, + "userfaultfd syscall not available in this kernel\n"); + return 1; + } + uffd_flags = fcntl(uffd, F_GETFD, NULL); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + fprintf(stderr, "UFFDIO_API\n"); + return 1; + } + if (uffdio_api.api != UFFD_API) { + fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); + return 1; + } + + count_verify = malloc(nr_pages * sizeof(unsigned long long)); + if (!count_verify) { + perror("count_verify"); + return 1; + } + + for (nr = 0; nr < nr_pages; nr++) { + *area_mutex(area_src, nr) = (pthread_mutex_t) + PTHREAD_MUTEX_INITIALIZER; + count_verify[nr] = *area_count(area_src, nr) = 1; + /* + * In the transition between 255 to 256, powerpc will + * read out of order in my_bcmp and see both bytes as + * zero, so leave a placeholder below always non-zero + * after the count, to avoid my_bcmp to trigger false + * positives. + */ + *(area_count(area_src, nr) + 1) = 1; + } + + pipefd = malloc(sizeof(int) * nr_cpus * 2); + if (!pipefd) { + perror("pipefd"); + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { + perror("pipe"); + return 1; + } + } + + if (posix_memalign(&area, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + zeropage = area; + bzero(zeropage, page_size); + + pthread_mutex_lock(&uffd_read_mutex); + + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16*1024*1024); + + err = 0; + while (bounces--) { + unsigned long expected_ioctls; + + printf("bounces: %d, mode:", bounces); + if (bounces & BOUNCE_RANDOM) + printf(" rnd"); + if (bounces & BOUNCE_RACINGFAULTS) + printf(" racing"); + if (bounces & BOUNCE_VERIFY) + printf(" ver"); + if (bounces & BOUNCE_POLL) + printf(" poll"); + printf(", "); + fflush(stdout); + + if (bounces & BOUNCE_POLL) + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + else + fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + + /* register */ + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + return 1; + } + expected_ioctls = (1 << _UFFDIO_WAKE) | + (1 << _UFFDIO_COPY) | + (1 << _UFFDIO_ZEROPAGE); + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) { + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"); + return 1; + } + + /* + * The madvise done previously isn't enough: some + * uffd_thread could have read userfaults (one of + * those already resolved by the background thread) + * and it may be in the process of calling + * UFFDIO_COPY. UFFDIO_COPY will read the zapped + * area_src and it would map a zero page in it (of + * course such a UFFDIO_COPY is perfectly safe as it'd + * return -EEXIST). The problem comes at the next + * bounce though: that racing UFFDIO_COPY would + * generate zeropages in the area_src, so invalidating + * the previous MADV_DONTNEED. Without this additional + * MADV_DONTNEED those zeropages leftovers in the + * area_src would lead to -EEXIST failure during the + * next bounce, effectively leaving a zeropage in the + * area_dst. + * + * Try to comment this out madvise to see the memory + * corruption being caught pretty quick. + * + * khugepaged is also inhibited to collapse THP after + * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's + * required to MADV_DONTNEED here. + */ + if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise 2"); + return 1; + } + + /* bounce pass */ + if (stress(userfaults)) + return 1; + + /* unregister */ + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { + fprintf(stderr, "register failure\n"); + return 1; + } + + /* verification */ + if (bounces & BOUNCE_VERIFY) { + for (nr = 0; nr < nr_pages; nr++) { + if (*area_count(area_dst, nr) != count_verify[nr]) { + fprintf(stderr, + "error area_count %Lu %Lu %lu\n", + *area_count(area_src, nr), + count_verify[nr], + nr); + err = 1; + bounces = 0; + } + } + } + + /* prepare next bounce */ + tmp_area = area_src; + area_src = area_dst; + area_dst = tmp_area; + + printf("userfaults:"); + for (cpu = 0; cpu < nr_cpus; cpu++) + printf(" %lu", userfaults[cpu]); + printf("\n"); + } + + return err; +} + +int main(int argc, char **argv) +{ + if (argc < 3) + fprintf(stderr, "Usage: \n"), exit(1); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + page_size = sysconf(_SC_PAGE_SIZE); + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 + > page_size) + fprintf(stderr, "Impossible to run this test\n"), exit(2); + nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / + nr_cpus; + if (!nr_pages_per_cpu) { + fprintf(stderr, "invalid MiB\n"); + fprintf(stderr, "Usage: \n"), exit(1); + } + bounces = atoi(argv[2]); + if (bounces <= 0) { + fprintf(stderr, "invalid bounces\n"); + fprintf(stderr, "Usage: \n"), exit(1); + } + nr_pages = nr_pages_per_cpu * nr_cpus; + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + nr_pages, nr_pages_per_cpu); + return userfaultfd_stress(); +} + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return 0; +} + +#endif /* __NR_userfaultfd */ diff --git a/x86/.gitignore b/x86/.gitignore new file mode 100644 index 0000000..15034fe --- /dev/null +++ b/x86/.gitignore @@ -0,0 +1,2 @@ +*_32 +*_64 diff --git a/x86/Makefile b/x86/Makefile new file mode 100644 index 0000000..29089b2 --- /dev/null +++ b/x86/Makefile @@ -0,0 +1,62 @@ +all: + +include ../lib.mk + +.PHONY: all all_32 all_64 warn_32bit_failure clean + +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn + +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) +BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) + +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +ifeq ($(CAN_BUILD_I386),1) +all: all_32 +TEST_PROGS += $(BINARIES_32) +endif + +ifeq ($(CAN_BUILD_X86_64),1) +all: all_64 +TEST_PROGS += $(BINARIES_64) +endif + +all_32: $(BINARIES_32) + +all_64: $(BINARIES_64) + +clean: + $(RM) $(BINARIES_32) $(BINARIES_64) + +$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +# x86_64 users should be encouraged to install 32-bit libraries +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) +all: warn_32bit_failure + +warn_32bit_failure: + @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ + echo "environment. This will reduce test coverage of 64-bit" 2>&1; \ + echo "kernels. If you are using a Debian-like distribution," 2>&1; \ + echo "try:"; 2>&1; \ + echo ""; \ + echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ + echo ""; \ + echo "If you are using a Fedora-like distribution, try:"; \ + echo ""; \ + echo " yum install glibc-devel.*i686"; \ + exit 0; +endif + +# Some tests have additional dependencies. +sysret_ss_attrs_64: thunks.S diff --git a/x86/check_cc.sh b/x86/check_cc.sh new file mode 100755 index 0000000..172d329 --- /dev/null +++ b/x86/check_cc.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# check_cc.sh - Helper to test userspace compilation support +# Copyright (c) 2015 Andrew Lutomirski +# GPL v2 + +CC="$1" +TESTPROG="$2" +shift 2 + +if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then + echo 1 +else + echo 0 +fi + +exit 0 diff --git a/x86/entry_from_vm86.c b/x86/entry_from_vm86.c new file mode 100644 index 0000000..421c607 --- /dev/null +++ b/x86/entry_from_vm86.c @@ -0,0 +1,234 @@ +/* + * entry_from_vm86.c - tests kernel entries from vm86 mode + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This exercises a few paths that need to special-case vm86 mode. + * + * GPL v2. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long load_addr = 0x10000; +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static sig_atomic_t got_signal; + +static void sighandler(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM || + (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) { + printf("[FAIL]\tSignal frame should not reflect vm86 mode\n"); + nerrs++; + } + + const char *signame; + if (sig == SIGSEGV) + signame = "SIGSEGV"; + else if (sig == SIGILL) + signame = "SIGILL"; + else + signame = "unexpected signal"; + + printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame, + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL], + (unsigned short)ctx->uc_mcontext.gregs[REG_CS]); + + got_signal = 1; +} + +asm ( + ".pushsection .rodata\n\t" + ".type vmcode_bound, @object\n\t" + "vmcode:\n\t" + "vmcode_bound:\n\t" + ".code16\n\t" + "bound %ax, (2048)\n\t" + "int3\n\t" + "vmcode_sysenter:\n\t" + "sysenter\n\t" + "vmcode_syscall:\n\t" + "syscall\n\t" + "vmcode_sti:\n\t" + "sti\n\t" + "vmcode_int3:\n\t" + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" + ".size vmcode, . - vmcode\n\t" + "end_vmcode:\n\t" + ".code32\n\t" + ".popsection" + ); + +extern unsigned char vmcode[], end_vmcode[]; +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], + vmcode_sti[], vmcode_int3[], vmcode_int80[]; + +/* Returns false if the test was skipped. */ +static bool do_test(struct vm86plus_struct *v86, unsigned long eip, + unsigned int rettype, unsigned int retarg, + const char *text) +{ + long ret; + + printf("[RUN]\t%s from vm86 mode\n", text); + v86->regs.eip = eip; + ret = vm86(VM86_ENTER, v86); + + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); + return false; + } + + if (VM86_TYPE(ret) == VM86_INTx) { + char trapname[32]; + int trapno = VM86_ARG(ret); + if (trapno == 13) + strcpy(trapname, "GP"); + else if (trapno == 5) + strcpy(trapname, "BR"); + else if (trapno == 14) + strcpy(trapname, "PF"); + else + sprintf(trapname, "%d", trapno); + + printf("[INFO]\tExited vm86 mode due to #%s\n", trapname); + } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { + printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n"); + } else if (VM86_TYPE(ret) == VM86_TRAP) { + printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n", + VM86_ARG(ret)); + } else if (VM86_TYPE(ret) == VM86_SIGNAL) { + printf("[INFO]\tExited vm86 mode due to a signal\n"); + } else if (VM86_TYPE(ret) == VM86_STI) { + printf("[INFO]\tExited vm86 mode due to STI\n"); + } else { + printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n", + VM86_TYPE(ret), VM86_ARG(ret)); + } + + if (rettype == -1 || + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { + printf("[OK]\tReturned correctly\n"); + } else { + printf("[FAIL]\tIncorrect return reason\n"); + nerrs++; + } + + return true; +} + +int main(void) +{ + struct vm86plus_struct v86; + unsigned char *addr = mmap((void *)load_addr, 4096, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, -1,0); + if (addr != (unsigned char *)load_addr) + err(1, "mmap"); + + memcpy(addr, vmcode, end_vmcode - vmcode); + addr[2048] = 2; + addr[2050] = 3; + + memset(&v86, 0, sizeof(v86)); + + v86.regs.cs = load_addr / 16; + v86.regs.ss = load_addr / 16; + v86.regs.ds = load_addr / 16; + v86.regs.es = load_addr / 16; + + assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ + + /* #BR -- should deliver SIG??? */ + do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR"); + + /* + * SYSENTER -- should cause #GP or #UD depending on CPU. + * Expected return type -1 means that we shouldn't validate + * the vm86 return value. This will avoid problems on non-SEP + * CPUs. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER"); + clearhandler(SIGILL); + + /* + * SYSCALL would be a disaster in VM86 mode. Fortunately, + * there is no kernel that both enables SYSCALL and sets + * EFER.SCE, so it's #UD on all systems. But vm86 is + * buggy (or has a "feature"), so the SIGILL will actually + * be delivered. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL"); + clearhandler(SIGILL); + + /* STI with VIP set */ + v86.regs.eflags |= X86_EFLAGS_VIP; + v86.regs.eflags &= ~X86_EFLAGS_IF; + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + + /* INT3 -- should cause #BP */ + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); + + /* INT80 -- should exit with "INTx 0x80" */ + v86.regs.eax = (unsigned int)-1; + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); + + /* Execute a null pointer */ + v86.regs.cs = 0; + v86.regs.ss = 0; + sethandler(SIGSEGV, sighandler, 0); + got_signal = 0; + if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") && + !got_signal) { + printf("[FAIL]\tDid not receive SIGSEGV\n"); + nerrs++; + } + clearhandler(SIGSEGV); + + return (nerrs == 0 ? 0 : 1); +} diff --git a/x86/ldt_gdt.c b/x86/ldt_gdt.c new file mode 100644 index 0000000..31a3035 --- /dev/null +++ b/x86/ldt_gdt.c @@ -0,0 +1,576 @@ +/* + * ldt_gdt.c - Test cases for LDT and GDT access + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AR_ACCESSED (1<<8) + +#define AR_TYPE_RODATA (0 * (1<<9)) +#define AR_TYPE_RWDATA (1 * (1<<9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9)) +#define AR_TYPE_XOCODE (4 * (1<<9)) +#define AR_TYPE_XRCODE (5 * (1<<9)) +#define AR_TYPE_XOCODE_CONF (6 * (1<<9)) +#define AR_TYPE_XRCODE_CONF (7 * (1<<9)) + +#define AR_DPL3 (3 * (1<<13)) + +#define AR_S (1 << 12) +#define AR_P (1 << 15) +#define AR_AVL (1 << 20) +#define AR_L (1 << 21) +#define AR_DB (1 << 22) +#define AR_G (1 << 23) + +static int nerrs; + +static void check_invalid_segment(uint16_t index, int ldt) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (has_limit || has_ar) { + printf("[FAIL]\t%s entry %hu is valid but should be invalid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + } else { + printf("[OK]\t%s entry %hu is invalid\n", + (ldt ? "LDT" : "GDT"), index); + } +} + +static void check_valid_segment(uint16_t index, int ldt, + uint32_t expected_ar, uint32_t expected_limit, + bool verbose) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (!has_limit || !has_ar) { + printf("[FAIL]\t%s entry %hu is invalid but should be valid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + return; + } + + if (ar != expected_ar) { + printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, expected_ar); + nerrs++; + } else if (limit != expected_limit) { + printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, limit, expected_limit); + nerrs++; + } else if (verbose) { + printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, limit); + } +} + +static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, + bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + uint32_t limit = desc->limit; + if (desc->limit_in_pages) + limit = (limit << 12) + 4095; + check_valid_segment(desc->entry_number, 1, ar, limit, true); + return true; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + return false; + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + return false; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + return false; + } + } +} + +static bool install_valid(const struct user_desc *desc, uint32_t ar) +{ + return install_valid_mode(desc, ar, false); +} + +static void install_invalid(const struct user_desc *desc, bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + check_invalid_segment(desc->entry_number, 1); + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + } + } +} + +static int safe_modify_ldt(int func, struct user_desc *ptr, + unsigned long bytecount) +{ + int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount); + if (ret < -1) + errno = -ret; + return ret; +} + +static void fail_install(struct user_desc *desc) +{ + if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) { + printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n"); + nerrs++; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + printf("[OK]\tmodify_ldt failure %d\n", errno); + } +} + +static void do_simple_tests(void) +{ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 10, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.entry_number = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.base_addr = 0xf0000000; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + desc.useable = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G | AR_AVL); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.seg_32bit = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_G | AR_AVL); + + desc.seg_32bit = 1; + desc.contents = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.contents = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 0; + desc.limit_in_pages = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | + AR_S | AR_DB | AR_AVL); + + desc.contents = 3; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 0; + desc.contents = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + +#ifdef __x86_64__ + desc.lm = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + desc.lm = 0; +#endif + + bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + + if (entry1_okay) { + printf("[RUN]\tTest fork\n"); + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + check_valid_segment(desc.entry_number, 1, + AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, desc.limit, + true); + check_invalid_segment(1, 1); + exit(nerrs ? 1 : 0); + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tTest size\n"); + int i; + for (i = 0; i < 8192; i++) { + desc.entry_number = i; + desc.limit = i; + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + printf("[FAIL]\tFailed to install entry %d\n", i); + nerrs++; + break; + } + } + for (int j = 0; j < i; j++) { + check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, j, false); + } + printf("[DONE]\tSize test\n"); + } else { + printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n"); + } + + /* Test entry_number too high. */ + desc.entry_number = 8192; + fail_install(&desc); + + /* Test deletion and actions mistakeable for deletion. */ + memset(&desc, 0, sizeof(desc)); + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.seg_not_present = 0; + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P); + + desc.read_exec_only = 0; + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.read_exec_only = 1; + desc.limit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.limit = 0; + desc.base_addr = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.base_addr = 0; + install_invalid(&desc, false); + + desc.seg_not_present = 0; + desc.read_exec_only = 0; + desc.seg_32bit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + install_invalid(&desc, true); +} + +/* + * 0: thread is idle + * 1: thread armed + * 2: thread should clear LDT entry 0 + * 3: thread should exit + */ +static volatile unsigned int ftx; + +static void *threadproc(void *ctx) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 1"); /* should never fail */ + + while (1) { + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + while (ftx != 2) { + if (ftx >= 3) + return NULL; + } + + /* clear LDT entry 0 */ + const struct user_desc desc = {}; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0) + err(1, "modify_ldt"); + + /* If ftx == 2, set it to zero. If ftx == 100, quit. */ + unsigned int x = -2; + asm volatile ("lock xaddl %[x], %[ftx]" : + [x] "+r" (x), [ftx] "+m" (ftx)); + if (x != 2) + return NULL; + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static void do_multicpu_tests(void) +{ + cpu_set_t cpuset; + pthread_t thread; + int failures = 0, iters = 5, i; + unsigned short orig_ss; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 1\n"); + return; + } + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 0\n"); + return; + } + + sethandler(SIGSEGV, sigsegv, 0); +#ifdef __i386__ + /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ + sethandler(SIGILL, sigsegv, 0); +#endif + + printf("[RUN]\tCross-CPU LDT invalidation\n"); + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + asm volatile ("mov %%ss, %0" : "=rm" (orig_ss)); + + for (i = 0; i < 5; i++) { + if (sigsetjmp(jmpbuf, 1) != 0) + continue; + + /* Make sure the thread is ready after the last test. */ + while (ftx != 0) + ; + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + if (errno != ENOSYS) + err(1, "modify_ldt"); + printf("[SKIP]\tmodify_ldt unavailable\n"); + break; + } + + /* Arm the thread. */ + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + asm volatile ("mov %0, %%ss" : : "r" (0x7)); + + /* Go! */ + ftx = 2; + + while (ftx != 0) + ; + + /* + * On success, modify_ldt will segfault us synchronously, + * and we'll escape via siglongjmp. + */ + + failures++; + asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); + }; + + ftx = 100; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + if (failures) { + printf("[FAIL]\t%d of %d iterations failed\n", failures, iters); + nerrs++; + } else { + printf("[OK]\tAll %d iterations succeeded\n", iters); + } +} + +static int finish_exec_test(void) +{ + /* + * In a sensible world, this would be check_invalid_segment(0, 1); + * For better or for worse, though, the LDT is inherited across exec. + * We can probably change this safely, but for now we test it. + */ + check_valid_segment(0, 1, + AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, + 42, true); + + return nerrs ? 1 : 0; +} + +static void do_exec_test(void) +{ + printf("[RUN]\tTest exec\n"); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 42, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + pid_t child = fork(); + if (child == 0) { + execl("/proc/self/exe", "ldt_gdt_test_exec", NULL); + printf("[FAIL]\tCould not exec self\n"); + exit(1); /* exec failed */ + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } +} + +int main(int argc, char **argv) +{ + if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) + return finish_exec_test(); + + do_simple_tests(); + + do_multicpu_tests(); + + do_exec_test(); + + return nerrs ? 1 : 0; +} diff --git a/x86/sigreturn.c b/x86/sigreturn.c new file mode 100644 index 0000000..b5aa1ba --- /dev/null +++ b/x86/sigreturn.c @@ -0,0 +1,684 @@ +/* + * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a series of tests that exercises the sigreturn(2) syscall and + * the IRET / SYSRET paths in the kernel. + * + * For now, this focuses on the effects of unusual CS and SS values, + * and it has a bunch of tests to make sure that ESP/RSP is restored + * properly. + * + * The basic idea behind these tests is to raise(SIGUSR1) to create a + * sigcontext frame, plug in the values to be tested, and then return, + * which implicitly invokes sigreturn(2) and programs the user context + * as desired. + * + * For tests for which we expect sigreturn and the subsequent return to + * user mode to succeed, we return to a short trampoline that generates + * SIGTRAP so that the meat of the tests can be ordinary C code in a + * SIGTRAP handler. + * + * The inner workings of each test is documented below. + * + * Do not run on outdated, unpatched kernels at risk of nasty crashes. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * In principle, this test can run on Linux emulation layers (e.g. + * Illumos "LX branded zones"). Solaris-based kernels reserve LDT + * entries 0-5 for their own internal purposes, so start our LDT + * allocations above that reservation. (The tests don't pass on LX + * branded zones, but at least this lets them run.) + */ +#define LDT_OFFSET 6 + +/* An aligned stack accessible through some of our segments. */ +static unsigned char stack16[65536] __attribute__((aligned(4096))); + +/* + * An aligned int3 instruction used as a trampoline. Some of the tests + * want to fish out their ss values, so this trampoline copies ss to eax + * before the int3. + */ +asm (".pushsection .text\n\t" + ".type int3, @function\n\t" + ".align 4096\n\t" + "int3:\n\t" + "mov %ss,%eax\n\t" + "int3\n\t" + ".size int3, . - int3\n\t" + ".align 4096, 0xcc\n\t" + ".popsection"); +extern char int3[4096]; + +/* + * At startup, we prepapre: + * + * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero + * descriptor or out of bounds). + * - code16_sel: A 16-bit LDT code segment pointing to int3. + * - data16_sel: A 16-bit LDT data segment pointing to stack16. + * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. + * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. + * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. + * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to + * stack16. + * + * For no particularly good reason, xyz_sel is a selector value with the + * RPL and LDT bits filled in, whereas xyz_idx is just an index into the + * descriptor table. These variables will be zero if their respective + * segments could not be allocated. + */ +static unsigned short ldt_nonexistent_sel; +static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; + +static unsigned short gdt_data16_idx, gdt_npdata32_idx; + +static unsigned short GDT3(int idx) +{ + return (idx << 3) | 3; +} + +static unsigned short LDT3(int idx) +{ + return (idx << 3) | 7; +} + +/* Our sigaltstack scratch space. */ +static char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void add_ldt(const struct user_desc *desc, unsigned short *var, + const char *name) +{ + if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { + *var = LDT3(desc->entry_number); + } else { + printf("[NOTE]\tFailed to create %s segment\n", name); + *var = 0; + } +} + +static void setup_ldt(void) +{ + if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) + errx(1, "stack16 is too high\n"); + if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) + errx(1, "int3 is too high\n"); + + ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); + + const struct user_desc code16_desc = { + .entry_number = LDT_OFFSET + 0, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 0, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&code16_desc, &code16_sel, "code16"); + + const struct user_desc data16_desc = { + .entry_number = LDT_OFFSET + 1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&data16_desc, &data16_sel, "data16"); + + const struct user_desc npcode32_desc = { + .entry_number = LDT_OFFSET + 3, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); + + const struct user_desc npdata32_desc = { + .entry_number = LDT_OFFSET + 4, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); + + struct user_desc gdt_data16_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { + /* + * This probably indicates vulnerability to CVE-2014-8133. + * Merely getting here isn't definitive, though, and we'll + * diagnose the problem for real later on. + */ + printf("[WARN]\tset_thread_area allocated data16 at index %d\n", + gdt_data16_desc.entry_number); + gdt_data16_idx = gdt_data16_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } + + struct user_desc gdt_npdata32_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { + /* + * As a hardening measure, newer kernels don't allow this. + */ + printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", + gdt_npdata32_desc.entry_number); + gdt_npdata32_idx = gdt_npdata32_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs, requested_regs, resulting_regs; + +/* Instructions for the SIGUSR1 handler. */ +static volatile unsigned short sig_cs, sig_ss; +static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; + +/* Abstractions for some 32-bit vs 64-bit differences. */ +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define REG_SP REG_RSP +# define REG_AX REG_RAX + +struct selectors { + unsigned short cs, gs, fs, ss; +}; + +static unsigned short *ssptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->ss; +} + +static unsigned short *csptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->cs; +} +#else +# define REG_IP REG_EIP +# define REG_SP REG_ESP +# define REG_AX REG_EAX + +static greg_t *ssptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_SS]; +} + +static greg_t *csptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_CS]; +} +#endif + +/* Number of errors in the current test case. */ +static volatile sig_atomic_t nerrs; + +/* + * SIGUSR1 handler. Sets CS and SS as requested and points IP to the + * int3 trampoline. Sets SP to a large known value so that we can see + * whether the value round-trips back to user mode correctly. + */ +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + *csptr(ctx) = sig_cs; + *ssptr(ctx) = sig_ss; + + ctx->uc_mcontext.gregs[REG_IP] = + sig_cs == code16_sel ? 0 : (unsigned long)&int3; + ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; + ctx->uc_mcontext.gregs[REG_AX] = 0; + + memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ + + return; +} + +/* + * Called after a successful sigreturn. Restores our state so that + * the original raise(SIGUSR1) returns. + */ +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + sig_err = ctx->uc_mcontext.gregs[REG_ERR]; + sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; + + unsigned short ss; + asm ("mov %%ss,%0" : "=r" (ss)); + + greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; + if (asm_ss != sig_ss && sig == SIGTRAP) { + /* Sanity check failure. */ + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + ss, *ssptr(ctx), (unsigned long long)asm_ss); + nerrs++; + } + + memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + sig_trapped = sig; +} + +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +int cs_bitness(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return -1; + + bool db = (ar & (1 << 22)); + bool l = (ar & (1 << 21)); + + if (!(ar & (1<<11))) + return -1; /* Not code. */ + + if (l && !db) + return 64; + else if (!l && db) + return 32; + else if (!l && !db) + return 16; + else + return -1; /* Unknown bitness. */ +} + +/* Finds a usable code segment of the requested bitness. */ +int find_cs(int bitness) +{ + unsigned short my_cs; + + asm ("mov %%cs,%0" : "=r" (my_cs)); + + if (cs_bitness(my_cs) == bitness) + return my_cs; + if (cs_bitness(my_cs + (2 << 3)) == bitness) + return my_cs + (2 << 3); + if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) + return my_cs - (2 << 3); + if (cs_bitness(code16_sel) == bitness) + return code16_sel; + + printf("[WARN]\tCould not find %d-bit CS\n", bitness); + return -1; +} + +static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) +{ + int cs = find_cs(cs_bits); + if (cs == -1) { + printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", + cs_bits, use_16bit_ss ? 16 : 32); + return 0; + } + + if (force_ss != -1) { + sig_ss = force_ss; + } else { + if (use_16bit_ss) { + if (!data16_sel) { + printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", + cs_bits); + return 0; + } + sig_ss = data16_sel; + } else { + asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); + } + } + + sig_cs = cs; + + printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", + cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, + (sig_ss & 4) ? "" : ", GDT"); + + raise(SIGUSR1); + + nerrs = 0; + + /* + * Check that each register had an acceptable value when the + * int3 trampoline was invoked. + */ + for (int i = 0; i < NGREG; i++) { + greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) + continue; /* don't care */ + if (i == REG_SP) { + printf("\tSP: %llx -> %llx\n", (unsigned long long)req, + (unsigned long long)res); + + /* + * In many circumstances, the high 32 bits of rsp + * are zeroed. For example, we could be a real + * 32-bit program, or we could hit any of a number + * of poorly-documented IRET or segmented ESP + * oddities. If this happens, it's okay. + */ + if (res == (req & 0xFFFFFFFF)) + continue; /* OK; not expected to work */ + } + + bool ignore_reg = false; +#if __i386__ + if (i == REG_UESP) + ignore_reg = true; +#else + if (i == REG_CSGSFS) { + struct selectors *req_sels = + (void *)&requested_regs[REG_CSGSFS]; + struct selectors *res_sels = + (void *)&resulting_regs[REG_CSGSFS]; + if (req_sels->cs != res_sels->cs) { + printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->cs, res_sels->cs); + nerrs++; + } + + if (req_sels->ss != res_sels->ss) { + printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->ss, res_sels->ss); + nerrs++; + } + + continue; + } +#endif + + /* Sanity check on the kernel */ + if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { + printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; + } + + if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + /* + * SP is particularly interesting here. The + * usual cause of failures is that we hit the + * nasty IRET case of returning to a 16-bit SS, + * in which case bits 16:31 of the *kernel* + * stack pointer persist in ESP. + */ + printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", + i, (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + } + } + + if (nerrs == 0) + printf("[OK]\tall registers okay\n"); + + return nerrs; +} + +static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) +{ + int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; + if (cs == -1) + return 0; + + sig_cs = cs; + sig_ss = ss; + + printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", + cs_bits, sig_cs, sig_ss); + + sig_trapped = 0; + raise(SIGUSR1); + if (sig_trapped) { + char errdesc[32] = ""; + if (sig_err) { + const char *src = (sig_err & 1) ? " EXT" : ""; + const char *table; + if ((sig_err & 0x6) == 0x0) + table = "GDT"; + else if ((sig_err & 0x6) == 0x4) + table = "LDT"; + else if ((sig_err & 0x6) == 0x2) + table = "IDT"; + else + table = "???"; + + sprintf(errdesc, "%s%s index %d, ", + table, src, sig_err >> 3); + } + + char trapname[32]; + if (sig_trapno == 13) + strcpy(trapname, "GP"); + else if (sig_trapno == 11) + strcpy(trapname, "NP"); + else if (sig_trapno == 12) + strcpy(trapname, "SS"); + else if (sig_trapno == 32) + strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ + else + sprintf(trapname, "%d", sig_trapno); + + printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", + trapname, (unsigned long)sig_err, + errdesc, strsignal(sig_trapped)); + return 0; + } else { + printf("[FAIL]\tDid not get SIGSEGV\n"); + return 1; + } +} + +int main() +{ + int total_nerrs = 0; + unsigned short my_cs, my_ss; + + asm volatile ("mov %%cs,%0" : "=r" (my_cs)); + asm volatile ("mov %%ss,%0" : "=r" (my_ss)); + setup_ldt(); + + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGUSR1, sigusr1, 0); + sethandler(SIGTRAP, sigtrap, SA_ONSTACK); + + /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ + total_nerrs += test_valid_sigreturn(64, false, -1); + total_nerrs += test_valid_sigreturn(32, false, -1); + total_nerrs += test_valid_sigreturn(16, false, -1); + + /* + * Test easy espfix cases: return to a 16-bit LDT SS in each possible + * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. + * + * This catches the original missing-espfix-on-64-bit-kernels issue + * as well as CVE-2014-8134. + */ + total_nerrs += test_valid_sigreturn(64, true, -1); + total_nerrs += test_valid_sigreturn(32, true, -1); + total_nerrs += test_valid_sigreturn(16, true, -1); + + if (gdt_data16_idx) { + /* + * For performance reasons, Linux skips espfix if SS points + * to the GDT. If we were able to allocate a 16-bit SS in + * the GDT, see if it leaks parts of the kernel stack pointer. + * + * This tests for CVE-2014-8133. + */ + total_nerrs += test_valid_sigreturn(64, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(32, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(16, true, + GDT3(gdt_data16_idx)); + } + + /* + * We're done testing valid sigreturn cases. Now we test states + * for which sigreturn itself will succeed but the subsequent + * entry to user mode will fail. + * + * Depending on the failure mode and the kernel bitness, these + * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. + */ + clearhandler(SIGTRAP); + sethandler(SIGSEGV, sigtrap, SA_ONSTACK); + sethandler(SIGBUS, sigtrap, SA_ONSTACK); + sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ + + /* Easy failures: invalid SS, resulting in #GP(0) */ + test_bad_iret(64, ldt_nonexistent_sel, -1); + test_bad_iret(32, ldt_nonexistent_sel, -1); + test_bad_iret(16, ldt_nonexistent_sel, -1); + + /* These fail because SS isn't a data segment, resulting in #GP(SS) */ + test_bad_iret(64, my_cs, -1); + test_bad_iret(32, my_cs, -1); + test_bad_iret(16, my_cs, -1); + + /* Try to return to a not-present code segment, triggering #NP(SS). */ + test_bad_iret(32, my_ss, npcode32_sel); + + /* + * Try to return to a not-present but otherwise valid data segment. + * This will cause IRET to fail with #SS on the espfix stack. This + * exercises CVE-2014-9322. + * + * Note that, if espfix is enabled, 64-bit Linux will lose track + * of the actual cause of failure and report #GP(0) instead. + * This would be very difficult for Linux to avoid, because + * espfix64 causes IRET failures to be promoted to #DF, so the + * original exception frame is never pushed onto the stack. + */ + test_bad_iret(32, npdata32_sel, -1); + + /* + * Try to return to a not-present but otherwise valid data + * segment without invoking espfix. Newer kernels don't allow + * this to happen in the first place. On older kernels, though, + * this can trigger CVE-2014-9322. + */ + if (gdt_npdata32_idx) + test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); + + return total_nerrs ? 1 : 0; +} diff --git a/x86/single_step_syscall.c b/x86/single_step_syscall.c new file mode 100644 index 0000000..50c2635 --- /dev/null +++ b/x86/single_step_syscall.c @@ -0,0 +1,181 @@ +/* + * single_step_syscall.c - single-steps various x86 syscalls + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a very simple series of tests that makes system calls with + * the TF flag set. This exercises some nasty kernel code in the + * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set + * immediately issues #DB from CPL 0. This requires special handling in + * the kernel. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; + +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define WIDTH "q" +#else +# define REG_IP REG_EIP +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +#define X86_EFLAGS_TF (1UL << 8) + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (get_eflags() & X86_EFLAGS_TF) { + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + printf("[WARN]\tSIGTRAP handler had TF set\n"); + _exit(1); + } + + sig_traps++; + + if (sig_traps == 10000 || sig_traps == 10001) { + printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n", + (int)sig_traps, + (unsigned long)info->si_addr, + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + } +} + +static void check_result(void) +{ + unsigned long new_eflags = get_eflags(); + set_eflags(new_eflags & ~X86_EFLAGS_TF); + + if (!sig_traps) { + printf("[FAIL]\tNo SIGTRAP\n"); + exit(1); + } + + if (!(new_eflags & X86_EFLAGS_TF)) { + printf("[FAIL]\tTF was cleared\n"); + exit(1); + } + + printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps); + sig_traps = 0; +} + +int main() +{ + int tmp; + + sethandler(SIGTRAP, sigtrap, 0); + + printf("[RUN]\tSet TF and check nop\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("nop"); + check_result(); + +#ifdef __x86_64__ + printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + extern unsigned char post_nop[]; + asm volatile ("pushf" WIDTH "\n\t" + "pop" WIDTH " %%r11\n\t" + "nop\n\t" + "post_nop:" + : : "c" (post_nop) : "r11"); + check_result(); +#endif + + printf("[RUN]\tSet TF and check int80\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + check_result(); + + /* + * This test is particularly interesting if fast syscalls use + * SYSENTER: it triggers a nasty design flaw in SYSENTER. + * Specifically, SYSENTER does not clear TF, so either SYSENTER + * or the next instruction traps at CPL0. (Of course, Intel + * mostly forgot to document exactly what happens here.) So we + * get a CPL0 fault with usergs (on 64-bit kernels) and possibly + * no stack. The only sane way the kernel can possibly handle + * it is to clear TF on return from the #DB handler, but this + * happens way too early to set TF in the saved pt_regs, so the + * kernel has to do something clever to avoid losing track of + * the TF bit. + * + * Needless to say, we've had bugs in this area. + */ + syscall(SYS_getpid); /* Force symbol binding without TF set. */ + printf("[RUN]\tSet TF and check a fast syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + syscall(SYS_getpid); + check_result(); + + /* Now make sure that another fast syscall doesn't set TF again. */ + printf("[RUN]\tFast syscall with TF cleared\n"); + fflush(stdout); /* Force a syscall */ + if (get_eflags() & X86_EFLAGS_TF) { + printf("[FAIL]\tTF is now set\n"); + exit(1); + } + if (sig_traps) { + printf("[FAIL]\tGot SIGTRAP\n"); + exit(1); + } + printf("[OK]\tNothing unexpected happened\n"); + + return 0; +} diff --git a/x86/syscall_arg_fault.c b/x86/syscall_arg_fault.c new file mode 100644 index 0000000..7db4fc9 --- /dev/null +++ b/x86/syscall_arg_fault.c @@ -0,0 +1,130 @@ +/* + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Our sigaltstack scratch space. */ +static unsigned char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; +static sigjmp_buf jmpbuf; + +static volatile sig_atomic_t n_errs; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { + printf("[FAIL]\tAX had the wrong value: 0x%x\n", + ctx->uc_mcontext.gregs[REG_EAX]); + n_errs++; + } else { + printf("[OK]\tSeems okay\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigill(int sig, siginfo_t *info, void *ctx_void) +{ + printf("[SKIP]\tIllegal instruction\n"); + siglongjmp(jmpbuf, 1); +} + +int main() +{ + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGSEGV, sigsegv, SA_ONSTACK); + sethandler(SIGILL, sigill, SA_ONSTACK); + + /* + * Exercise another nasty special case. The 32-bit SYSCALL + * and SYSENTER instructions (even in compat mode) each + * clobber one register. A Linux system call has a syscall + * number and six arguments, and the user stack pointer + * needs to live in some register on return. That means + * that we need eight registers, but SYSCALL and SYSENTER + * only preserve seven registers. As a result, one argument + * ends up on the stack. The stack is user memory, which + * means that the kernel can fail to read it. + * + * The 32-bit fast system calls don't have a defined ABI: + * we're supposed to invoke them through the vDSO. So we'll + * fudge it: we set all regs to invalid pointer values and + * invoke the entry instruction. The return will fail no + * matter what, and we completely lose our program state, + * but we can fix it up with a signal handler. + */ + + printf("[RUN]\tSYSENTER with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + + printf("[RUN]\tSYSCALL with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "syscall\n\t" + "pushl $0" /* make sure we segfault cleanly */ + : : : "memory", "flags"); + } + + return 0; +} diff --git a/x86/syscall_nt.c b/x86/syscall_nt.c new file mode 100644 index 0000000..60c06af --- /dev/null +++ b/x86/syscall_nt.c @@ -0,0 +1,54 @@ +/* + * syscall_nt.c - checks syscalls with NT set + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Some obscure user-space code requires the ability to make system calls + * with FLAGS.NT set. Make sure it works. + */ + +#include +#include +#include +#include + +#ifdef __x86_64__ +# define WIDTH "q" +#else +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +int main() +{ + printf("[RUN]\tSet NT and issue a syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_NT); + syscall(SYS_getpid); + if (get_eflags() & X86_EFLAGS_NT) { + printf("[OK]\tThe syscall worked and NT is still set\n"); + return 0; + } else { + printf("[FAIL]\tThe syscall worked but NT was cleared\n"); + return 1; + } +} diff --git a/x86/sysret_ss_attrs.c b/x86/sysret_ss_attrs.c new file mode 100644 index 0000000..ce42d5a --- /dev/null +++ b/x86/sysret_ss_attrs.c @@ -0,0 +1,112 @@ +/* + * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * On AMD CPUs, SYSRET can return with a valid SS descriptor with with + * the hidden attributes set to an unusable state. Make sure the kernel + * doesn't let this happen. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void *threadproc(void *ctx) +{ + /* + * Do our best to cause sleeps on this CPU to exit the kernel and + * re-enter with SS = 0. + */ + while (true) + ; + + return NULL; +} + +#ifdef __x86_64__ +extern unsigned long call32_from_64(void *stack, void (*function)(void)); + +asm (".pushsection .text\n\t" + ".code32\n\t" + "test_ss:\n\t" + "pushl $0\n\t" + "popl %eax\n\t" + "ret\n\t" + ".code64"); +extern void test_ss(void); +#endif + +int main() +{ + /* + * Start a busy-looping thread on the same CPU we're on. + * For simplicity, just stick everything to CPU 0. This will + * fail in some containers, but that's probably okay. + */ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + printf("[WARN]\tsched_setaffinity failed\n"); + + pthread_t thread; + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + +#ifdef __x86_64__ + unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (stack32 == MAP_FAILED) + err(1, "mmap"); +#endif + + printf("[RUN]\tSyscalls followed by SS validation\n"); + + for (int i = 0; i < 1000; i++) { + /* + * Go to sleep and return using sysret (if we're 64-bit + * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs, + * SYSRET doesn't fix up the cached SS descriptor, so the + * kernel needs some kind of workaround to make sure that we + * end the system call with a valid stack segment. This + * can be a confusing failure because the SS *selector* + * is the same regardless. + */ + usleep(2); + +#ifdef __x86_64__ + /* + * On 32-bit, just doing a syscall through glibc is enough + * to cause a crash if our cached SS descriptor is invalid. + * On 64-bit, it's not, so try extra hard. + */ + call32_from_64(stack32 + 4088, test_ss); +#endif + } + + printf("[OK]\tWe survived\n"); + +#ifdef __x86_64__ + munmap(stack32, 4096); +#endif + + return 0; +} diff --git a/x86/thunks.S b/x86/thunks.S new file mode 100644 index 0000000..ce8a995 --- /dev/null +++ b/x86/thunks.S @@ -0,0 +1,67 @@ +/* + * thunks.S - assembly helpers for mixed-bitness code + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * These are little helpers that make it easier to switch bitness on + * the fly. + */ + + .text + + .global call32_from_64 + .type call32_from_64, @function +call32_from_64: + // rdi: stack to use + // esi: function to call + + // Save registers + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + + // Switch stacks + mov %rsp,(%rdi) + mov %rdi,%rsp + + // Switch to compatibility mode + pushq $0x23 /* USER32_CS */ + pushq $1f + lretq + +1: + .code32 + // Call the function + call *%esi + // Switch back to long mode + jmp $0x33,$1f + .code64 + +1: + // Restore the stack + mov (%rsp),%rsp + + // Restore registers + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + + ret + +.size call32_from_64, .-call32_from_64 diff --git a/x86/trivial_32bit_program.c b/x86/trivial_32bit_program.c new file mode 100644 index 0000000..fabdf0f --- /dev/null +++ b/x86/trivial_32bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 32-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __i386__ +# error wrong architecture +#endif + +#include + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/x86/trivial_64bit_program.c b/x86/trivial_64bit_program.c new file mode 100644 index 0000000..05c6a41 --- /dev/null +++ b/x86/trivial_64bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 64-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __x86_64__ +# error wrong architecture +#endif + +#include + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/zram/Makefile b/zram/Makefile new file mode 100644 index 0000000..29d8034 --- /dev/null +++ b/zram/Makefile @@ -0,0 +1,9 @@ +all: + +TEST_PROGS := zram.sh +TEST_FILES := zram01.sh zram02.sh zram_lib.sh + +include ../lib.mk + +clean: + $(RM) err.log diff --git a/zram/README b/zram/README new file mode 100644 index 0000000..eb17917 --- /dev/null +++ b/zram/README @@ -0,0 +1,40 @@ +zram: Compressed RAM based block devices +---------------------------------------- +* Introduction + +The zram module creates RAM based block devices named /dev/zram +( = 0, 1, ...). Pages written to these disks are compressed and stored +in memory itself. These disks allow very fast I/O and compression provides +good amounts of memory savings. Some of the usecases include /tmp storage, +use as swap disks, various caches under /var and maybe many more :) + +Statistics for individual zram devices are exported through sysfs nodes at +/sys/block/zram/ + +Kconfig required: +CONFIG_ZRAM=y +CONFIG_ZRAM_LZ4_COMPRESS=y +CONFIG_ZPOOL=y +CONFIG_ZSMALLOC=y + +ZRAM Testcases +-------------- +zram_lib.sh: create library with initialization/cleanup functions +zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02 + +Two functional tests: zram01 and zram02: +zram01.sh: creates general purpose ram disks with ext4 filesystems +zram02.sh: creates block device for swap + +Commands required for testing: + - bc + - dd + - free + - awk + - mkswap + - swapon + - swapoff + - mkfs/ mkfs.ext4 + +For more information please refer: +kernel-source-tree/Documentation/blockdev/zram.txt diff --git a/zram/zram.sh b/zram/zram.sh new file mode 100755 index 0000000..683a292 --- /dev/null +++ b/zram/zram.sh @@ -0,0 +1,27 @@ +#!/bin/bash +TCID="zram.sh" + +. ./zram_lib.sh + +run_zram () { +echo "--------------------" +echo "running zram tests" +echo "--------------------" +./zram01.sh +echo "" +./zram02.sh +} + +check_prereqs + +# check zram module exists +MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko +if [ -f $MODULE_PATH ]; then + run_zram +elif [ -b /dev/zram0 ]; then + run_zram +else + echo "$TCID : No zram.ko module or /dev/zram0 device file not found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 +fi diff --git a/zram/zram01.sh b/zram/zram01.sh new file mode 100755 index 0000000..b9566a6 --- /dev/null +++ b/zram/zram01.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test creates several zram devices with different filesystems on them. +# It fills each device with zeros and checks that compression works. +# +# Author: Alexey Kodanev +# Modified: Naresh Kamboju + +TCID="zram01" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="2097152" # 2MB +zram_mem_limits="2M" +zram_filesystems="ext4" +zram_algs="lzo" + +zram_fill_fs() +{ + local mem_free0=$(free -m | awk 'NR==2 {print $4}') + + for i in $(seq 0 $(($dev_num - 1))); do + echo "fill zram$i..." + local b=0 + while [ true ]; do + dd conv=notrunc if=/dev/zero of=zram${i}/file \ + oflag=append count=1 bs=1024 status=none \ + > /dev/null 2>&1 || break + b=$(($b + 1)) + done + echo "zram$i can be filled with '$b' KB" + done + + local mem_free1=$(free -m | awk 'NR==2 {print $4}') + local used_mem=$(($mem_free0 - $mem_free1)) + + local total_size=0 + for sm in $zram_sizes; do + local s=$(echo $sm | sed 's/M//') + total_size=$(($total_size + $s)) + done + + echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" + + local v=$((100 * $total_size / $used_mem)) + + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + zram_cleanup + return + fi + + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" +} + +check_prereqs +zram_load +zram_max_streams +zram_compress_alg +zram_set_disksizes +zram_set_memlimit +zram_makefs +zram_mount + +zram_fill_fs +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/zram/zram02.sh b/zram/zram02.sh new file mode 100755 index 0000000..74569b8 --- /dev/null +++ b/zram/zram02.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test checks that we can create swap zram device. +# +# Author: Alexey Kodanev +# Modified: Naresh Kamboju + +TCID="zram02" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="1048576" # 1M +zram_mem_limits="1M" + +check_prereqs +zram_load +zram_max_streams +zram_set_disksizes +zram_set_memlimit +zram_makeswap +zram_swapoff +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/zram/zram_lib.sh b/zram/zram_lib.sh new file mode 100755 index 0000000..f6a9c73 --- /dev/null +++ b/zram/zram_lib.sh @@ -0,0 +1,233 @@ +#!/bin/sh +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Alexey Kodanev +# Modified: Naresh Kamboju + +MODULE=0 +dev_makeswap=-1 +dev_mounted=-1 + +trap INT + +check_prereqs() +{ + local msg="skip all tests:" + local uid=$(id -u) + + if [ $uid -ne 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi +} + +zram_cleanup() +{ + echo "zram cleanup" + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i + done + + for i in $(seq 0 $dev_mounted); do + umount /dev/zram$i + done + + for i in $(seq 0 $(($dev_num - 1))); do + echo 1 > /sys/block/zram${i}/reset + rm -rf zram$i + done + +} + +zram_unload() +{ + if [ $MODULE -ne 0 ] ; then + echo "zram rmmod zram" + rmmod zram > /dev/null 2>&1 + fi +} + +zram_load() +{ + # check zram module exists + MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko + if [ -f $MODULE_PATH ]; then + MODULE=1 + echo "create '$dev_num' zram device(s)" + modprobe zram num_devices=$dev_num + if [ $? -ne 0 ]; then + echo "failed to insert zram module" + exit 1 + fi + + dev_num_created=$(ls /dev/zram* | wc -w) + + if [ "$dev_num_created" -ne "$dev_num" ]; then + echo "unexpected num of devices: $dev_num_created" + ERR_CODE=-1 + else + echo "zram load module successful" + fi + elif [ -b /dev/zram0 ]; then + echo "/dev/zram0 device file found: OK" + else + echo "ERROR: No zram.ko module or no /dev/zram0 device found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 + fi +} + +zram_max_streams() +{ + echo "set max_comp_streams to zram device(s)" + + local i=0 + for max_s in $zram_max_streams; do + local sys_path="/sys/block/zram${i}/max_comp_streams" + echo $max_s > $sys_path || \ + echo "FAIL failed to set '$max_s' to $sys_path" + sleep 1 + local max_streams=$(cat $sys_path) + + [ "$max_s" -ne "$max_streams" ] && \ + echo "FAIL can't set max_streams '$max_s', get $max_stream" + + i=$(($i + 1)) + echo "$sys_path = '$max_streams' ($i/$dev_num)" + done + + echo "zram max streams: OK" +} + +zram_compress_alg() +{ + echo "test that we can set compression algorithm" + + local algs=$(cat /sys/block/zram0/comp_algorithm) + echo "supported algs: $algs" + local i=0 + for alg in $zram_algs; do + local sys_path="/sys/block/zram${i}/comp_algorithm" + echo "$alg" > $sys_path || \ + echo "FAIL can't set '$alg' to $sys_path" + i=$(($i + 1)) + echo "$sys_path = '$alg' ($i/$dev_num)" + done + + echo "zram set compression algorithm: OK" +} + +zram_set_disksizes() +{ + echo "set disk size to zram device(s)" + local i=0 + for ds in $zram_sizes; do + local sys_path="/sys/block/zram${i}/disksize" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set disksizes: OK" +} + +zram_set_memlimit() +{ + echo "set memory limit to zram device(s)" + + local i=0 + for ds in $zram_mem_limits; do + local sys_path="/sys/block/zram${i}/mem_limit" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set memory limit: OK" +} + +zram_makeswap() +{ + echo "make swap with zram device(s)" + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + mkswap /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL mkswap /dev/zram$1 failed" + fi + + swapon /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapon /dev/zram$1 failed" + fi + + echo "done with /dev/zram$i" + dev_makeswap=$i + done + + echo "zram making zram mkswap and swapon: OK" +} + +zram_swapoff() +{ + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapoff /dev/zram$i failed" + fi + done + dev_makeswap=-1 + + echo "zram swapoff: OK" +} + +zram_makefs() +{ + local i=0 + for fs in $zram_filesystems; do + # if requested fs not supported default it to ext2 + which mkfs.$fs > /dev/null 2>&1 || fs=ext2 + + echo "make $fs filesystem on /dev/zram$i" + mkfs.$fs /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL failed to make $fs on /dev/zram$i" + fi + i=$(($i + 1)) + echo "zram mkfs.$fs: OK" + done +} + +zram_mount() +{ + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + echo "mount /dev/zram$i" + mkdir zram$i + mount /dev/zram$i zram$i > /dev/null || \ + echo "FAIL mount /dev/zram$i failed" + dev_mounted=$i + done + + echo "zram mount of zram device(s): OK" +} -- cgit v1.2.3