From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 10:43:17 -0700
Subject: word-at-a-time: make the interfaces truly generic

This changes the interfaces in <asm/word-at-a-time.h> to be a bit more
complicated, but a lot more generic.

In particular, it allows us to really do the operations efficiently on
both little-endian and big-endian machines, pretty much regardless of
machine details.  For example, if you can rely on a fast population
count instruction on your architecture, this will allow you to make your
optimized <asm/word-at-a-time.h> file with that.

NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is
not truly generic, it actually only works on big-endian.  Why? Because
on little-endian the generic algorithms are wasteful, since you can
inevitably do better. The x86 implementation is an example of that.

(The only truly non-generic part of the asm-generic implementation is
the "find_zero()" function, and you could make a little-endian version
of it.  And if the Kbuild infrastructure allowed us to pick a particular
header file, that would be lovely)

The <asm/word-at-a-time.h> functions are as follows:

 - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm
   uses.

 - has_zero(): take a word, and determine if it has a zero byte in it.
   It gets the word, the pointer to the constant pool, and a pointer to
   an intermediate "data" field it can set.

   This is the "quick-and-dirty" zero tester: it's what is run inside
   the hot loops.

 - "prep_zero_mask()": take the word, the data that has_zero() produced,
   and the constant pool, and generate an *exact* mask of which byte had
   the first zero.  This is run directly *outside* the loop, and allows
   the "has_zero()" function to answer the "is there a zero byte"
   question without necessarily getting exactly *which* byte is the
   first one to contain a zero.

   If you do multiple byte lookups concurrently (eg "hash_name()", which
   looks for both NUL and '/' bytes), after you've done the prep_zero_mask()
   phase, the result of those can be or'ed together to get the "either
   or" case.

 - The result from "prep_zero_mask()" can then be fed into "find_zero()"
   (to find the byte offset of the first byte that was zero) or into
   "zero_bytemask()" (to find the bytemask of the bytes preceding the
   zero byte).

   The existence of zero_bytemask() is optional, and is not necessary
   for the normal string routines.  But dentry name hashing needs it, so
   if you enable DENTRY_WORD_AT_A_TIME you need to expose it.

This changes the generic strncpy_from_user() function and the dentry
hashing functions to use these modified word-at-a-time interfaces.  This
gets us back to the optimized state of the x86 strncpy that we lost in
the previous commit when moving over to the generic version.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/strncpy_from_user.c | 47 +++++++----------------------------------------
 1 file changed, 7 insertions(+), 40 deletions(-)

(limited to 'lib')

diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index c4c09b0e96b..bb2b201d6ad 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -4,37 +4,7 @@
 #include <linux/errno.h>
 
 #include <asm/byteorder.h>
-
-static inline long find_zero(unsigned long mask)
-{
-	long byte = 0;
-
-#ifdef __BIG_ENDIAN
-#ifdef CONFIG_64BIT
-	if (mask >> 32)
-		mask >>= 32;
-	else
-		byte = 4;
-#endif
-	if (mask >> 16)
-		mask >>= 16;
-	else
-		byte += 2;
-	return (mask >> 8) ? byte : byte + 1;
-#else
-#ifdef CONFIG_64BIT
-	if (!((unsigned int) mask)) {
-		mask >>= 32;
-		byte = 4;
-	}
-#endif
-	if (!(mask & 0xffff)) {
-		mask >>= 16;
-		byte += 2;
-	}
-	return (mask & 0xff) ? byte : byte + 1;
-#endif
-}
+#include <asm/word-at-a-time.h>
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #define IS_UNALIGNED(src, dst)	0
@@ -51,8 +21,7 @@ static inline long find_zero(unsigned long mask)
  */
 static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
 {
-	const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1;
-	const unsigned long low_bits = REPEAT_BYTE(0x7f);
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 	long res = 0;
 
 	/*
@@ -66,18 +35,16 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
 		goto byte_at_a_time;
 
 	while (max >= sizeof(unsigned long)) {
-		unsigned long c, v, rhs;
+		unsigned long c, data;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
 		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
 			break;
-		rhs = c | low_bits;
-		v = (c + high_bits) & ~rhs;
 		*(unsigned long *)(dst+res) = c;
-		if (v) {
-			v = (c & low_bits) + low_bits;
-			v = ~(v | rhs);
-			return res + find_zero(v);
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data);
 		}
 		res += sizeof(unsigned long);
 		max -= sizeof(unsigned long);
-- 
cgit v1.2.3


From a08c5356a3aaf638c41897ae4169de18db89595e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 May 2012 11:06:38 -0700
Subject: lib: add generic strnlen_user() function

This adds a new generic optimized strnlen_user() function that uses the
<asm/word-at-a-time.h> infrastructure to portably do efficient string
handling.

In many ways, strnlen is much simpler than strncpy, and in particular we
can always pre-align the words we load from memory.  That means that all
the worries about alignment etc are a non-issue, so this one can easily
be used on any architecture.  You obviously do have to do the
appropriate word-at-a-time.h macros.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig        |   3 ++
 lib/Makefile       |   1 +
 lib/strnlen_user.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 142 insertions(+)
 create mode 100644 lib/strnlen_user.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index 98230ac3db2..64ddc44d0b8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,6 +19,9 @@ config RATIONAL
 config GENERIC_STRNCPY_FROM_USER
 	bool
 
+config GENERIC_STRNLEN_USER
+	bool
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index b98df505f33..77937a7dd5c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CLZ_TAB) += clz_tab.o
 obj-$(CONFIG_DDR) += jedec_ddr_data.o
 
 obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
+obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
new file mode 100644
index 00000000000..90900ecfeb5
--- /dev/null
+++ b/lib/strnlen_user.c
@@ -0,0 +1,138 @@
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+
+#include <asm/word-at-a-time.h>
+
+/* Set bits in the first 'n' bytes when loaded from memory */
+#ifdef __LITTLE_ENDIAN
+#  define aligned_byte_mask(n) ((1ul << 8*(n))-1)
+#else
+#  define aligned_byte_mask(n) (~0xfful << 8*(7-(n)))
+#endif
+
+/*
+ * Do a strnlen, return length of string *with* final '\0'.
+ * 'count' is the user-supplied count, while 'max' is the
+ * address space maximum.
+ *
+ * Return 0 for exceptions (which includes hitting the address
+ * space maximum), or 'count+1' if hitting the user-supplied
+ * maximum count.
+ *
+ * NOTE! We can sometimes overshoot the user-supplied maximum
+ * if it fits in a aligned 'long'. The caller needs to check
+ * the return value against "> max".
+ */
+static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
+{
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+	long align, res = 0;
+	unsigned long c;
+
+	/*
+	 * Truncate 'max' to the user-specified limit, so that
+	 * we only have one limit we need to check in the loop
+	 */
+	if (max > count)
+		max = count;
+
+	/*
+	 * Do everything aligned. But that means that we
+	 * need to also expand the maximum..
+	 */
+	align = (sizeof(long) - 1) & (unsigned long)src;
+	src -= align;
+	max += align;
+
+	if (unlikely(__get_user(c,(unsigned long __user *)src)))
+		return 0;
+	c |= aligned_byte_mask(align);
+
+	for (;;) {
+		unsigned long data;
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data) + 1 - align;
+		}
+		res += sizeof(unsigned long);
+		if (unlikely(max < sizeof(unsigned long)))
+			break;
+		max -= sizeof(unsigned long);
+		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
+			return 0;
+	}
+	res -= align;
+
+	/*
+	 * Uhhuh. We hit 'max'. But was that the user-specified maximum
+	 * too? If so, return the marker for "too long".
+	 */
+	if (res >= count)
+		return count+1;
+
+	/*
+	 * Nope: we hit the address space limit, and we still had more
+	 * characters the caller would have wanted. That's 0.
+	 */
+	return 0;
+}
+
+/**
+ * strnlen_user: - Get the size of a user string INCLUDING final NUL.
+ * @str: The string to measure.
+ * @count: Maximum count (including NUL character)
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * If the string is too long, returns 'count+1'.
+ * On exception (or invalid count), returns 0.
+ */
+long strnlen_user(const char __user *str, long count)
+{
+	unsigned long max_addr, src_addr;
+
+	if (unlikely(count <= 0))
+		return 0;
+
+	max_addr = user_addr_max();
+	src_addr = (unsigned long)str;
+	if (likely(src_addr < max_addr)) {
+		unsigned long max = max_addr - src_addr;
+		return do_strnlen_user(str, count, max);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strnlen_user);
+
+/**
+ * strlen_user: - Get the size of a user string INCLUDING final NUL.
+ * @str: The string to measure.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+long strlen_user(const char __user *str)
+{
+	unsigned long max_addr, src_addr;
+
+	max_addr = user_addr_max();
+	src_addr = (unsigned long)str;
+	if (likely(src_addr < max_addr)) {
+		unsigned long max = max_addr - src_addr;
+		return do_strnlen_user(str, ~0ul, max);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strlen_user);
-- 
cgit v1.2.3