aboutsummaryrefslogtreecommitdiff
path: root/drivers/staging/csr/csr_utf16.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/csr/csr_utf16.c')
-rw-r--r--drivers/staging/csr/csr_utf16.c1136
1 files changed, 1136 insertions, 0 deletions
diff --git a/drivers/staging/csr/csr_utf16.c b/drivers/staging/csr/csr_utf16.c
new file mode 100644
index 00000000000..9259635851f
--- /dev/null
+++ b/drivers/staging/csr/csr_utf16.c
@@ -0,0 +1,1136 @@
+/*****************************************************************************
+
+ (c) Cambridge Silicon Radio Limited 2010
+ All rights reserved and confidential information of CSR
+
+ Refer to LICENSE.txt included with this source for details
+ on the license terms.
+
+*****************************************************************************/
+#include <linux/module.h>
+#include "csr_types.h"
+#include "csr_pmem.h"
+#include "csr_unicode.h"
+#include "csr_util.h"
+
+#define UNI_SUR_HIGH_START ((CsrUint32) 0xD800)
+#define UNI_SUR_HIGH_END ((CsrUint32) 0xDBFF)
+#define UNI_SUR_LOW_START ((CsrUint32) 0xDC00)
+#define UNI_SUR_LOW_END ((CsrUint32) 0xDFFF)
+#define UNI_REPLACEMENT_CHAR ((CsrUint32) 0xFFFD)
+#define UNI_HALF_SHIFT ((CsrUint8) 10) /* used for shifting by 10 bits */
+#define UNI_HALF_BASE ((CsrUint32) 0x00010000)
+#define UNI_BYTEMASK ((CsrUint32) 0xBF)
+#define UNI_BYTEMARK ((CsrUint32) 0x80)
+
+#define CAPITAL(x) ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x))
+
+/*
+* Index into the table with the first byte to get the number of trailing bytes in a utf-8 character.
+* -1 if the byte has an invalid value.
+*
+* Legal sequences are:
+*
+* byte 1st 2nd 3rd 4th
+*
+* 00-7F
+* C2-DF 80-BF
+* E0 A0-BF 80-BF
+* E1-EC 80-BF 80-BF
+* ED 80-9F 80-BF
+* EE-EF 80-BF 80-BF
+* F0 90-BF 80-BF 80-BF
+* F1-F3 80-BF 80-BF 80-BF
+* F4 80-8F 80-BF 80-BF
+*/
+static const CsrInt8 trailingBytesForUtf8[256] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x1F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x3F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x5F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x7F */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */
+ -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 - 0xDF */
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xE0 - 0xFF */
+};
+
+/* Values to be substracted from a CsrUint32 when converting from UTF8 to UTF16 */
+static const CsrUint32 offsetsFromUtf8[4] =
+{
+ 0x00000000, 0x00003080, 0x000E2080, 0x03C82080
+};
+
+/********************************************************************************
+*
+* Name: CsrUint32ToUtf16String
+*
+* Description: The function converts an 32 bit number to an UTF-16 string
+* that is allocated and 0-terminated.
+*
+* Input: 32 bit number.
+*
+* Output: A string of UTF-16 characters.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUint32ToUtf16String(CsrUint32 number)
+{
+ CsrUint16 count, noOfDigits;
+ CsrUtf16String *output;
+ CsrUint32 tempNumber;
+
+ /* calculate the number of digits in the output */
+ tempNumber = number;
+ noOfDigits = 1;
+ while (tempNumber >= 10)
+ {
+ tempNumber = tempNumber / 10;
+ noOfDigits++;
+ }
+
+ output = (CsrUtf16String *) CsrPmemAlloc(sizeof(CsrUtf16String) * (noOfDigits + 1)); /*add space for 0-termination*/
+
+ tempNumber = number;
+ for (count = noOfDigits; count > 0; count--)
+ {
+ output[count - 1] = (CsrUtf16String) ((tempNumber % 10) + '0');
+ tempNumber = tempNumber / 10;
+ }
+ output[noOfDigits] = '\0';
+
+ return output;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StringToUint32
+*
+* Description: The function converts an UTF-16 string that is
+* 0-terminated into a 32 bit number.
+*
+* Input: A string of UTF-16 characters containig a number.
+*
+* Output: 32 bit number.
+*
+*********************************************************************************/
+CsrUint32 CsrUtf16StringToUint32(const CsrUtf16String *unicodeString)
+{
+ CsrUint16 numLen, count;
+ CsrUint32 newNumber = 0;
+
+ numLen = (CsrUint16) CsrUtf16StrLen(unicodeString);
+
+ if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */
+ {
+ return 0;
+ }
+
+ for (count = 0; count < numLen; count++)
+ {
+ CsrUtf16String input = unicodeString[count];
+ if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */
+ {
+ return 0;
+ }
+
+ newNumber = (newNumber * 10) + (input - 0x30);
+ }
+ return newNumber;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16MemCpy
+*
+* Description: The function copies count number of 16 bit data elements
+* from srv to dest.
+*
+* Input: A pointer to an unicoded string.
+*
+* Output: A pointer to an unicoded string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16MemCpy(CsrUtf16String *dest, const CsrUtf16String *src, CsrUint32 count)
+{
+ return CsrMemCpy((CsrUint8 *) dest, (CsrUint8 *) src, count * sizeof(CsrUtf16String));
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16ConcatenateTexts
+*
+* Description: The function merge the contents of 4 unicoded input pointers
+* into a new string.
+*
+* Input: 4 unicoded input strings (UTF-16).
+*
+* Output: A new unicoded string (UTF-16) containing the combined strings.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16ConcatenateTexts(const CsrUtf16String *inputText1, const CsrUtf16String *inputText2,
+ const CsrUtf16String *inputText3, const CsrUtf16String *inputText4)
+{
+ CsrUtf16String *outputText;
+ CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
+
+ textLen1 = CsrUtf16StrLen(inputText1);
+ textLen2 = CsrUtf16StrLen(inputText2);
+ textLen3 = CsrUtf16StrLen(inputText3);
+ textLen4 = CsrUtf16StrLen(inputText4);
+
+ textLen = textLen1 + textLen2 + textLen3 + textLen4;
+
+ if (textLen == 0) /*stop here is all lengths are 0*/
+ {
+ return NULL;
+ }
+
+ outputText = (CsrUtf16String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf16String)); /* add space for 0-termination*/
+
+
+ if (inputText1 != NULL)
+ {
+ CsrUtf16MemCpy(outputText, inputText1, textLen1);
+ }
+
+ if (inputText2 != NULL)
+ {
+ CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2);
+ }
+
+ if (inputText3 != NULL)
+ {
+ CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
+ }
+
+ if (inputText4 != NULL)
+ {
+ CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
+ }
+
+ outputText[textLen] = '\0';
+
+ return outputText;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StrLen
+*
+* Description: The function returns the number of 16 bit elements present
+* in the 0-terminated string.
+*
+* Input: 0-terminated string of 16 bit unicoded characters.
+*
+* Output: The number of 16 bit elements in the string.
+*
+*********************************************************************************/
+CsrUint32 CsrUtf16StrLen(const CsrUtf16String *unicodeString)
+{
+ CsrUint32 length;
+
+ length = 0;
+ if (unicodeString != NULL)
+ {
+ while (*unicodeString)
+ {
+ length++;
+ unicodeString++;
+ }
+ }
+ return length;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16String2Utf8
+*
+* Description: The function decodes an UTF-16 string into an UTF8 byte
+* oriented string.
+*
+* Input: 0-terminated UTF-16 string characters.
+*
+* Output: 0-terminated string of byte oriented UTF8 coded characters.
+*
+*********************************************************************************/
+CsrUtf8String *CsrUtf16String2Utf8(const CsrUtf16String *source)
+{
+ CsrUtf8String *dest, *destStart = NULL;
+ CsrUint32 i;
+ CsrUint32 ch;
+ CsrUint32 length;
+ CsrUint32 sourceLength;
+ CsrUint8 bytes;
+ CsrBool appendNull = FALSE;
+
+ CsrUint8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0};
+
+ if (!source)
+ {
+ return NULL;
+ }
+
+ length = 0;
+ sourceLength = CsrUtf16StrLen(source) + 1;
+
+ for (i = 0; i < sourceLength; i++)
+ {
+ ch = source[i];
+ if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
+ {
+ if (i + 1 < sourceLength) /* The low surrogate is in the source */
+ {
+ CsrUint32 ch2 = source[++i];
+ if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */
+ {
+ length += 4;
+ }
+ else /* It is not a low surrogate, instead put a Unicode
+ 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ length += 3;
+ i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
+ }
+ }
+ else /* The low surrogate does not exist, instead put a Unicode
+ 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
+ {
+ length += 4;
+ }
+ }
+ else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
+ a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ length += 3;
+ }
+ else /* Figure out how many bytes that are required */
+ {
+ if (ch < 0x0080)
+ {
+ length++;
+ }
+ else if (ch < 0x0800)
+ {
+ length += 2;
+ }
+ else
+ {
+ length += 3;
+ }
+ }
+ }
+
+ dest = CsrPmemAlloc(length);
+ destStart = dest;
+
+ for (i = 0; i < sourceLength; i++)
+ {
+ ch = source[i];
+ if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
+ {
+ if (i + 1 < sourceLength) /* The low surrogate is in the source */
+ {
+ CsrUint32 ch2 = source[++i];
+ if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */
+ {
+ ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE;
+ }
+ else /* It is not a low surrogate, instead put a Unicode
+ 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ ch = UNI_REPLACEMENT_CHAR;
+ i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
+ }
+ }
+ else /* The low surrogate does not exist, instead put a Unicode
+ 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
+ {
+ ch = UNI_REPLACEMENT_CHAR;
+ appendNull = TRUE;
+ }
+ }
+ else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
+ a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ /* Figure out how many bytes that are required */
+ if (ch < (CsrUint32) 0x80)
+ {
+ bytes = 1;
+ }
+ else if (ch < (CsrUint32) 0x800)
+ {
+ bytes = 2;
+ }
+ else if (ch < (CsrUint32) 0x10000)
+ {
+ bytes = 3;
+ }
+ else if (ch < (CsrUint32) 0x110000)
+ {
+ bytes = 4;
+ }
+ else
+ {
+ bytes = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ dest += bytes;
+
+ switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */
+ {
+ case 4:
+ {
+ *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+ ch >>= 6;
+ }
+ /* FALLTHROUGH */
+ case 3:
+ {
+ *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+ ch >>= 6;
+ }
+ /* FALLTHROUGH */
+ case 2:
+ {
+ *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+ ch >>= 6;
+ }
+ /* FALLTHROUGH */
+ case 1:
+ {
+ *--dest = (CsrUint8) (ch | firstByteMark[bytes]);
+ }
+ /* FALLTHROUGH */
+ default:
+ {
+ break;
+ }
+ }
+
+ dest += bytes;
+ }
+
+ if (appendNull) /* Append the \0 character */
+ {
+ *dest = '\0';
+ }
+
+ return destStart;
+}
+
+/*****************************************************************************
+
+ NAME
+ isLegalUtf8
+
+ DESCRIPTION
+ Returns TRUE if the given UFT-8 code unit is legal as defined by the
+ Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode
+ Encoding Forms, UTF-8).
+
+ This function assumes that the length parameter is unconditionally
+ correct and that the first byte is already validated by looking it up
+ in the trailingBytesForUtf8 array, which also reveals the number of
+ trailing bytes.
+
+ Legal code units are composed of one of the following byte sequences:
+
+ 1st 2nd 3rd 4th
+ --------------------------------
+ 00-7F
+ C2-DF 80-BF
+ E0 A0-BF 80-BF
+ E1-EC 80-BF 80-BF
+ ED 80-9F 80-BF
+ EE-EF 80-BF 80-BF
+ F0 90-BF 80-BF 80-BF
+ F1-F3 80-BF 80-BF 80-BF
+ F4 80-8F 80-BF 80-BF
+
+ Please note that this function only checks whether the 2nd, 3rd and
+ 4th bytes fall into the valid ranges.
+
+ PARAMETERS
+ codeUnit - pointer to the first byte of the byte sequence composing
+ the code unit to test.
+ length - the number of bytes in the code unit. Valid range is 1 to 4.
+
+ RETURNS
+ TRUE if the given code unit is legal.
+
+*****************************************************************************/
+static CsrBool isLegalUtf8(const CsrUtf8String *codeUnit, CsrUint32 length)
+{
+ const CsrUtf8String *srcPtr = codeUnit + length;
+ CsrUint8 byte;
+
+ switch (length) /* Everything falls through except case 1 */
+ {
+ case 4:
+ {
+ byte = *--srcPtr;
+ if ((byte < 0x80) || (byte > 0xBF))
+ {
+ return FALSE;
+ }
+ }
+ /* Fallthrough */
+ case 3:
+ {
+ byte = *--srcPtr;
+ if ((byte < 0x80) || (byte > 0xBF))
+ {
+ return FALSE;
+ }
+ }
+ /* Fallthrough */
+ case 2:
+ {
+ byte = *--srcPtr;
+ if (byte > 0xBF)
+ {
+ return FALSE;
+ }
+
+ switch (*codeUnit) /* No fallthrough */
+ {
+ case 0xE0:
+ {
+ if (byte < 0xA0)
+ {
+ return FALSE;
+ }
+ break;
+ }
+ case 0xED:
+ {
+ if ((byte < 0x80) || (byte > 0x9F))
+ {
+ return FALSE;
+ }
+ break;
+ }
+ case 0xF0:
+ {
+ if (byte < 0x90)
+ {
+ return FALSE;
+ }
+ break;
+ }
+ case 0xF4:
+ {
+ if ((byte < 0x80) || (byte > 0x8F))
+ {
+ return FALSE;
+ }
+ break;
+ }
+ default:
+ {
+ if (byte < 0x80)
+ {
+ return FALSE;
+ }
+ break;
+ }
+ }
+ }
+ /* Fallthrough */
+ case 1:
+ default:
+ /* The 1st byte and length are assumed correct */
+ break;
+ }
+
+ return TRUE;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf82Utf16String
+*
+* Description: The function decodes an UTF8 byte oriented string into a
+* UTF-16string.
+*
+* Input: 0-terminated string of byte oriented UTF8 coded characters.
+*
+* Output: 0-terminated string of UTF-16 characters.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf82Utf16String(const CsrUtf8String *utf8String)
+{
+ CsrSize i, length = 0;
+ CsrSize sourceLength;
+ CsrUtf16String *dest = NULL;
+ CsrUtf16String *destStart = NULL;
+ CsrInt8 extraBytes2Read;
+
+ if (!utf8String)
+ {
+ return NULL;
+ }
+ sourceLength = CsrStrLen((CsrCharString *) utf8String);
+
+ for (i = 0; i < sourceLength; i++)
+ {
+ extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
+
+ if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ length += 1;
+ }
+ else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
+ CHARACTER' (U+FFFD), and the null terminated character */
+ {
+ length += 2;
+ break;
+ }
+ else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
+ CHARACTER' (U+FFFD) */
+ {
+ length += 1;
+ }
+ else
+ {
+ if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */
+ {
+ length += 2;
+ }
+ else
+ {
+ length += 1;
+ }
+ i += extraBytes2Read;
+ }
+ }
+
+ /* Create space for the null terminated character */
+ dest = (CsrUtf16String *) CsrPmemAlloc((1 + length) * sizeof(CsrUtf16String));
+ destStart = dest;
+
+ for (i = 0; i < sourceLength; i++)
+ {
+ extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
+
+ if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+ {
+ *dest++ = UNI_REPLACEMENT_CHAR;
+ }
+ else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
+ CHARACTER' (U+FFFD), and the null terminated character */
+ {
+ *dest++ = UNI_REPLACEMENT_CHAR;
+ *dest++ = '\0';
+ break;
+ }
+ else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
+ CHARACTER' (U+FFFD) */
+ {
+ *dest++ = UNI_REPLACEMENT_CHAR;
+ }
+ else /* It is legal, convert the character to an CsrUint32 */
+ {
+ CsrUint32 ch = 0;
+
+ switch (extraBytes2Read) /* Everything falls through */
+ {
+ case 3:
+ {
+ ch += utf8String[i];
+ ch <<= 6;
+ i++;
+ }
+ /* FALLTHROUGH */
+ case 2:
+ {
+ ch += utf8String[i];
+ ch <<= 6;
+ i++;
+ }
+ /* FALLTHROUGH */
+ case 1:
+ {
+ ch += utf8String[i];
+ ch <<= 6;
+ i++;
+ }
+ /* FALLTHROUGH */
+ case 0:
+ {
+ ch += utf8String[i];
+ }
+ /* FALLTHROUGH */
+ default:
+ {
+ break;
+ }
+ }
+
+ ch -= offsetsFromUtf8[extraBytes2Read];
+
+ if (ch <= 0xFFFF) /* Character can be encoded in one CsrUint16 */
+ {
+ *dest++ = (CsrUint16) ch;
+ }
+ else /* The character needs two CsrUint16 */
+ {
+ ch -= UNI_HALF_BASE;
+ *dest++ = (CsrUint16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START);
+ *dest++ = (CsrUint16) ((ch & 0x03FF) | UNI_SUR_LOW_START);
+ }
+ }
+ }
+
+ destStart[length] = 0x00;
+
+ return destStart;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StrCpy
+*
+* Description: The function copies the contents from one UTF-16 string
+* to another UTF-16 string.
+*
+* Input: 0-terminated UTF-16 string.
+*
+* Output: 0-terminated UTF-16 string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16StrCpy(CsrUtf16String *target, const CsrUtf16String *source)
+{
+ if (source) /* if source is not NULL*/
+ {
+ CsrMemCpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String));
+ return target;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StringDuplicate
+*
+* Description: The function allocates a new pointer and copies the input to
+* the new pointer.
+*
+* Input: 0-terminated UTF-16 string.
+*
+* Output: Allocated variable0-terminated UTF-16 string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16StringDuplicate(const CsrUtf16String *source)
+{
+ CsrUtf16String *target = NULL;
+ CsrUint32 length;
+
+ if (source) /* if source is not NULL*/
+ {
+ length = (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String);
+ target = (CsrUtf16String *) CsrPmemAlloc(length);
+ CsrMemCpy(target, source, length);
+ }
+ return target;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StrICmp
+*
+* Description: The function compares two UTF-16 strings.
+*
+* Input: Two 0-terminated UTF-16 string.
+*
+* Output: 0: if the strings are identical.
+*
+*********************************************************************************/
+CsrUint16 CsrUtf16StrICmp(const CsrUtf16String *string1, const CsrUtf16String *string2)
+{
+ while (*string1 || *string2)
+ {
+ if (CAPITAL(*string1) != CAPITAL(*string2))
+ {
+ return *string1 - *string2;
+ }
+ string1++;
+ string2++;
+ }
+
+ return 0;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16StrNICmp
+*
+* Description: The function compares upto count number of elements in the
+* two UTF-16 string.
+*
+* Input: Two 0-terminated UTF-16 string and a maximum
+* number of elements to check.
+*
+* Output: 0: if the strings are identical.
+*
+*********************************************************************************/
+CsrUint16 CsrUtf16StrNICmp(const CsrUtf16String *string1, const CsrUtf16String *string2, CsrUint32 count)
+{
+ while ((*string1 || *string2) && count--)
+ {
+ if (CAPITAL(*string1) != CAPITAL(*string2))
+ {
+ return *string1 - *string2;
+ }
+ string1++;
+ string2++;
+ }
+
+ return 0;
+}
+
+/********************************************************************************
+*
+* Name: CsrUtf16String2XML
+*
+* Description: The function converts an unicoded string (UTF-16) into an unicoded XML
+* string where some special characters are encoded according to
+* the XML spec.
+*
+* Input: A unicoded string (UTF-16) which is freed.
+*
+* Output: A new unicoded string (UTF-16) containing the converted output.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16String2XML(CsrUtf16String *str)
+{
+ CsrUtf16String *scanString;
+ CsrUtf16String *outputString = NULL;
+ CsrUtf16String *resultString = str;
+ CsrUint32 stringLength = 0;
+ CsrBool encodeChars = FALSE;
+
+ scanString = str;
+ if (scanString)
+ {
+ while (*scanString)
+ {
+ if (*scanString == L'&')
+ {
+ stringLength += 5;
+ encodeChars = TRUE;
+ }
+ else if ((*scanString == L'<') || (*scanString == L'>'))
+ {
+ stringLength += 4;
+ encodeChars = TRUE;
+ }
+ else
+ {
+ stringLength++;
+ }
+
+ scanString++;
+ }
+
+ stringLength++;
+
+ if (encodeChars)
+ {
+ resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
+
+ scanString = str;
+
+ while (*scanString)
+ {
+ if (*scanString == L'&')
+ {
+ *outputString++ = '&';
+ *outputString++ = 'a';
+ *outputString++ = 'm';
+ *outputString++ = 'p';
+ *outputString++ = ';';
+ }
+ else if (*scanString == L'<')
+ {
+ *outputString++ = '&';
+ *outputString++ = 'l';
+ *outputString++ = 't';
+ *outputString++ = ';';
+ }
+ else if (*scanString == L'>')
+ {
+ *outputString++ = '&';
+ *outputString++ = 'g';
+ *outputString++ = 't';
+ *outputString++ = ';';
+ }
+ else
+ {
+ *outputString++ = *scanString;
+ }
+
+ scanString++;
+ }
+
+ *outputString++ = 0;
+
+ CsrPmemFree(str);
+ }
+ }
+
+ return resultString;
+}
+
+/********************************************************************************
+*
+* Name: CsrXML2Utf16String
+*
+* Description: The function converts an unicoded XML string into an unicoded
+* string (UTF-16) where some special XML characters are decoded according to
+* the XML spec.
+*
+* Input: A unicoded XML string which is freed.
+*
+* Output: A new unicoded pointer containing the decoded output.
+*
+*********************************************************************************/
+CsrUtf16String *CsrXML2Utf16String(CsrUtf16String *str)
+{
+ CsrUtf16String *scanString;
+ CsrUtf16String *outputString = NULL;
+ CsrUtf16String *resultString = str;
+ CsrUint32 stringLength = 0;
+ CsrBool encodeChars = FALSE;
+
+ scanString = str;
+ if (scanString)
+ {
+ while (*scanString)
+ {
+ if (*scanString == (CsrUtf16String) L'&')
+ {
+ scanString++;
+
+ if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
+ {
+ scanString += 3;
+ encodeChars = TRUE;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
+ {
+ scanString += 2;
+ encodeChars = TRUE;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
+ {
+ scanString += 2;
+ encodeChars = TRUE;
+ }
+ if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
+ {
+ scanString += 4;
+ encodeChars = TRUE;
+ }
+ if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
+ {
+ scanString += 4;
+ encodeChars = TRUE;
+ }
+ else
+ {
+ scanString--;
+ }
+ }
+
+ stringLength++;
+ scanString++;
+ }
+
+ stringLength++;
+
+ if (encodeChars)
+ {
+ resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
+
+ scanString = str;
+
+ while (*scanString)
+ {
+ if (*scanString == L'&')
+ {
+ scanString++;
+
+ if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
+ {
+ *outputString++ = L'&';
+ scanString += 3;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
+ {
+ *outputString++ = L'<';
+ scanString += 2;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
+ {
+ *outputString++ = L'>';
+ scanString += 2;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
+ {
+ *outputString++ = L'\'';
+ scanString += 4;
+ }
+ else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
+ {
+ *outputString++ = L'\"';
+ scanString += 4;
+ }
+ else
+ {
+ *outputString++ = L'&';
+ scanString--;
+ }
+ }
+ else
+ {
+ *outputString++ = *scanString;
+ }
+
+ scanString++;
+ }
+
+ *outputString++ = 0;
+
+ CsrPmemFree(str);
+ }
+ }
+
+ return resultString;
+}
+
+CsrInt32 CsrUtf8StrCmp(const CsrUtf8String *string1, const CsrUtf8String *string2)
+{
+ return CsrStrCmp((const CsrCharString *) string1, (const CsrCharString *) string2);
+}
+
+CsrInt32 CsrUtf8StrNCmp(const CsrUtf8String *string1, const CsrUtf8String *string2, CsrSize count)
+{
+ return CsrStrNCmp((const CsrCharString *) string1, (const CsrCharString *) string2, count);
+}
+
+CsrUint32 CsrUtf8StringLengthInBytes(const CsrUtf8String *string)
+{
+ CsrSize length = 0;
+ if (string)
+ {
+ length = CsrStrLen((const CsrCharString *) string);
+ }
+ return (CsrUint32) length;
+}
+
+CsrUtf8String *CsrUtf8StrCpy(CsrUtf8String *target, const CsrUtf8String *source)
+{
+ return (CsrUtf8String *) CsrStrCpy((CsrCharString *) target, (const CsrCharString *) source);
+}
+
+CsrUtf8String *CsrUtf8StrTruncate(CsrUtf8String *target, CsrSize count)
+{
+ CsrSize lastByte = count - 1;
+
+ target[count] = '\0';
+
+ if (count && (target[lastByte] & 0x80))
+ {
+ /* the last byte contains non-ascii char */
+ if (target[lastByte] & 0x40)
+ {
+ /* multi-byte char starting just before truncation */
+ target[lastByte] = '\0';
+ }
+ else if ((target[lastByte - 1] & 0xE0) == 0xE0)
+ {
+ /* 3-byte char starting 2 bytes before truncation */
+ target[lastByte - 1] = '\0';
+ }
+ else if ((target[lastByte - 2] & 0xF0) == 0xF0)
+ {
+ /* 4-byte char starting 3 bytes before truncation */
+ target[lastByte - 2] = '\0';
+ }
+ }
+
+ return target;
+}
+
+CsrUtf8String *CsrUtf8StrNCpy(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
+{
+ return (CsrUtf8String *) CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
+}
+
+CsrUtf8String *CsrUtf8StrNCpyZero(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
+{
+ CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
+ if (target[count - 1] != '\0')
+ {
+ CsrUtf8StrTruncate(target, count - 1);
+ }
+ return target;
+}
+
+CsrUtf8String *CsrUtf8StrDup(const CsrUtf8String *source)
+{
+ return (CsrUtf8String *) CsrStrDup((const CsrCharString *) source);
+}
+
+CsrUtf8String *CsrUtf8StringConcatenateTexts(const CsrUtf8String *inputText1, const CsrUtf8String *inputText2, const CsrUtf8String *inputText3, const CsrUtf8String *inputText4)
+{
+ CsrUtf8String *outputText;
+ CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
+
+ textLen1 = CsrUtf8StringLengthInBytes(inputText1);
+ textLen2 = CsrUtf8StringLengthInBytes(inputText2);
+ textLen3 = CsrUtf8StringLengthInBytes(inputText3);
+ textLen4 = CsrUtf8StringLengthInBytes(inputText4);
+
+ textLen = textLen1 + textLen2 + textLen3 + textLen4;
+
+ if (textLen == 0) /*stop here is all lengths are 0*/
+ {
+ return NULL;
+ }
+
+ outputText = (CsrUtf8String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf8String)); /* add space for 0-termination*/
+
+
+ if (inputText1 != NULL)
+ {
+ CsrUtf8StrNCpy(outputText, inputText1, textLen1);
+ }
+
+ if (inputText2 != NULL)
+ {
+ CsrUtf8StrNCpy(&(outputText[textLen1]), inputText2, textLen2);
+ }
+
+ if (inputText3 != NULL)
+ {
+ CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
+ }
+
+ if (inputText4 != NULL)
+ {
+ CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
+ }
+
+ outputText[textLen] = '\0';
+
+ return outputText;
+}