1 files changed, 1136 insertions, 0 deletions
diff --git a/drivers/staging/csr/csr_utf16.c b/drivers/staging/csr/csr_utf16.c
new file mode 100644
index 00000000000..9259635851f
--- /dev/null
+++ b/drivers/staging/csr/csr_utf16.c
@@ -0,0 +1,1136 @@
+/*****************************************************************************
+
+            (c) Cambridge Silicon Radio Limited 2010
+            All rights reserved and confidential information of CSR
+
+            Refer to LICENSE.txt included with this source for details
+            on the license terms.
+
+*****************************************************************************/
+#include <linux/module.h>
+#include "csr_types.h"
+#include "csr_pmem.h"
+#include "csr_unicode.h"
+#include "csr_util.h"
+
+#define UNI_SUR_HIGH_START   ((CsrUint32) 0xD800)
+#define UNI_SUR_HIGH_END     ((CsrUint32) 0xDBFF)
+#define UNI_SUR_LOW_START    ((CsrUint32) 0xDC00)
+#define UNI_SUR_LOW_END      ((CsrUint32) 0xDFFF)
+#define UNI_REPLACEMENT_CHAR ((CsrUint32) 0xFFFD)
+#define UNI_HALF_SHIFT       ((CsrUint8) 10)  /* used for shifting by 10 bits */
+#define UNI_HALF_BASE        ((CsrUint32) 0x00010000)
+#define UNI_BYTEMASK         ((CsrUint32) 0xBF)
+#define UNI_BYTEMARK         ((CsrUint32) 0x80)
+
+#define CAPITAL(x)    ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x))
+
+/*
+*  Index into the table with the first byte to get the number of trailing bytes in a utf-8 character.
+*  -1 if the byte has an invalid value.
+*
+*  Legal sequences are:
+*
+*  byte  1st      2nd      3rd      4th
+*
+*       00-7F
+*       C2-DF    80-BF
+*       E0       A0-BF    80-BF
+*       E1-EC    80-BF    80-BF
+*       ED       80-9F    80-BF
+*       EE-EF    80-BF    80-BF
+*       F0       90-BF    80-BF    80-BF
+*       F1-F3    80-BF    80-BF    80-BF
+*       F4       80-8F    80-BF    80-BF
+*/
+static const CsrInt8 trailingBytesForUtf8[256] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                                 /* 0x00 - 0x1F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                                 /* 0x20 - 0x3F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                                 /* 0x40 - 0x5F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                                 /* 0x60 - 0x7F */
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */
+    -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                               /* 0xC0 - 0xDF */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,                      /* 0xE0 - 0xFF */
+};
+
+/* Values to be substracted from a CsrUint32 when converting from UTF8 to UTF16 */
+static const CsrUint32 offsetsFromUtf8[4] =
+{
+    0x00000000, 0x00003080, 0x000E2080, 0x03C82080
+};
+
+/********************************************************************************
+*
+*   Name:           CsrUint32ToUtf16String
+*
+*   Description:    The function converts an 32 bit number to an UTF-16 string
+*                   that is allocated and 0-terminated.
+*
+*   Input:          32 bit number.
+*
+*   Output:         A string of UTF-16 characters.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUint32ToUtf16String(CsrUint32 number)
+{
+    CsrUint16 count, noOfDigits;
+    CsrUtf16String *output;
+    CsrUint32 tempNumber;
+
+    /* calculate the number of digits in the output */
+    tempNumber = number;
+    noOfDigits = 1;
+    while (tempNumber >= 10)
+    {
+        tempNumber = tempNumber / 10;
+        noOfDigits++;
+    }
+
+    output = (CsrUtf16String *) CsrPmemAlloc(sizeof(CsrUtf16String) * (noOfDigits + 1)); /*add space for 0-termination*/
+
+    tempNumber = number;
+    for (count = noOfDigits; count > 0; count--)
+    {
+        output[count - 1] = (CsrUtf16String) ((tempNumber % 10) + '0');
+        tempNumber = tempNumber / 10;
+    }
+    output[noOfDigits] = '\0';
+
+    return output;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StringToUint32
+*
+*   Description:    The function converts an UTF-16 string that is
+*                   0-terminated into a 32 bit number.
+*
+*   Input:          A string of UTF-16 characters containig a number.
+*
+*   Output:         32 bit number.
+*
+*********************************************************************************/
+CsrUint32 CsrUtf16StringToUint32(const CsrUtf16String *unicodeString)
+{
+    CsrUint16 numLen, count;
+    CsrUint32 newNumber = 0;
+
+    numLen = (CsrUint16) CsrUtf16StrLen(unicodeString);
+
+    if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */
+    {
+        return 0;
+    }
+
+    for (count = 0; count < numLen; count++)
+    {
+        CsrUtf16String input = unicodeString[count];
+        if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */
+        {
+            return 0;
+        }
+
+        newNumber = (newNumber * 10) + (input - 0x30);
+    }
+    return newNumber;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16MemCpy
+*
+*   Description:    The function copies count number of 16 bit data elements
+*                   from srv to dest.
+*
+*   Input:          A pointer to an unicoded string.
+*
+*   Output:         A pointer to an unicoded string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16MemCpy(CsrUtf16String *dest, const CsrUtf16String *src, CsrUint32 count)
+{
+    return CsrMemCpy((CsrUint8 *) dest, (CsrUint8 *) src, count * sizeof(CsrUtf16String));
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16ConcatenateTexts
+*
+*   Description:    The function merge the contents of 4 unicoded input pointers
+*                   into a new string.
+*
+*   Input:          4 unicoded input strings (UTF-16).
+*
+*   Output:         A new unicoded string (UTF-16) containing the combined strings.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16ConcatenateTexts(const CsrUtf16String *inputText1, const CsrUtf16String *inputText2,
+    const CsrUtf16String *inputText3, const CsrUtf16String *inputText4)
+{
+    CsrUtf16String *outputText;
+    CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
+
+    textLen1 = CsrUtf16StrLen(inputText1);
+    textLen2 = CsrUtf16StrLen(inputText2);
+    textLen3 = CsrUtf16StrLen(inputText3);
+    textLen4 = CsrUtf16StrLen(inputText4);
+
+    textLen = textLen1 + textLen2 + textLen3 + textLen4;
+
+    if (textLen == 0) /*stop here is all lengths are 0*/
+    {
+        return NULL;
+    }
+
+    outputText = (CsrUtf16String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf16String)); /* add space for 0-termination*/
+
+
+    if (inputText1 != NULL)
+    {
+        CsrUtf16MemCpy(outputText, inputText1, textLen1);
+    }
+
+    if (inputText2 != NULL)
+    {
+        CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2);
+    }
+
+    if (inputText3 != NULL)
+    {
+        CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
+    }
+
+    if (inputText4 != NULL)
+    {
+        CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
+    }
+
+    outputText[textLen] = '\0';
+
+    return outputText;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StrLen
+*
+*   Description:    The function returns the number of 16 bit elements present
+*                   in the 0-terminated string.
+*
+*   Input:          0-terminated string of 16 bit unicoded characters.
+*
+*   Output:         The number of 16 bit elements in the string.
+*
+*********************************************************************************/
+CsrUint32 CsrUtf16StrLen(const CsrUtf16String *unicodeString)
+{
+    CsrUint32 length;
+
+    length = 0;
+    if (unicodeString != NULL)
+    {
+        while (*unicodeString)
+        {
+            length++;
+            unicodeString++;
+        }
+    }
+    return length;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16String2Utf8
+*
+*   Description:    The function decodes an UTF-16 string into an UTF8 byte
+*                   oriented string.
+*
+*   Input:          0-terminated UTF-16 string characters.
+*
+*   Output:         0-terminated string of byte oriented UTF8 coded characters.
+*
+*********************************************************************************/
+CsrUtf8String *CsrUtf16String2Utf8(const CsrUtf16String *source)
+{
+    CsrUtf8String *dest, *destStart = NULL;
+    CsrUint32 i;
+    CsrUint32 ch;
+    CsrUint32 length;
+    CsrUint32 sourceLength;
+    CsrUint8 bytes;
+    CsrBool appendNull = FALSE;
+
+    CsrUint8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0};
+
+    if (!source)
+    {
+        return NULL;
+    }
+
+    length = 0;
+    sourceLength = CsrUtf16StrLen(source) + 1;
+
+    for (i = 0; i < sourceLength; i++)
+    {
+        ch = source[i];
+        if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
+        {
+            if (i + 1 < sourceLength) /* The low surrogate is in the source */
+            {
+                CsrUint32 ch2 = source[++i];
+                if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */
+                {
+                    length += 4;
+                }
+                else /* It is not a low surrogate, instead put a Unicode
+                     'REPLACEMENT CHARACTER' (U+FFFD) */
+                {
+                    length += 3;
+                    i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
+                }
+            }
+            else /* The low surrogate does not exist, instead put a Unicode
+                 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
+            {
+                length += 4;
+            }
+        }
+        else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
+             a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+        {
+            length += 3;
+        }
+        else /* Figure out how many bytes that are required */
+        {
+            if (ch < 0x0080)
+            {
+                length++;
+            }
+            else if (ch < 0x0800)
+            {
+                length += 2;
+            }
+            else
+            {
+                length += 3;
+            }
+        }
+    }
+
+    dest = CsrPmemAlloc(length);
+    destStart = dest;
+
+    for (i = 0; i < sourceLength; i++)
+    {
+        ch = source[i];
+        if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
+        {
+            if (i + 1 < sourceLength) /* The low surrogate is in the source */
+            {
+                CsrUint32 ch2 = source[++i];
+                if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */
+                {
+                    ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE;
+                }
+                else /* It is not a low surrogate, instead put a Unicode
+                     'REPLACEMENT CHARACTER' (U+FFFD) */
+                {
+                    ch = UNI_REPLACEMENT_CHAR;
+                    i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
+                }
+            }
+            else /* The low surrogate does not exist, instead put a Unicode
+                 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
+            {
+                ch = UNI_REPLACEMENT_CHAR;
+                appendNull = TRUE;
+            }
+        }
+        else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
+             a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+        {
+            ch = UNI_REPLACEMENT_CHAR;
+        }
+
+        /* Figure out how many bytes that are required */
+        if (ch < (CsrUint32) 0x80)
+        {
+            bytes = 1;
+        }
+        else if (ch < (CsrUint32) 0x800)
+        {
+            bytes = 2;
+        }
+        else if (ch < (CsrUint32) 0x10000)
+        {
+            bytes = 3;
+        }
+        else if (ch < (CsrUint32) 0x110000)
+        {
+            bytes = 4;
+        }
+        else
+        {
+            bytes = 3;
+            ch = UNI_REPLACEMENT_CHAR;
+        }
+
+        dest += bytes;
+
+        switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */
+        {
+            case 4:
+            {
+                *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+                ch >>= 6;
+            }
+            /* FALLTHROUGH */
+            case 3:
+            {
+                *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+                ch >>= 6;
+            }
+            /* FALLTHROUGH */
+            case 2:
+            {
+                *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
+                ch >>= 6;
+            }
+            /* FALLTHROUGH */
+            case 1:
+            {
+                *--dest = (CsrUint8) (ch | firstByteMark[bytes]);
+            }
+            /* FALLTHROUGH */
+            default:
+            {
+                break;
+            }
+        }
+
+        dest += bytes;
+    }
+
+    if (appendNull) /* Append the \0 character */
+    {
+        *dest = '\0';
+    }
+
+    return destStart;
+}
+
+/*****************************************************************************
+
+    NAME
+        isLegalUtf8
+
+    DESCRIPTION
+        Returns TRUE if the given UFT-8 code unit is legal as defined by the
+        Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode
+        Encoding Forms, UTF-8).
+
+        This function assumes that the length parameter is unconditionally
+        correct and that the first byte is already validated by looking it up
+        in the trailingBytesForUtf8 array, which also reveals the number of
+        trailing bytes.
+
+        Legal code units are composed of one of the following byte sequences:
+
+        1st      2nd      3rd      4th
+        --------------------------------
+        00-7F
+        C2-DF    80-BF
+        E0       A0-BF    80-BF
+        E1-EC    80-BF    80-BF
+        ED       80-9F    80-BF
+        EE-EF    80-BF    80-BF
+        F0       90-BF    80-BF    80-BF
+        F1-F3    80-BF    80-BF    80-BF
+        F4       80-8F    80-BF    80-BF
+
+        Please note that this function only checks whether the 2nd, 3rd and
+        4th bytes fall into the valid ranges.
+
+    PARAMETERS
+        codeUnit - pointer to the first byte of the byte sequence composing
+            the code unit to test.
+        length - the number of bytes in the code unit. Valid range is 1 to 4.
+
+    RETURNS
+        TRUE if the given code unit is legal.
+
+*****************************************************************************/
+static CsrBool isLegalUtf8(const CsrUtf8String *codeUnit, CsrUint32 length)
+{
+    const CsrUtf8String *srcPtr = codeUnit + length;
+    CsrUint8 byte;
+
+    switch (length) /* Everything falls through except case 1 */
+    {
+        case 4:
+        {
+            byte = *--srcPtr;
+            if ((byte < 0x80) || (byte > 0xBF))
+            {
+                return FALSE;
+            }
+        }
+        /* Fallthrough */
+        case 3:
+        {
+            byte = *--srcPtr;
+            if ((byte < 0x80) || (byte > 0xBF))
+            {
+                return FALSE;
+            }
+        }
+        /* Fallthrough */
+        case 2:
+        {
+            byte = *--srcPtr;
+            if (byte > 0xBF)
+            {
+                return FALSE;
+            }
+
+            switch (*codeUnit) /* No fallthrough */
+            {
+                case 0xE0:
+                {
+                    if (byte < 0xA0)
+                    {
+                        return FALSE;
+                    }
+                    break;
+                }
+                case 0xED:
+                {
+                    if ((byte < 0x80) || (byte > 0x9F))
+                    {
+                        return FALSE;
+                    }
+                    break;
+                }
+                case 0xF0:
+                {
+                    if (byte < 0x90)
+                    {
+                        return FALSE;
+                    }
+                    break;
+                }
+                case 0xF4:
+                {
+                    if ((byte < 0x80) || (byte > 0x8F))
+                    {
+                        return FALSE;
+                    }
+                    break;
+                }
+                default:
+                {
+                    if (byte < 0x80)
+                    {
+                        return FALSE;
+                    }
+                    break;
+                }
+            }
+        }
+        /* Fallthrough */
+        case 1:
+        default:
+            /* The 1st byte and length are assumed correct */
+            break;
+    }
+
+    return TRUE;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf82Utf16String
+*
+*   Description:    The function decodes an UTF8 byte oriented string into a
+*                   UTF-16string.
+*
+*   Input:          0-terminated string of byte oriented UTF8 coded characters.
+*
+*   Output:         0-terminated string of UTF-16 characters.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf82Utf16String(const CsrUtf8String *utf8String)
+{
+    CsrSize i, length = 0;
+    CsrSize sourceLength;
+    CsrUtf16String *dest = NULL;
+    CsrUtf16String *destStart = NULL;
+    CsrInt8 extraBytes2Read;
+
+    if (!utf8String)
+    {
+        return NULL;
+    }
+    sourceLength = CsrStrLen((CsrCharString *) utf8String);
+
+    for (i = 0; i < sourceLength; i++)
+    {
+        extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
+
+        if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+        {
+            length += 1;
+        }
+        else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
+             CHARACTER' (U+FFFD), and the null terminated character */
+        {
+            length += 2;
+            break;
+        }
+        else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
+             CHARACTER' (U+FFFD) */
+        {
+            length += 1;
+        }
+        else
+        {
+            if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */
+            {
+                length += 2;
+            }
+            else
+            {
+                length += 1;
+            }
+            i += extraBytes2Read;
+        }
+    }
+
+    /* Create space for the null terminated character */
+    dest = (CsrUtf16String *) CsrPmemAlloc((1 + length) * sizeof(CsrUtf16String));
+    destStart = dest;
+
+    for (i = 0; i < sourceLength; i++)
+    {
+        extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
+
+        if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
+        {
+            *dest++ = UNI_REPLACEMENT_CHAR;
+        }
+        else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
+             CHARACTER' (U+FFFD), and the null terminated character */
+        {
+            *dest++ = UNI_REPLACEMENT_CHAR;
+            *dest++ = '\0';
+            break;
+        }
+        else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
+             CHARACTER' (U+FFFD) */
+        {
+            *dest++ = UNI_REPLACEMENT_CHAR;
+        }
+        else /* It is legal, convert the character to an CsrUint32 */
+        {
+            CsrUint32 ch = 0;
+
+            switch (extraBytes2Read) /* Everything falls through */
+            {
+                case 3:
+                {
+                    ch += utf8String[i];
+                    ch <<= 6;
+                    i++;
+                }
+                /* FALLTHROUGH */
+                case 2:
+                {
+                    ch += utf8String[i];
+                    ch <<= 6;
+                    i++;
+                }
+                /* FALLTHROUGH */
+                case 1:
+                {
+                    ch += utf8String[i];
+                    ch <<= 6;
+                    i++;
+                }
+                /* FALLTHROUGH */
+                case 0:
+                {
+                    ch += utf8String[i];
+                }
+                /* FALLTHROUGH */
+                default:
+                {
+                    break;
+                }
+            }
+
+            ch -= offsetsFromUtf8[extraBytes2Read];
+
+            if (ch <= 0xFFFF) /* Character can be encoded in one CsrUint16 */
+            {
+                *dest++ = (CsrUint16) ch;
+            }
+            else /* The character needs two CsrUint16 */
+            {
+                ch -= UNI_HALF_BASE;
+                *dest++ = (CsrUint16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START);
+                *dest++ = (CsrUint16) ((ch & 0x03FF) | UNI_SUR_LOW_START);
+            }
+        }
+    }
+
+    destStart[length] = 0x00;
+
+    return destStart;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StrCpy
+*
+*   Description:    The function copies the contents from one UTF-16 string
+*                   to another UTF-16 string.
+*
+*   Input:          0-terminated UTF-16 string.
+*
+*   Output:         0-terminated UTF-16 string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16StrCpy(CsrUtf16String *target, const CsrUtf16String *source)
+{
+    if (source) /* if source is not NULL*/
+    {
+        CsrMemCpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String));
+        return target;
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StringDuplicate
+*
+*   Description:    The function allocates a new pointer and copies the input to
+*                   the new pointer.
+*
+*   Input:          0-terminated UTF-16 string.
+*
+*   Output:         Allocated variable0-terminated UTF-16 string.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16StringDuplicate(const CsrUtf16String *source)
+{
+    CsrUtf16String *target = NULL;
+    CsrUint32 length;
+
+    if (source) /* if source is not NULL*/
+    {
+        length = (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String);
+        target = (CsrUtf16String *) CsrPmemAlloc(length);
+        CsrMemCpy(target, source, length);
+    }
+    return target;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StrICmp
+*
+*   Description:    The function compares two UTF-16 strings.
+*
+*   Input:          Two 0-terminated UTF-16 string.
+*
+*   Output:         0: if the strings are identical.
+*
+*********************************************************************************/
+CsrUint16 CsrUtf16StrICmp(const CsrUtf16String *string1, const CsrUtf16String *string2)
+{
+    while (*string1 || *string2)
+    {
+        if (CAPITAL(*string1) != CAPITAL(*string2))
+        {
+            return *string1 - *string2;
+        }
+        string1++;
+        string2++;
+    }
+
+    return 0;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16StrNICmp
+*
+*   Description:    The function compares upto count number of elements in the
+*                   two UTF-16 string.
+*
+*   Input:          Two 0-terminated UTF-16 string and a maximum
+*                   number of elements to check.
+*
+*   Output:         0: if the strings are identical.
+*
+*********************************************************************************/
+CsrUint16 CsrUtf16StrNICmp(const CsrUtf16String *string1, const CsrUtf16String *string2, CsrUint32 count)
+{
+    while ((*string1 || *string2) && count--)
+    {
+        if (CAPITAL(*string1) != CAPITAL(*string2))
+        {
+            return *string1 - *string2;
+        }
+        string1++;
+        string2++;
+    }
+
+    return 0;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrUtf16String2XML
+*
+*   Description:    The function converts an unicoded string (UTF-16) into an unicoded XML
+*                   string where some special characters are encoded according to
+*                   the XML spec.
+*
+*   Input:          A unicoded string (UTF-16) which is freed.
+*
+*   Output:         A new unicoded string (UTF-16) containing the converted output.
+*
+*********************************************************************************/
+CsrUtf16String *CsrUtf16String2XML(CsrUtf16String *str)
+{
+    CsrUtf16String *scanString;
+    CsrUtf16String *outputString = NULL;
+    CsrUtf16String *resultString = str;
+    CsrUint32 stringLength = 0;
+    CsrBool encodeChars = FALSE;
+
+    scanString = str;
+    if (scanString)
+    {
+        while (*scanString)
+        {
+            if (*scanString == L'&')
+            {
+                stringLength += 5;
+                encodeChars = TRUE;
+            }
+            else if ((*scanString == L'<') || (*scanString == L'>'))
+            {
+                stringLength += 4;
+                encodeChars = TRUE;
+            }
+            else
+            {
+                stringLength++;
+            }
+
+            scanString++;
+        }
+
+        stringLength++;
+
+        if (encodeChars)
+        {
+            resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
+
+            scanString = str;
+
+            while (*scanString)
+            {
+                if (*scanString == L'&')
+                {
+                    *outputString++ = '&';
+                    *outputString++ = 'a';
+                    *outputString++ = 'm';
+                    *outputString++ = 'p';
+                    *outputString++ = ';';
+                }
+                else if (*scanString == L'<')
+                {
+                    *outputString++ = '&';
+                    *outputString++ = 'l';
+                    *outputString++ = 't';
+                    *outputString++ = ';';
+                }
+                else if (*scanString == L'>')
+                {
+                    *outputString++ = '&';
+                    *outputString++ = 'g';
+                    *outputString++ = 't';
+                    *outputString++ = ';';
+                }
+                else
+                {
+                    *outputString++ = *scanString;
+                }
+
+                scanString++;
+            }
+
+            *outputString++ = 0;
+
+            CsrPmemFree(str);
+        }
+    }
+
+    return resultString;
+}
+
+/********************************************************************************
+*
+*   Name:           CsrXML2Utf16String
+*
+*   Description:    The function converts an unicoded XML string into an unicoded
+*                   string (UTF-16) where some special XML characters are decoded according to
+*                   the XML spec.
+*
+*   Input:          A unicoded XML string which is freed.
+*
+*   Output:         A new unicoded pointer containing the decoded output.
+*
+*********************************************************************************/
+CsrUtf16String *CsrXML2Utf16String(CsrUtf16String *str)
+{
+    CsrUtf16String *scanString;
+    CsrUtf16String *outputString = NULL;
+    CsrUtf16String *resultString = str;
+    CsrUint32 stringLength = 0;
+    CsrBool encodeChars = FALSE;
+
+    scanString = str;
+    if (scanString)
+    {
+        while (*scanString)
+        {
+            if (*scanString == (CsrUtf16String) L'&')
+            {
+                scanString++;
+
+                if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
+                {
+                    scanString += 3;
+                    encodeChars = TRUE;
+                }
+                else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
+                {
+                    scanString += 2;
+                    encodeChars = TRUE;
+                }
+                else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
+                {
+                    scanString += 2;
+                    encodeChars = TRUE;
+                }
+                if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
+                {
+                    scanString += 4;
+                    encodeChars = TRUE;
+                }
+                if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
+                {
+                    scanString += 4;
+                    encodeChars = TRUE;
+                }
+                else
+                {
+                    scanString--;
+                }
+            }
+
+            stringLength++;
+            scanString++;
+        }
+
+        stringLength++;
+
+        if (encodeChars)
+        {
+            resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
+
+            scanString = str;
+
+            while (*scanString)
+            {
+                if (*scanString == L'&')
+                {
+                    scanString++;
+
+                    if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
+                    {
+                        *outputString++ = L'&';
+                        scanString += 3;
+                    }
+                    else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
+                    {
+                        *outputString++ = L'<';
+                        scanString += 2;
+                    }
+                    else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
+                    {
+                        *outputString++ = L'>';
+                        scanString += 2;
+                    }
+                    else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
+                    {
+                        *outputString++ = L'\'';
+                        scanString += 4;
+                    }
+                    else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
+                    {
+                        *outputString++ = L'\"';
+                        scanString += 4;
+                    }
+                    else
+                    {
+                        *outputString++ = L'&';
+                        scanString--;
+                    }
+                }
+                else
+                {
+                    *outputString++ = *scanString;
+                }
+
+                scanString++;
+            }
+
+            *outputString++ = 0;
+
+            CsrPmemFree(str);
+        }
+    }
+
+    return resultString;
+}
+
+CsrInt32 CsrUtf8StrCmp(const CsrUtf8String *string1, const CsrUtf8String *string2)
+{
+    return CsrStrCmp((const CsrCharString *) string1, (const CsrCharString *) string2);
+}
+
+CsrInt32 CsrUtf8StrNCmp(const CsrUtf8String *string1, const CsrUtf8String *string2, CsrSize count)
+{
+    return CsrStrNCmp((const CsrCharString *) string1, (const CsrCharString *) string2, count);
+}
+
+CsrUint32 CsrUtf8StringLengthInBytes(const CsrUtf8String *string)
+{
+    CsrSize length = 0;
+    if (string)
+    {
+        length = CsrStrLen((const CsrCharString *) string);
+    }
+    return (CsrUint32) length;
+}
+
+CsrUtf8String *CsrUtf8StrCpy(CsrUtf8String *target, const CsrUtf8String *source)
+{
+    return (CsrUtf8String *) CsrStrCpy((CsrCharString *) target, (const CsrCharString *) source);
+}
+
+CsrUtf8String *CsrUtf8StrTruncate(CsrUtf8String *target, CsrSize count)
+{
+    CsrSize lastByte = count - 1;
+
+    target[count] = '\0';
+
+    if (count && (target[lastByte] & 0x80))
+    {
+        /* the last byte contains non-ascii char */
+        if (target[lastByte] & 0x40)
+        {
+            /* multi-byte char starting just before truncation */
+            target[lastByte] = '\0';
+        }
+        else if ((target[lastByte - 1] & 0xE0) == 0xE0)
+        {
+            /* 3-byte char starting 2 bytes before truncation */
+            target[lastByte - 1] = '\0';
+        }
+        else if ((target[lastByte - 2] & 0xF0) == 0xF0)
+        {
+            /* 4-byte char starting 3 bytes before truncation */
+            target[lastByte - 2] = '\0';
+        }
+    }
+
+    return target;
+}
+
+CsrUtf8String *CsrUtf8StrNCpy(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
+{
+    return (CsrUtf8String *) CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
+}
+
+CsrUtf8String *CsrUtf8StrNCpyZero(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
+{
+    CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
+    if (target[count - 1] != '\0')
+    {
+        CsrUtf8StrTruncate(target, count - 1);
+    }
+    return target;
+}
+
+CsrUtf8String *CsrUtf8StrDup(const CsrUtf8String *source)
+{
+    return (CsrUtf8String *) CsrStrDup((const CsrCharString *) source);
+}
+
+CsrUtf8String *CsrUtf8StringConcatenateTexts(const CsrUtf8String *inputText1, const CsrUtf8String *inputText2, const CsrUtf8String *inputText3, const CsrUtf8String *inputText4)
+{
+    CsrUtf8String *outputText;
+    CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
+
+    textLen1 = CsrUtf8StringLengthInBytes(inputText1);
+    textLen2 = CsrUtf8StringLengthInBytes(inputText2);
+    textLen3 = CsrUtf8StringLengthInBytes(inputText3);
+    textLen4 = CsrUtf8StringLengthInBytes(inputText4);
+
+    textLen = textLen1 + textLen2 + textLen3 + textLen4;
+
+    if (textLen == 0) /*stop here is all lengths are 0*/
+    {
+        return NULL;
+    }
+
+    outputText = (CsrUtf8String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf8String)); /* add space for 0-termination*/
+
+
+    if (inputText1 != NULL)
+    {
+        CsrUtf8StrNCpy(outputText, inputText1, textLen1);
+    }
+
+    if (inputText2 != NULL)
+    {
+        CsrUtf8StrNCpy(&(outputText[textLen1]), inputText2, textLen2);
+    }
+
+    if (inputText3 != NULL)
+    {
+        CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
+    }
+
+    if (inputText4 != NULL)
+    {
+        CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
+    }
+
+    outputText[textLen] = '\0';
+
+    return outputText;
+}