diff options
Diffstat (limited to 'drivers/staging/csr/csr_utf16.c')
-rw-r--r-- | drivers/staging/csr/csr_utf16.c | 1136 |
1 files changed, 1136 insertions, 0 deletions
diff --git a/drivers/staging/csr/csr_utf16.c b/drivers/staging/csr/csr_utf16.c new file mode 100644 index 00000000000..9259635851f --- /dev/null +++ b/drivers/staging/csr/csr_utf16.c @@ -0,0 +1,1136 @@ +/***************************************************************************** + + (c) Cambridge Silicon Radio Limited 2010 + All rights reserved and confidential information of CSR + + Refer to LICENSE.txt included with this source for details + on the license terms. + +*****************************************************************************/ +#include <linux/module.h> +#include "csr_types.h" +#include "csr_pmem.h" +#include "csr_unicode.h" +#include "csr_util.h" + +#define UNI_SUR_HIGH_START ((CsrUint32) 0xD800) +#define UNI_SUR_HIGH_END ((CsrUint32) 0xDBFF) +#define UNI_SUR_LOW_START ((CsrUint32) 0xDC00) +#define UNI_SUR_LOW_END ((CsrUint32) 0xDFFF) +#define UNI_REPLACEMENT_CHAR ((CsrUint32) 0xFFFD) +#define UNI_HALF_SHIFT ((CsrUint8) 10) /* used for shifting by 10 bits */ +#define UNI_HALF_BASE ((CsrUint32) 0x00010000) +#define UNI_BYTEMASK ((CsrUint32) 0xBF) +#define UNI_BYTEMARK ((CsrUint32) 0x80) + +#define CAPITAL(x) ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x)) + +/* +* Index into the table with the first byte to get the number of trailing bytes in a utf-8 character. +* -1 if the byte has an invalid value. +* +* Legal sequences are: +* +* byte 1st 2nd 3rd 4th +* +* 00-7F +* C2-DF 80-BF +* E0 A0-BF 80-BF +* E1-EC 80-BF 80-BF +* ED 80-9F 80-BF +* EE-EF 80-BF 80-BF +* F0 90-BF 80-BF 80-BF +* F1-F3 80-BF 80-BF 80-BF +* F4 80-8F 80-BF 80-BF +*/ +static const CsrInt8 trailingBytesForUtf8[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x3F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x5F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x7F */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */ + -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 - 0xDF */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xE0 - 0xFF */ +}; + +/* Values to be substracted from a CsrUint32 when converting from UTF8 to UTF16 */ +static const CsrUint32 offsetsFromUtf8[4] = +{ + 0x00000000, 0x00003080, 0x000E2080, 0x03C82080 +}; + +/******************************************************************************** +* +* Name: CsrUint32ToUtf16String +* +* Description: The function converts an 32 bit number to an UTF-16 string +* that is allocated and 0-terminated. +* +* Input: 32 bit number. +* +* Output: A string of UTF-16 characters. +* +*********************************************************************************/ +CsrUtf16String *CsrUint32ToUtf16String(CsrUint32 number) +{ + CsrUint16 count, noOfDigits; + CsrUtf16String *output; + CsrUint32 tempNumber; + + /* calculate the number of digits in the output */ + tempNumber = number; + noOfDigits = 1; + while (tempNumber >= 10) + { + tempNumber = tempNumber / 10; + noOfDigits++; + } + + output = (CsrUtf16String *) CsrPmemAlloc(sizeof(CsrUtf16String) * (noOfDigits + 1)); /*add space for 0-termination*/ + + tempNumber = number; + for (count = noOfDigits; count > 0; count--) + { + output[count - 1] = (CsrUtf16String) ((tempNumber % 10) + '0'); + tempNumber = tempNumber / 10; + } + output[noOfDigits] = '\0'; + + return output; +} + +/******************************************************************************** +* +* Name: CsrUtf16StringToUint32 +* +* Description: The function converts an UTF-16 string that is +* 0-terminated into a 32 bit number. +* +* Input: A string of UTF-16 characters containig a number. +* +* Output: 32 bit number. +* +*********************************************************************************/ +CsrUint32 CsrUtf16StringToUint32(const CsrUtf16String *unicodeString) +{ + CsrUint16 numLen, count; + CsrUint32 newNumber = 0; + + numLen = (CsrUint16) CsrUtf16StrLen(unicodeString); + + if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */ + { + return 0; + } + + for (count = 0; count < numLen; count++) + { + CsrUtf16String input = unicodeString[count]; + if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */ + { + return 0; + } + + newNumber = (newNumber * 10) + (input - 0x30); + } + return newNumber; +} + +/******************************************************************************** +* +* Name: CsrUtf16MemCpy +* +* Description: The function copies count number of 16 bit data elements +* from srv to dest. +* +* Input: A pointer to an unicoded string. +* +* Output: A pointer to an unicoded string. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf16MemCpy(CsrUtf16String *dest, const CsrUtf16String *src, CsrUint32 count) +{ + return CsrMemCpy((CsrUint8 *) dest, (CsrUint8 *) src, count * sizeof(CsrUtf16String)); +} + +/******************************************************************************** +* +* Name: CsrUtf16ConcatenateTexts +* +* Description: The function merge the contents of 4 unicoded input pointers +* into a new string. +* +* Input: 4 unicoded input strings (UTF-16). +* +* Output: A new unicoded string (UTF-16) containing the combined strings. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf16ConcatenateTexts(const CsrUtf16String *inputText1, const CsrUtf16String *inputText2, + const CsrUtf16String *inputText3, const CsrUtf16String *inputText4) +{ + CsrUtf16String *outputText; + CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4; + + textLen1 = CsrUtf16StrLen(inputText1); + textLen2 = CsrUtf16StrLen(inputText2); + textLen3 = CsrUtf16StrLen(inputText3); + textLen4 = CsrUtf16StrLen(inputText4); + + textLen = textLen1 + textLen2 + textLen3 + textLen4; + + if (textLen == 0) /*stop here is all lengths are 0*/ + { + return NULL; + } + + outputText = (CsrUtf16String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf16String)); /* add space for 0-termination*/ + + + if (inputText1 != NULL) + { + CsrUtf16MemCpy(outputText, inputText1, textLen1); + } + + if (inputText2 != NULL) + { + CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2); + } + + if (inputText3 != NULL) + { + CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3); + } + + if (inputText4 != NULL) + { + CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4); + } + + outputText[textLen] = '\0'; + + return outputText; +} + +/******************************************************************************** +* +* Name: CsrUtf16StrLen +* +* Description: The function returns the number of 16 bit elements present +* in the 0-terminated string. +* +* Input: 0-terminated string of 16 bit unicoded characters. +* +* Output: The number of 16 bit elements in the string. +* +*********************************************************************************/ +CsrUint32 CsrUtf16StrLen(const CsrUtf16String *unicodeString) +{ + CsrUint32 length; + + length = 0; + if (unicodeString != NULL) + { + while (*unicodeString) + { + length++; + unicodeString++; + } + } + return length; +} + +/******************************************************************************** +* +* Name: CsrUtf16String2Utf8 +* +* Description: The function decodes an UTF-16 string into an UTF8 byte +* oriented string. +* +* Input: 0-terminated UTF-16 string characters. +* +* Output: 0-terminated string of byte oriented UTF8 coded characters. +* +*********************************************************************************/ +CsrUtf8String *CsrUtf16String2Utf8(const CsrUtf16String *source) +{ + CsrUtf8String *dest, *destStart = NULL; + CsrUint32 i; + CsrUint32 ch; + CsrUint32 length; + CsrUint32 sourceLength; + CsrUint8 bytes; + CsrBool appendNull = FALSE; + + CsrUint8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0}; + + if (!source) + { + return NULL; + } + + length = 0; + sourceLength = CsrUtf16StrLen(source) + 1; + + for (i = 0; i < sourceLength; i++) + { + ch = source[i]; + if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */ + { + if (i + 1 < sourceLength) /* The low surrogate is in the source */ + { + CsrUint32 ch2 = source[++i]; + if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */ + { + length += 4; + } + else /* It is not a low surrogate, instead put a Unicode + 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + length += 3; + i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */ + } + } + else /* The low surrogate does not exist, instead put a Unicode + 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ + { + length += 4; + } + } + else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put + a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + length += 3; + } + else /* Figure out how many bytes that are required */ + { + if (ch < 0x0080) + { + length++; + } + else if (ch < 0x0800) + { + length += 2; + } + else + { + length += 3; + } + } + } + + dest = CsrPmemAlloc(length); + destStart = dest; + + for (i = 0; i < sourceLength; i++) + { + ch = source[i]; + if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */ + { + if (i + 1 < sourceLength) /* The low surrogate is in the source */ + { + CsrUint32 ch2 = source[++i]; + if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */ + { + ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE; + } + else /* It is not a low surrogate, instead put a Unicode + 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + ch = UNI_REPLACEMENT_CHAR; + i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */ + } + } + else /* The low surrogate does not exist, instead put a Unicode + 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ + { + ch = UNI_REPLACEMENT_CHAR; + appendNull = TRUE; + } + } + else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put + a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + ch = UNI_REPLACEMENT_CHAR; + } + + /* Figure out how many bytes that are required */ + if (ch < (CsrUint32) 0x80) + { + bytes = 1; + } + else if (ch < (CsrUint32) 0x800) + { + bytes = 2; + } + else if (ch < (CsrUint32) 0x10000) + { + bytes = 3; + } + else if (ch < (CsrUint32) 0x110000) + { + bytes = 4; + } + else + { + bytes = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + dest += bytes; + + switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */ + { + case 4: + { + *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); + ch >>= 6; + } + /* FALLTHROUGH */ + case 3: + { + *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); + ch >>= 6; + } + /* FALLTHROUGH */ + case 2: + { + *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); + ch >>= 6; + } + /* FALLTHROUGH */ + case 1: + { + *--dest = (CsrUint8) (ch | firstByteMark[bytes]); + } + /* FALLTHROUGH */ + default: + { + break; + } + } + + dest += bytes; + } + + if (appendNull) /* Append the \0 character */ + { + *dest = '\0'; + } + + return destStart; +} + +/***************************************************************************** + + NAME + isLegalUtf8 + + DESCRIPTION + Returns TRUE if the given UFT-8 code unit is legal as defined by the + Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode + Encoding Forms, UTF-8). + + This function assumes that the length parameter is unconditionally + correct and that the first byte is already validated by looking it up + in the trailingBytesForUtf8 array, which also reveals the number of + trailing bytes. + + Legal code units are composed of one of the following byte sequences: + + 1st 2nd 3rd 4th + -------------------------------- + 00-7F + C2-DF 80-BF + E0 A0-BF 80-BF + E1-EC 80-BF 80-BF + ED 80-9F 80-BF + EE-EF 80-BF 80-BF + F0 90-BF 80-BF 80-BF + F1-F3 80-BF 80-BF 80-BF + F4 80-8F 80-BF 80-BF + + Please note that this function only checks whether the 2nd, 3rd and + 4th bytes fall into the valid ranges. + + PARAMETERS + codeUnit - pointer to the first byte of the byte sequence composing + the code unit to test. + length - the number of bytes in the code unit. Valid range is 1 to 4. + + RETURNS + TRUE if the given code unit is legal. + +*****************************************************************************/ +static CsrBool isLegalUtf8(const CsrUtf8String *codeUnit, CsrUint32 length) +{ + const CsrUtf8String *srcPtr = codeUnit + length; + CsrUint8 byte; + + switch (length) /* Everything falls through except case 1 */ + { + case 4: + { + byte = *--srcPtr; + if ((byte < 0x80) || (byte > 0xBF)) + { + return FALSE; + } + } + /* Fallthrough */ + case 3: + { + byte = *--srcPtr; + if ((byte < 0x80) || (byte > 0xBF)) + { + return FALSE; + } + } + /* Fallthrough */ + case 2: + { + byte = *--srcPtr; + if (byte > 0xBF) + { + return FALSE; + } + + switch (*codeUnit) /* No fallthrough */ + { + case 0xE0: + { + if (byte < 0xA0) + { + return FALSE; + } + break; + } + case 0xED: + { + if ((byte < 0x80) || (byte > 0x9F)) + { + return FALSE; + } + break; + } + case 0xF0: + { + if (byte < 0x90) + { + return FALSE; + } + break; + } + case 0xF4: + { + if ((byte < 0x80) || (byte > 0x8F)) + { + return FALSE; + } + break; + } + default: + { + if (byte < 0x80) + { + return FALSE; + } + break; + } + } + } + /* Fallthrough */ + case 1: + default: + /* The 1st byte and length are assumed correct */ + break; + } + + return TRUE; +} + +/******************************************************************************** +* +* Name: CsrUtf82Utf16String +* +* Description: The function decodes an UTF8 byte oriented string into a +* UTF-16string. +* +* Input: 0-terminated string of byte oriented UTF8 coded characters. +* +* Output: 0-terminated string of UTF-16 characters. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf82Utf16String(const CsrUtf8String *utf8String) +{ + CsrSize i, length = 0; + CsrSize sourceLength; + CsrUtf16String *dest = NULL; + CsrUtf16String *destStart = NULL; + CsrInt8 extraBytes2Read; + + if (!utf8String) + { + return NULL; + } + sourceLength = CsrStrLen((CsrCharString *) utf8String); + + for (i = 0; i < sourceLength; i++) + { + extraBytes2Read = trailingBytesForUtf8[utf8String[i]]; + + if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + length += 1; + } + else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT + CHARACTER' (U+FFFD), and the null terminated character */ + { + length += 2; + break; + } + else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT + CHARACTER' (U+FFFD) */ + { + length += 1; + } + else + { + if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */ + { + length += 2; + } + else + { + length += 1; + } + i += extraBytes2Read; + } + } + + /* Create space for the null terminated character */ + dest = (CsrUtf16String *) CsrPmemAlloc((1 + length) * sizeof(CsrUtf16String)); + destStart = dest; + + for (i = 0; i < sourceLength; i++) + { + extraBytes2Read = trailingBytesForUtf8[utf8String[i]]; + + if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ + { + *dest++ = UNI_REPLACEMENT_CHAR; + } + else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT + CHARACTER' (U+FFFD), and the null terminated character */ + { + *dest++ = UNI_REPLACEMENT_CHAR; + *dest++ = '\0'; + break; + } + else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT + CHARACTER' (U+FFFD) */ + { + *dest++ = UNI_REPLACEMENT_CHAR; + } + else /* It is legal, convert the character to an CsrUint32 */ + { + CsrUint32 ch = 0; + + switch (extraBytes2Read) /* Everything falls through */ + { + case 3: + { + ch += utf8String[i]; + ch <<= 6; + i++; + } + /* FALLTHROUGH */ + case 2: + { + ch += utf8String[i]; + ch <<= 6; + i++; + } + /* FALLTHROUGH */ + case 1: + { + ch += utf8String[i]; + ch <<= 6; + i++; + } + /* FALLTHROUGH */ + case 0: + { + ch += utf8String[i]; + } + /* FALLTHROUGH */ + default: + { + break; + } + } + + ch -= offsetsFromUtf8[extraBytes2Read]; + + if (ch <= 0xFFFF) /* Character can be encoded in one CsrUint16 */ + { + *dest++ = (CsrUint16) ch; + } + else /* The character needs two CsrUint16 */ + { + ch -= UNI_HALF_BASE; + *dest++ = (CsrUint16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START); + *dest++ = (CsrUint16) ((ch & 0x03FF) | UNI_SUR_LOW_START); + } + } + } + + destStart[length] = 0x00; + + return destStart; +} + +/******************************************************************************** +* +* Name: CsrUtf16StrCpy +* +* Description: The function copies the contents from one UTF-16 string +* to another UTF-16 string. +* +* Input: 0-terminated UTF-16 string. +* +* Output: 0-terminated UTF-16 string. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf16StrCpy(CsrUtf16String *target, const CsrUtf16String *source) +{ + if (source) /* if source is not NULL*/ + { + CsrMemCpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String)); + return target; + } + else + { + return NULL; + } +} + +/******************************************************************************** +* +* Name: CsrUtf16StringDuplicate +* +* Description: The function allocates a new pointer and copies the input to +* the new pointer. +* +* Input: 0-terminated UTF-16 string. +* +* Output: Allocated variable0-terminated UTF-16 string. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf16StringDuplicate(const CsrUtf16String *source) +{ + CsrUtf16String *target = NULL; + CsrUint32 length; + + if (source) /* if source is not NULL*/ + { + length = (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String); + target = (CsrUtf16String *) CsrPmemAlloc(length); + CsrMemCpy(target, source, length); + } + return target; +} + +/******************************************************************************** +* +* Name: CsrUtf16StrICmp +* +* Description: The function compares two UTF-16 strings. +* +* Input: Two 0-terminated UTF-16 string. +* +* Output: 0: if the strings are identical. +* +*********************************************************************************/ +CsrUint16 CsrUtf16StrICmp(const CsrUtf16String *string1, const CsrUtf16String *string2) +{ + while (*string1 || *string2) + { + if (CAPITAL(*string1) != CAPITAL(*string2)) + { + return *string1 - *string2; + } + string1++; + string2++; + } + + return 0; +} + +/******************************************************************************** +* +* Name: CsrUtf16StrNICmp +* +* Description: The function compares upto count number of elements in the +* two UTF-16 string. +* +* Input: Two 0-terminated UTF-16 string and a maximum +* number of elements to check. +* +* Output: 0: if the strings are identical. +* +*********************************************************************************/ +CsrUint16 CsrUtf16StrNICmp(const CsrUtf16String *string1, const CsrUtf16String *string2, CsrUint32 count) +{ + while ((*string1 || *string2) && count--) + { + if (CAPITAL(*string1) != CAPITAL(*string2)) + { + return *string1 - *string2; + } + string1++; + string2++; + } + + return 0; +} + +/******************************************************************************** +* +* Name: CsrUtf16String2XML +* +* Description: The function converts an unicoded string (UTF-16) into an unicoded XML +* string where some special characters are encoded according to +* the XML spec. +* +* Input: A unicoded string (UTF-16) which is freed. +* +* Output: A new unicoded string (UTF-16) containing the converted output. +* +*********************************************************************************/ +CsrUtf16String *CsrUtf16String2XML(CsrUtf16String *str) +{ + CsrUtf16String *scanString; + CsrUtf16String *outputString = NULL; + CsrUtf16String *resultString = str; + CsrUint32 stringLength = 0; + CsrBool encodeChars = FALSE; + + scanString = str; + if (scanString) + { + while (*scanString) + { + if (*scanString == L'&') + { + stringLength += 5; + encodeChars = TRUE; + } + else if ((*scanString == L'<') || (*scanString == L'>')) + { + stringLength += 4; + encodeChars = TRUE; + } + else + { + stringLength++; + } + + scanString++; + } + + stringLength++; + + if (encodeChars) + { + resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String)); + + scanString = str; + + while (*scanString) + { + if (*scanString == L'&') + { + *outputString++ = '&'; + *outputString++ = 'a'; + *outputString++ = 'm'; + *outputString++ = 'p'; + *outputString++ = ';'; + } + else if (*scanString == L'<') + { + *outputString++ = '&'; + *outputString++ = 'l'; + *outputString++ = 't'; + *outputString++ = ';'; + } + else if (*scanString == L'>') + { + *outputString++ = '&'; + *outputString++ = 'g'; + *outputString++ = 't'; + *outputString++ = ';'; + } + else + { + *outputString++ = *scanString; + } + + scanString++; + } + + *outputString++ = 0; + + CsrPmemFree(str); + } + } + + return resultString; +} + +/******************************************************************************** +* +* Name: CsrXML2Utf16String +* +* Description: The function converts an unicoded XML string into an unicoded +* string (UTF-16) where some special XML characters are decoded according to +* the XML spec. +* +* Input: A unicoded XML string which is freed. +* +* Output: A new unicoded pointer containing the decoded output. +* +*********************************************************************************/ +CsrUtf16String *CsrXML2Utf16String(CsrUtf16String *str) +{ + CsrUtf16String *scanString; + CsrUtf16String *outputString = NULL; + CsrUtf16String *resultString = str; + CsrUint32 stringLength = 0; + CsrBool encodeChars = FALSE; + + scanString = str; + if (scanString) + { + while (*scanString) + { + if (*scanString == (CsrUtf16String) L'&') + { + scanString++; + + if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4)) + { + scanString += 3; + encodeChars = TRUE; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3)) + { + scanString += 2; + encodeChars = TRUE; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3)) + { + scanString += 2; + encodeChars = TRUE; + } + if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5)) + { + scanString += 4; + encodeChars = TRUE; + } + if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5)) + { + scanString += 4; + encodeChars = TRUE; + } + else + { + scanString--; + } + } + + stringLength++; + scanString++; + } + + stringLength++; + + if (encodeChars) + { + resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String)); + + scanString = str; + + while (*scanString) + { + if (*scanString == L'&') + { + scanString++; + + if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4)) + { + *outputString++ = L'&'; + scanString += 3; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3)) + { + *outputString++ = L'<'; + scanString += 2; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3)) + { + *outputString++ = L'>'; + scanString += 2; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5)) + { + *outputString++ = L'\''; + scanString += 4; + } + else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5)) + { + *outputString++ = L'\"'; + scanString += 4; + } + else + { + *outputString++ = L'&'; + scanString--; + } + } + else + { + *outputString++ = *scanString; + } + + scanString++; + } + + *outputString++ = 0; + + CsrPmemFree(str); + } + } + + return resultString; +} + +CsrInt32 CsrUtf8StrCmp(const CsrUtf8String *string1, const CsrUtf8String *string2) +{ + return CsrStrCmp((const CsrCharString *) string1, (const CsrCharString *) string2); +} + +CsrInt32 CsrUtf8StrNCmp(const CsrUtf8String *string1, const CsrUtf8String *string2, CsrSize count) +{ + return CsrStrNCmp((const CsrCharString *) string1, (const CsrCharString *) string2, count); +} + +CsrUint32 CsrUtf8StringLengthInBytes(const CsrUtf8String *string) +{ + CsrSize length = 0; + if (string) + { + length = CsrStrLen((const CsrCharString *) string); + } + return (CsrUint32) length; +} + +CsrUtf8String *CsrUtf8StrCpy(CsrUtf8String *target, const CsrUtf8String *source) +{ + return (CsrUtf8String *) CsrStrCpy((CsrCharString *) target, (const CsrCharString *) source); +} + +CsrUtf8String *CsrUtf8StrTruncate(CsrUtf8String *target, CsrSize count) +{ + CsrSize lastByte = count - 1; + + target[count] = '\0'; + + if (count && (target[lastByte] & 0x80)) + { + /* the last byte contains non-ascii char */ + if (target[lastByte] & 0x40) + { + /* multi-byte char starting just before truncation */ + target[lastByte] = '\0'; + } + else if ((target[lastByte - 1] & 0xE0) == 0xE0) + { + /* 3-byte char starting 2 bytes before truncation */ + target[lastByte - 1] = '\0'; + } + else if ((target[lastByte - 2] & 0xF0) == 0xF0) + { + /* 4-byte char starting 3 bytes before truncation */ + target[lastByte - 2] = '\0'; + } + } + + return target; +} + +CsrUtf8String *CsrUtf8StrNCpy(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count) +{ + return (CsrUtf8String *) CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count); +} + +CsrUtf8String *CsrUtf8StrNCpyZero(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count) +{ + CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count); + if (target[count - 1] != '\0') + { + CsrUtf8StrTruncate(target, count - 1); + } + return target; +} + +CsrUtf8String *CsrUtf8StrDup(const CsrUtf8String *source) +{ + return (CsrUtf8String *) CsrStrDup((const CsrCharString *) source); +} + +CsrUtf8String *CsrUtf8StringConcatenateTexts(const CsrUtf8String *inputText1, const CsrUtf8String *inputText2, const CsrUtf8String *inputText3, const CsrUtf8String *inputText4) +{ + CsrUtf8String *outputText; + CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4; + + textLen1 = CsrUtf8StringLengthInBytes(inputText1); + textLen2 = CsrUtf8StringLengthInBytes(inputText2); + textLen3 = CsrUtf8StringLengthInBytes(inputText3); + textLen4 = CsrUtf8StringLengthInBytes(inputText4); + + textLen = textLen1 + textLen2 + textLen3 + textLen4; + + if (textLen == 0) /*stop here is all lengths are 0*/ + { + return NULL; + } + + outputText = (CsrUtf8String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf8String)); /* add space for 0-termination*/ + + + if (inputText1 != NULL) + { + CsrUtf8StrNCpy(outputText, inputText1, textLen1); + } + + if (inputText2 != NULL) + { + CsrUtf8StrNCpy(&(outputText[textLen1]), inputText2, textLen2); + } + + if (inputText3 != NULL) + { + CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3); + } + + if (inputText4 != NULL) + { + CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4); + } + + outputText[textLen] = '\0'; + + return outputText; +} |