diff options
Diffstat (limited to 'src/core/basetypes/MCString.cpp')
-rw-r--r-- | src/core/basetypes/MCString.cpp | 212 |
1 files changed, 193 insertions, 19 deletions
diff --git a/src/core/basetypes/MCString.cpp b/src/core/basetypes/MCString.cpp index 213d5e5f..9c024067 100644 --- a/src/core/basetypes/MCString.cpp +++ b/src/core/basetypes/MCString.cpp @@ -823,7 +823,9 @@ String::String(const char * UTF8Characters) { mUnicodeChars = NULL; reset(); - allocate((unsigned int) strlen(UTF8Characters), true); + if (UTF8Characters != NULL) { + allocate((unsigned int) strlen(UTF8Characters), true); + } appendUTF8Characters(UTF8Characters); } @@ -890,7 +892,7 @@ void String::allocate(unsigned int length, bool force) String * String::string() { - return stringWithCharacters(NULL); + return stringWithCharacters(NULL, 0); } String * String::stringWithData(Data * data, const char * charset) @@ -926,12 +928,18 @@ String * String::stringWithVUTF8Format(const char * format, va_list ap) String * String::stringWithUTF8Characters(const char * UTF8Characters) { + if (UTF8Characters == NULL) { + return NULL; + } String * result = new String(UTF8Characters); return (String *) result->autorelease(); } String * String::stringWithCharacters(const UChar * characters) { + if (characters == NULL) { + return NULL; + } String * result = new String(characters); return (String *) result->autorelease(); } @@ -1194,6 +1202,9 @@ Data * String::encodedMIMEHeaderValueForSubject() int String::compareWithCaseSensitive(String * otherString, bool caseSensitive) { + if (otherString == NULL) { + return 1; + } if ((length() == 0) && (otherString->length() == 0)) { return 0; } @@ -1206,7 +1217,7 @@ int String::compareWithCaseSensitive(String * otherString, bool caseSensitive) } if (otherString->unicodeCharacters() == NULL) { - return -1; + return 1; } #if DISABLE_ICU @@ -2034,25 +2045,69 @@ String * String::flattenHTML() String * String::stripWhitespace() { - String *str = (String *)copy(); - - str->replaceOccurrencesOfString(MCSTR("\t"), MCSTR(" ")); - str->replaceOccurrencesOfString(MCSTR("\n"), MCSTR(" ")); - str->replaceOccurrencesOfString(MCSTR("\v"), MCSTR(" ")); - str->replaceOccurrencesOfString(MCSTR("\f"), MCSTR(" ")); - str->replaceOccurrencesOfString(MCSTR("\r"), MCSTR(" ")); - str->replaceOccurrencesOfString(s_unicode160, MCSTR(" ")); - str->replaceOccurrencesOfString(s_unicode133, MCSTR(" ")); - str->replaceOccurrencesOfString(s_unicode2028, MCSTR(" ")); + String * str = (String *)copy(); - while (str->replaceOccurrencesOfString(MCSTR(" "), MCSTR(" ")) > 0) { - /* do nothing */ + // replace space-like characters with space. + const UChar * source = str->unicodeCharacters(); + UChar * dest = str->mUnicodeChars; + while (* source != 0) { + if (* source == '\t') { + * dest = ' '; + } + else if (* source == '\n') { + * dest = ' '; + } + else if (* source == '\t') { + * dest = ' '; + } + else if (* source == '\f') { + * dest = ' '; + } + else if (* source == '\r') { + * dest = ' '; + } + else if (* source == 160) { + * dest = ' '; + } + else if (* source == 133) { + * dest = ' '; + } + else if (* source == 0x2028) { + * dest = ' '; + } + else { + * dest = * source; + } + dest ++; + source ++; } - while (str->hasPrefix(MCSTR(" "))) { - str->deleteCharactersInRange(RangeMake(0, 1)); + + // skip spaces at the beginning. + source = str->unicodeCharacters(); + dest = str->mUnicodeChars; + while (* source == ' ') { + source ++; } - while (str->hasSuffix(MCSTR(" "))) { - str->deleteCharactersInRange(RangeMake(str->length() - 1, 1)); + + // copy content + while (* source != 0) { + if ((* source == ' ') && (* (source + 1) == ' ')) { + source ++; + } + * dest = * source; + source ++; + dest ++; + } + * dest = 0; + str->mLength = (unsigned int) (dest - str->mUnicodeChars); + + // skip spaces at the end. + if (str->mLength > 0) { + while (* (dest - 1) == ' ') { + dest --; + } + * dest = 0; + str->mLength = (unsigned int) (dest - str->mUnicodeChars); } str->autorelease(); @@ -2241,10 +2296,29 @@ Array * String::componentsSeparatedByString(String * separator) p = mUnicodeChars; while (1) { UChar * location; +#if 0 location = u_strstr(p, separator->unicodeCharacters()); if (location == NULL) { break; } +#else + int remaining = length() - (int) (p - mUnicodeChars); + location = NULL; + while (location == NULL) { + location = (UChar *) memmem(p, remaining * sizeof(UChar), separator->unicodeCharacters(), separator->length() * sizeof(UChar)); + if (location == NULL) { + break; + } + // If it's odd, it's an invalid location. Keep looking for the pattern. + if (((char *) location - (char *) p) % sizeof(UChar) != 0) { + p = (UChar *) (((char *) location) + 1); + location = NULL; + } + } + if (location == NULL) { + break; + } +#endif unsigned int length = (unsigned int) (location - p); String * value = new String(p, length); @@ -2357,6 +2431,10 @@ String * String::uniquedStringWithUTF8Characters(const char * UTF8Characters) static pthread_once_t once = PTHREAD_ONCE_INIT; int r; + if (UTF8Characters == NULL) { + return NULL; + } + pthread_once(&once, initUniquedStringHash); key.data = (void *) UTF8Characters; key.len = (unsigned int) strlen(UTF8Characters); @@ -2512,6 +2590,102 @@ Data * String::decodedBase64Data() return result; } +static int hexValue(const char * code) { + int value = 0; + const char * pch = code; + for (;;) { + int digit = *pch++; + if (digit >= '0' && digit <= '9') { + value += digit - '0'; + } + else if (digit >= 'A' && digit <= 'F') { + value += digit - 'A' + 10; + } + else if (digit >= 'a' && digit <= 'f') { + value += digit - 'a' + 10; + } + else { + return -1; + } + if (pch == code + 2) { + return value; + } + value <<= 4; + } +} + +String * String::urlDecodedString() +{ + Data * sourceData = dataUsingEncoding(); + const char * source = sourceData->bytes(); + char * start = (char *) malloc(sourceData->length() + 1); + char * dest = start; + unsigned int i = 0; + while (i < sourceData->length()) { + switch (source[i]) { + case '%': + { + if (i + 2 < sourceData->length()) { + int value = hexValue(&source[i + 1]); + if (value >= 0) { + *(dest++) = value; + i += 3; + } + else { + *dest++ = '?'; + i ++; + } + } + else { + *dest++ = '?'; + i ++; + } + break; + } + default: + { + *dest++ = source[i]; + i ++; + break; + } + } + } + * dest = 0; + String * result = String::stringWithUTF8Characters(start); + free(start); + return result; +} + +static inline bool isValidUrlChar(char ch) { + return strchr("$&+,/:;=?@[]#!'()* ", ch) == NULL; +} + +String * String::urlEncodedString() +{ + const char * digits = "0123456789ABCDEF"; + Data * sourceData = dataUsingEncoding(); + const char * source = sourceData->bytes(); + char * start = (char *) malloc(sourceData->length() * 3 + 1); + char * dest = start; + unsigned int i = 0; + while (i < sourceData->length()) { + unsigned char ch = (unsigned char) source[i]; + if (isValidUrlChar(ch)) { + *dest++ = ch; + } else { + *dest++ = '%'; + *dest++ = digits[(ch >> 4) & 0x0F]; + *dest++ = digits[ ch & 0x0F]; + } + i ++; + } + *dest = 0; + String * result = String::stringWithUTF8Characters(dest); + free(start); + + return result; +} + HashMap * String::serializable() { HashMap * result = Object::serializable(); |