diff options
Diffstat (limited to 'src/core/basetypes/MCString.cpp')
-rw-r--r-- | src/core/basetypes/MCString.cpp | 2460 |
1 files changed, 2460 insertions, 0 deletions
diff --git a/src/core/basetypes/MCString.cpp b/src/core/basetypes/MCString.cpp new file mode 100644 index 00000000..c17c7074 --- /dev/null +++ b/src/core/basetypes/MCString.cpp @@ -0,0 +1,2460 @@ +#include "MCWin32.h" // should be included first. + +#include "MCString.h" + +#if __APPLE__ +#define DISABLE_ICU 1 +#endif + +#include <string.h> +#include <stdlib.h> +#if !DISABLE_ICU +#include <unicode/ustring.h> +#include <unicode/ucnv.h> +#include <unicode/utypes.h> +#endif +#ifndef _MSC_VER +#include <uuid/uuid.h> +#endif +#include <pthread.h> +#include <libetpan/libetpan.h> +#include <libxml/xmlmemory.h> +#include <libxml/HTMLparser.h> +#if __APPLE__ +#include <CoreFoundation/CoreFoundation.h> +#endif + +#include "MCDefines.h" +#include "MCData.h" +#include "MCHash.h" +#include "MCLog.h" +#include "MCUtils.h" +#include "MCRange.h" +#include "MCArray.h" +#include "MCSet.h" +#include "MCHashMap.h" +#include "MCAutoreleasePool.h" +#include "MCValue.h" +#include "MCHTMLCleaner.h" +#include "MCBase64.h" +#include "MCIterator.h" +#include "ConvertUTF.h" + +using namespace mailcore; + +static String * s_unicode160 = NULL; +static String * s_unicode133 = NULL; +static String * s_unicode2028 = NULL; + +#if DISABLE_ICU +static int32_t u_strlen(const UChar *s) { + if (s == NULL) { + return 0; + } + const UChar * p = s; + while (* p != 0) { + p ++; + } + return (int32_t) (p - s); +} + +static UChar * u_memcpy(UChar * dest, const UChar * src, int32_t count) { + memcpy(dest, src, count * sizeof(* src)); + return dest; +} + +static UChar * u_strstr(const UChar * s, const UChar * substring) +{ + if (s == NULL) { + return NULL; + } + CFStringRef cfS = CFStringCreateWithCharactersNoCopy(NULL, (const UniChar *) s, u_strlen(s), kCFAllocatorNull); + CFStringRef cfSubstring = CFStringCreateWithCharactersNoCopy(NULL, (const UniChar *) substring, u_strlen(substring), kCFAllocatorNull); + + CFRange range = CFStringFind(cfS, cfSubstring, 0); + CFRelease(cfSubstring); + CFRelease(cfS); + if (range.length == 0) { + return NULL; + } + return (UChar *) (s + range.location); +} + +static UChar * u_strrstr(const UChar * s, const UChar * substring) +{ + if (s == NULL) { + return NULL; + } + CFStringRef cfS = CFStringCreateWithCharactersNoCopy(NULL, (const UniChar *) s, u_strlen(s), kCFAllocatorNull); + CFStringRef cfSubstring = CFStringCreateWithCharactersNoCopy(NULL, (const UniChar *) substring, u_strlen(substring), kCFAllocatorNull); + + CFRange range = CFStringFind(cfS, cfSubstring, kCFCompareBackwards); + CFRelease(cfSubstring); + CFRelease(cfS); + if (range.length == 0) { + return NULL; + } + return (UChar *) (s + range.location); +} + +static int32_t u_memcmp(const UChar * buf1, const UChar * buf2, int32_t count) +{ + return memcmp(buf1, buf2, count * sizeof(* buf1)); +} + +static UChar * u_strchr(const UChar *s, UChar c) +{ + if (s == NULL) { + return NULL; + } + const UChar * p = s; + while (* p != 0) { + if (* p == c) { + return (UChar *) p; + } + p ++; + } + return NULL; +} + +static UChar * u_strrchr(const UChar *s, UChar c) +{ + if (s == NULL) { + return NULL; + } + const UChar * lastOccurrence = NULL; + const UChar * p = s; + while (* p != 0) { + if (* p == c) { + lastOccurrence = p; + } + p ++; + } + return (UChar *) lastOccurrence; +} +#endif + +void mailcore::setICUDataDirectory(String * directory) +{ +#if !DISABLE_ICU + u_setDataDirectory(directory->fileSystemRepresentation()); +#endif +} + +#pragma mark quote headers string + +static inline int to_be_quoted(const char * word, size_t size, int subject) +{ + int do_quote; + const char * cur; + size_t i; + + do_quote = 0; + cur = word; + for(i = 0 ; i < size ; i ++) { + if (* cur == '=') + do_quote = 1; + + if (!subject) { + switch (* cur) { + case ',': + case ':': + case '!': + case '"': + case '#': + case '$': + case '@': + case '[': + case '\\': + case ']': + case '^': + case '`': + case '{': + case '|': + case '}': + case '~': + case '=': + case '?': + case '_': + do_quote = 1; + break; + } + } + if (((unsigned char) * cur) >= 128) + do_quote = 1; + + cur ++; + } + + return do_quote; +} + +#define MAX_IMF_LINE 72 + +static inline void quote_word(const char * display_charset, + MMAPString * mmapstr, const char * word, size_t size) +{ + const char * cur; + size_t i; + char hex[4]; + int col; + + mmap_string_append(mmapstr, "=?"); + mmap_string_append(mmapstr, display_charset); + mmap_string_append(mmapstr, "?Q?"); + + col = (int) mmapstr->len; + + cur = word; + for(i = 0 ; i < size ; i ++) { + int do_quote_char; + + do_quote_char = 0; + switch (* cur) { + case ',': + case ':': + case '!': + case '"': + case '#': + case '$': + case '@': + case '[': + case '\\': + case ']': + case '^': + case '`': + case '{': + case '|': + case '}': + case '~': + case '=': + case '?': + case '_': + do_quote_char = 1; + break; + + default: + if (((unsigned char) * cur) >= 128) + do_quote_char = 1; + break; + } + + if (do_quote_char) { + snprintf(hex, 4, "=%2.2X", (unsigned char) * cur); + mmap_string_append(mmapstr, hex); + col += 3; + } + else { + if (* cur == ' ') { + mmap_string_append_c(mmapstr, '_'); + } + else { + mmap_string_append_c(mmapstr, * cur); + } + col += 3; + } + cur ++; + } + + mmap_string_append(mmapstr, "?="); +} + +static inline void get_word(const char * begin, const char ** pend, int subject, int * pto_be_quoted) +{ + const char * cur; + + cur = begin; + + while ((* cur != ' ') && (* cur != '\t') && (* cur != '\0')) { + cur ++; + } + while (((* cur == ' ') || (* cur == '\t')) && (* cur != '\0')) { + cur ++; + } + + if (cur - begin + + 1 /* minimum column of string in a + folded header */ > MAX_IMF_LINE) + * pto_be_quoted = 1; + else + * pto_be_quoted = to_be_quoted(begin, cur - begin, subject); + + * pend = cur; +} + +static char * etpan_make_full_quoted_printable(const char * display_charset, + const char * phrase) +{ + int needs_quote; + char * str; + + needs_quote = to_be_quoted(phrase, strlen(phrase), 0); + if (needs_quote) { + MMAPString * mmapstr; + + mmapstr = mmap_string_new(""); + quote_word(display_charset, mmapstr, phrase, strlen(phrase)); + str = strdup(mmapstr->str); + mmap_string_free(mmapstr); + } + else { + str = strdup(phrase); + } + + return str; +} + +static char * etpan_make_quoted_printable(const char * display_charset, + const char * phrase, int subject) +{ + char * str; + const char * cur; + MMAPString * mmapstr; + + mmapstr = mmap_string_new(""); + + cur = phrase; + while (* cur != '\0') { + const char * begin; + const char * end; + int do_quote; + int quote_words; + + begin = cur; + end = begin; + quote_words = 0; + do_quote = 1; + + while (* cur != '\0') { + get_word(cur, &cur, subject, &do_quote); + if (do_quote) { + quote_words = 1; + end = cur; + } + else + break; + if (* cur != '\0') + cur ++; + } + + if (quote_words) { + quote_word(display_charset, mmapstr, begin, end - begin); + + if ((* end == ' ') || (* end == '\t')) { + mmap_string_append_c(mmapstr, * end); + end ++; + } + + if (* end != '\0') { + mmap_string_append_len(mmapstr, end, cur - end); + } + } + else { + mmap_string_append_len(mmapstr, begin, cur - begin); + } + + if ((* cur == ' ') || (* cur == '\t')) { + mmap_string_append_c(mmapstr, * cur); + cur ++; + } + } + + str = strdup(mmapstr->str); + mmap_string_free(mmapstr); + + return str; +} + +#pragma mark extract subject + +static inline int skip_subj_blob(char * subj, size_t * begin, + size_t length, int keep_bracket) +{ + if (keep_bracket) + return 0; + + /* subj-blob = "[" *BLOBCHAR "]" *WSP */ + size_t cur_token; + + cur_token = * begin; + + if (subj[cur_token] != '[') + return 0; + + cur_token ++; + + while (1) { + if (cur_token >= length) + return 0; + + if (subj[cur_token] == '[') + return 0; + + if (subj[cur_token] == ']') + break; + + cur_token ++; + } + + cur_token ++; + + while (1) { + if (cur_token >= length) + break; + + if (subj[cur_token] != ' ') + break; + + cur_token ++; + } + + * begin = cur_token; + + return 1; +} + +static inline int skip_subj_refwd(char * subj, size_t * begin, + size_t length, int keep_bracket) +{ + /* subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" */ + size_t cur_token; + int prefix; + int has_suffix; + + cur_token = * begin; + prefix = 0; + if (!prefix) { + if (length - cur_token >= 18) { + if (strncasecmp(subj + cur_token, "Переслать", 18) == 0) { + cur_token += 18; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 10) { + if (strncasecmp(subj + cur_token, "Ответ", 10) == 0) { + cur_token += 10; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 7) { + if (strncasecmp(subj + cur_token, "Antwort", 7) == 0) { + cur_token += 7; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 6) { + if (strncasecmp(subj + cur_token, "回复", 6) == 0) { + cur_token += 6; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "转发", 6) == 0) { + cur_token += 6; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 5) { + // é is 2 chars in utf-8 + if (strncasecmp(subj + cur_token, "réf.", 5) == 0) { + cur_token += 5; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "rép.", 5) == 0) { + cur_token += 5; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "trans", 5) == 0) { + cur_token += 5; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 4) { + if (strncasecmp(subj + cur_token, "antw", 4) == 0) { + cur_token += 4; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 3) { + if (strncasecmp(subj + cur_token, "fwd", 3) == 0) { + cur_token += 3; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "ogg", 3) == 0) { + cur_token += 3; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "odp", 3) == 0) { + cur_token += 3; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "res", 3) == 0) { + cur_token += 3; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "end", 3) == 0) { + cur_token += 3; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 2) { + if (strncasecmp(subj + cur_token, "fw", 2) == 0) { + cur_token += 2; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "re", 2) == 0) { + cur_token += 2; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "tr", 2) == 0) { + cur_token += 2; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "aw", 2) == 0) { + cur_token += 2; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "sv", 2) == 0) { + cur_token += 2; + prefix = 1; + } + else if (strncasecmp(subj + cur_token, "rv", 2) == 0) { + cur_token += 2; + prefix = 1; + } + } + } + if (!prefix) { + if (length - cur_token >= 1) { + if (strncasecmp(subj + cur_token, "r", 1) == 0) { + cur_token += 1; + prefix = 1; + } + } + } + + if (!prefix) + return 0; + + while (1) { + if (cur_token >= length) + break; + + if (subj[cur_token] != ' ') + break; + + cur_token ++; + } + + skip_subj_blob(subj, &cur_token, length, keep_bracket); + + has_suffix = 0; + + if (!has_suffix) { + if (length - cur_token >= 3) { + if (strncasecmp(subj + cur_token, ":", 3) == 0) { + cur_token += 3; + has_suffix = 1; + } + } + } + + if (!has_suffix) { + if (cur_token < length) { + if (subj[cur_token] == ':') { + cur_token ++; + has_suffix = 1; + } + } + } + + if (!has_suffix) { + return 0; + } + + * begin = cur_token; + + return 1; +} + +static inline int skip_subj_leader(char * subj, size_t * begin, + size_t length, int keep_bracket) +{ + size_t cur_token; + + cur_token = * begin; + + /* subj-leader = (*subj-blob subj-refwd) / WSP */ + + if (subj[cur_token] == ' ') { + cur_token ++; + } + else { + while (cur_token < length) { + if (!skip_subj_blob(subj, &cur_token, length, keep_bracket)) + break; + } + if (!skip_subj_refwd(subj, &cur_token, length, keep_bracket)) + return 0; + } + + * begin = cur_token; + + return 1; +} + +static char * extract_subject(char * str, int keep_bracket) +{ + char * subj; + char * cur; + char * write_pos; + size_t len; + size_t begin; + int do_repeat_5; + int do_repeat_6; + + /* + (1) Convert any RFC 2047 encoded-words in the subject to + UTF-8. + We work on UTF-8 string -- DVH + */ + + subj = strdup(str); + if (subj == NULL) + return NULL; + + len = strlen(subj); + + /* + Convert all tabs and continuations to space. + Convert all multiple spaces to a single space. + */ + + cur = subj; + write_pos = subj; + while (* cur != '\0') { + int cont; + + switch (* cur) { + case '\t': + case '\r': + case '\n': + cont = 1; + + cur ++; + while (* cur && cont) { + switch (* cur) { + case '\t': + case '\r': + case '\n': + cont = 1; + break; + default: + cont = 0; + break; + } + cur ++; + } + + * write_pos = ' '; + write_pos ++; + + break; + + default: + * write_pos = * cur; + write_pos ++; + + cur ++; + + break; + } + } + * write_pos = '\0'; + + begin = 0; + + do { + do_repeat_6 = 0; + + /* + (2) Remove all trailing text of the subject that matches + the subj-trailer ABNF, repeat until no more matches are + possible. + */ + + while (len > 0) { + /* subj-trailer = "(fwd)" / WSP */ + if (subj[len - 1] == ' ') { + subj[len - 1] = '\0'; + len --; + } + else { + if (len < 5) + break; + + if (strncasecmp(subj + len - 5, "(fwd)", 5) != 0) + break; + + subj[len - 5] = '\0'; + len -= 5; + } + } + + do { + size_t saved_begin; + + do_repeat_5 = 0; + + /* + (3) Remove all prefix text of the subject that matches the + subj-leader ABNF. + */ + + if (skip_subj_leader(subj, &begin, len, keep_bracket)) + do_repeat_5 = 1; + + /* + (4) If there is prefix text of the subject that matches the + subj-blob ABNF, and removing that prefix leaves a non-empty + subj-base, then remove the prefix text. + */ + + saved_begin = begin; + if (skip_subj_blob(subj, &begin, len, keep_bracket)) { + if (begin == len) { + /* this will leave a empty subject base */ + begin = saved_begin; + } + else + do_repeat_5 = 1; + } + + /* + (5) Repeat (3) and (4) until no matches remain. + Note: it is possible to defer step (2) until step (6), + but this requires checking for subj-trailer in step (4). + */ + + } + while (do_repeat_5); + + /* + (6) If the resulting text begins with the subj-fwd-hdr ABNF + and ends with the subj-fwd-trl ABNF, remove the + subj-fwd-hdr and subj-fwd-trl and repeat from step (2). + */ + + if (len >= 5) { + if (strncasecmp(subj + begin, "[fwd:", 5) == 0) { + begin += 5; + + if (subj[len - 1] == ']') { + subj[len - 1] = '\0'; + len --; + do_repeat_6 = 1; + } + } + } + + } + while (do_repeat_6); + + /* + (7) The resulting text is the "base subject" used in + threading. + */ + + /* convert to upper case */ + + cur = subj + begin; + write_pos = subj; + + while (* cur != '\0') { + * write_pos = * cur; + cur ++; + write_pos ++; + } + * write_pos = '\0'; + + return subj; +} + +String::String(const UChar * unicodeChars) +{ + mUnicodeChars = NULL; + reset(); + if (unicodeChars != NULL) { + allocate(u_strlen(unicodeChars), true); + } + appendCharacters(unicodeChars); +} + +String::String(const UChar * unicodeChars, unsigned int length) +{ + mUnicodeChars = NULL; + reset(); + allocate(length, true); + appendCharactersLength(unicodeChars, length); +} + +String::String(const char * UTF8Characters) +{ + mUnicodeChars = NULL; + reset(); + allocate((unsigned int) strlen(UTF8Characters), true); + appendUTF8Characters(UTF8Characters); +} + +String::String(String * otherString) +{ + mUnicodeChars = NULL; + reset(); + appendString(otherString); +} + +String::String(Data * data, const char * charset) +{ + mUnicodeChars = NULL; + reset(); + appendBytes(data->bytes(), data->length(), charset); +} + +String::String(const char * bytes, unsigned int length, const char * charset) +{ + mUnicodeChars = NULL; + reset(); + allocate(length, true); + if (charset == NULL) { + appendUTF8CharactersLength(bytes, length); + } + else { + appendBytes(bytes, length, charset); + } +} + +String::~String() +{ + reset(); +} + +static int isPowerOfTwo (unsigned int x) +{ + return ((x != 0) && !(x & (x - 1))); +} + +void String::allocate(unsigned int length, bool force) +{ + length ++; + if (length <= mAllocated) + return; + + if (force) { + mAllocated = length; + } + else { + if (!isPowerOfTwo(mAllocated)) { + mAllocated = 0; + } + if (mAllocated == 0) { + mAllocated = 4; + } + while (length > mAllocated) { + mAllocated *= 2; + } + } + + mUnicodeChars = (UChar *) realloc(mUnicodeChars, mAllocated * sizeof(* mUnicodeChars)); +} + +String * String::string() +{ + return stringWithCharacters(NULL); +} + +String * String::stringWithData(Data * data, const char * charset) +{ + if (data == NULL) { + return String::string(); + } + String * result = NULL; + result = new String(data->bytes(), data->length(), charset); + result->autorelease(); + return result; +} + +String * String::stringWithUTF8Format(const char * format, ...) +{ + va_list argp; + + va_start(argp, format); + String * result = stringWithVUTF8Format(format, argp); + va_end(argp); + + return result; +} + +String * String::stringWithVUTF8Format(const char * format, va_list ap) +{ + char * result; + vasprintf(&result, format, ap); + String * str = stringWithUTF8Characters(result); + free(result); + return str; +} + +String * String::stringWithUTF8Characters(const char * UTF8Characters) +{ + String * result = new String(UTF8Characters); + return (String *) result->autorelease(); +} + +String * String::stringWithCharacters(const UChar * characters) +{ + String * result = new String(characters); + return (String *) result->autorelease(); +} + +String * String::stringWithCharacters(const UChar * characters, unsigned int length) +{ + String * result = new String(characters, length); + return (String *) result->autorelease(); +} + +void String::appendCharactersLength(const UChar * unicodeCharacters, unsigned int length) +{ + if (unicodeCharacters == NULL) { + return; + } + allocate(mLength + length); + memcpy(&mUnicodeChars[mLength], unicodeCharacters, length * sizeof(* mUnicodeChars)); + mLength += length; + mUnicodeChars[mLength] = 0; +} + +void String::appendString(String * otherString) +{ + if (otherString == NULL) { + return; + } + appendCharactersLength(otherString->unicodeCharacters(), otherString->length()); +} + +void String::appendUTF8Format(const char * format, ...) +{ + va_list argp; + + va_start(argp, format); + String * otherString = stringWithVUTF8Format(format, argp); + va_end(argp); + + this->appendString(otherString); +} + +void String::appendUTF8CharactersLength(const char * UTF8Characters, unsigned int length) +{ + if (UTF8Characters == NULL) { + return; + } + + const UTF8 * source = (const UTF8 *) UTF8Characters; + UTF16 * target = (UTF16 *) malloc(length * sizeof(* target)); + UTF16 * targetStart = target; + ConvertUTF8toUTF16(&source, source + length, + &targetStart, targetStart + length, lenientConversion); + appendCharactersLength((UChar *) target, (unsigned int ) (targetStart - target)); + free(target); +} + +void String::appendUTF8Characters(const char * UTF8Characters) +{ + appendUTF8CharactersLength(UTF8Characters, (unsigned int) strlen(UTF8Characters)); +} + +void String::appendCharacters(const UChar * unicodeCharacters) +{ + if (unicodeCharacters == NULL) + return; + + appendCharactersLength(unicodeCharacters, u_strlen(unicodeCharacters)); +} + +const UChar * String::unicodeCharacters() +{ + return mUnicodeChars; +} + +const char * String::UTF8Characters() +{ + const UTF16 * source = (const UTF16 *) mUnicodeChars; + UTF8 * target = (UTF8 *) malloc(mLength * 6 + 1); + UTF8 * targetStart = target; + ConvertUTF16toUTF8(&source, source + mLength, + &targetStart, targetStart + mLength * 6 + 1, lenientConversion); + unsigned int utf8length = (unsigned int) (targetStart - target); + target[utf8length] = 0; + Data * data = Data::dataWithBytes((const char *) target, utf8length + 1); + free(target); + + return data->bytes(); +} + +unsigned int String::length() +{ + return mLength; +} + +String * String::stringByAppendingString(String * otherString) +{ + String * result = new String(this); + result->appendString(otherString); + return (String *) result->autorelease(); +} + +String * String::stringByAppendingUTF8Format(const char * format, ...) +{ + va_list argp; + + va_start(argp, format); + String * otherString = stringWithVUTF8Format(format, argp); + va_end(argp); + + return this->stringByAppendingString(otherString); +} + +String * String::stringByAppendingUTF8Characters(const char * UTF8Characters) +{ + String * otherString = stringWithUTF8Characters(UTF8Characters); + return this->stringByAppendingString(otherString); +} + +String * String::stringByAppendingCharacters(const UChar * unicodeCharacters) +{ + String * otherString = stringWithCharacters(unicodeCharacters); + return this->stringByAppendingString(otherString); +} + +void String::reset() +{ + free(mUnicodeChars); + mUnicodeChars = NULL; + mLength = 0; + mAllocated = 0; +} + +void String::setString(String * otherString) +{ + reset(); + appendString(otherString); +} + +void String::setUTF8Characters(const char * UTF8Characters) +{ + reset(); + appendUTF8Characters(UTF8Characters); +} + +void String::setCharacters(const UChar * unicodeCharacters) +{ + reset(); + appendCharacters(unicodeCharacters); +} + +String * String::description() +{ + return this; +} + +Object * String::copy() +{ + return new String(this); +} + +bool String::isEqual(Object * otherObject) +{ + if (otherObject == NULL) + return false; + + String * otherString = (String *) otherObject; + if (length() != otherString->length()) + return false; + return compare(otherString) == 0; +} + +unsigned int String::hash() +{ + return hashCompute((const char *) mUnicodeChars, mLength * sizeof(* mUnicodeChars)); +} + +#define DEFAULT_INCOMING_CHARSET "iso-8859-1" +#define DEFAULT_DISPLAY_CHARSET "utf-8" + +String * String::stringByDecodingMIMEHeaderValue(const char * phrase) +{ + size_t cur_token; + char * decoded; + String * result; + bool hasEncoding; + + if (phrase == NULL) + return string(); + + if (* phrase == '\0') { + return string(); + } + + hasEncoding = false; + if (strstr(phrase, "=?") != NULL) { + if ((strcasestr(phrase, "?Q?") != NULL) || (strcasestr(phrase, "?B?") != NULL)) { + hasEncoding = true; + } + } + + if (!hasEncoding) { + return Data::dataWithBytes(phrase, (unsigned int) strlen(phrase))->stringWithDetectedCharset(); + } + + cur_token = 0; + decoded = NULL; + mailmime_encoded_phrase_parse(DEFAULT_INCOMING_CHARSET, + phrase, strlen(phrase), + &cur_token, DEFAULT_DISPLAY_CHARSET, + &decoded); + + result = NULL; + if (decoded != NULL) { + result = stringWithUTF8Characters(decoded); + } + else { + MCLog("could not decode: %s\n", phrase); + } + + free(decoded); + + return result; +} + +Data * String::encodedAddressDisplayNameValue() +{ + char * str; + Data * result; + + str = etpan_make_full_quoted_printable(DEFAULT_DISPLAY_CHARSET, UTF8Characters()); + result = Data::dataWithBytes(str, (unsigned int) strlen(str) + 1); + free(str); + + return result; +} + +Data * String::encodedMIMEHeaderValue() +{ + char * str; + Data * result; + + str = etpan_make_quoted_printable(DEFAULT_DISPLAY_CHARSET, UTF8Characters(), 0); + result = Data::dataWithBytes(str, (unsigned int) strlen(str) + 1); + free(str); + + return result; +} + +Data * String::encodedMIMEHeaderValueForSubject() +{ + char * str; + Data * result; + + str = etpan_make_quoted_printable(DEFAULT_DISPLAY_CHARSET, UTF8Characters(), 1); + result = Data::dataWithBytes(str, (unsigned int) strlen(str) + 1); + free(str); + + return result; +} + +int String::compareWithCaseSensitive(String * otherString, bool caseSensitive) +{ + if ((unicodeCharacters() == NULL) && (otherString->unicodeCharacters() != NULL)) { + return 0; + } + + if (unicodeCharacters() == NULL) { + return -1; + } + + if (otherString->unicodeCharacters() == NULL) { + return -1; + } + +#if DISABLE_ICU + CFStringRef cfThis = CFStringCreateWithCharactersNoCopy(NULL, mUnicodeChars, mLength, kCFAllocatorNull); + CFStringRef cfOther = CFStringCreateWithCharactersNoCopy(NULL, otherString->mUnicodeChars, otherString->mLength, kCFAllocatorNull); + CFComparisonResult result = CFStringCompare(cfThis, cfOther, caseSensitive ? 0 : kCFCompareCaseInsensitive); + CFRelease(cfThis); + CFRelease(cfOther); + return result; +#else + if (caseSensitive) { + return u_strcmp(unicodeCharacters(), otherString->unicodeCharacters()); + } + else { + return u_strcasecmp(unicodeCharacters(), otherString->unicodeCharacters(), 0); + } +#endif +} + +int String::compare(String * otherString) +{ + return compareWithCaseSensitive(otherString, true); +} + +int String::caseInsensitiveCompare(String * otherString) +{ + return compareWithCaseSensitive(otherString, false); +} + +//Any-Lower, Any-Upper +String * String::lowercaseString() +{ +#if DISABLE_ICU + CFMutableStringRef cfStr = CFStringCreateMutable(NULL, 0); + CFStringAppendCharacters(cfStr, (const UniChar *) mUnicodeChars, mLength); + CFStringLowercase(cfStr, NULL); + UniChar * characters = (UniChar *) malloc(sizeof(* characters) * mLength); + CFStringGetCharacters(cfStr, CFRangeMake(0, mLength), characters); + String * result = String::stringWithCharacters(characters, mLength); + free(characters); + CFRelease(cfStr); + return result; +#else + UErrorCode err; + String * result = (String *) copy()->autorelease(); + err = U_ZERO_ERROR; + u_strToLower(result->mUnicodeChars, result->mLength, + result->mUnicodeChars, result->mLength, + NULL, &err); + return result; +#endif +} + +String * String::uppercaseString() +{ +#if DISABLE_ICU + CFMutableStringRef cfStr = CFStringCreateMutable(NULL, 0); + CFStringAppendCharacters(cfStr, (const UniChar *) mUnicodeChars, mLength); + CFStringUppercase(cfStr, NULL); + UniChar * characters = (UniChar *) malloc(sizeof(* characters) * mLength); + CFStringGetCharacters(cfStr, CFRangeMake(0, mLength), characters); + String * result = String::stringWithCharacters(characters, mLength); + free(characters); + CFRelease(cfStr); + return result; +#else + UErrorCode err; + String * result = (String *) copy()->autorelease(); + err = U_ZERO_ERROR; + u_strToUpper(result->mUnicodeChars, result->mLength, + result->mUnicodeChars, result->mLength, + NULL, &err); + return result; +#endif +} + +void String::appendBytes(const char * bytes, unsigned int length, const char * charset) +{ + if (bytes == NULL) { + return; + } + +#if __APPLE__ + CFStringEncoding encoding; + if (strcasecmp(charset, "mutf-7") == 0) { + encoding = kCFStringEncodingUTF7_IMAP; + } + else { + CFStringRef encodingName = CFStringCreateWithCString(NULL, charset, kCFStringEncodingUTF8); + encoding = CFStringConvertIANACharSetNameToEncoding(encodingName); + if (encoding == kCFStringEncodingBig5) { + encoding = kCFStringEncodingBig5_HKSCS_1999; + } + if (encoding == kCFStringEncodingGBK_95) { + encoding = kCFStringEncodingGB_18030_2000; + } + if (encoding == kCFStringEncodingGB_2312_80) { + encoding = kCFStringEncodingGB_18030_2000; + } + CFRelease(encodingName); + } + if (encoding == kCFStringEncodingUTF8) { + appendUTF8CharactersLength(bytes, length); + return; + } + + bool converted = false; + int conversionCount = 0; + while (!converted) { + CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) bytes, (CFIndex) length, encoding, false); + if (cfStr != NULL) { + converted = true; + CFDataRef data = CFStringCreateExternalRepresentation(NULL, cfStr, kCFStringEncodingUTF16LE, '_'); + if (data != NULL) { + UChar * fixedData = (UChar *) malloc(CFDataGetLength(data)); + memcpy(fixedData, CFDataGetBytePtr(data), CFDataGetLength(data)); + unsigned int length = (unsigned int) CFDataGetLength(data) / 2; + for(int32_t i = 0 ; i < length ; i ++) { + if (fixedData[i] == 0) { + fixedData[i] = ' '; + } + } + appendCharactersLength(fixedData, length); + free(fixedData); + CFRelease(data); + } + CFRelease(cfStr); + } + else { + length --; + conversionCount ++; + if (conversionCount > 10) { + // failed. + break; + } + } + } +#else + UErrorCode err; + + err = U_ZERO_ERROR; + UConverter * converter = ucnv_open(charset, &err); + if (converter == NULL) { + MCLog("invalid charset %s %i", charset, err); + return; + } + + err = U_ZERO_ERROR; + int32_t destLength = ucnv_toUChars(converter, NULL, 0, + bytes, length, &err); + int32_t destCapacity = destLength + 1; + UChar * dest = (UChar *) malloc(destCapacity * sizeof(* dest)); + err = U_ZERO_ERROR; + destLength = ucnv_toUChars(converter, dest, destCapacity, bytes, length, &err); + dest[destLength] = 0; + + // Fix in case of bad conversion. + for(int32_t i = 0 ; i < destLength ; i ++) { + if (dest[i] == 0) { + dest[i] = ' '; + } + } + + appendCharactersLength(dest, destLength); + free(dest); + + ucnv_close(converter); +#endif +} + +String * String::extractedSubject() +{ + return extractedSubjectAndKeepBracket(false); +} + +String * String::extractedSubjectAndKeepBracket(bool keepBracket) +{ + char * result; + String * str; + + result = extract_subject((char *) UTF8Characters(), keepBracket); + str = String::stringWithUTF8Characters(result); + free(result); + + return str; +} + +#ifndef _MSC_VER +String * String::uuidString() +{ + uuid_t uuid; +#ifdef _UUID_STRING_T + uuid_string_t uuidString; +#else + char uuidString[37]; +#endif + + uuid_generate(uuid); + uuid_unparse_lower(uuid, uuidString); + return String::stringWithUTF8Characters(uuidString); +} +#endif + +unsigned int String::replaceOccurrencesOfString(String * occurrence, String * replacement) +{ + unsigned int count; + + if (occurrence->length() == 0) + return 0; + + count = 0; + UChar * p = mUnicodeChars; + while (1) { + UChar * location; + location = u_strstr(p, occurrence->unicodeCharacters()); + if (location == NULL) + break; + p = location + occurrence->length(); + count ++; + } + + UChar * unicodeChars; + int delta = replacement->length() - occurrence->length(); + int modifiedLength = mLength + delta * count + 1; + unicodeChars = (UChar *) malloc(modifiedLength * sizeof(* unicodeChars)); + unicodeChars[modifiedLength - 1] = 0; + UChar * dest_p = unicodeChars; + p = mUnicodeChars; + while (1) { + UChar * location; + unsigned int count; + + location = u_strstr(p, occurrence->unicodeCharacters()); + if (location == NULL) + break; + count = (unsigned int) (location - p); + u_memcpy(dest_p, p, count); + dest_p += count; + p += count; + u_memcpy(dest_p, replacement->unicodeCharacters(), replacement->length()); + p += occurrence->length(); + dest_p += replacement->length(); + } + // copy remaining + if(p) { + unsigned int remainingLength = mLength - (unsigned int) (p - mUnicodeChars); + memcpy(dest_p, p, remainingLength * sizeof(* p)); + dest_p += remainingLength; + * dest_p = 0; + } + + free(mUnicodeChars); + mUnicodeChars = unicodeChars; + mLength = modifiedLength - 1; + + return count; +} + +UChar String::characterAtIndex(unsigned int index) +{ + return mUnicodeChars[index]; +} + +void String::deleteCharactersInRange(Range range) +{ + if (range.location > mLength) + return; + + if (range.length > mLength) { + range.length = mLength - range.location; + } + else if (range.location + range.length > mLength) { + range.length = mLength - range.location; + } + + int32_t count = mLength - (int32_t) (range.location + range.length); + memmove(&mUnicodeChars[range.location], &mUnicodeChars[range.location + range.length], count * sizeof(* mUnicodeChars)); + mLength -= range.length; + mUnicodeChars[mLength] = 0; +} + +int String::locationOfString(String * occurrence) +{ + UChar * location; + location = u_strstr(mUnicodeChars, occurrence->unicodeCharacters()); + if (location == NULL) { + return -1; + } + + return (int) (location - mUnicodeChars); +} + +#pragma mark strip HTML + +struct parserState { + int level; + int enabled; + int disabledLevel; + String * result; + int logEnabled; + int hasQuote; + int quoteLevel; + bool hasText; + bool lastCharIsWhitespace; + bool showBlockQuote; + bool showLink; + bool hasReturnToLine; + Array * linkStack; + Array * paragraphSpacingStack; +}; + +static void appendQuote(struct parserState * state); + +static void charactersParsed(void * context, + const xmlChar * ch, int len) +/*" Callback function for stringByStrippingHTML. "*/ +{ + struct parserState * state; + + state = (struct parserState *) context; + String * result = state->result; + + if (!state->enabled) { + return; + } + + if (state->logEnabled) { + MCLog("text %s", ch); + } + String * modifiedString; + modifiedString = new String((const char *) ch, len); + modifiedString->autorelease(); + modifiedString = modifiedString->stripWhitespace(); + + if (modifiedString->length() > 0) { + if (state->lastCharIsWhitespace) { + if (modifiedString->characterAtIndex(0) == ' ') { + modifiedString->deleteCharactersInRange(RangeMake(0, 1)); + } + } + } + + if (modifiedString->length() > 0) { + bool lastIsWhiteSpace; + bool isWhiteSpace; + + isWhiteSpace = false; + lastIsWhiteSpace = false; + if (modifiedString->length() > 0) { + if (modifiedString->characterAtIndex(modifiedString->length() - 1) == ' ') { + lastIsWhiteSpace = true; + } + } + if (lastIsWhiteSpace && (modifiedString->length() == 1)) { + isWhiteSpace = true; + } + + if (isWhiteSpace) { + if (state->lastCharIsWhitespace) { + // do nothing + } + else if (!state->hasText) { + // do nothing + } + else { + result->appendString(MCSTR(" ")); + state->lastCharIsWhitespace = true; + state->hasText = true; + } + } + else { + if (!state->hasQuote) { + appendQuote(state); + state->hasQuote = true; + } + result->appendString(modifiedString); + state->lastCharIsWhitespace = lastIsWhiteSpace; + state->hasText = true; + } + } +} + +/* GCS: custom error function to ignore errors */ +static void structuredError(void * userData, + xmlErrorPtr error) +{ + /* ignore all errors */ + (void)userData; + (void)error; +} + +static void appendQuote(struct parserState * state) +{ + if (state->quoteLevel < 0) { + MCLog("error consistency in quote level"); + state->lastCharIsWhitespace = true; + return; + } + for(int i = 0 ; i < state->quoteLevel ; i ++) { + state->result->appendString(MCSTR("> ")); + } + state->lastCharIsWhitespace = true; +} + +static void returnToLine(struct parserState * state) +{ + if (!state->hasQuote) { + appendQuote(state); + state->hasQuote = true; + } + state->result->appendString(MCSTR("\n")); + state->hasText = false; + state->lastCharIsWhitespace = true; + state->hasQuote = false; + state->hasReturnToLine = false; +} + +static void returnToLineAtBeginningOfBlock(struct parserState * state) +{ + if (state->hasText) { + returnToLine(state); + } + state->hasQuote = false; +} + +static Set * blockElements(void) +{ + static Set * elements = NULL; + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock(&lock); + if (elements == NULL) { + elements = new Set(); + elements->addObject(MCSTR("address")); + elements->addObject(MCSTR("div")); + elements->addObject(MCSTR("p")); + elements->addObject(MCSTR("h1")); + elements->addObject(MCSTR("h2")); + elements->addObject(MCSTR("h3")); + elements->addObject(MCSTR("h4")); + elements->addObject(MCSTR("h5")); + elements->addObject(MCSTR("h6")); + elements->addObject(MCSTR("pre")); + elements->addObject(MCSTR("ul")); + elements->addObject(MCSTR("ol")); + elements->addObject(MCSTR("li")); + elements->addObject(MCSTR("dl")); + elements->addObject(MCSTR("dt")); + elements->addObject(MCSTR("dd")); + elements->addObject(MCSTR("form")); + // tables + elements->addObject(MCSTR("col")); + elements->addObject(MCSTR("colgroup")); + elements->addObject(MCSTR("th")); + elements->addObject(MCSTR("tbody")); + elements->addObject(MCSTR("thead")); + elements->addObject(MCSTR("tfoot")); + elements->addObject(MCSTR("table")); + elements->addObject(MCSTR("tr")); + elements->addObject(MCSTR("td")); + } + pthread_mutex_unlock(&lock); + + return elements; +} + +static HashMap * dictionaryFromAttributes(const xmlChar ** atts) +{ + HashMap * result; + + if (atts == NULL) + return NULL; + + result = HashMap::hashMap(); + for(const xmlChar ** curAtt = atts ; * curAtt != NULL ; curAtt += 2) { + const xmlChar * attrName; + const xmlChar * attrValue; + String * name; + + attrName = * curAtt; + attrValue = * (curAtt + 1); + if ((attrName == NULL) || (attrValue == NULL)) + continue; + + name = String::stringWithUTF8Characters((const char *) attrName); + name = name->lowercaseString(); + result->setObjectForKey(name, String::stringWithUTF8Characters((const char *) attrValue)); + } + + return result; +} + +static void elementStarted(void * ctx, const xmlChar * name, const xmlChar ** atts) +{ + struct parserState * state; + + state = (struct parserState *) ctx; + + if (state->logEnabled) { + MCLog("parsed element %s", name); + } + + if (strcasecmp((const char *) name, "blockquote") == 0) { + state->quoteLevel ++; + } + else if (strcasecmp((const char *) name, "a") == 0) { + AutoreleasePool * pool; + String * link = NULL; + HashMap * attributes; + + pool = new AutoreleasePool(); + attributes = dictionaryFromAttributes(atts); + if (attributes != NULL) { + link = (String *) attributes->objectForKey(MCSTR("href")); + } + if (link == NULL) + link = MCSTR(""); + + Array * item; + item = new Array(); + item->addObject(link); + item->addObject(Value::valueWithUnsignedIntValue(state->result->length())); + state->linkStack->addObject(item); + item->release(); + pool->release(); + } + else if (strcasecmp((const char *) name, "p") == 0) { + bool hasSpacing; + String * style; + AutoreleasePool * pool; + HashMap * attributes; + + hasSpacing = true; + + pool = new AutoreleasePool(); + attributes = dictionaryFromAttributes(atts); + if (attributes != NULL) { + style = (String *) attributes->objectForKey(MCSTR("style")); + if (style != NULL) { + if (style->locationOfString(MCSTR("margin: 0.0px 0.0px 0.0px 0.0px;")) != -1) { + hasSpacing = false; + } + else if (style->locationOfString(MCSTR("margin: 0px 0px 0px 0px;")) != -1) { + hasSpacing = false; + } + else if (style->locationOfString(MCSTR("margin: 0.0px;")) != -1) { + hasSpacing = false; + } + else if (style->locationOfString(MCSTR("margin: 0px;")) != -1) { + hasSpacing = false; + } + } + } + pool->release(); + + state->paragraphSpacingStack->addObject(Value::valueWithBoolValue(hasSpacing)); + } + + if (state->enabled) { + if (state->level == 1) { + if (strcasecmp((const char *) name, "head") == 0) { + state->enabled = 0; + state->disabledLevel = state->level; + } + } + if (strcasecmp((const char *) name, "style") == 0) { + state->enabled = 0; + state->disabledLevel = state->level; + } + else if (strcasecmp((const char *) name, "script") == 0) { + state->enabled = 0; + state->disabledLevel = state->level; + } + else if (strcasecmp((const char *) name, "p") == 0) { + returnToLineAtBeginningOfBlock(state); + if (((Value *) state->paragraphSpacingStack->lastObject())->boolValue()) { + returnToLine(state); + } + } + else if (blockElements()->containsObject(String::stringWithUTF8Characters((const char *) name)->lowercaseString())) { + returnToLineAtBeginningOfBlock(state); + } + else if (strcasecmp((const char *) name, "blockquote") == 0) { + if (!state->showBlockQuote) { + AutoreleasePool * pool; + String * type; + bool cite; + HashMap * attributes; + + cite = false; + pool = new AutoreleasePool(); + attributes = dictionaryFromAttributes(atts); + if (attributes != NULL) { + type = (String *) attributes->objectForKey(MCSTR("type")); + if (type != NULL) { + if (type->caseInsensitiveCompare(MCSTR("cite")) == 0) { + cite = true; + } + } + } + pool->release(); + + if (cite) { + state->enabled = 0; + state->disabledLevel = state->level; + } + else { + returnToLineAtBeginningOfBlock(state); + } + } + else { + returnToLineAtBeginningOfBlock(state); + } + } + else if (strcasecmp((const char *) name, "br") == 0) { + returnToLine(state); + state->hasReturnToLine = true; + } + } + + state->level ++; +} + +static void elementEnded(void * ctx, const xmlChar * name) +{ + struct parserState * state; + + state = (struct parserState *) ctx; + + if (state->logEnabled) { + MCLog("ended element %s", name); + } + + if (strcasecmp((const char *) name, "blockquote") == 0) { + state->quoteLevel --; + } + + state->level --; + if (!state->enabled) { + if (state->level == state->disabledLevel) { + state->enabled = 1; + } + } + + bool hasReturnToLine; + + hasReturnToLine = false; + if (strcasecmp((const char *) name, "a") == 0) { + if (state->enabled) { + Array * item; + String * link; + unsigned int offset; + + item = (Array *) state->linkStack->lastObject(); + link = (String *) item->objectAtIndex(0); + offset = ((Value *) item->objectAtIndex(1))->unsignedIntValue(); + if (state->showLink) { + if (offset != state->result->length()) { + if (link->length() > 0) { + if (!state->result->hasSuffix(link)) { + state->result->appendUTF8Characters("("); + state->result->appendString(link); + state->result->appendUTF8Characters(")"); + state->hasText = true; + state->lastCharIsWhitespace = false; + } + } + } + } + } + + state->linkStack->removeObjectAtIndex(state->linkStack->count() - 1); + } + else if (strcasecmp((const char *) name, "p") == 0) { + if (state->enabled) { + if (((Value *) state->paragraphSpacingStack->lastObject())->boolValue()) { + returnToLine(state); + } + } + state->paragraphSpacingStack->removeObjectAtIndex(state->paragraphSpacingStack->count() - 1); + hasReturnToLine = true; + } + else if (blockElements()->containsObject(String::stringWithUTF8Characters((const char *) name)->lowercaseString())) { + hasReturnToLine = true; + } + else if (strcasecmp((const char *) name, "blockquote") == 0) { + hasReturnToLine = true; + } + + if (hasReturnToLine) { + if (state->enabled) { + if (!state->hasReturnToLine) { + returnToLine(state); + } + } + } +} + +static void commentParsed(void * ctx, const xmlChar * value) +{ + struct parserState * state; + + state = (struct parserState *) ctx; + + if (state->logEnabled) { + MCLog("comments %s", value); + } +} + +void initializeLibXML() +{ + static bool initDone = false; + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock(&lock); + if (!initDone) { + initDone = true; + xmlInitParser(); + + /* GCS: override structuredErrorFunc to mine so + I can ignore errors */ + xmlSetStructuredErrorFunc(xmlGenericErrorContext, + &structuredError); + } + pthread_mutex_unlock(&lock); +} + +String * String::flattenHTMLAndShowBlockquoteAndLink(bool showBlockquote, bool showLink) +/*" Interpretes the receiver als HTML, removes all tags + and returns the plain text. "*/ +{ + initializeLibXML(); + + int mem_base = xmlMemBlocks(); + String * result = String::string(); + xmlSAXHandler handler; + memset(&handler, 0, sizeof(xmlSAXHandler)); + handler.characters = charactersParsed; + handler.startElement = elementStarted; + handler.endElement = elementEnded; + handler.comment = commentParsed; + struct parserState state; + state.result = result; + state.level = 0; + state.enabled = 1; + state.logEnabled = 0; + state.disabledLevel = 0; + state.quoteLevel = 0; + state.hasText = false; + state.hasQuote = false; + state.hasReturnToLine = false; + state.showBlockQuote = showBlockquote; + state.showLink = showLink; + state.lastCharIsWhitespace = true; + state.linkStack = new Array(); + state.paragraphSpacingStack = new Array(); + + const char * characters = cleanedHTMLString()->UTF8Characters(); + + htmlSAXParseDoc((xmlChar*) characters, "utf-8", &handler, &state); + + if (mem_base != xmlMemBlocks()) { + MCLog("Leak of %d blocks found in htmlSAXParseDoc", + xmlMemBlocks() - mem_base); + } + + state.paragraphSpacingStack->release(); + state.linkStack->release(); + + UChar ch[2]; + ch[0] = 160; + ch[1] = 0; + result->replaceOccurrencesOfString(String::stringWithCharacters(ch), MCSTR(" ")); + + return result; +} + +String * String::flattenHTMLAndShowBlockquote(bool showBlockquote) +{ + return flattenHTMLAndShowBlockquoteAndLink(showBlockquote, true); +} + +String * String::flattenHTML() +{ + return flattenHTMLAndShowBlockquote(true); +} + +String * String::stripWhitespace() +{ + String *str = (String *)copy(); + + str->replaceOccurrencesOfString(MCSTR("\t"), MCSTR(" ")); + str->replaceOccurrencesOfString(MCSTR("\n"), MCSTR(" ")); + str->replaceOccurrencesOfString(MCSTR("\v"), MCSTR(" ")); + str->replaceOccurrencesOfString(MCSTR("\f"), MCSTR(" ")); + str->replaceOccurrencesOfString(MCSTR("\r"), MCSTR(" ")); + str->replaceOccurrencesOfString(s_unicode160, MCSTR(" ")); + str->replaceOccurrencesOfString(s_unicode133, MCSTR(" ")); + str->replaceOccurrencesOfString(s_unicode2028, MCSTR(" ")); + + while (str->replaceOccurrencesOfString(MCSTR(" "), MCSTR(" ")) > 0) { + /* do nothing */ + } + while (str->hasPrefix(MCSTR(" "))) { + str->deleteCharactersInRange(RangeMake(0, 1)); + } + while (str->hasSuffix(MCSTR(" "))) { + str->deleteCharactersInRange(RangeMake(str->length() - 1, 1)); + } + + str->autorelease(); + return str; +} + + +bool String::hasSuffix(String * suffix) +{ + if (mLength >= suffix->mLength) { + if (u_memcmp(mUnicodeChars + (mLength - suffix->mLength), + suffix->mUnicodeChars, suffix->mLength) == 0) { + return true; + } + } + return false; +} + +bool String::hasPrefix(String * prefix) +{ + if (mLength >= prefix->mLength) { + if (u_memcmp(prefix->mUnicodeChars, mUnicodeChars, prefix->mLength) == 0) { + return true; + } + } + return false; +} + +String * String::lastPathComponent() +{ + // TODO: Improve Windows compatibility. + if (mUnicodeChars == NULL) + return MCSTR(""); + UChar * component = u_strrchr(mUnicodeChars, '/'); + if (component == NULL) + return (String *) this->copy()->autorelease(); + return String::stringWithCharacters(component + 1); +} + +String * String::pathExtension() +{ + UChar * component = u_strrchr(mUnicodeChars, '.'); + if (component == NULL) + return MCSTR(""); + return String::stringWithCharacters(component + 1); +} + +Data * String::dataUsingEncoding(const char * charset) +{ + if (charset == NULL) { + charset = "utf-8"; + } + +#if __APPLE__ + Data * data; + + data = NULL; + CFStringEncoding encoding; + if (strcasecmp(charset, "mutf-7") == 0) { + encoding = kCFStringEncodingUTF7_IMAP; + } + else { + CFStringRef encodingName = CFStringCreateWithCString(NULL, charset, kCFStringEncodingUTF8); + encoding = CFStringConvertIANACharSetNameToEncoding(encodingName); + CFRelease(encodingName); + } + CFStringRef cfStr = CFStringCreateWithBytes(NULL, (const UInt8 *) mUnicodeChars, + (CFIndex) mLength * sizeof(* mUnicodeChars), kCFStringEncodingUTF16LE, false); + if (cfStr != NULL) { + CFDataRef cfData = CFStringCreateExternalRepresentation(NULL, cfStr, encoding, '_'); + if (cfData != NULL) { + data = Data::dataWithBytes((const char *) CFDataGetBytePtr(cfData), + (unsigned int) CFDataGetLength(cfData)); + CFRelease(cfData); + } + CFRelease(cfStr); + } + + return data; +#else + UErrorCode err; + Data * data; + + err = U_ZERO_ERROR; + UConverter * converter = ucnv_open(charset, &err); + if (converter == NULL) { + MCLog("invalid charset %s %i", charset, err); + return NULL; + } + + err = U_ZERO_ERROR; + int32_t destLength = ucnv_fromUChars(converter, NULL, 0, mUnicodeChars, mLength, &err); + int32_t destCapacity = destLength + 1; + char * dest = (char *) malloc(destCapacity * sizeof(* dest)); + err = U_ZERO_ERROR; + destLength = ucnv_fromUChars(converter, dest, destCapacity, mUnicodeChars, mLength, &err); + dest[destLength] = 0; + + // Fix in case of bad conversion. + for(int32_t i = 0 ; i < destLength ; i ++) { + if (dest[i] == 0) { + dest[i] = ' '; + } + } + + data = Data::dataWithBytes(dest, destLength); + + free(dest); + + ucnv_close(converter); + + return data; +#endif +} + +const char * String::fileSystemRepresentation() +{ + return UTF8Characters(); +} + +String * String::stringWithFileSystemRepresentation(const char * filename) +{ + return stringWithUTF8Characters(filename); +} + +String * String::stringByAppendingPathComponent(String * component) +{ + // TODO: Improve Windows compatibility. + String * result = (String *) this->copy()->autorelease(); + if (result->length() > 0) { + UChar lastChar = result->unicodeCharacters()[result->length() - 1]; + if (lastChar != '/') { + result->appendUTF8Characters("/"); + } + } + result->appendString(component); + return result; +} + +String * String::stringByDeletingLastPathComponent() +{ + String * component = lastPathComponent(); + String * result = (String *) this->copy()->autorelease(); + result->deleteCharactersInRange(RangeMake(result->length() - component->length(), component->length())); + return result; +} + +Array * String::componentsSeparatedByString(String * separator) +{ + UChar * p; + Array * result; + + result = Array::array(); + p = mUnicodeChars; + while (1) { + UChar * location; + location = u_strstr(p, separator->unicodeCharacters()); + if (location == NULL) { + break; + } + + unsigned int length = (unsigned int) (location - p); + String * value = new String(p, length); + result->addObject(value); + value->release(); + + p = location + separator->length(); + } + unsigned int length = (unsigned int) (mLength - (p - mUnicodeChars)); + String * value = new String(p, length); + result->addObject(value); + value->release(); + + return result; +} + +int String::intValue() +{ + return (int) strtol(UTF8Characters(), NULL, 10); +} + +unsigned int String::unsignedIntValue() +{ + return (unsigned int) strtoul(UTF8Characters(), NULL, 10); +} + +long String::longValue() +{ + return strtol(UTF8Characters(), NULL, 10); +} + +unsigned long String::unsignedLongValue() +{ + return strtoul(UTF8Characters(), NULL, 10); +} + +long long String::longLongValue() +{ + return strtoll(UTF8Characters(), NULL, 10); +} + +unsigned long long String::unsignedLongLongValue() +{ + return strtoull(UTF8Characters(), NULL, 10); +} + +double String::doubleValue() +{ + return strtod(UTF8Characters(), NULL); + +} + +Data * String::mUTF7EncodedData() +{ + return dataUsingEncoding("mutf-7"); +} + +String * String::stringWithMUTF7Data(Data * data) +{ + return data->stringWithCharset("mutf-7"); +} + +String * String::mUTF7EncodedString() +{ + Data * data = mUTF7EncodedData(); + return data->stringWithCharset("ascii"); +} + +String * String::mUTF7DecodedString() +{ + Data * data = dataUsingEncoding("utf-8"); + return stringWithMUTF7Data(data); +} + +String * String::substringFromIndex(unsigned int idx) +{ + return substringWithRange(RangeMake(idx, length() - idx)); +} + +String * String::substringToIndex(unsigned int idx) +{ + return substringWithRange(RangeMake(0, idx)); +} + +String * String::substringWithRange(Range range) +{ + if (range.location > length()) { + return MCSTR(""); + } + + if (range.location + range.length > length()) { + range.length = length() - range.location; + } + + return stringWithCharacters(unicodeCharacters() + range.location, (unsigned int) range.length); +} + +static chash * uniquedStringHash = NULL; +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + +static void initUniquedStringHash() +{ + uniquedStringHash = chash_new(CHASH_DEFAULTSIZE, CHASH_COPYKEY); +} + +String * String::uniquedStringWithUTF8Characters(const char * UTF8Characters) +{ + chashdatum key; + chashdatum value; + static pthread_once_t once = PTHREAD_ONCE_INIT; + int r; + + pthread_once(&once, initUniquedStringHash); + key.data = (void *) UTF8Characters; + key.len = (unsigned int) strlen(UTF8Characters); + pthread_mutex_lock(&lock); + r = chash_get(uniquedStringHash, &key, &value); + if (r == 0) { + pthread_mutex_unlock(&lock); + return (String *) value.data; + } + else { + value.data = new String(UTF8Characters); + value.len = 0; + chash_set(uniquedStringHash, &key, &value, NULL); + pthread_mutex_unlock(&lock); + return (String *) value.data; + } +} + +String * String::htmlEncodedString() +{ + String * htmlStr = String::string(); +#define kBufSz 2000 + + const char * inStr = UTF8Characters(); + const uint32_t kInStrSz = (const uint32_t) strlen(inStr); + int nInStrConsumed = 0; + + static char buf[kBufSz]; + + int outVal = -1; + int nBufConsumed; + int inStrSz; + do { + nBufConsumed = kBufSz-1; + inStrSz = kInStrSz - nInStrConsumed; + outVal = htmlEncodeEntities( (unsigned char*)buf, + &nBufConsumed, + (const unsigned char*)inStr+nInStrConsumed, + &inStrSz, + 0 ); + if (-2 == outVal || -1 == outVal) { + MCLog("Unable to encode html entities of %s", MCUTF8DESC(this)); + break; + } + buf[nBufConsumed] = '\0'; + htmlStr->appendUTF8Characters(buf); + nInStrConsumed += inStrSz; + } while (nInStrConsumed != kInStrSz); + + htmlStr->replaceOccurrencesOfString(MCSTR("\n"), MCSTR("<br/>")); + + return htmlStr; +} + +String * String::cleanedHTMLString() +{ + return HTMLCleaner::cleanHTML(this); +} + +String * String::htmlMessageContent() +{ + String * str = this; + + Array * lines = str->componentsSeparatedByString(MCSTR("\n")); + + while (1) { + if (lines->count() == 0) { + break; + } + + if (((String *) lines->lastObject())->length() > 0) { + break; + } + + lines->removeLastObject(); + } + + String * localString; + int state; + localString = String::string(); + + String * quoted = NULL; + state = 0; + mc_foreacharray(String, line, lines) { + if (state == 0) { + if (line->hasPrefix(MCSTR(">"))) { + state = 1; + quoted = new String(); + int i = 1; + while (i < line->length()) { + if (line->characterAtIndex(i) != ' ') { + break; + } + i ++; + } + quoted->appendString(line->substringFromIndex(i)); + quoted->appendString(MCSTR("\n")); + } + else { + localString->appendString(line->htmlEncodedString()); + localString->appendString(MCSTR("<br/>")); + } + } + else if (state == 1) { + if (line->hasPrefix(MCSTR(">"))) { + int i = 1; + while (i < line->length()) { + if (line->characterAtIndex(i) != ' ') { + break; + } + i ++; + } + quoted->appendString(line->substringFromIndex(i)); + quoted->appendString(MCSTR("\n")); + } + else { + if (quoted != NULL) { + localString->appendString(MCSTR("<blockquote type=\"cite\">")); + localString->appendString(quoted->htmlMessageContent()); + localString->appendString(MCSTR("</blockquote>")); + MC_SAFE_RELEASE(quoted); + state = 0; + } + localString->appendString(line->htmlEncodedString()); + localString->appendString(MCSTR("<br/>")); + } + } + } + + if (quoted != NULL) { + localString->appendString(MCSTR("<blockquote type=\"cite\">")); + localString->appendString(quoted->htmlMessageContent()); + localString->appendString(MCSTR("</blockquote>")); + MC_SAFE_RELEASE(quoted); + } + + return localString; +} + +bool String::isEqualCaseInsensitive(String * otherString) +{ + return caseInsensitiveCompare(otherString) == 0; +} + +Data * String::decodedBase64Data() +{ + const char * utf8 = UTF8Characters(); + unsigned int encoded_len = (unsigned int) strlen(utf8); + int decoded_len = 0; + char * decoded = MCDecodeBase64(utf8, encoded_len, &decoded_len); + Data * result = Data::dataWithBytes(decoded, decoded_len); + free(decoded); + return result; +} + +HashMap * String::serializable() +{ + HashMap * result = Object::serializable(); + result->setObjectForKey(MCSTR("value"), this); + return result; +} + +void String::importSerializable(HashMap * serializable) +{ + String * value = (String *) serializable->objectForKey(MCSTR("value")); + setString(value); +} + +static void * createObject() +{ + return new String(); +} + +INITIALIZE(String) +{ + Object::registerObjectConstructor("mailcore::String", &createObject); + + UChar chars_160[1] = {160}; + s_unicode160 = new String(chars_160, 1); + UChar chars_133[1] = {133}; + s_unicode133 = new String(chars_133, 1); + UChar chars_2028[1] = {0x2028}; + s_unicode2028 = new String(chars_2028, 1); +} |