From eff1dfc76029b2ad8ebb834b6deff43675f690c6 Mon Sep 17 00:00:00 2001 From: Viet Hoa Dinh Date: Tue, 8 Mar 2016 23:44:48 -0800 Subject: fixed conversion to plain/text --- src/core/basetypes/MCString.cpp | 69 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/src/core/basetypes/MCString.cpp b/src/core/basetypes/MCString.cpp index 3a5bfc48..46aebd44 100644 --- a/src/core/basetypes/MCString.cpp +++ b/src/core/basetypes/MCString.cpp @@ -1576,6 +1576,23 @@ struct parserState { static void appendQuote(struct parserState * state); +static inline int isWhitespace(UChar ch) +{ + switch (ch) { + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + case 160: + case 133: + case 0x2028: + case 0x2029: + return true; + } + return false; +} + static void charactersParsed(void * context, const xmlChar * ch, int len) /*" Callback function for stringByStrippingHTML. "*/ @@ -1595,7 +1612,21 @@ static void charactersParsed(void * context, String * modifiedString; modifiedString = new String((const char *) ch, len); modifiedString->autorelease(); + bool hasTerminalWhitespace = false; + bool hasInitialWhitespace = false; + if (modifiedString->length() > 0) { + hasInitialWhitespace = isWhitespace(modifiedString->characterAtIndex(0)); + hasTerminalWhitespace = isWhitespace(modifiedString->characterAtIndex(modifiedString->length() - 1)); + } + modifiedString = modifiedString->stripWhitespace(); + if (hasTerminalWhitespace) { + if (modifiedString->length() == 0) { + hasInitialWhitespace = false; + } + modifiedString->appendString(MCSTR(" ")); + } + /* if (modifiedString->length() > 0) { if (state->lastCharIsWhitespace) { if (modifiedString->characterAtIndex(0) == ' ') { @@ -1603,6 +1634,7 @@ static void charactersParsed(void * context, } } } + */ if (modifiedString->length() > 0) { bool lastIsWhiteSpace; @@ -1637,6 +1669,11 @@ static void charactersParsed(void * context, appendQuote(state); state->hasQuote = true; } + if (hasInitialWhitespace) { + if (!state->lastCharIsWhitespace) { + result->appendString(MCSTR(" ")); + } + } result->appendString(modifiedString); state->lastCharIsWhitespace = lastIsWhiteSpace; state->hasText = true; @@ -1666,13 +1703,35 @@ static void appendQuote(struct parserState * state) state->lastCharIsWhitespace = true; } +static void cleanTerminalSpace(String * result) +{ + if (result->length() > 0) { + if (result->characterAtIndex(result->length() - 1) == ' ') { + result->deleteCharactersInRange(RangeMake(result->length() - 1, 1)); + } + } +} + +static bool isPreviousLineBlankLine(String * result) +{ + if (result->length() < 2) { + return false; + } + return (result->characterAtIndex(result->length() - 1) == '\n') && (result->characterAtIndex(result->length() - 2) == '\n'); +} + static void returnToLine(struct parserState * state) { if (!state->hasQuote) { appendQuote(state); state->hasQuote = true; } - state->result->appendString(MCSTR("\n")); + + cleanTerminalSpace(state->result); + + if (!isPreviousLineBlankLine(state->result)) { + state->result->appendString(MCSTR("\n")); + } state->hasText = false; state->lastCharIsWhitespace = true; state->hasQuote = false; @@ -1922,6 +1981,9 @@ static void elementEnded(void * ctx, const xmlChar * name) if (offset != state->result->length()) { if (link->length() > 0) { if (!state->result->hasSuffix(link)) { + if (!state->lastCharIsWhitespace) { + state->result->appendUTF8Characters(" "); + } state->result->appendUTF8Characters("("); state->result->appendString(link); state->result->appendUTF8Characters(")"); @@ -2027,11 +2089,10 @@ String * String::flattenHTMLAndShowBlockquoteAndLink(bool showBlockquote, bool s MCLog("Leak of %d blocks found in htmlSAXParseDoc", xmlMemBlocks() - mem_base); } - + + cleanTerminalSpace(state.result); state.paragraphSpacingStack->release(); state.linkStack->release(); - - result = result->stripWhitespace(); return result; } -- cgit v1.2.3