aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Viet Hoa Dinh <dinh.viet.hoa@gmail.com>2016-03-08 23:44:48 -0800
committerGravatar Viet Hoa Dinh <dinh.viet.hoa@gmail.com>2016-03-08 23:46:03 -0800
commiteff1dfc76029b2ad8ebb834b6deff43675f690c6 (patch)
tree7a06071afdc84f9d74dcdf2f8881eb38560a868b
parent42dfa76e33af5d2e7561d9c291ba4060604b5181 (diff)
fixed conversion to plain/text
-rw-r--r--src/core/basetypes/MCString.cpp69
1 files changed, 65 insertions, 4 deletions
diff --git a/src/core/basetypes/MCString.cpp b/src/core/basetypes/MCString.cpp
index 3a5bfc48..46aebd44 100644
--- a/src/core/basetypes/MCString.cpp
+++ b/src/core/basetypes/MCString.cpp
@@ -1576,6 +1576,23 @@ struct parserState {
static void appendQuote(struct parserState * state);
+static inline int isWhitespace(UChar ch)
+{
+ switch (ch) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case 160:
+ case 133:
+ case 0x2028:
+ case 0x2029:
+ return true;
+ }
+ return false;
+}
+
static void charactersParsed(void * context,
const xmlChar * ch, int len)
/*" Callback function for stringByStrippingHTML. "*/
@@ -1595,7 +1612,21 @@ static void charactersParsed(void * context,
String * modifiedString;
modifiedString = new String((const char *) ch, len);
modifiedString->autorelease();
+ bool hasTerminalWhitespace = false;
+ bool hasInitialWhitespace = false;
+ if (modifiedString->length() > 0) {
+ hasInitialWhitespace = isWhitespace(modifiedString->characterAtIndex(0));
+ hasTerminalWhitespace = isWhitespace(modifiedString->characterAtIndex(modifiedString->length() - 1));
+ }
+ modifiedString = modifiedString->stripWhitespace();
+ if (hasTerminalWhitespace) {
+ if (modifiedString->length() == 0) {
+ hasInitialWhitespace = false;
+ }
+ modifiedString->appendString(MCSTR(" "));
+ }
+ /*
if (modifiedString->length() > 0) {
if (state->lastCharIsWhitespace) {
if (modifiedString->characterAtIndex(0) == ' ') {
@@ -1603,6 +1634,7 @@ static void charactersParsed(void * context,
}
}
}
+ */
if (modifiedString->length() > 0) {
bool lastIsWhiteSpace;
@@ -1637,6 +1669,11 @@ static void charactersParsed(void * context,
appendQuote(state);
state->hasQuote = true;
}
+ if (hasInitialWhitespace) {
+ if (!state->lastCharIsWhitespace) {
+ result->appendString(MCSTR(" "));
+ }
+ }
result->appendString(modifiedString);
state->lastCharIsWhitespace = lastIsWhiteSpace;
state->hasText = true;
@@ -1666,13 +1703,35 @@ static void appendQuote(struct parserState * state)
state->lastCharIsWhitespace = true;
}
+static void cleanTerminalSpace(String * result)
+{
+ if (result->length() > 0) {
+ if (result->characterAtIndex(result->length() - 1) == ' ') {
+ result->deleteCharactersInRange(RangeMake(result->length() - 1, 1));
+ }
+ }
+}
+
+static bool isPreviousLineBlankLine(String * result)
+{
+ if (result->length() < 2) {
+ return false;
+ }
+ return (result->characterAtIndex(result->length() - 1) == '\n') && (result->characterAtIndex(result->length() - 2) == '\n');
+}
+
static void returnToLine(struct parserState * state)
{
if (!state->hasQuote) {
appendQuote(state);
state->hasQuote = true;
}
- state->result->appendString(MCSTR("\n"));
+
+ cleanTerminalSpace(state->result);
+
+ if (!isPreviousLineBlankLine(state->result)) {
+ state->result->appendString(MCSTR("\n"));
+ }
state->hasText = false;
state->lastCharIsWhitespace = true;
state->hasQuote = false;
@@ -1922,6 +1981,9 @@ static void elementEnded(void * ctx, const xmlChar * name)
if (offset != state->result->length()) {
if (link->length() > 0) {
if (!state->result->hasSuffix(link)) {
+ if (!state->lastCharIsWhitespace) {
+ state->result->appendUTF8Characters(" ");
+ }
state->result->appendUTF8Characters("(");
state->result->appendString(link);
state->result->appendUTF8Characters(")");
@@ -2027,11 +2089,10 @@ String * String::flattenHTMLAndShowBlockquoteAndLink(bool showBlockquote, bool s
MCLog("Leak of %d blocks found in htmlSAXParseDoc",
xmlMemBlocks() - mem_base);
}
-
+
+ cleanTerminalSpace(state.result);
state.paragraphSpacingStack->release();
state.linkStack->release();
-
- result = result->stripWhitespace();
return result;
}