aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/basetypes/MCHTMLCleaner.cpp
blob: f3ff9c212b7bf7f4a21d8e530a44bc28f0e0110a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//
//  HTMLCleaner.cpp
//  mailcore2
//
//  Created by DINH Viêt Hoà on 2/3/13.
//  Copyright (c) 2013 MailCore. All rights reserved.
//

#include "MCHTMLCleaner.h"

#include "MCString.h"
#include "MCData.h"

#if defined(ANDROID) || defined(__ANDROID__)
typedef unsigned long ulong;
#endif

#include <tidy.h>
#include <buffio.h>

#include "MCUtils.h"
#include "MCLog.h"

#if __APPLE__
#include <TargetConditionals.h>
#endif

using namespace mailcore;

String * HTMLCleaner::cleanHTML(String * input)
{
    TidyBuffer output;
    TidyBuffer errbuf;
    TidyBuffer docbuf;
    int rc;
    
    TidyDoc tdoc = tidyCreate();
    tidyBufInit(&output);
    tidyBufInit(&errbuf);
    tidyBufInit(&docbuf);
    
    Data * data = input->dataUsingEncoding("utf-8");
    tidyBufAppend(&docbuf, data->bytes(), data->length());
    
#if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE
    // This option is not available on the Mac.
    tidyOptSetBool(tdoc, TidyDropEmptyElems, no);
#endif
    tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
    tidyOptSetInt(tdoc, TidyDoctypeMode, TidyDoctypeUser);
    
    tidyOptSetBool(tdoc, TidyMark, no);
    tidySetCharEncoding(tdoc, "utf8");
    tidyOptSetBool(tdoc, TidyForceOutput, yes);
    //tidyOptSetValue(tdoc, TidyErrFile, "/dev/null");
    //tidyOptSetValue(tdoc, TidyOutFile, "/dev/null");
    tidyOptSetBool(tdoc, TidyShowWarnings, no);
    tidyOptSetInt(tdoc, TidyShowErrors, 0);
    rc = tidySetErrorBuffer(tdoc, &errbuf);
    if ((rc > 1) || (rc < 0)) {
        //fprintf(stderr, "error tidySetErrorBuffer: %i\n", rc);
        //fprintf(stderr, "1:%s", errbuf.bp);
        //return NULL;
    }
    rc = tidyParseBuffer(tdoc, &docbuf);
    //MCLog("%s", MCUTF8(input));
    if ((rc > 1) || (rc < 0)) {
        //fprintf(stderr, "error tidyParseBuffer: %i\n", rc);
        //fprintf(stderr, "1:%s", errbuf.bp);
        //return NULL;
    }
    rc = tidyCleanAndRepair(tdoc);
    if ((rc > 1) || (rc < 0)) {
        //fprintf(stderr, "error tidyCleanAndRepair: %i\n", rc);
        //fprintf(stderr, "1:%s", errbuf.bp);
        //return NULL;
    }
    rc = tidySaveBuffer(tdoc, &output);
    if ((rc > 1) || (rc < 0)) {
        //fprintf(stderr, "error tidySaveBuffer: %i\n", rc);
        //fprintf(stderr, "1:%s", errbuf.bp);
    }
    
    String * result = String::stringWithUTF8Characters((const char *) output.bp);
    
    tidyBufFree(&docbuf);
    tidyBufFree(&output);
    tidyBufFree(&errbuf);
    tidyRelease(tdoc);
    
    return result;
    
    /*
    if ( ok ) {
        rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
    }
    if ( rc &gt;= 0 ) {
        rc = tidyParseString( tdoc, input );           // Parse the input
    }
    if ( rc &gt;= 0 ) {
        rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
    }
    if ( rc &gt;= 0 ) {
        rc = tidyRunDiagnostics( tdoc );               // Kvetch
    }
    if ( rc &gt; 1 ) {                                    // If error, force output.
        rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
    }
    if ( rc &gt;= 0 ) {
        rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
    }
     */
    
    /*
    if ( rc &gt;= 0 )
    {
        if ( rc &gt; 0 )
            printf( "\\nDiagnostics:\\n\\n\%s", errbuf.bp );
            printf( "\\nAnd here is the result:\\n\\n\%s", output.bp );
            }
    else
        printf( "A severe error (\%d) occurred.\\n", rc );
        
        tidyBufFree( &amp;output );
        tidyBufFree( &amp;errbuf );
        tidyRelease( tdoc );
        return rc;
     */
}