1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
//
// HTMLCleaner.cpp
// mailcore2
//
// Created by DINH Viêt Hoà on 2/3/13.
// Copyright (c) 2013 MailCore. All rights reserved.
//
#include "MCHTMLCleaner.h"
#include "MCString.h"
#include "MCData.h"
#include <tidy.h>
#include <buffio.h>
#include "MCUtils.h"
#include "MCLog.h"
using namespace mailcore;
String * HTMLCleaner::cleanHTML(String * input)
{
TidyBuffer output;
TidyBuffer errbuf;
TidyBuffer docbuf;
int rc;
TidyDoc tdoc = tidyCreate();
tidyBufInit(&output);
tidyBufInit(&errbuf);
tidyBufInit(&docbuf);
Data * data = input->dataUsingEncoding("utf-8");
tidyBufAppend(&docbuf, data->bytes(), data->length());
tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
tidySetCharEncoding(tdoc, "utf8");
tidyOptSetBool(tdoc, TidyForceOutput, yes);
rc = tidySetErrorBuffer(tdoc, &errbuf);
if ((rc > 1) || (rc < 0)) {
fprintf(stderr, "error tidySetErrorBuffer: %i\n", rc);
fprintf(stderr, "1:%s", errbuf.bp);
//return NULL;
}
rc = tidyParseBuffer(tdoc, &docbuf);
//MCLog("%s", MCUTF8(input));
if ((rc > 1) || (rc < 0)) {
fprintf(stderr, "error tidyParseBuffer: %i\n", rc);
fprintf(stderr, "1:%s", errbuf.bp);
//return NULL;
}
rc = tidyCleanAndRepair(tdoc);
if ((rc > 1) || (rc < 0)) {
fprintf(stderr, "error tidyCleanAndRepair: %i\n", rc);
fprintf(stderr, "1:%s", errbuf.bp);
//return NULL;
}
rc = tidySaveBuffer(tdoc, &output);
if ((rc > 1) || (rc < 0)) {
fprintf(stderr, "error tidySaveBuffer: %i\n", rc);
fprintf(stderr, "1:%s", errbuf.bp);
}
String * result = String::stringWithUTF8Characters((const char *) output.bp);
tidyBufFree(&output);
tidyBufFree(&errbuf);
tidyRelease(tdoc);
return result;
/*
if ( ok ) {
rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics
}
if ( rc >= 0 ) {
rc = tidyParseString( tdoc, input ); // Parse the input
}
if ( rc >= 0 ) {
rc = tidyCleanAndRepair( tdoc ); // Tidy it up!
}
if ( rc >= 0 ) {
rc = tidyRunDiagnostics( tdoc ); // Kvetch
}
if ( rc > 1 ) { // If error, force output.
rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
}
if ( rc >= 0 ) {
rc = tidySaveBuffer( tdoc, &output ); // Pretty Print
}
*/
/*
if ( rc >= 0 )
{
if ( rc > 0 )
printf( "\\nDiagnostics:\\n\\n\%s", errbuf.bp );
printf( "\\nAnd here is the result:\\n\\n\%s", output.bp );
}
else
printf( "A severe error (\%d) occurred.\\n", rc );
tidyBufFree( &output );
tidyBufFree( &errbuf );
tidyRelease( tdoc );
return rc;
*/
}
|