aboutsummaryrefslogtreecommitdiff
path: root/exampleData/ruleSets/language-processing/natural/nattest1.js
diff options
context:
space:
mode:
Diffstat (limited to 'exampleData/ruleSets/language-processing/natural/nattest1.js')
-rw-r--r--exampleData/ruleSets/language-processing/natural/nattest1.js66
1 files changed, 66 insertions, 0 deletions
diff --git a/exampleData/ruleSets/language-processing/natural/nattest1.js b/exampleData/ruleSets/language-processing/natural/nattest1.js
new file mode 100644
index 0000000..bcb8712
--- /dev/null
+++ b/exampleData/ruleSets/language-processing/natural/nattest1.js
@@ -0,0 +1,66 @@
+exports.name = "Common words";
+exports.description = "Identifies rare word use (words not in the 1000 most common English word list).";
+
+var natural = require('natural');
+
+var stemmer = natural.PorterStemmer;
+var tokenizer = new natural.TreebankWordTokenizer();
+
+var getTextNodesIn = function (node, includeWhitespaceNodes) {
+ var textNodes = [], whitespace = /^\s*$/;
+
+ function getTextNodes(node) {
+ if (node.nodeType == 3) {
+ if (includeWhitespaceNodes || !whitespace.test(node.nodeValue)) {
+ textNodes.push(node);
+ }
+ } else {
+ for (var i = 0, len = node.childNodes.length; i < len; ++i) {
+ getTextNodes(node.childNodes[i]);
+ }
+ }
+ }
+
+ getTextNodes(node);
+ return textNodes;
+};
+
+var commonWord = function(word) {
+ return _.contains(words, word);
+};
+
+var isPunctuation = function(str) {
+ return _.contains(['&', '%', '(', ')', ';', ':', '.', ',', '"', "'", '`', '!', '?' ], str);
+};
+
+
+var markWords = function(obj, report) {
+
+ var mergeFn = function(obj, tok) {
+ if (commonWord(tok) || isPunctuation(tok) || _.isNumber(tok)) {
+ obj.append(tok + ' ');
+ } else {
+ // var newObj = "<span style='background-color: red'>"+tok+"</span> ");
+ var newObj = $("<span>"+tok+"</span> ");
+ obj.append(newObj);
+ report.error("The word '"+tok+"' is uncommon", newObj);
+ }
+ return obj;
+ };
+
+ var toks = tokenizer.tokenize(obj.text());
+ var rawObj = $('<p></p>', {id: 'text'});
+ var newObj = _.reduce(toks, mergeFn , rawObj);
+
+ obj.replaceWith(newObj);
+};
+
+exports.rule = function(report) {
+ $5('body').each(
+ function(i){
+ var nodes = getTextNodesIn($(this));
+ _.map(nodes, function(n){
+ markWords(n, report);
+ });
+ });
+}; \ No newline at end of file