aboutsummaryrefslogtreecommitdiff
path: root/exampleData/ruleSets/language-processing/natural/upGoerFive.js
blob: fb17a54fffad3f0b60b3b62681a06a82a4fbaff4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
var natural = require('natural');

rule =  {};
rule.name = "Common words";
rule.description = "Identifies rare word use (words not in the 1000 most common English word list).";

// var words = require('./1-1000');

words =  ['the', 'of', 'to', 'and', 'a', 'in', 'is', 'it',
'you', 'that', 'he', 'was', 'for', 'on', 'are', 'with', 'as', 'I',
'his', 'they', 'be', 'at', 'one', 'have', 'this', 'from', 'or', 'had',
'by', 'hot', 'word', 'but', 'what', 'some', 'we', 'can', 'out',
'other', 'were', 'all', 'there', 'when', 'up', 'use', 'your', 'how',
'said', 'an', 'each', 'she', 'which', 'do', 'their', 'time', 'if',
'will', 'way', 'about', 'many', 'then', 'them', 'write', 'would',
'like', 'so', 'these', 'her', 'long', 'make', 'thing', 'see', 'him',
'two', 'has', 'look', 'more', 'day', 'could', 'go', 'come', 'did',
'number', 'sound', 'no', 'most', 'people', 'my', 'over', 'know',
'water', 'than', 'call', 'first', 'who', 'may', 'down', 'side',
'been', 'now', 'find', 'any', 'new', 'work', 'part', 'take', 'get',
'place', 'made', 'live', 'where', 'after', 'back', 'little', 'only',
'round', 'man', 'year', 'came', 'show', 'every', 'good', 'me', 'give',
'our', 'under', 'name', 'very', 'through', 'just', 'form', 'sentence',
'great', 'think', 'say', 'help', 'low', 'line', 'differ', 'turn',
'cause', 'much', 'mean', 'before', 'move', 'right', 'boy', 'old',
'too', 'same', 'tell', 'does', 'set', 'three', 'want', 'air', 'well',
'also', 'play', 'small', 'end', 'put', 'home', 'read', 'hand', 'port',
'large', 'spell', 'add', 'even', 'land', 'here', 'must', 'big',
'high', 'such', 'follow', 'act', 'why', 'ask', 'men', 'change',
'went', 'light', 'kind', 'off', 'need', 'house', 'picture', 'try',
'us', 'again', 'animal', 'point', 'mother', 'world', 'near', 'build',
'self', 'earth', 'father', 'head', 'stand', 'own', 'page', 'should',
'country', 'found', 'answer', 'school', 'grow', 'study', 'still',
'learn', 'plant', 'cover', 'food', 'sun', 'four', 'between', 'state',
'keep', 'eye', 'never', 'last', 'let', 'thought', 'city', 'tree',
'cross', 'farm', 'hard', 'start', 'might', 'story', 'saw', 'far',
'sea', 'draw', 'left', 'late', 'run', 'don\'t', 'while', 'press',
'close', 'night', 'real', 'life', 'few', 'north', 'open', 'seem',
'together', 'next', 'white', 'children', 'begin', 'got', 'walk',
'example', 'ease', 'paper', 'group', 'always', 'music', 'those',
'both', 'mark', 'often', 'letter', 'until', 'mile', 'river', 'car',
'feet', 'care', 'second', 'book', 'carry', 'took', 'science', 'eat',
'room', 'friend', 'began', 'idea', 'fish', 'mountain', 'stop', 'once',
'base', 'hear', 'horse', 'cut', 'sure', 'watch', 'color', 'face',
'wood', 'main', 'enough', 'plain', 'girl', 'usual', 'young', 'ready',
'above', 'ever', 'red', 'list', 'though', 'feel', 'talk', 'bird',
'soon', 'body', 'dog', 'family', 'direct', 'pose', 'leave', 'song',
'measure', 'door', 'product', 'black', 'short', 'numeral', 'class',
'wind', 'question', 'happen', 'complete', 'ship', 'area', 'half',
'rock', 'order', 'fire', 'south', 'problem', 'piece', 'told', 'knew',
'pass', 'since', 'top', 'whole', 'king', 'space', 'heard', 'best',
'hour', 'better', 'true .', 'during', 'hundred', 'five', 'remember',
'step', 'early', 'hold', 'west', 'ground', 'interest', 'reach',
'fast', 'verb', 'sing', 'listen', 'six', 'table', 'travel', 'less',
'morning', 'ten', 'simple', 'several', 'vowel', 'toward', 'war',
'lay', 'against', 'pattern', 'slow', 'center', 'love', 'person',
'money', 'serve', 'appear', 'road', 'map', 'rain', 'rule', 'govern',
'pull', 'cold', 'notice', 'voice', 'unit', 'power', 'town', 'fine',
'certain', 'fly', 'fall', 'lead', 'cry', 'dark', 'machine', 'note',
'wait', 'plan', 'figure', 'star', 'box', 'noun', 'field', 'rest',
'correct', 'able', 'pound', 'done', 'beauty', 'drive', 'stood',
'contain', 'front', 'teach', 'week', 'final', 'gave', 'green', 'oh',
'quick', 'develop', 'ocean', 'warm', 'free', 'minute', 'strong',
'special', 'mind', 'behind', 'clear', 'tail', 'produce', 'fact',
'street', 'inch', 'multiply', 'nothing', 'course', 'stay', 'wheel',
'full', 'force', 'blue', 'object', 'decide', 'surface', 'deep',
'moon', 'island', 'foot', 'system', 'busy', 'test', 'record', 'boat',
'common', 'gold', 'possible', 'plane', 'stead', 'dry', 'wonder',
'laugh', 'thousand', 'ago', 'ran', 'check', 'game', 'shape', 'equate',
'miss', 'brought', 'heat', 'snow', 'tire', 'bring', 'yes', 'distant',
'fill', 'east', 'paint', 'language', 'among', 'grand', 'ball', 'yet',
'wave', 'drop', 'heart', 'am', 'present', 'heavy', 'dance', 'engine',
'position', 'arm', 'wide', 'sail', 'material', 'size', 'vary',
'settle', 'speak', 'weight', 'general', 'ice', 'matter', 'circle',
'pair', 'include', 'divide', 'syllable', 'felt', 'perhaps', 'pick',
'sudden', 'count', 'square', 'reason', 'length', 'represent', 'art',
'subject', 'region', 'energy', 'hunt', 'probable', 'bed', 'brother',
'egg', 'ride', 'cell', 'believe', 'fraction', 'forest', 'sit', 'race',
'window', 'store', 'summer', 'train', 'sleep', 'prove', 'lone', 'leg',
'exercise', 'wall', 'catch', 'mount', 'wish', 'sky', 'board', 'joy',
'winter', 'sat', 'written', 'wild', 'instrument', 'kept', 'glass',
'grass', 'cow', 'job', 'edge', 'sign', 'visit', 'past', 'soft', 'fun',
'bright', 'gas', 'weather', 'month', 'million', 'bear', 'finish',
'happy', 'hope', 'flower', 'clothe', 'strange', 'gone', 'jump',
'baby', 'eight', 'village', 'meet', 'root', 'buy', 'raise', 'solve',
'metal', 'whether', 'push', 'seven', 'paragraph', 'third', 'shall',
'held', 'hair', 'describe', 'cook', 'floor', 'either', 'result',
'burn', 'hill', 'safe', 'cat', 'century', 'consider', 'type', 'law',
'bit', 'coast', 'copy', 'phrase', 'silent', 'tall', 'sand', 'soil',
'roll', 'temperature', 'finger', 'industry', 'value', 'fight', 'lie',
'beat', 'excite', 'natural', 'view', 'sense', 'ear', 'else', 'quite',
'broke', 'case', 'middle', 'kill', 'son', 'lake', 'moment', 'scale',
'loud', 'spring', 'observe', 'child', 'straight', 'consonant',
'nation', 'dictionary', 'milk', 'speed', 'method', 'organ', 'pay',
'age', 'section', 'dress', 'cloud', 'surprise', 'quiet', 'stone',
'tiny', 'climb', 'cool', 'design', 'poor', 'lot', 'experiment',
'bottom', 'key', 'iron', 'single', 'stick', 'flat', 'twenty', 'skin',
'smile', 'crease', 'hole', 'trade', 'melody', 'trip', 'office',
'receive', 'row', 'mouth', 'exact', 'symbol', 'die', 'least',
'trouble', 'shout', 'except', 'wrote', 'seed', 'tone', 'join',
'suggest', 'clean', 'break', 'lady', 'yard', 'rise', 'bad', 'blow',
'oil', 'blood', 'touch', 'grew', 'cent', 'mix', 'team', 'wire',
'cost', 'lost', 'brown', 'wear', 'garden', 'equal', 'sent', 'choose',
'fell', 'fit', 'flow', 'fair', 'bank', 'collect', 'save', 'control',
'decimal', 'gentle', 'woman', 'captain', 'practice', 'separate',
'difficult', 'doctor', 'please', 'protect', 'noon', 'whose', 'locate',
'ring', 'character', 'insect', 'caught', 'period', 'indicate',
'radio', 'spoke', 'atom', 'human', 'history', 'effect', 'electric',
'expect', 'crop', 'modern', 'element', 'hit', 'student', 'corner',
'party', 'supply', 'bone', 'rail', 'imagine', 'provide', 'agree',
'thus', 'capital', 'won\'t', 'chair', 'danger', 'fruit', 'rich',
'thick', 'soldier', 'process', 'operate', 'guess', 'necessary',
'sharp', 'wing', 'create', 'neighbor', 'wash', 'bat', 'rather',
'crowd', 'corn', 'compare', 'poem', 'string', 'bell', 'depend',
'meat', 'rub', 'tube', 'famous', 'dollar', 'stream', 'fear', 'sight',
'thin', 'triangle', 'planet', 'hurry', 'chief', 'colony', 'clock',
'mine', 'tie', 'enter', 'major', 'fresh', 'search', 'send', 'yellow',
'gun', 'allow', 'print', 'dead', 'spot', 'desert', 'suit', 'current',
'lift', 'rose', 'continue', 'block', 'chart', 'hat', 'sell',
'success', 'company', 'subtract', 'event', 'particular', 'deal',
'swim', 'term', 'opposite', 'wife', 'shoe', 'shoulder', 'spread',
'arrange', 'camp', 'invent', 'cotton', 'born', 'determine', 'quart',
'nine', 'truck', 'noise', 'level', 'chance', 'gather', 'shop',
'stretch', 'throw', 'shine', 'property', 'column', 'molecule',
'select', 'wrong', 'gray', 'repeat', 'require', 'broad', 'prepare',
'salt', 'nose', 'plural', 'anger', 'claim', 'continent', 'oxygen',
'sugar', 'death', 'pretty', 'skill', 'women', 'season', 'solution',
'magnet', 'silver', 'thank', 'branch', 'match', 'suffix',
'especially', 'fig', 'afraid', 'huge', 'sister', 'steel', 'discuss',
'forward', 'similar', 'guide', 'experience', 'score', 'apple',
'bought', 'led', 'pitch', 'coat', 'mass', 'card', 'band', 'rope',
'slip', 'win', 'dream', 'evening', 'condition', 'feed', 'tool',
']total', 'basic', 'smell', 'valley', 'nor', 'double', 'seat',
'arrive', 'master', 'track', 'parent', 'shore', 'division', 'sheet',
'substance', 'favor', 'connect', 'post', 'spend', 'chord', 'fat',
'glad', 'original', 'share', 'station', 'dad', 'bread', 'charge',
'proper', 'bar', 'offer', 'segment', 'slave', 'duck', 'instant',
'market', 'degree', 'populate', 'chick', 'dear', 'enemy', 'reply',
'drink', 'occur', 'support', 'speech', 'nature', 'range', 'steam',
'motion', 'path', 'liquid', 'log', 'meant', 'quotient', 'teeth',
'shell', 'neck' ];

var stemmer = natural.PorterStemmer;
var tokenizer = new natural.TreebankWordTokenizer();

var getTextNodesIn = function (node, includeWhitespaceNodes) {
    var textNodes = [], whitespace = /^\s*$/;

    function getTextNodes(node) {
        if ($(node).attr('id') == 'fiveui-top') {
          return;
        }
        if (node.nodeType == 3) {
            if (includeWhitespaceNodes || !whitespace.test(node.nodeValue)) {
                textNodes.push(node);
            }
        } else {
            for (var i = 0, len = node.childNodes.length; i < len; ++i) {
                getTextNodes(node.childNodes[i]);
            }
        }
    }

    getTextNodes(node);
    return textNodes;
};

var isCommonWord = function(word) {
  return _.contains(words, word);
};

var isPunctuation = function(str) {
  return _.contains(['&', '%', '(', ')', ';', ':', '.', ',', '"', "'", '`', '!', '?' ], str);
};


var markWords = function(obj, report) {
  var toks = tokenizer.tokenize($(obj).text());
  var rawObj = $('<p></p>', {id: 'text'});
//  $(obj).empty();
  $(obj).replaceWith(rawObj);

  _.each(toks, function(tok) {
    if (isCommonWord(tok) || isPunctuation(tok) || _.isNumber(tok)) {
      rawObj.append(' ' + tok + ' ');
    } else {
      var newObj = $("<span>"+tok+"</span> ");
      rawObj.append(newObj);
      report.error("The word '"+tok+"' is uncommon", newObj.get(0));
    }
  });
};

rule.rule = function(report) {
  console.log("checking for rare words");
  fiveui.query('body').each(
    function(i){
      var nodes = getTextNodesIn(this);
      _.map(nodes, function(n){
              console.log(n);
              markWords(n, report);
            });
    });
  console.log("done checking for rare words");
};