From 29bb5e6e86d615e49b0c58413e4dc14e73230d97 Mon Sep 17 00:00:00 2001 From: Rogan Creswick Date: Fri, 14 Jun 2013 17:44:10 -0700 Subject: note: report needs a DOM node, not a jquery object --- .../language-processing/natural/upGoerFive-gen.js | 2113 ++++++++++---------- .../language-processing/natural/upGoerFive.js | 3 +- 2 files changed, 1059 insertions(+), 1057 deletions(-) diff --git a/exampleData/ruleSets/language-processing/natural/upGoerFive-gen.js b/exampleData/ruleSets/language-processing/natural/upGoerFive-gen.js index 2162fbe..17f3fda 100644 --- a/exampleData/ruleSets/language-processing/natural/upGoerFive-gen.js +++ b/exampleData/ruleSets/language-processing/natural/upGoerFive-gen.js @@ -178,6 +178,7 @@ var isPunctuation = function(str) { var markWords = function(obj, report) { var toks = tokenizer.tokenize($(obj).text()); var rawObj = $('

', {id: 'text'}); +// $(obj).empty(); $(obj).replaceWith(rawObj); _.each(toks, function(tok) { @@ -186,7 +187,7 @@ var markWords = function(obj, report) { } else { var newObj = $(""+tok+" "); rawObj.append(newObj); - report.error("The word '"+tok+"' is uncommon", newObj); + report.error("The word '"+tok+"' is uncommon", newObj.get(0)); } }); }; @@ -267,7 +268,7 @@ exports.normalize_ja = require('./normalizers/normalizer_ja').normalize_ja; exports.removeDiacritics = require('./normalizers/remove_diacritics'); exports.transliterate_ja = require('./transliterators/ja'); -},{"./phonetics/soundex":3,"./phonetics/metaphone":4,"./phonetics/double_metaphone":5,"./phonetics/dm_soundex":6,"./stemmers/porter_stemmer":7,"./stemmers/porter_stemmer_fa":8,"./stemmers/porter_stemmer_ru":9,"./stemmers/porter_stemmer_es":10,"./stemmers/porter_stemmer_it":11,"./stemmers/lancaster_stemmer":12,"./stemmers/stemmer_ja":13,"./tokenizers/aggressive_tokenizer_fa":14,"./tokenizers/aggressive_tokenizer_ru":15,"./tokenizers/aggressive_tokenizer_es":16,"./tokenizers/aggressive_tokenizer_it":17,"./tokenizers/aggressive_tokenizer":18,"./tokenizers/regexp_tokenizer":19,"./tokenizers/treebank_word_tokenizer":20,"./tokenizers/tokenizer_ja":21,"./classifiers/bayes_classifier":22,"./classifiers/logistic_regression_classifier":23,"./inflectors/noun_inflector":24,"./inflectors/fr/noun_inflector":25,"./inflectors/ja/noun_inflector":26,"./inflectors/present_verb_inflector":27,"./inflectors/count_inflector":28,"./wordnet/wordnet":29,"./tfidf/tfidf":30,"./analyzers/sentence_analyzer":31,"./util/stopwords":32,"./ngrams/ngrams":33,"./distance/jaro-winkler_distance":34,"./distance/levenshtein_distance":35,"./distance/dice_coefficient":36,"./normalizers/normalizer_ja":37,"./normalizers/remove_diacritics":38,"./transliterators/ja":39}],28:[function(require,module,exports){ +},{"./phonetics/soundex":3,"./phonetics/metaphone":4,"./phonetics/double_metaphone":5,"./phonetics/dm_soundex":6,"./stemmers/porter_stemmer":7,"./stemmers/porter_stemmer_fa":8,"./stemmers/porter_stemmer_ru":9,"./stemmers/porter_stemmer_es":10,"./stemmers/porter_stemmer_it":11,"./stemmers/lancaster_stemmer":12,"./stemmers/stemmer_ja":13,"./tokenizers/aggressive_tokenizer_fa":14,"./tokenizers/aggressive_tokenizer_ru":15,"./tokenizers/aggressive_tokenizer_es":16,"./tokenizers/aggressive_tokenizer_it":17,"./tokenizers/regexp_tokenizer":18,"./tokenizers/aggressive_tokenizer":19,"./tokenizers/treebank_word_tokenizer":20,"./classifiers/bayes_classifier":21,"./tokenizers/tokenizer_ja":22,"./classifiers/logistic_regression_classifier":23,"./inflectors/noun_inflector":24,"./inflectors/fr/noun_inflector":25,"./inflectors/ja/noun_inflector":26,"./inflectors/present_verb_inflector":27,"./inflectors/count_inflector":28,"./wordnet/wordnet":29,"./tfidf/tfidf":30,"./analyzers/sentence_analyzer":31,"./util/stopwords":32,"./ngrams/ngrams":33,"./distance/jaro-winkler_distance":34,"./distance/levenshtein_distance":35,"./distance/dice_coefficient":36,"./normalizers/normalizer_ja":37,"./normalizers/remove_diacritics":38,"./transliterators/ja":39}],28:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel @@ -1421,6 +1422,196 @@ SoundEx.transformR = transformR; SoundEx.condense = condense; SoundEx.padRight0 = padRight0; +})() +},{"./phonetic":44}],4:[function(require,module,exports){ +(function(){/* +Copyright (c) 2011, Chris Umbel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +var Phonetic = require('./phonetic'); + +function dedup(token) { + return token.replace(/([^c])\1/g, '$1'); +} + +function dropInitialLetters(token) { + if(token.match(/^(kn|gn|pn|ae|wr)/)) + return token.substr(1, token.length - 1); + + return token; +} + +function dropBafterMAtEnd(token) { + return token.replace(/mb$/, 'm'); +} + +function cTransform(token) { + token = token.replace(/([^s]|^)(c)(h)/g, '$1x$3').trim(); + token = token.replace(/cia/g, 'xia'); + token = token.replace(/c(i|e|y)/g, 's$1'); + token = token.replace(/c/g, 'k'); + + return token; +} + +function dTransform(token) { + token = token.replace(/d(ge|gy|gi)/g, 'j$1'); + token = token.replace(/d/g, 't'); + + return token; +} + +function dropG(token) { + token = token.replace(/gh(^$|[^aeiou])/g, 'h$1'); + token = token.replace(/g(n|ned)$/g, '$1'); + + return token; +} + +function transformG(token) { + token = token.replace(/([^g]|^)(g)(i|e|y)/g, '$1j$3'); + token = token.replace(/gg/g, 'g'); + token = token.replace(/g/g, 'k'); + + return token; +} + +function dropH(token) { + return token.replace(/([aeiou])h([^aeiou])/g, '$1$2'); +} + +function transformCK(token) { + return token.replace(/ck/g, 'k'); +} +function transformPH(token) { + return token.replace(/ph/g, 'f'); +} + +function transformQ(token) { + return token.replace(/q/g, 'k'); +} + +function transformS(token) { + return token.replace(/s(h|io|ia)/g, 'x$1'); +} + +function transformT(token) { + token = token.replace(/t(ia|io)/g, 'x$1'); + token = token.replace(/th/, '0'); + + return token; +} + +function dropT(token) { + return token.replace(/tch/g, 'ch'); +} + +function transformV(token) { + return token.replace(/v/g, 'f'); +} + +function transformWH(token) { + return token.replace(/^wh/, 'w'); +} + +function dropW(token) { + return token.replace(/w([^aeiou]|$)/g, '$1'); +} + +function transformX(token) { + token = token.replace(/^x/, 's'); + token = token.replace(/x/g, 'ks'); + return token; +} + +function dropY(token) { + return token.replace(/y([^aeiou]|$)/g, '$1'); +} + +function transformZ(token) { + return token.replace(/z/, 's'); +} + +function dropVowels(token) { + return token.charAt(0) + token.substr(1, token.length).replace(/[aeiou]/g, ''); +} + +var Metaphone = new Phonetic(); +module.exports = Metaphone; + +Metaphone.process = function(token, maxLength) { + maxLength == maxLength || 32; + token = token.toLowerCase(); + token = dedup(token); + token = dropInitialLetters(token); + token = dropBafterMAtEnd(token); + token = transformCK(token); + token = cTransform(token); + token = dTransform(token); + token = dropG(token); + token = transformG(token); + token = dropH(token); + token = transformPH(token); + token = transformQ(token); + token = transformS(token); + token = transformX(token); + token = transformT(token); + token = dropT(token); + token = transformV(token); + token = transformWH(token); + token = dropW(token); + token = dropY(token); + token = transformZ(token); + token = dropVowels(token); + + token.toUpperCase(); + if(token.length >= maxLength) + token = token.substring(0, maxLength); + + return token.toUpperCase(); +}; + +// expose functions for testing +Metaphone.dedup = dedup; +Metaphone.dropInitialLetters = dropInitialLetters; +Metaphone.dropBafterMAtEnd = dropBafterMAtEnd; +Metaphone.cTransform = cTransform; +Metaphone.dTransform = dTransform; +Metaphone.dropG = dropG; +Metaphone.transformG = transformG; +Metaphone.dropH = dropH; +Metaphone.transformCK = transformCK; +Metaphone.transformPH = transformPH; +Metaphone.transformQ = transformQ; +Metaphone.transformS = transformS; +Metaphone.transformT = transformT; +Metaphone.dropT = dropT; +Metaphone.transformV = transformV; +Metaphone.transformWH = transformWH; +Metaphone.dropW = dropW; +Metaphone.transformX = transformX; +Metaphone.dropY = dropY; +Metaphone.transformZ = transformZ; +Metaphone.dropVowels = dropVowels; + })() },{"./phonetic":44}],5:[function(require,module,exports){ (function(){/* @@ -1931,9 +2122,9 @@ DoubleMetaphone.process = process; DoubleMetaphone.isVowel = isVowel; })() -},{"./phonetic":44}],4:[function(require,module,exports){ +},{"./phonetic":44}],6:[function(require,module,exports){ (function(){/* -Copyright (c) 2011, Chris Umbel +Copyright (c) 2012, Alexy Maslenninkov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -1954,242 +2145,52 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -var Phonetic = require('./phonetic'); - -function dedup(token) { - return token.replace(/([^c])\1/g, '$1'); -} - -function dropInitialLetters(token) { - if(token.match(/^(kn|gn|pn|ae|wr)/)) - return token.substr(1, token.length - 1); - - return token; -} - -function dropBafterMAtEnd(token) { - return token.replace(/mb$/, 'm'); -} - -function cTransform(token) { - token = token.replace(/([^s]|^)(c)(h)/g, '$1x$3').trim(); - token = token.replace(/cia/g, 'xia'); - token = token.replace(/c(i|e|y)/g, 's$1'); - token = token.replace(/c/g, 'k'); - - return token; -} - -function dTransform(token) { - token = token.replace(/d(ge|gy|gi)/g, 'j$1'); - token = token.replace(/d/g, 't'); - - return token; -} - -function dropG(token) { - token = token.replace(/gh(^$|[^aeiou])/g, 'h$1'); - token = token.replace(/g(n|ned)$/g, '$1'); - - return token; -} - -function transformG(token) { - token = token.replace(/([^g]|^)(g)(i|e|y)/g, '$1j$3'); - token = token.replace(/gg/g, 'g'); - token = token.replace(/g/g, 'k'); - - return token; -} - -function dropH(token) { - return token.replace(/([aeiou])h([^aeiou])/g, '$1$2'); -} - -function transformCK(token) { - return token.replace(/ck/g, 'k'); -} -function transformPH(token) { - return token.replace(/ph/g, 'f'); -} - -function transformQ(token) { - return token.replace(/q/g, 'k'); -} - -function transformS(token) { - return token.replace(/s(h|io|ia)/g, 'x$1'); -} - -function transformT(token) { - token = token.replace(/t(ia|io)/g, 'x$1'); - token = token.replace(/th/, '0'); - - return token; -} - -function dropT(token) { - return token.replace(/tch/g, 'ch'); -} - -function transformV(token) { - return token.replace(/v/g, 'f'); -} - -function transformWH(token) { - return token.replace(/^wh/, 'w'); -} - -function dropW(token) { - return token.replace(/w([^aeiou]|$)/g, '$1'); -} - -function transformX(token) { - token = token.replace(/^x/, 's'); - token = token.replace(/x/g, 'ks'); - return token; -} - -function dropY(token) { - return token.replace(/y([^aeiou]|$)/g, '$1'); -} - -function transformZ(token) { - return token.replace(/z/, 's'); -} - -function dropVowels(token) { - return token.charAt(0) + token.substr(1, token.length).replace(/[aeiou]/g, ''); -} - -var Metaphone = new Phonetic(); -module.exports = Metaphone; - -Metaphone.process = function(token, maxLength) { - maxLength == maxLength || 32; - token = token.toLowerCase(); - token = dedup(token); - token = dropInitialLetters(token); - token = dropBafterMAtEnd(token); - token = transformCK(token); - token = cTransform(token); - token = dTransform(token); - token = dropG(token); - token = transformG(token); - token = dropH(token); - token = transformPH(token); - token = transformQ(token); - token = transformS(token); - token = transformX(token); - token = transformT(token); - token = dropT(token); - token = transformV(token); - token = transformWH(token); - token = dropW(token); - token = dropY(token); - token = transformZ(token); - token = dropVowels(token); - - token.toUpperCase(); - if(token.length >= maxLength) - token = token.substring(0, maxLength); - - return token.toUpperCase(); -}; - -// expose functions for testing -Metaphone.dedup = dedup; -Metaphone.dropInitialLetters = dropInitialLetters; -Metaphone.dropBafterMAtEnd = dropBafterMAtEnd; -Metaphone.cTransform = cTransform; -Metaphone.dTransform = dTransform; -Metaphone.dropG = dropG; -Metaphone.transformG = transformG; -Metaphone.dropH = dropH; -Metaphone.transformCK = transformCK; -Metaphone.transformPH = transformPH; -Metaphone.transformQ = transformQ; -Metaphone.transformS = transformS; -Metaphone.transformT = transformT; -Metaphone.dropT = dropT; -Metaphone.transformV = transformV; -Metaphone.transformWH = transformWH; -Metaphone.dropW = dropW; -Metaphone.transformX = transformX; -Metaphone.dropY = dropY; -Metaphone.transformZ = transformZ; -Metaphone.dropVowels = dropVowels; - -})() -},{"./phonetic":44}],6:[function(require,module,exports){ -(function(){/* -Copyright (c) 2012, Alexy Maslenninkov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Daitch-Mokotoff Soundex Coding - * - * The Daitch-Mokotoff Soundex System was created by Randy Daitch and Gary - * Mokotoff of the Jewish Genealogical Society because they concluded the system - * developed by Robert Russell in 1918, and in use today by the U.S. National - * Archives and Records Administration (NARA) does not apply well to many Slavic - * and Yiddish surnames. It also includes refinements that are independent of - * ethnic considerations. - * - * The rules for converting surnames into D-M Code numbers are listed below. - * They are followed by the coding chart. - * - * 1. Names are coded to six digits, each digit representing a sound listed in - * the coding chart (below). - * - * 2. When a name lacks enough coded sounds for six digits, use zeros to fill to - * six digits. GOLDEN which has only four coded sounds [G-L-D-N] is coded as - * 583600. - * - * 3. The letters A, E, I, O, U, J, and Y are always coded at the beginning of a - * name as in Alpert 087930. In any other situation, they are ignored except - * when two of them form a pair and the pair comes before a vowel, as in Breuer - * 791900 but not Freud. - * - * 4. The letter H is coded at the beginning of a name, as in Haber 579000, or - * preceding a vowel, as in Manheim 665600, otherwise it is not coded. - * - * 5. When adjacent sounds can combine to form a larger sound, they are given - * the code number of the larger sound. Mintz which is not coded MIN-T-Z but - * MIN-TZ 664000. - * - * 6. When adjacent letters have the same code number, they are coded as one - * sound, as in TOPF, which is not coded TO-P-F 377000 but TO-PF 370000. - * Exceptions to this rule are the letter combinations MN and NM, whose letters - * are coded separately, as in Kleinman, which is coded 586660 not 586600. - * - * 7. When a surname consists or more than one word, it is coded as if one word, - * such as Ben Aron which is treated as Benaron. - * - * 8. Several letter and letter combinations pose the problem that they may - * sound in one of two ways. The letter and letter combinations CH, CK, C, J, - * and RS are assigned two possible code numbers. - * - * For more info, see http://www.jewishgen.org/InfoFiles/soundex.html - */ +/* + * Daitch-Mokotoff Soundex Coding + * + * The Daitch-Mokotoff Soundex System was created by Randy Daitch and Gary + * Mokotoff of the Jewish Genealogical Society because they concluded the system + * developed by Robert Russell in 1918, and in use today by the U.S. National + * Archives and Records Administration (NARA) does not apply well to many Slavic + * and Yiddish surnames. It also includes refinements that are independent of + * ethnic considerations. + * + * The rules for converting surnames into D-M Code numbers are listed below. + * They are followed by the coding chart. + * + * 1. Names are coded to six digits, each digit representing a sound listed in + * the coding chart (below). + * + * 2. When a name lacks enough coded sounds for six digits, use zeros to fill to + * six digits. GOLDEN which has only four coded sounds [G-L-D-N] is coded as + * 583600. + * + * 3. The letters A, E, I, O, U, J, and Y are always coded at the beginning of a + * name as in Alpert 087930. In any other situation, they are ignored except + * when two of them form a pair and the pair comes before a vowel, as in Breuer + * 791900 but not Freud. + * + * 4. The letter H is coded at the beginning of a name, as in Haber 579000, or + * preceding a vowel, as in Manheim 665600, otherwise it is not coded. + * + * 5. When adjacent sounds can combine to form a larger sound, they are given + * the code number of the larger sound. Mintz which is not coded MIN-T-Z but + * MIN-TZ 664000. + * + * 6. When adjacent letters have the same code number, they are coded as one + * sound, as in TOPF, which is not coded TO-P-F 377000 but TO-PF 370000. + * Exceptions to this rule are the letter combinations MN and NM, whose letters + * are coded separately, as in Kleinman, which is coded 586660 not 586600. + * + * 7. When a surname consists or more than one word, it is coded as if one word, + * such as Ben Aron which is treated as Benaron. + * + * 8. Several letter and letter combinations pose the problem that they may + * sound in one of two ways. The letter and letter combinations CH, CK, C, J, + * and RS are assigned two possible code numbers. + * + * For more info, see http://www.jewishgen.org/InfoFiles/soundex.html + */ /** * D-M transformation table in the form of finite-state machine. @@ -2991,83 +2992,7 @@ PorterStemmer.stem = function(token) { }; -},{"./stemmer_es":48}],12:[function(require,module,exports){ -/* -Copyright (c) 2011, Chris Umbel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -var Stemmer = require('./stemmer'); -var ruleTable = require('./lancaster_rules').rules; - -function acceptable(candidate) { - if (candidate.match(/^[aeiou]/)) - return (candidate.length > 1); - else - return (candidate.length > 2 && candidate.match(/[aeiouy]/)); -} - -// take a token, look up the applicatble rule section and attempt some stemming! -function applyRuleSection(token, intact) { - var section = token.substr( - 1); - var rules = ruleTable[section]; - - if (rules) { - for (var i = 0; i < rules.length; i++) { - if ((intact || !rules[i].intact) - // only apply intact rules to intact tokens - && token.substr(0 - rules[i].pattern.length) == rules[i].pattern) { - // hack off only as much as the rule indicates - var result = token.substr(0, token.length - rules[i].size); - - // if the rules wants us to apply an appendage do so - if (rules[i].appendage) - result += rules[i].appendage; - - if (acceptable(result)) { - token = result; - - // see what the rules wants to do next - if (rules[i].continuation) { - // this rule thinks there still might be stem left. keep at it. - // since we've applied a change we'll pass false in for intact - return applyRuleSection(result, false); - } else { - // the rule thinks we're done stemming. drop out. - return result; - } - } - } - } - } - - return token; -} - -var LancasterStemmer = new Stemmer(); -module.exports = LancasterStemmer; - -LancasterStemmer.stem = function(token) { - return applyRuleSection(token.toLowerCase(), true); -} -},{"./stemmer":45,"./lancaster_rules":49}],11:[function(require,module,exports){ +},{"./stemmer_es":48}],11:[function(require,module,exports){ /* Copyright (c) 2012, Leonardo Fenu, Chris Umbel @@ -3301,7 +3226,83 @@ PorterStemmer.stem = function(token) { return token.toLowerCase(); }; -},{"./stemmer_it":50}],13:[function(require,module,exports){ +},{"./stemmer_it":49}],12:[function(require,module,exports){ +/* +Copyright (c) 2011, Chris Umbel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +var Stemmer = require('./stemmer'); +var ruleTable = require('./lancaster_rules').rules; + +function acceptable(candidate) { + if (candidate.match(/^[aeiou]/)) + return (candidate.length > 1); + else + return (candidate.length > 2 && candidate.match(/[aeiouy]/)); +} + +// take a token, look up the applicatble rule section and attempt some stemming! +function applyRuleSection(token, intact) { + var section = token.substr( - 1); + var rules = ruleTable[section]; + + if (rules) { + for (var i = 0; i < rules.length; i++) { + if ((intact || !rules[i].intact) + // only apply intact rules to intact tokens + && token.substr(0 - rules[i].pattern.length) == rules[i].pattern) { + // hack off only as much as the rule indicates + var result = token.substr(0, token.length - rules[i].size); + + // if the rules wants us to apply an appendage do so + if (rules[i].appendage) + result += rules[i].appendage; + + if (acceptable(result)) { + token = result; + + // see what the rules wants to do next + if (rules[i].continuation) { + // this rule thinks there still might be stem left. keep at it. + // since we've applied a change we'll pass false in for intact + return applyRuleSection(result, false); + } else { + // the rule thinks we're done stemming. drop out. + return result; + } + } + } + } + } + + return token; +} + +var LancasterStemmer = new Stemmer(); +module.exports = LancasterStemmer; + +LancasterStemmer.stem = function(token) { + return applyRuleSection(token.toLowerCase(), true); +} +},{"./stemmer":45,"./lancaster_rules":50}],13:[function(require,module,exports){ /* Copyright (c) 2012, Guillaume Marty @@ -3441,7 +3442,7 @@ StemmerJa.prototype.attach = function() { module.exports = StemmerJa; -},{"../tokenizers/tokenizer_ja":21,"../util/stopwords_ja":51}],14:[function(require,module,exports){ +},{"../tokenizers/tokenizer_ja":22,"../util/stopwords_ja":51}],14:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel Farsi Aggressive Tokenizer by Fardin Koochaki @@ -3491,9 +3492,9 @@ AggressiveTokenizer.prototype.tokenize = function(text) { return this.clearEmptyString(text.split(/\s+/)); }; -},{"util":40,"./tokenizer":52}],16:[function(require,module,exports){ +},{"util":40,"./tokenizer":52}],15:[function(require,module,exports){ /* -Copyright (c) 2011, Chris Umbel,David Przybilla +Copyright (c) 2011, Chris Umbel Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -3520,18 +3521,27 @@ var Tokenizer = require('./tokenizer'), var AggressiveTokenizer = function() { Tokenizer.call(this); }; + util.inherits(AggressiveTokenizer, Tokenizer); module.exports = AggressiveTokenizer; +AggressiveTokenizer.prototype.withoutEmpty = function(array) { + return array.filter(function(a) {return a;}); +}; + +AggressiveTokenizer.prototype.clearText = function(text) { + return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim(); +}; + AggressiveTokenizer.prototype.tokenize = function(text) { // break a string up into an array of tokens by anything non-word - return this.trim(text.split(/\W+/)); + return this.withoutEmpty(this.clearText(text).split(' ')); }; -},{"util":40,"./tokenizer":52}],15:[function(require,module,exports){ +},{"util":40,"./tokenizer":52}],16:[function(require,module,exports){ /* -Copyright (c) 2011, Chris Umbel +Copyright (c) 2011, Chris Umbel,David Przybilla Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -3558,22 +3568,13 @@ var Tokenizer = require('./tokenizer'), var AggressiveTokenizer = function() { Tokenizer.call(this); }; - util.inherits(AggressiveTokenizer, Tokenizer); module.exports = AggressiveTokenizer; -AggressiveTokenizer.prototype.withoutEmpty = function(array) { - return array.filter(function(a) {return a;}); -}; - -AggressiveTokenizer.prototype.clearText = function(text) { - return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim(); -}; - AggressiveTokenizer.prototype.tokenize = function(text) { // break a string up into an array of tokens by anything non-word - return this.withoutEmpty(this.clearText(text).split(' ')); + return this.trim(text.split(/\W+/)); }; },{"util":40,"./tokenizer":52}],17:[function(require,module,exports){ @@ -3614,7 +3615,7 @@ AggressiveTokenizer.prototype.tokenize = function(text) { return this.trim(text.split(/\W+/)); }; -},{"util":40,"./tokenizer":52}],18:[function(require,module,exports){ +},{"util":40,"./tokenizer":52}],19:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel @@ -3652,7 +3653,7 @@ AggressiveTokenizer.prototype.tokenize = function(text) { return this.trim(text.split(/\W+/)); }; -},{"util":40,"./tokenizer":52}],21:[function(require,module,exports){ +},{"util":40,"./tokenizer":52}],22:[function(require,module,exports){ // Original copyright: /* Copyright (c) 2008, Taku Kudo @@ -5210,7 +5211,7 @@ exports.replacer = replacer; exports.flip = flip; exports.merge = merge; -},{}],49:[function(require,module,exports){ +},{}],50:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel @@ -6899,7 +6900,7 @@ module.exports = function() { }; })() -},{"../util/stopwords":32,"../tokenizers/aggressive_tokenizer":18}],45:[function(require,module,exports){ +},{"../util/stopwords":32,"../tokenizers/aggressive_tokenizer":19}],45:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel @@ -6962,7 +6963,7 @@ module.exports = function() { }; } -},{"../util/stopwords":32,"../tokenizers/aggressive_tokenizer":18}],46:[function(require,module,exports){ +},{"../util/stopwords":32,"../tokenizers/aggressive_tokenizer":19}],46:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel Farsi Stemmer by Fardin Koochaki @@ -7138,7 +7139,7 @@ module.exports = function() { }; } -},{"../util/stopwords_es":58,"../tokenizers/aggressive_tokenizer_es":16}],50:[function(require,module,exports){ +},{"../util/stopwords_es":58,"../tokenizers/aggressive_tokenizer_es":16}],49:[function(require,module,exports){ var stopwords = require('../util/stopwords_it'); var Tokenizer = require('../tokenizers/aggressive_tokenizer_it'); @@ -11192,84 +11193,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -var WordNetFile = require('./wordnet_file'), - fs = require('fs'), - util = require('util'); - -function get(location, callback) { - var buff = new Buffer(4096); - - this.open(function(err, fd, done) { - WordNetFile.appendLineChar(fd, location, 0, buff, function(line) { - done(); - var data = line.split('| '); - var tokens = data[0].split(/\s+/); - var ptrs = []; - var wCnt = parseInt(tokens[3], 10); - var synonyms = []; - - for(var i = 0; i < wCnt; i++) { - synonyms.push(tokens[4 + i * 2]); - } - - var ptrOffset = (wCnt - 1) * 2 + 6; - for(var i = 0; i < parseInt(tokens[ptrOffset], 10); i++) { - ptrs.push({ - pointerSymbol: tokens[ptrOffset + 1 + i * 4], - synsetOffset: parseInt(tokens[ptrOffset + 2 + i * 4], 10), - pos: tokens[ptrOffset + 3 + i * 4], - sourceTarget: tokens[ptrOffset + 4 + i * 4] - }); - } - - callback({ - synsetOffset: parseInt(tokens[0], 10), - lexFilenum: parseInt(tokens[1], 10), - pos: tokens[2], - wCnt: wCnt, - lemma: tokens[4], - synonyms: synonyms, - lexId: tokens[5], - ptrs: ptrs, - gloss: data[1] - }); - }); - }); -} - -var DataFile = function(dataDir, name) { - WordNetFile.call(this, dataDir, 'data.' + name); -}; - -util.inherits(DataFile, WordNetFile); -DataFile.prototype.get = get; - -module.exports = DataFile; - -})(require("__browserify_buffer").Buffer) -},{"fs":42,"util":40,"./wordnet_file":63,"__browserify_buffer":61}],64:[function(require,module,exports){ -(function(Buffer){/* -Copyright (c) 2011, Chris Umbel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - var WordNetFile = require('./wordnet_file'), fs = require('fs'), util = require('util'); @@ -11390,47 +11313,85 @@ IndexFile.prototype._findAt = findAt; module.exports = IndexFile; })(require("__browserify_buffer").Buffer) -},{"fs":42,"util":40,"./wordnet_file":63,"__browserify_buffer":61}],56:[function(require,module,exports){ -/* -Copyright (c) 2011, Chris Umbel -Farsi Stop Words by Fardin Koochaki - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// a list of commonly used words that have little meaning and can be excluded -// from analysis. -var words = [ - // Words - 'از', 'با', 'یه', 'برای', 'و', 'باید', 'شاید', - - // Symbols - '؟', '!', '٪', '.', '،', '؛', ':', ';', ',', - - // Numbers - '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '۰' -]; - -// tell the world about the noise words. -exports.words = words; +},{"fs":42,"util":40,"./wordnet_file":63,"__browserify_buffer":61}],64:[function(require,module,exports){ +(function(Buffer){/* +Copyright (c) 2011, Chris Umbel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +var WordNetFile = require('./wordnet_file'), + fs = require('fs'), + util = require('util'); + +function get(location, callback) { + var buff = new Buffer(4096); + + this.open(function(err, fd, done) { + WordNetFile.appendLineChar(fd, location, 0, buff, function(line) { + done(); + var data = line.split('| '); + var tokens = data[0].split(/\s+/); + var ptrs = []; + var wCnt = parseInt(tokens[3], 10); + var synonyms = []; + + for(var i = 0; i < wCnt; i++) { + synonyms.push(tokens[4 + i * 2]); + } + + var ptrOffset = (wCnt - 1) * 2 + 6; + for(var i = 0; i < parseInt(tokens[ptrOffset], 10); i++) { + ptrs.push({ + pointerSymbol: tokens[ptrOffset + 1 + i * 4], + synsetOffset: parseInt(tokens[ptrOffset + 2 + i * 4], 10), + pos: tokens[ptrOffset + 3 + i * 4], + sourceTarget: tokens[ptrOffset + 4 + i * 4] + }); + } + + callback({ + synsetOffset: parseInt(tokens[0], 10), + lexFilenum: parseInt(tokens[1], 10), + pos: tokens[2], + wCnt: wCnt, + lemma: tokens[4], + synonyms: synonyms, + lexId: tokens[5], + ptrs: ptrs, + gloss: data[1] + }); + }); + }); +} + +var DataFile = function(dataDir, name) { + WordNetFile.call(this, dataDir, 'data.' + name); +}; + +util.inherits(DataFile, WordNetFile); +DataFile.prototype.get = get; + +module.exports = DataFile; -},{}],57:[function(require,module,exports){ +})(require("__browserify_buffer").Buffer) +},{"fs":42,"util":40,"./wordnet_file":63,"__browserify_buffer":61}],57:[function(require,module,exports){ /* Copyright (c) 2011, Polyakov Vladimir, Chris Umbel @@ -11565,6 +11526,46 @@ var words = [ // tell the world about the noise words. exports.words = words; +},{}],56:[function(require,module,exports){ +/* +Copyright (c) 2011, Chris Umbel +Farsi Stop Words by Fardin Koochaki + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// a list of commonly used words that have little meaning and can be excluded +// from analysis. +var words = [ + // Words + 'از', 'با', 'یه', 'برای', 'و', 'باید', 'شاید', + + // Symbols + '؟', '!', '٪', '.', '،', '؛', ':', ';', ',', + + // Numbers + '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '۰' +]; + +// tell the world about the noise words. +exports.words = words; + },{}],63:[function(require,module,exports){ (function(Buffer){/* Copyright (c) 2011, Chris Umbel @@ -11637,60 +11638,7 @@ WordNetFile.appendLineChar = appendLineChar; module.exports = WordNetFile; })(require("__browserify_buffer").Buffer) -},{"fs":42,"path":65,"util":40,"__browserify_buffer":61}],22:[function(require,module,exports){ -/* -Copyright (c) 2011, Chris Umbel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -var PorterStemmer = require('../stemmers/porter_stemmer'), -util = require('util'), -Classifier = require('./classifier'), -ApparatusBayesClassifier = require('apparatus').BayesClassifier; - -var BayesClassifier = function(stemmer) { - Classifier.call(this, new ApparatusBayesClassifier(), stemmer); -}; - -util.inherits(BayesClassifier, Classifier); - -function restore(classifier, stemmer) { - classifier = Classifier.restore(classifier, stemmer); - classifier.__proto__ = BayesClassifier.prototype; - classifier.classifier = ApparatusBayesClassifier.restore(classifier.classifier); - - return classifier; -} - -function load(filename, stemmer, callback) { - Classifier.load(filename, function(err, classifier) { - callback(err, restore(classifier, stemmer)); - }); -} - -BayesClassifier.restore = restore; -BayesClassifier.load = load; - -module.exports = BayesClassifier; - -},{"util":40,"../stemmers/porter_stemmer":7,"./classifier":60,"apparatus":66}],19:[function(require,module,exports){ +},{"fs":42,"path":65,"util":40,"__browserify_buffer":61}],18:[function(require,module,exports){ /* Copyright (c) 2011, Rob Ellis, Chris Umbel @@ -11778,7 +11726,7 @@ var WordPunctTokenizer = function(options) { util.inherits(WordPunctTokenizer, RegexpTokenizer); exports.WordPunctTokenizer = WordPunctTokenizer; -},{"util":40,"./tokenizer":52,"underscore":67}],20:[function(require,module,exports){ +},{"util":40,"./tokenizer":52,"underscore":66}],20:[function(require,module,exports){ /* Copyright (c) 2011, Rob Ellis, Chris Umbel @@ -11854,7 +11802,7 @@ TreebankWordTokenizer.prototype.tokenize = function(text) { module.exports = TreebankWordTokenizer; -},{"util":40,"./tokenizer":52,"underscore":67}],23:[function(require,module,exports){ +},{"util":40,"./tokenizer":52,"underscore":66}],21:[function(require,module,exports){ /* Copyright (c) 2011, Chris Umbel @@ -11880,18 +11828,18 @@ THE SOFTWARE. var PorterStemmer = require('../stemmers/porter_stemmer'), util = require('util'), Classifier = require('./classifier'), -ApparatusLogisticRegressionClassifier = require('apparatus').LogisticRegressionClassifier; +ApparatusBayesClassifier = require('apparatus').BayesClassifier; -var LogisticRegressionClassifier = function(stemmer) { - Classifier.call(this, new ApparatusLogisticRegressionClassifier(), stemmer); +var BayesClassifier = function(stemmer) { + Classifier.call(this, new ApparatusBayesClassifier(), stemmer); }; -util.inherits(LogisticRegressionClassifier, Classifier); +util.inherits(BayesClassifier, Classifier); function restore(classifier, stemmer) { classifier = Classifier.restore(classifier, stemmer); - classifier.__proto__ = LogisticRegressionClassifier.prototype; - classifier.classifier = ApparatusLogisticRegressionClassifier.restore(classifier.classifier); + classifier.__proto__ = BayesClassifier.prototype; + classifier.classifier = ApparatusBayesClassifier.restore(classifier.classifier); return classifier; } @@ -11902,21 +11850,12 @@ function load(filename, stemmer, callback) { }); } -function train() { - // we need to reset the traning state because logistic regression - // needs its matricies to have their widths synced, etc. - this.lastAdded = 0; - this.classifier = new ApparatusLogisticRegressionClassifier(); - Classifier.prototype.train.call(this); -} - -LogisticRegressionClassifier.prototype.train = train; -LogisticRegressionClassifier.restore = restore; -LogisticRegressionClassifier.load = load; +BayesClassifier.restore = restore; +BayesClassifier.load = load; -module.exports = LogisticRegressionClassifier; +module.exports = BayesClassifier; -},{"util":40,"../stemmers/porter_stemmer":7,"./classifier":60,"apparatus":66}],65:[function(require,module,exports){ +},{"util":40,"../stemmers/porter_stemmer":7,"./classifier":60,"apparatus":67}],65:[function(require,module,exports){ (function(process){function filter (xs, fn) { var res = []; for (var i = 0; i < xs.length; i++) { @@ -12094,9 +12033,9 @@ exports.relative = function(from, to) { }; })(require("__browserify_process")) -},{"__browserify_process":43}],33:[function(require,module,exports){ +},{"__browserify_process":43}],23:[function(require,module,exports){ /* -Copyright (c) 2011, Rob Ellis, Chris Umbel +Copyright (c) 2011, Chris Umbel Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -12117,40 +12056,46 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -var _ = require("underscore")._, - Tokenizer = require('../tokenizers/regexp_tokenizer').WordTokenizer, - tokenizer = new Tokenizer(); +var PorterStemmer = require('../stemmers/porter_stemmer'), +util = require('util'), +Classifier = require('./classifier'), +ApparatusLogisticRegressionClassifier = require('apparatus').LogisticRegressionClassifier; -exports.ngrams = function(sequence, n) { - return ngrams(sequence, n); -} +var LogisticRegressionClassifier = function(stemmer) { + Classifier.call(this, new ApparatusLogisticRegressionClassifier(), stemmer); +}; -exports.bigrams = function(sequence) { - return ngrams(sequence, 2); -} +util.inherits(LogisticRegressionClassifier, Classifier); -exports.trigrams = function(sequence) { - return ngrams(sequence, 3); +function restore(classifier, stemmer) { + classifier = Classifier.restore(classifier, stemmer); + classifier.__proto__ = LogisticRegressionClassifier.prototype; + classifier.classifier = ApparatusLogisticRegressionClassifier.restore(classifier.classifier); + + return classifier; } -var ngrams = function(sequence, n) { - var result = []; - - if (!_(sequence).isArray()) { - sequence = tokenizer.tokenize(sequence); - } +function load(filename, stemmer, callback) { + Classifier.load(filename, function(err, classifier) { + callback(err, restore(classifier, stemmer)); + }); +} - var count = _.max([0, sequence.length - n + 1]); - - for (var i = 0; i < count; i++) { - result.push(sequence.slice(i, i + n)); - } - - return result; +function train() { + // we need to reset the traning state because logistic regression + // needs its matricies to have their widths synced, etc. + this.lastAdded = 0; + this.classifier = new ApparatusLogisticRegressionClassifier(); + Classifier.prototype.train.call(this); } +LogisticRegressionClassifier.prototype.train = train; +LogisticRegressionClassifier.restore = restore; +LogisticRegressionClassifier.load = load; + +module.exports = LogisticRegressionClassifier; -},{"../tokenizers/regexp_tokenizer":19,"underscore":67}],31:[function(require,module,exports){ +},{"util":40,"../stemmers/porter_stemmer":7,"./classifier":60,"apparatus":67}],30:[function(require,module,exports){ /* Copyright (c) 2011, Rob Ellis, Chris Umbel @@ -12173,7 +12118,128 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -var _ = require("underscore")._; +var _ = require("underscore")._, + Tokenizer = require('../tokenizers/regexp_tokenizer').WordTokenizer, + tokenizer = new Tokenizer(), + stopwords = require('../util/stopwords').words, + fs = require('fs'); + +function buildDocument(text, key) { + var stopOut; + + if(typeof text === 'string') { + text = tokenizer.tokenize(text.toLowerCase()); + stopOut = true; + } else if(!_.isArray(text)) { + return text; + stopOut = false; + } + + return text.reduce(function(document, term) { + if(!stopOut || stopwords.indexOf(term) < 0) + document[term] = (document[term] ? document[term] + 1 : 1); + + return document; + }, {__key: key}); +} + +function tf(term, document) { + return document[term] ? document[term]: 0; +} + +function documentHasTerm(term, document) { + return document[term] && document[term] > 0; +} + +function TfIdf(deserialized) { + if(deserialized) + this.documents = deserialized.documents; + else + this.documents = []; +} + +module.exports = TfIdf; +TfIdf.tf = tf; + +TfIdf.prototype.idf = function(term) { + var docsWithTerm = this.documents.reduce(function(count, document) { + return count + (documentHasTerm(term, document) ? 1 : 0); + }, 1); + + return Math.log(this.documents.length + 1 / docsWithTerm /* inited to 1 so + no addition needed */); +}; + +TfIdf.prototype.addDocument = function(document, key) { + this.documents.push(buildDocument(document, key)); +}; + +TfIdf.prototype.addFileSync = function(path, encoding, key) { + if(encoding) + encoding = 'UTF-8'; + + var document = fs.readFileSync(path, 'UTF-8'); + this.documents.push(buildDocument(document, key)); +}; + +TfIdf.prototype.tfidf = function(terms, d) { + var _this = this; + + if(!_.isArray(terms)) + terms = tokenizer.tokenize(terms.toString().toLowerCase()); + + return terms.reduce(function(value, term) { + return value + (tf(term, _this.documents[d]) * _this.idf(term)); + }, 0.0); +}; + +TfIdf.prototype.listTerms = function(d) { + var terms = []; + + for(var term in this.documents[d]) { + terms.push({term: term, tfidf: this.tfidf(term, d)}) + } + + return terms.sort(function(x, y) { return y.tfidf - x.tfidf }); +} + +TfIdf.prototype.tfidfs = function(terms, callback) { + var tfidfs = new Array(this.documents.length); + + for(var i = 0; i < this.documents.length; i++) { + tfidfs[i] = this.tfidf(terms, i); + + if(callback) + callback(i, tfidfs[i], this.documents[i].__key); + } + + return tfidfs; +}; + +},{"fs":42,"../tokenizers/regexp_tokenizer":18,"../util/stopwords":32,"underscore":66}],31:[function(require,module,exports){ +/* +Copyright (c) 2011, Rob Ellis, Chris Umbel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +var _ = require("underscore")._; /* Sentences Analizer Class @@ -12325,7 +12391,7 @@ Sentences.prototype.type = function(callback) { module.exports = Sentences; -},{"underscore":67}],30:[function(require,module,exports){ +},{"underscore":66}],33:[function(require,module,exports){ /* Copyright (c) 2011, Rob Ellis, Chris Umbel @@ -12350,103 +12416,38 @@ THE SOFTWARE. var _ = require("underscore")._, Tokenizer = require('../tokenizers/regexp_tokenizer').WordTokenizer, - tokenizer = new Tokenizer(), - stopwords = require('../util/stopwords').words, - fs = require('fs'); - -function buildDocument(text, key) { - var stopOut; - - if(typeof text === 'string') { - text = tokenizer.tokenize(text.toLowerCase()); - stopOut = true; - } else if(!_.isArray(text)) { - return text; - stopOut = false; - } - - return text.reduce(function(document, term) { - if(!stopOut || stopwords.indexOf(term) < 0) - document[term] = (document[term] ? document[term] + 1 : 1); - - return document; - }, {__key: key}); -} + tokenizer = new Tokenizer(); -function tf(term, document) { - return document[term] ? document[term]: 0; +exports.ngrams = function(sequence, n) { + return ngrams(sequence, n); } -function documentHasTerm(term, document) { - return document[term] && document[term] > 0; +exports.bigrams = function(sequence) { + return ngrams(sequence, 2); } -function TfIdf(deserialized) { - if(deserialized) - this.documents = deserialized.documents; - else - this.documents = []; +exports.trigrams = function(sequence) { + return ngrams(sequence, 3); } -module.exports = TfIdf; -TfIdf.tf = tf; - -TfIdf.prototype.idf = function(term) { - var docsWithTerm = this.documents.reduce(function(count, document) { - return count + (documentHasTerm(term, document) ? 1 : 0); - }, 1); - - return Math.log(this.documents.length + 1 / docsWithTerm /* inited to 1 so - no addition needed */); -}; - -TfIdf.prototype.addDocument = function(document, key) { - this.documents.push(buildDocument(document, key)); -}; - -TfIdf.prototype.addFileSync = function(path, encoding, key) { - if(encoding) - encoding = 'UTF-8'; - - var document = fs.readFileSync(path, 'UTF-8'); - this.documents.push(buildDocument(document, key)); -}; - -TfIdf.prototype.tfidf = function(terms, d) { - var _this = this; - - if(!_.isArray(terms)) - terms = tokenizer.tokenize(terms.toString().toLowerCase()); +var ngrams = function(sequence, n) { + var result = []; - return terms.reduce(function(value, term) { - return value + (tf(term, _this.documents[d]) * _this.idf(term)); - }, 0.0); -}; - -TfIdf.prototype.listTerms = function(d) { - var terms = []; - - for(var term in this.documents[d]) { - terms.push({term: term, tfidf: this.tfidf(term, d)}) + if (!_(sequence).isArray()) { + sequence = tokenizer.tokenize(sequence); } - return terms.sort(function(x, y) { return y.tfidf - x.tfidf }); -} - -TfIdf.prototype.tfidfs = function(terms, callback) { - var tfidfs = new Array(this.documents.length); + var count = _.max([0, sequence.length - n + 1]); - for(var i = 0; i < this.documents.length; i++) { - tfidfs[i] = this.tfidf(terms, i); - - if(callback) - callback(i, tfidfs[i], this.documents[i].__key); + for (var i = 0; i < count; i++) { + result.push(sequence.slice(i, i + n)); } + + return result; +} - return tfidfs; -}; -},{"fs":42,"../tokenizers/regexp_tokenizer":19,"../util/stopwords":32,"underscore":67}],67:[function(require,module,exports){ +},{"../tokenizers/regexp_tokenizer":18,"underscore":66}],66:[function(require,module,exports){ (function(){// Underscore.js 1.4.4 // http://underscorejs.org // (c) 2009-2013 Jeremy Ashkenas, DocumentCloud Inc. @@ -13846,14 +13847,14 @@ function WordNet(dataDir) { module.exports = WordNet; -},{"./index_file":64,"./data_file":62,"WNdb":68}],68:[function(require,module,exports){ +},{"./index_file":62,"./data_file":64,"WNdb":68}],68:[function(require,module,exports){ (function(__dirname){ exports.version = "3.0"; // this is the WordNet DB version exports.path = require('path').join(__dirname, "dict"); exports.files = require('fs').readdirSync(exports.path); })("/node_modules/WNdb") -},{"path":65,"fs":42}],66:[function(require,module,exports){ +},{"path":65,"fs":42}],67:[function(require,module,exports){ exports.BayesClassifier = require('./classifier/bayes_classifier'); exports.LogisticRegressionClassifier = require('./classifier/logistic_regression_classifier'); @@ -14370,7 +14371,7 @@ exports.Line.Segment = require('./line.segment'); exports.Sylvester = require('./sylvester'); })(window) -},{"./vector":74,"./matrix":75,"./plane":76,"./line":77,"./line.segment":78,"./sylvester":79}],79:[function(require,module,exports){ +},{"./vector":74,"./line":75,"./matrix":76,"./plane":77,"./line.segment":78,"./sylvester":79}],79:[function(require,module,exports){ // Copyright (c) 2011, Chris Umbel, James Coglan // This file is required in order for any other classes to work. Some Vector methods work with the // other Sylvester classes and are useless unless they are included. Other classes such as Line and @@ -14827,45 +14828,278 @@ Vector.log = function(v) { module.exports = Vector; -},{"./sylvester":79,"./matrix":75}],76:[function(require,module,exports){ +},{"./sylvester":79,"./matrix":76}],75:[function(require,module,exports){ // Copyright (c) 2011, Chris Umbel, James Coglan -// Plane class - depends on Vector. Some methods require Matrix and Line. var Vector = require('./vector'); var Matrix = require('./matrix'); -var Line = require('./line'); - +var Plane = require('./plane'); var Sylvester = require('./sylvester'); -function Plane() {} -Plane.prototype = { +// Line class - depends on Vector, and some methods require Matrix and Plane. - // Returns true iff the plane occupies the same space as the argument - eql: function(plane) { - return (this.contains(plane.anchor) && this.isParallelTo(plane)); +function Line() {} +Line.prototype = { + + // Returns true if the argument occupies the same space as the line + eql: function(line) { + return (this.isParallelTo(line) && this.contains(line.anchor)); }, - // Returns a copy of the plane + // Returns a copy of the line dup: function() { - return Plane.create(this.anchor, this.normal); + return Line.create(this.anchor, this.direction); }, - // Returns the result of translating the plane by the given vector + // Returns the result of translating the line by the given vector/array translate: function(vector) { var V = vector.elements || vector; - return Plane.create([ + return Line.create([ this.anchor.elements[0] + V[0], this.anchor.elements[1] + V[1], this.anchor.elements[2] + (V[2] || 0) - ], this.normal); + ], this.direction); }, - // Returns true iff the plane is parallel to the argument. Will return true - // if the planes are equal, or if you give a line and it lies in the plane. + // Returns true if the line is parallel to the argument. Here, 'parallel to' + // means that the argument's direction is either parallel or antiparallel to + // the line's own direction. A line is parallel to a plane if the two do not + // have a unique intersection. isParallelTo: function(obj) { - var theta; - if (obj.normal) { - // obj is a plane - theta = this.normal.angleFrom(obj.normal); + if (obj.normal || (obj.start && obj.end)) { return obj.isParallelTo(this); } + var theta = this.direction.angleFrom(obj.direction); + return (Math.abs(theta) <= Sylvester.precision || Math.abs(theta - Math.PI) <= Sylvester.precision); + }, + + // Returns the line's perpendicular distance from the argument, + // which can be a point, a line or a plane + distanceFrom: function(obj) { + if (obj.normal || (obj.start && obj.end)) { return obj.distanceFrom(this); } + if (obj.direction) { + // obj is a line + if (this.isParallelTo(obj)) { return this.distanceFrom(obj.anchor); } + var N = this.direction.cross(obj.direction).toUnitVector().elements; + var A = this.anchor.elements, B = obj.anchor.elements; + return Math.abs((A[0] - B[0]) * N[0] + (A[1] - B[1]) * N[1] + (A[2] - B[2]) * N[2]); + } else { + // obj is a point + var P = obj.elements || obj; + var A = this.anchor.elements, D = this.direction.elements; + var PA1 = P[0] - A[0], PA2 = P[1] - A[1], PA3 = (P[2] || 0) - A[2]; + var modPA = Math.sqrt(PA1*PA1 + PA2*PA2 + PA3*PA3); + if (modPA === 0) return 0; + // Assumes direction vector is normalized + var cosTheta = (PA1 * D[0] + PA2 * D[1] + PA3 * D[2]) / modPA; + var sin2 = 1 - cosTheta*cosTheta; + return Math.abs(modPA * Math.sqrt(sin2 < 0 ? 0 : sin2)); + } + }, + + // Returns true iff the argument is a point on the line, or if the argument + // is a line segment lying within the receiver + contains: function(obj) { + if (obj.start && obj.end) { return this.contains(obj.start) && this.contains(obj.end); } + var dist = this.distanceFrom(obj); + return (dist !== null && dist <= Sylvester.precision); + }, + + // Returns the distance from the anchor of the given point. Negative values are + // returned for points that are in the opposite direction to the line's direction from + // the line's anchor point. + positionOf: function(point) { + if (!this.contains(point)) { return null; } + var P = point.elements || point; + var A = this.anchor.elements, D = this.direction.elements; + return (P[0] - A[0]) * D[0] + (P[1] - A[1]) * D[1] + ((P[2] || 0) - A[2]) * D[2]; + }, + + // Returns true iff the line lies in the given plane + liesIn: function(plane) { + return plane.contains(this); + }, + + // Returns true iff the line has a unique point of intersection with the argument + intersects: function(obj) { + if (obj.normal) { return obj.intersects(this); } + return (!this.isParallelTo(obj) && this.distanceFrom(obj) <= Sylvester.precision); + }, + + // Returns the unique intersection point with the argument, if one exists + intersectionWith: function(obj) { + if (obj.normal || (obj.start && obj.end)) { return obj.intersectionWith(this); } + if (!this.intersects(obj)) { return null; } + var P = this.anchor.elements, X = this.direction.elements, + Q = obj.anchor.elements, Y = obj.direction.elements; + var X1 = X[0], X2 = X[1], X3 = X[2], Y1 = Y[0], Y2 = Y[1], Y3 = Y[2]; + var PsubQ1 = P[0] - Q[0], PsubQ2 = P[1] - Q[1], PsubQ3 = P[2] - Q[2]; + var XdotQsubP = - X1*PsubQ1 - X2*PsubQ2 - X3*PsubQ3; + var YdotPsubQ = Y1*PsubQ1 + Y2*PsubQ2 + Y3*PsubQ3; + var XdotX = X1*X1 + X2*X2 + X3*X3; + var YdotY = Y1*Y1 + Y2*Y2 + Y3*Y3; + var XdotY = X1*Y1 + X2*Y2 + X3*Y3; + var k = (XdotQsubP * YdotY / XdotX + XdotY * YdotPsubQ) / (YdotY - XdotY * XdotY); + return Vector.create([P[0] + k*X1, P[1] + k*X2, P[2] + k*X3]); + }, + + // Returns the point on the line that is closest to the given point or line/line segment + pointClosestTo: function(obj) { + if (obj.start && obj.end) { + // obj is a line segment + var P = obj.pointClosestTo(this); + return (P === null) ? null : this.pointClosestTo(P); + } else if (obj.direction) { + // obj is a line + if (this.intersects(obj)) { return this.intersectionWith(obj); } + if (this.isParallelTo(obj)) { return null; } + var D = this.direction.elements, E = obj.direction.elements; + var D1 = D[0], D2 = D[1], D3 = D[2], E1 = E[0], E2 = E[1], E3 = E[2]; + // Create plane containing obj and the shared normal and intersect this with it + // Thank you: http://www.cgafaq.info/wiki/Line-line_distance + var x = (D3 * E1 - D1 * E3), y = (D1 * E2 - D2 * E1), z = (D2 * E3 - D3 * E2); + var N = [x * E3 - y * E2, y * E1 - z * E3, z * E2 - x * E1]; + var P = Plane.create(obj.anchor, N); + return P.intersectionWith(this); + } else { + // obj is a point + var P = obj.elements || obj; + if (this.contains(P)) { return Vector.create(P); } + var A = this.anchor.elements, D = this.direction.elements; + var D1 = D[0], D2 = D[1], D3 = D[2], A1 = A[0], A2 = A[1], A3 = A[2]; + var x = D1 * (P[1]-A2) - D2 * (P[0]-A1), y = D2 * ((P[2] || 0) - A3) - D3 * (P[1]-A2), + z = D3 * (P[0]-A1) - D1 * ((P[2] || 0) - A3); + var V = Vector.create([D2 * x - D3 * z, D3 * y - D1 * x, D1 * z - D2 * y]); + var k = this.distanceFrom(P) / V.modulus(); + return Vector.create([ + P[0] + V.elements[0] * k, + P[1] + V.elements[1] * k, + (P[2] || 0) + V.elements[2] * k + ]); + } + }, + + // Returns a copy of the line rotated by t radians about the given line. Works by + // finding the argument's closest point to this line's anchor point (call this C) and + // rotating the anchor about C. Also rotates the line's direction about the argument's. + // Be careful with this - the rotation axis' direction affects the outcome! + rotate: function(t, line) { + // If we're working in 2D + if (typeof(line.direction) == 'undefined') { line = Line.create(line.to3D(), Vector.k); } + var R = Matrix.Rotation(t, line.direction).elements; + var C = line.pointClosestTo(this.anchor).elements; + var A = this.anchor.elements, D = this.direction.elements; + var C1 = C[0], C2 = C[1], C3 = C[2], A1 = A[0], A2 = A[1], A3 = A[2]; + var x = A1 - C1, y = A2 - C2, z = A3 - C3; + return Line.create([ + C1 + R[0][0] * x + R[0][1] * y + R[0][2] * z, + C2 + R[1][0] * x + R[1][1] * y + R[1][2] * z, + C3 + R[2][0] * x + R[2][1] * y + R[2][2] * z + ], [ + R[0][0] * D[0] + R[0][1] * D[1] + R[0][2] * D[2], + R[1][0] * D[0] + R[1][1] * D[1] + R[1][2] * D[2], + R[2][0] * D[0] + R[2][1] * D[1] + R[2][2] * D[2] + ]); + }, + + // Returns a copy of the line with its direction vector reversed. + // Useful when using lines for rotations. + reverse: function() { + return Line.create(this.anchor, this.direction.x(-1)); + }, + + // Returns the line's reflection in the given point or line + reflectionIn: function(obj) { + if (obj.normal) { + // obj is a plane + var A = this.anchor.elements, D = this.direction.elements; + var A1 = A[0], A2 = A[1], A3 = A[2], D1 = D[0], D2 = D[1], D3 = D[2]; + var newA = this.anchor.reflectionIn(obj).elements; + // Add the line's direction vector to its anchor, then mirror that in the plane + var AD1 = A1 + D1, AD2 = A2 + D2, AD3 = A3 + D3; + var Q = obj.pointClosestTo([AD1, AD2, AD3]).elements; + var newD = [Q[0] + (Q[0] - AD1) - newA[0], Q[1] + (Q[1] - AD2) - newA[1], Q[2] + (Q[2] - AD3) - newA[2]]; + return Line.create(newA, newD); + } else if (obj.direction) { + // obj is a line - reflection obtained by rotating PI radians about obj + return this.rotate(Math.PI, obj); + } else { + // obj is a point - just reflect the line's anchor in it + var P = obj.elements || obj; + return Line.create(this.anchor.reflectionIn([P[0], P[1], (P[2] || 0)]), this.direction); + } + }, + + // Set the line's anchor point and direction. + setVectors: function(anchor, direction) { + // Need to do this so that line's properties are not + // references to the arguments passed in + anchor = Vector.create(anchor); + direction = Vector.create(direction); + if (anchor.elements.length == 2) {anchor.elements.push(0); } + if (direction.elements.length == 2) { direction.elements.push(0); } + if (anchor.elements.length > 3 || direction.elements.length > 3) { return null; } + var mod = direction.modulus(); + if (mod === 0) { return null; } + this.anchor = anchor; + this.direction = Vector.create([ + direction.elements[0] / mod, + direction.elements[1] / mod, + direction.elements[2] / mod + ]); + return this; + } +}; + +// Constructor function +Line.create = function(anchor, direction) { + var L = new Line(); + return L.setVectors(anchor, direction); +}; + +// Axes +Line.X = Line.create(Vector.Zero(3), Vector.i); +Line.Y = Line.create(Vector.Zero(3), Vector.j); +Line.Z = Line.create(Vector.Zero(3), Vector.k); + +module.exports = Line; + +},{"./vector":74,"./matrix":76,"./plane":77,"./sylvester":79}],77:[function(require,module,exports){ +// Copyright (c) 2011, Chris Umbel, James Coglan +// Plane class - depends on Vector. Some methods require Matrix and Line. +var Vector = require('./vector'); +var Matrix = require('./matrix'); +var Line = require('./line'); + +var Sylvester = require('./sylvester'); + +function Plane() {} +Plane.prototype = { + + // Returns true iff the plane occupies the same space as the argument + eql: function(plane) { + return (this.contains(plane.anchor) && this.isParallelTo(plane)); + }, + + // Returns a copy of the plane + dup: function() { + return Plane.create(this.anchor, this.normal); + }, + + // Returns the result of translating the plane by the given vector + translate: function(vector) { + var V = vector.elements || vector; + return Plane.create([ + this.anchor.elements[0] + V[0], + this.anchor.elements[1] + V[1], + this.anchor.elements[2] + (V[2] || 0) + ], this.normal); + }, + + // Returns true iff the plane is parallel to the argument. Will return true + // if the planes are equal, or if you give a line and it lies in the plane. + isParallelTo: function(obj) { + var theta; + if (obj.normal) { + // obj is a plane + theta = this.normal.angleFrom(obj.normal); return (Math.abs(theta) <= Sylvester.precision || Math.abs(Math.PI - theta) <= Sylvester.precision); } else if (obj.direction) { // obj is a line @@ -15024,319 +15258,86 @@ Plane.prototype = { var A1 = anchor.elements[0], A2 = anchor.elements[1], A3 = anchor.elements[2]; var v11 = v1.elements[0], v12 = v1.elements[1], v13 = v1.elements[2]; var normal, mod; - if (v2 !== null) { - var v21 = v2.elements[0], v22 = v2.elements[1], v23 = v2.elements[2]; - normal = Vector.create([ - (v12 - A2) * (v23 - A3) - (v13 - A3) * (v22 - A2), - (v13 - A3) * (v21 - A1) - (v11 - A1) * (v23 - A3), - (v11 - A1) * (v22 - A2) - (v12 - A2) * (v21 - A1) - ]); - mod = normal.modulus(); - if (mod === 0) { return null; } - normal = Vector.create([normal.elements[0] / mod, normal.elements[1] / mod, normal.elements[2] / mod]); - } else { - mod = Math.sqrt(v11*v11 + v12*v12 + v13*v13); - if (mod === 0) { return null; } - normal = Vector.create([v1.elements[0] / mod, v1.elements[1] / mod, v1.elements[2] / mod]); - } - this.anchor = anchor; - this.normal = normal; - return this; - } -}; - -// Constructor function -Plane.create = function(anchor, v1, v2) { - var P = new Plane(); - return P.setVectors(anchor, v1, v2); -}; - -// X-Y-Z planes -Plane.XY = Plane.create(Vector.Zero(3), Vector.k); -Plane.YZ = Plane.create(Vector.Zero(3), Vector.i); -Plane.ZX = Plane.create(Vector.Zero(3), Vector.j); -Plane.YX = Plane.XY; Plane.ZY = Plane.YZ; Plane.XZ = Plane.ZX; - -// Returns the plane containing the given points (can be arrays as -// well as vectors). If the points are not coplanar, returns null. -Plane.fromPoints = function(points) { - var np = points.length, list = [], i, P, n, N, A, B, C, D, theta, prevN, totalN = Vector.Zero(3); - for (i = 0; i < np; i++) { - P = Vector.create(points[i]).to3D(); - if (P === null) { return null; } - list.push(P); - n = list.length; - if (n > 2) { - // Compute plane normal for the latest three points - A = list[n-1].elements; B = list[n-2].elements; C = list[n-3].elements; - N = Vector.create([ - (A[1] - B[1]) * (C[2] - B[2]) - (A[2] - B[2]) * (C[1] - B[1]), - (A[2] - B[2]) * (C[0] - B[0]) - (A[0] - B[0]) * (C[2] - B[2]), - (A[0] - B[0]) * (C[1] - B[1]) - (A[1] - B[1]) * (C[0] - B[0]) - ]).toUnitVector(); - if (n > 3) { - // If the latest normal is not (anti)parallel to the previous one, we've strayed off the plane. - // This might be a slightly long-winded way of doing things, but we need the sum of all the normals - // to find which way the plane normal should point so that the points form an anticlockwise list. - theta = N.angleFrom(prevN); - if (theta !== null) { - if (!(Math.abs(theta) <= Sylvester.precision || Math.abs(theta - Math.PI) <= Sylvester.precision)) { return null; } - } - } - totalN = totalN.add(N); - prevN = N; - } - } - // We need to add in the normals at the start and end points, which the above misses out - A = list[1].elements; B = list[0].elements; C = list[n-1].elements; D = list[n-2].elements; - totalN = totalN.add(Vector.create([ - (A[1] - B[1]) * (C[2] - B[2]) - (A[2] - B[2]) * (C[1] - B[1]), - (A[2] - B[2]) * (C[0] - B[0]) - (A[0] - B[0]) * (C[2] - B[2]), - (A[0] - B[0]) * (C[1] - B[1]) - (A[1] - B[1]) * (C[0] - B[0]) - ]).toUnitVector()).add(Vector.create([ - (B[1] - C[1]) * (D[2] - C[2]) - (B[2] - C[2]) * (D[1] - C[1]), - (B[2] - C[2]) * (D[0] - C[0]) - (B[0] - C[0]) * (D[2] - C[2]), - (B[0] - C[0]) * (D[1] - C[1]) - (B[1] - C[1]) * (D[0] - C[0]) - ]).toUnitVector()); - return Plane.create(list[0], totalN); -}; - -module.exports = Plane; - -},{"./vector":74,"./matrix":75,"./line":77,"./sylvester":79}],77:[function(require,module,exports){ -// Copyright (c) 2011, Chris Umbel, James Coglan -var Vector = require('./vector'); -var Matrix = require('./matrix'); -var Plane = require('./plane'); -var Sylvester = require('./sylvester'); - -// Line class - depends on Vector, and some methods require Matrix and Plane. - -function Line() {} -Line.prototype = { - - // Returns true if the argument occupies the same space as the line - eql: function(line) { - return (this.isParallelTo(line) && this.contains(line.anchor)); - }, - - // Returns a copy of the line - dup: function() { - return Line.create(this.anchor, this.direction); - }, - - // Returns the result of translating the line by the given vector/array - translate: function(vector) { - var V = vector.elements || vector; - return Line.create([ - this.anchor.elements[0] + V[0], - this.anchor.elements[1] + V[1], - this.anchor.elements[2] + (V[2] || 0) - ], this.direction); - }, - - // Returns true if the line is parallel to the argument. Here, 'parallel to' - // means that the argument's direction is either parallel or antiparallel to - // the line's own direction. A line is parallel to a plane if the two do not - // have a unique intersection. - isParallelTo: function(obj) { - if (obj.normal || (obj.start && obj.end)) { return obj.isParallelTo(this); } - var theta = this.direction.angleFrom(obj.direction); - return (Math.abs(theta) <= Sylvester.precision || Math.abs(theta - Math.PI) <= Sylvester.precision); - }, - - // Returns the line's perpendicular distance from the argument, - // which can be a point, a line or a plane - distanceFrom: function(obj) { - if (obj.normal || (obj.start && obj.end)) { return obj.distanceFrom(this); } - if (obj.direction) { - // obj is a line - if (this.isParallelTo(obj)) { return this.distanceFrom(obj.anchor); } - var N = this.direction.cross(obj.direction).toUnitVector().elements; - var A = this.anchor.elements, B = obj.anchor.elements; - return Math.abs((A[0] - B[0]) * N[0] + (A[1] - B[1]) * N[1] + (A[2] - B[2]) * N[2]); - } else { - // obj is a point - var P = obj.elements || obj; - var A = this.anchor.elements, D = this.direction.elements; - var PA1 = P[0] - A[0], PA2 = P[1] - A[1], PA3 = (P[2] || 0) - A[2]; - var modPA = Math.sqrt(PA1*PA1 + PA2*PA2 + PA3*PA3); - if (modPA === 0) return 0; - // Assumes direction vector is normalized - var cosTheta = (PA1 * D[0] + PA2 * D[1] + PA3 * D[2]) / modPA; - var sin2 = 1 - cosTheta*cosTheta; - return Math.abs(modPA * Math.sqrt(sin2 < 0 ? 0 : sin2)); - } - }, - - // Returns true iff the argument is a point on the line, or if the argument - // is a line segment lying within the receiver - contains: function(obj) { - if (obj.start && obj.end) { return this.contains(obj.start) && this.contains(obj.end); } - var dist = this.distanceFrom(obj); - return (dist !== null && dist <= Sylvester.precision); - }, - - // Returns the distance from the anchor of the given point. Negative values are - // returned for points that are in the opposite direction to the line's direction from - // the line's anchor point. - positionOf: function(point) { - if (!this.contains(point)) { return null; } - var P = point.elements || point; - var A = this.anchor.elements, D = this.direction.elements; - return (P[0] - A[0]) * D[0] + (P[1] - A[1]) * D[1] + ((P[2] || 0) - A[2]) * D[2]; - }, - - // Returns true iff the line lies in the given plane - liesIn: function(plane) { - return plane.contains(this); - }, - - // Returns true iff the line has a unique point of intersection with the argument - intersects: function(obj) { - if (obj.normal) { return obj.intersects(this); } - return (!this.isParallelTo(obj) && this.distanceFrom(obj) <= Sylvester.precision); - }, - - // Returns the unique intersection point with the argument, if one exists - intersectionWith: function(obj) { - if (obj.normal || (obj.start && obj.end)) { return obj.intersectionWith(this); } - if (!this.intersects(obj)) { return null; } - var P = this.anchor.elements, X = this.direction.elements, - Q = obj.anchor.elements, Y = obj.direction.elements; - var X1 = X[0], X2 = X[1], X3 = X[2], Y1 = Y[0], Y2 = Y[1], Y3 = Y[2]; - var PsubQ1 = P[0] - Q[0], PsubQ2 = P[1] - Q[1], PsubQ3 = P[2] - Q[2]; - var XdotQsubP = - X1*PsubQ1 - X2*PsubQ2 - X3*PsubQ3; - var YdotPsubQ = Y1*PsubQ1 + Y2*PsubQ2 + Y3*PsubQ3; - var XdotX = X1*X1 + X2*X2 + X3*X3; - var YdotY = Y1*Y1 + Y2*Y2 + Y3*Y3; - var XdotY = X1*Y1 + X2*Y2 + X3*Y3; - var k = (XdotQsubP * YdotY / XdotX + XdotY * YdotPsubQ) / (YdotY - XdotY * XdotY); - return Vector.create([P[0] + k*X1, P[1] + k*X2, P[2] + k*X3]); - }, - - // Returns the point on the line that is closest to the given point or line/line segment - pointClosestTo: function(obj) { - if (obj.start && obj.end) { - // obj is a line segment - var P = obj.pointClosestTo(this); - return (P === null) ? null : this.pointClosestTo(P); - } else if (obj.direction) { - // obj is a line - if (this.intersects(obj)) { return this.intersectionWith(obj); } - if (this.isParallelTo(obj)) { return null; } - var D = this.direction.elements, E = obj.direction.elements; - var D1 = D[0], D2 = D[1], D3 = D[2], E1 = E[0], E2 = E[1], E3 = E[2]; - // Create plane containing obj and the shared normal and intersect this with it - // Thank you: http://www.cgafaq.info/wiki/Line-line_distance - var x = (D3 * E1 - D1 * E3), y = (D1 * E2 - D2 * E1), z = (D2 * E3 - D3 * E2); - var N = [x * E3 - y * E2, y * E1 - z * E3, z * E2 - x * E1]; - var P = Plane.create(obj.anchor, N); - return P.intersectionWith(this); - } else { - // obj is a point - var P = obj.elements || obj; - if (this.contains(P)) { return Vector.create(P); } - var A = this.anchor.elements, D = this.direction.elements; - var D1 = D[0], D2 = D[1], D3 = D[2], A1 = A[0], A2 = A[1], A3 = A[2]; - var x = D1 * (P[1]-A2) - D2 * (P[0]-A1), y = D2 * ((P[2] || 0) - A3) - D3 * (P[1]-A2), - z = D3 * (P[0]-A1) - D1 * ((P[2] || 0) - A3); - var V = Vector.create([D2 * x - D3 * z, D3 * y - D1 * x, D1 * z - D2 * y]); - var k = this.distanceFrom(P) / V.modulus(); - return Vector.create([ - P[0] + V.elements[0] * k, - P[1] + V.elements[1] * k, - (P[2] || 0) + V.elements[2] * k - ]); - } - }, - - // Returns a copy of the line rotated by t radians about the given line. Works by - // finding the argument's closest point to this line's anchor point (call this C) and - // rotating the anchor about C. Also rotates the line's direction about the argument's. - // Be careful with this - the rotation axis' direction affects the outcome! - rotate: function(t, line) { - // If we're working in 2D - if (typeof(line.direction) == 'undefined') { line = Line.create(line.to3D(), Vector.k); } - var R = Matrix.Rotation(t, line.direction).elements; - var C = line.pointClosestTo(this.anchor).elements; - var A = this.anchor.elements, D = this.direction.elements; - var C1 = C[0], C2 = C[1], C3 = C[2], A1 = A[0], A2 = A[1], A3 = A[2]; - var x = A1 - C1, y = A2 - C2, z = A3 - C3; - return Line.create([ - C1 + R[0][0] * x + R[0][1] * y + R[0][2] * z, - C2 + R[1][0] * x + R[1][1] * y + R[1][2] * z, - C3 + R[2][0] * x + R[2][1] * y + R[2][2] * z - ], [ - R[0][0] * D[0] + R[0][1] * D[1] + R[0][2] * D[2], - R[1][0] * D[0] + R[1][1] * D[1] + R[1][2] * D[2], - R[2][0] * D[0] + R[2][1] * D[1] + R[2][2] * D[2] - ]); - }, - - // Returns a copy of the line with its direction vector reversed. - // Useful when using lines for rotations. - reverse: function() { - return Line.create(this.anchor, this.direction.x(-1)); - }, - - // Returns the line's reflection in the given point or line - reflectionIn: function(obj) { - if (obj.normal) { - // obj is a plane - var A = this.anchor.elements, D = this.direction.elements; - var A1 = A[0], A2 = A[1], A3 = A[2], D1 = D[0], D2 = D[1], D3 = D[2]; - var newA = this.anchor.reflectionIn(obj).elements; - // Add the line's direction vector to its anchor, then mirror that in the plane - var AD1 = A1 + D1, AD2 = A2 + D2, AD3 = A3 + D3; - var Q = obj.pointClosestTo([AD1, AD2, AD3]).elements; - var newD = [Q[0] + (Q[0] - AD1) - newA[0], Q[1] + (Q[1] - AD2) - newA[1], Q[2] + (Q[2] - AD3) - newA[2]]; - return Line.create(newA, newD); - } else if (obj.direction) { - // obj is a line - reflection obtained by rotating PI radians about obj - return this.rotate(Math.PI, obj); + if (v2 !== null) { + var v21 = v2.elements[0], v22 = v2.elements[1], v23 = v2.elements[2]; + normal = Vector.create([ + (v12 - A2) * (v23 - A3) - (v13 - A3) * (v22 - A2), + (v13 - A3) * (v21 - A1) - (v11 - A1) * (v23 - A3), + (v11 - A1) * (v22 - A2) - (v12 - A2) * (v21 - A1) + ]); + mod = normal.modulus(); + if (mod === 0) { return null; } + normal = Vector.create([normal.elements[0] / mod, normal.elements[1] / mod, normal.elements[2] / mod]); } else { - // obj is a point - just reflect the line's anchor in it - var P = obj.elements || obj; - return Line.create(this.anchor.reflectionIn([P[0], P[1], (P[2] || 0)]), this.direction); + mod = Math.sqrt(v11*v11 + v12*v12 + v13*v13); + if (mod === 0) { return null; } + normal = Vector.create([v1.elements[0] / mod, v1.elements[1] / mod, v1.elements[2] / mod]); } - }, - - // Set the line's anchor point and direction. - setVectors: function(anchor, direction) { - // Need to do this so that line's properties are not - // references to the arguments passed in - anchor = Vector.create(anchor); - direction = Vector.create(direction); - if (anchor.elements.length == 2) {anchor.elements.push(0); } - if (direction.elements.length == 2) { direction.elements.push(0); } - if (anchor.elements.length > 3 || direction.elements.length > 3) { return null; } - var mod = direction.modulus(); - if (mod === 0) { return null; } this.anchor = anchor; - this.direction = Vector.create([ - direction.elements[0] / mod, - direction.elements[1] / mod, - direction.elements[2] / mod - ]); + this.normal = normal; return this; } }; // Constructor function -Line.create = function(anchor, direction) { - var L = new Line(); - return L.setVectors(anchor, direction); +Plane.create = function(anchor, v1, v2) { + var P = new Plane(); + return P.setVectors(anchor, v1, v2); }; -// Axes -Line.X = Line.create(Vector.Zero(3), Vector.i); -Line.Y = Line.create(Vector.Zero(3), Vector.j); -Line.Z = Line.create(Vector.Zero(3), Vector.k); +// X-Y-Z planes +Plane.XY = Plane.create(Vector.Zero(3), Vector.k); +Plane.YZ = Plane.create(Vector.Zero(3), Vector.i); +Plane.ZX = Plane.create(Vector.Zero(3), Vector.j); +Plane.YX = Plane.XY; Plane.ZY = Plane.YZ; Plane.XZ = Plane.ZX; -module.exports = Line; +// Returns the plane containing the given points (can be arrays as +// well as vectors). If the points are not coplanar, returns null. +Plane.fromPoints = function(points) { + var np = points.length, list = [], i, P, n, N, A, B, C, D, theta, prevN, totalN = Vector.Zero(3); + for (i = 0; i < np; i++) { + P = Vector.create(points[i]).to3D(); + if (P === null) { return null; } + list.push(P); + n = list.length; + if (n > 2) { + // Compute plane normal for the latest three points + A = list[n-1].elements; B = list[n-2].elements; C = list[n-3].elements; + N = Vector.create([ + (A[1] - B[1]) * (C[2] - B[2]) - (A[2] - B[2]) * (C[1] - B[1]), + (A[2] - B[2]) * (C[0] - B[0]) - (A[0] - B[0]) * (C[2] - B[2]), + (A[0] - B[0]) * (C[1] - B[1]) - (A[1] - B[1]) * (C[0] - B[0]) + ]).toUnitVector(); + if (n > 3) { + // If the latest normal is not (anti)parallel to the previous one, we've strayed off the plane. + // This might be a slightly long-winded way of doing things, but we need the sum of all the normals + // to find which way the plane normal should point so that the points form an anticlockwise list. + theta = N.angleFrom(prevN); + if (theta !== null) { + if (!(Math.abs(theta) <= Sylvester.precision || Math.abs(theta - Math.PI) <= Sylvester.precision)) { return null; } + } + } + totalN = totalN.add(N); + prevN = N; + } + } + // We need to add in the normals at the start and end points, which the above misses out + A = list[1].elements; B = list[0].elements; C = list[n-1].elements; D = list[n-2].elements; + totalN = totalN.add(Vector.create([ + (A[1] - B[1]) * (C[2] - B[2]) - (A[2] - B[2]) * (C[1] - B[1]), + (A[2] - B[2]) * (C[0] - B[0]) - (A[0] - B[0]) * (C[2] - B[2]), + (A[0] - B[0]) * (C[1] - B[1]) - (A[1] - B[1]) * (C[0] - B[0]) + ]).toUnitVector()).add(Vector.create([ + (B[1] - C[1]) * (D[2] - C[2]) - (B[2] - C[2]) * (D[1] - C[1]), + (B[2] - C[2]) * (D[0] - C[0]) - (B[0] - C[0]) * (D[2] - C[2]), + (B[0] - C[0]) * (D[1] - C[1]) - (B[1] - C[1]) * (D[0] - C[0]) + ]).toUnitVector()); + return Plane.create(list[0], totalN); +}; + +module.exports = Plane; -},{"./vector":74,"./matrix":75,"./plane":76,"./sylvester":79}],78:[function(require,module,exports){ +},{"./vector":74,"./matrix":76,"./line":75,"./sylvester":79}],78:[function(require,module,exports){ // Copyright (c) 2011, Chris Umbel, James Coglan // Line.Segment class - depends on Line and its dependencies. @@ -15464,7 +15465,7 @@ Line.Segment.create = function(v1, v2) { module.exports = Line.Segment; -},{"./line":77,"./vector":74}],75:[function(require,module,exports){ +},{"./line":75,"./vector":74}],76:[function(require,module,exports){ // Copyright (c) 2011, Chris Umbel, James Coglan // Matrix class - depends on Vector. @@ -17218,46 +17219,7 @@ Object.keys(ffi.NON_SPECIFIC_TYPES).forEach(function (type) { Pointer.NULL = new Pointer(0) })(require("__browserify_buffer").Buffer) -},{"util":40,"./ffi":83,"__browserify_buffer":61}],85:[function(require,module,exports){ -var ffi = require('./ffi') - -/** - * CIF provides a JS interface for the libffi "callback info" (CIF) structure. - * TODO: Deprecate this class. Turn this into a simple function that returns the - * CIF pointer. - */ - -function CIF (rtype, types) { - - if (!ffi.isValidReturnType(rtype)) { - throw new Error('Invalid Return Type: ' + rtype) - } - - var numArgs = types.length - - this._argtypesptr = new ffi.Pointer(types.length * ffi.Bindings.FFI_TYPE_SIZE) - this._rtypeptr = ffi.ffiTypeFor(rtype) - - var tptr = this._argtypesptr.clone() - - for (var i=0; i

', {id: 'text'}); +// $(obj).empty(); $(obj).replaceWith(rawObj); _.each(toks, function(tok) { @@ -185,7 +186,7 @@ var markWords = function(obj, report) { } else { var newObj = $(""+tok+" "); rawObj.append(newObj); - report.error("The word '"+tok+"' is uncommon", newObj); + report.error("The word '"+tok+"' is uncommon", newObj.get(0)); } }); }; -- cgit v1.2.3