diff options
Diffstat (limited to 'exampleData/ruleSets/language-processing/jspos/lexer.js')
-rw-r--r-- | exampleData/ruleSets/language-processing/jspos/lexer.js | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/exampleData/ruleSets/language-processing/jspos/lexer.js b/exampleData/ruleSets/language-processing/jspos/lexer.js new file mode 100644 index 0000000..a010701 --- /dev/null +++ b/exampleData/ruleSets/language-processing/jspos/lexer.js @@ -0,0 +1,66 @@ +/*! + * jsPOS + * + * Copyright 2010, Percy Wegmann + * Licensed under the GNU LGPLv3 license + * http://www.opensource.org/licenses/lgpl-3.0.html + */ + +function LexerNode(string, regex, regexs){ + this.string = string; + this.children = []; + if (string) { + this.matches = string.match(regex); + var childElements = string.split(regex); + } + if (!this.matches) { + this.matches = []; + var childElements = [string]; + } + if (regexs.length > 0) { + var nextRegex = regexs[0]; + var nextRegexes = regexs.slice(1); + for (var i in childElements) { + this.children.push(new LexerNode(childElements[i], nextRegex, nextRegexes)); + } + } + else { + this.children = childElements; + } +} + +LexerNode.prototype.fillArray = function(array){ + for (var i in this.children) { + var child = this.children[i]; + if (child.fillArray) + child.fillArray(array); + else if (/[^ \t\n\r]+/i.test(child)) + array.push(child); + if (i < this.matches.length) { + var match = this.matches[i]; + if (/[^ \t\n\r]+/i.test(match)) + array.push(match); + } + } +} + +LexerNode.prototype.toString = function(){ + var array = []; + this.fillArray(array); + return array.toString(); +} + +function Lexer(){ + // Split by numbers, then whitespace, then punctuation + this.regexs = [/[0-9]*\.[0-9]+|[0-9]+/ig, /[ \t\n\r]+/ig, /[\.\,\?\!]/ig]; +} + +Lexer.prototype.lex = function(string){ + var array = []; + var node = new LexerNode(string, this.regexs[0], this.regexs.slice(1)); + node.fillArray(array); + return array; +} + +//var lexer = new Lexer(); +//print(lexer.lex("I made $5.60 today in 1 hour of work. The E.M.T.'s were on time, but only barely.").toString()); |