diff options
Diffstat (limited to 'inputs/HTMLInput.py')
-rwxr-xr-x | inputs/HTMLInput.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/inputs/HTMLInput.py b/inputs/HTMLInput.py new file mode 100755 index 0000000..9697a32 --- /dev/null +++ b/inputs/HTMLInput.py @@ -0,0 +1,29 @@ +from operationscore.Input import * +import urllib, re + +""" +HTML Input, which takes 2 arguments: +- 'Src': a URL to a web page, and +- 'Regex': a Regex to parse data out of the web page. +The input parses the source code of the web page according to the regex, and processes the parsed regex groups. +""" +class HTMLInput(Input): + def inputInit(self): + self.src = self.argDict['Src'] + self.regex = self.argDict['Regex'] + + def getHTML(self): + self.sock = urllib.urlopen(self.src); + self.html = self.sock.read() + self.sock.close() + + def sensingLoop(self): + self.getHTML() + self.dataList = [] + + pattern = re.compile(self.regex) + matchObj = pattern.search(self.html) + self.dataList = matchObj.groups() + + self.respond(self.dataList) + |