diff options
author | Daniel <dmt@daniel-desktop.(none)> | 2011-01-29 18:54:50 -0800 |
---|---|---|
committer | Daniel <dmt@daniel-desktop.(none)> | 2011-01-29 18:54:50 -0800 |
commit | ff07b18748c64243c1c6bc62f489bfd03205d13a (patch) | |
tree | db95373a3511be0dd1e700a78e9f1ea7320769a4 /inputs/HTMLInput.py | |
parent | 83931a3c8e65b4018e98b4986458d1df7172ab91 (diff) | |
parent | 277a5143165d2553ce5e97f151cc6b3cea426468 (diff) |
Merge branch 'master' of github.com:rcoh/SmootLight
Diffstat (limited to 'inputs/HTMLInput.py')
-rwxr-xr-x | inputs/HTMLInput.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/inputs/HTMLInput.py b/inputs/HTMLInput.py new file mode 100755 index 0000000..9697a32 --- /dev/null +++ b/inputs/HTMLInput.py @@ -0,0 +1,29 @@ +from operationscore.Input import * +import urllib, re + +""" +HTML Input, which takes 2 arguments: +- 'Src': a URL to a web page, and +- 'Regex': a Regex to parse data out of the web page. +The input parses the source code of the web page according to the regex, and processes the parsed regex groups. +""" +class HTMLInput(Input): + def inputInit(self): + self.src = self.argDict['Src'] + self.regex = self.argDict['Regex'] + + def getHTML(self): + self.sock = urllib.urlopen(self.src); + self.html = self.sock.read() + self.sock.close() + + def sensingLoop(self): + self.getHTML() + self.dataList = [] + + pattern = re.compile(self.regex) + matchObj = pattern.search(self.html) + self.dataList = matchObj.groups() + + self.respond(self.dataList) + |