diff options
author | dan <dmtaub@gmail.com> | 2011-01-28 16:11:11 -0500 |
---|---|---|
committer | dan <dmtaub@gmail.com> | 2011-01-28 16:11:11 -0500 |
commit | f017051f9b93c3a8ac435537c8c9c1f360d0702f (patch) | |
tree | 3f0a9e01ddcb87af2e1b016dd8489be572641ca0 /inputs/HTMLInput.py | |
parent | fac2f8373cd0e63a34a39cb77c0c7276d1d88b65 (diff) | |
parent | a7d6577e55ebd665ad9e4f45183836f11b3c6fd4 (diff) |
Merge branch 'fridaydemo' of github.com:rcoh/SmootLight into osc
Diffstat (limited to 'inputs/HTMLInput.py')
-rwxr-xr-x | inputs/HTMLInput.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/inputs/HTMLInput.py b/inputs/HTMLInput.py new file mode 100755 index 0000000..9697a32 --- /dev/null +++ b/inputs/HTMLInput.py @@ -0,0 +1,29 @@ +from operationscore.Input import * +import urllib, re + +""" +HTML Input, which takes 2 arguments: +- 'Src': a URL to a web page, and +- 'Regex': a Regex to parse data out of the web page. +The input parses the source code of the web page according to the regex, and processes the parsed regex groups. +""" +class HTMLInput(Input): + def inputInit(self): + self.src = self.argDict['Src'] + self.regex = self.argDict['Regex'] + + def getHTML(self): + self.sock = urllib.urlopen(self.src); + self.html = self.sock.read() + self.sock.close() + + def sensingLoop(self): + self.getHTML() + self.dataList = [] + + pattern = re.compile(self.regex) + matchObj = pattern.search(self.html) + self.dataList = matchObj.groups() + + self.respond(self.dataList) + |