aboutsummaryrefslogtreecommitdiff
path: root/inputs/HTMLInput.py
diff options
context:
space:
mode:
authorGravatar Daniel <dmt@daniel-desktop.(none)>2011-01-29 18:54:50 -0800
committerGravatar Daniel <dmt@daniel-desktop.(none)>2011-01-29 18:54:50 -0800
commitff07b18748c64243c1c6bc62f489bfd03205d13a (patch)
treedb95373a3511be0dd1e700a78e9f1ea7320769a4 /inputs/HTMLInput.py
parent83931a3c8e65b4018e98b4986458d1df7172ab91 (diff)
parent277a5143165d2553ce5e97f151cc6b3cea426468 (diff)
Merge branch 'master' of github.com:rcoh/SmootLight
Diffstat (limited to 'inputs/HTMLInput.py')
-rwxr-xr-xinputs/HTMLInput.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/inputs/HTMLInput.py b/inputs/HTMLInput.py
new file mode 100755
index 0000000..9697a32
--- /dev/null
+++ b/inputs/HTMLInput.py
@@ -0,0 +1,29 @@
+from operationscore.Input import *
+import urllib, re
+
+"""
+HTML Input, which takes 2 arguments:
+- 'Src': a URL to a web page, and
+- 'Regex': a Regex to parse data out of the web page.
+The input parses the source code of the web page according to the regex, and processes the parsed regex groups.
+"""
+class HTMLInput(Input):
+ def inputInit(self):
+ self.src = self.argDict['Src']
+ self.regex = self.argDict['Regex']
+
+ def getHTML(self):
+ self.sock = urllib.urlopen(self.src);
+ self.html = self.sock.read()
+ self.sock.close()
+
+ def sensingLoop(self):
+ self.getHTML()
+ self.dataList = []
+
+ pattern = re.compile(self.regex)
+ matchObj = pattern.search(self.html)
+ self.dataList = matchObj.groups()
+
+ self.respond(self.dataList)
+