Merge branch 'master' of github.com:rcoh/SmootLight

author: Daniel <dmt@daniel-desktop.(none)> 2011-01-29 18:54:50 -0800
committer: Daniel <dmt@daniel-desktop.(none)> 2011-01-29 18:54:50 -0800
commit: ff07b18748c64243c1c6bc62f489bfd03205d13a (patch)
tree: db95373a3511be0dd1e700a78e9f1ea7320769a4 /inputs/HTMLInput.py
parent: 83931a3c8e65b4018e98b4986458d1df7172ab91 (diff)
parent: 277a5143165d2553ce5e97f151cc6b3cea426468 (diff)
1 files changed, 29 insertions, 0 deletions
diff --git a/inputs/HTMLInput.py b/inputs/HTMLInput.py
new file mode 100755
index 0000000..9697a32
--- /dev/null
+++ b/inputs/HTMLInput.py
@@ -0,0 +1,29 @@
+from operationscore.Input import *
+import urllib, re
+
+"""
+HTML Input, which takes 2 arguments:
+- 'Src': a URL to a web page, and
+- 'Regex': a Regex to parse data out of the web page.
+The input parses the source code of the web page according to the regex, and processes the parsed regex groups.
+"""
+class HTMLInput(Input):
+    def inputInit(self):
+        self.src = self.argDict['Src']
+        self.regex = self.argDict['Regex']
+        
+    def getHTML(self):
+        self.sock = urllib.urlopen(self.src);
+        self.html = self.sock.read()
+        self.sock.close()
+
+    def sensingLoop(self):
+        self.getHTML()
+        self.dataList = []
+        
+        pattern = re.compile(self.regex)
+        matchObj = pattern.search(self.html)        
+        self.dataList = matchObj.groups()
+
+        self.respond(self.dataList)
+
author	Daniel <dmt@daniel-desktop.(none)>	2011-01-29 18:54:50 -0800
committer	Daniel <dmt@daniel-desktop.(none)>	2011-01-29 18:54:50 -0800
commit	ff07b18748c64243c1c6bc62f489bfd03205d13a (patch)
tree	db95373a3511be0dd1e700a78e9f1ea7320769a4 /inputs/HTMLInput.py
parent	83931a3c8e65b4018e98b4986458d1df7172ab91 (diff)
parent	277a5143165d2553ce5e97f151cc6b3cea426468 (diff)