Package SmootLight :: Package inputs :: Module HTMLInput
[hide private]
[frames] | no frames]

Source Code for Module SmootLight.inputs.HTMLInput

 1  from operationscore.Input import * 
 2  import urllib, re 
 3   
 4  """ 
 5  HTML Input, which takes 2 arguments: 
 6  - 'Src': a URL to a web page, and 
 7  - 'Regex': a Regex to parse data out of the web page. 
 8  The input parses the source code of the web page according to the regex, and processes the parsed regex groups. 
 9  """ 
10 -class HTMLInput(Input):
11 - def inputInit(self):
12 self.src = self.argDict['Src'] 13 self.regex = self.argDict['Regex']
14
15 - def getHTML(self):
16 self.sock = urllib.urlopen(self.src); 17 self.html = self.sock.read() 18 self.sock.close()
19
20 - def sensingLoop(self):
21 self.getHTML() 22 self.dataList = [] 23 24 pattern = re.compile(self.regex) 25 matchObj = pattern.search(self.html) 26 self.dataList = matchObj.groups() 27 28 self.respond(self.dataList)
29