fetching works

This commit is contained in:
chat
2014-07-21 00:53:26 +02:00
parent 4f1ebdeebf
commit 30c1964669

View File

@@ -1,6 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
import sys, os, re, time import sys, os, re, time, urllib
BUFSIZ = 8192 BUFSIZ = 8192
delay = 0.100 # seconds delay = 0.100 # seconds
@@ -17,20 +17,37 @@ def debug_enabled():
return False return False
def e(data): def e(data):
return data.encode('string-escape') if data:
return data.encode('string-escape')
else:
return "''"
def logger(severity, message): def logger(severity, message):
if \ # sev = ( 'err', 'warn', 'info' )
'err' == severity or \ # if severity in sev:
'warn' == severity or \ sys.stderr.write(e('%s: %s: %s' %(sys.argv[0], severity, message)) + '\n')
'info' == severity:
sys.stderr.write(e(sys.argv[0] + ': ' + message) + '\n') def fetch_page(url):
logger('info', 'fetching page ' + url)
response = urllib.urlopen(url)
html = response.read(BUFSIZ)
response.close()
return html
def extract_title(url):
logger('info', 'extracting title from ' + url)
html = fetch_page(url)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S|re.M)
if result:
return result.groups()[0]
def extract_url(data): def extract_url(data):
result = re.findall("(https?://[^\s]+)", data) result = re.findall("(https?://[^\s]+)", data)
if result: if result:
for r in result: for r in result:
message = '/say yeah, URL found: %s' % e(r) title = extract_title(r)
message = '/say Title: %s: %s' % (title, e(r))
logger('info', 'printing ' + message) logger('info', 'printing ' + message)
if debug_enabled(): if debug_enabled():