levenshtein: strip domain; write to persistent struct
This commit is contained in:
13
urlbot.py
13
urlbot.py
@@ -133,9 +133,16 @@ def extract_url(data):
|
|||||||
(status, title) = extract_title(r)
|
(status, title) = extract_title(r)
|
||||||
|
|
||||||
if 0 == status:
|
if 0 == status:
|
||||||
message = 'lev=%d/%d:%d Title: %s: %s' %(
|
title = title.strip()
|
||||||
levenshtein(r, title.strip()), len(title.strip()), len(r), title.strip(), r
|
lev_url = re.sub(r'https?://[^/]*/', '', r)
|
||||||
)
|
lev_res = levenshtein(lev_url, title)
|
||||||
|
|
||||||
|
obj = conf_load()
|
||||||
|
obj['lev'].append((lev_res, title, lev_url))
|
||||||
|
conf_save(obj)
|
||||||
|
|
||||||
|
lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url))
|
||||||
|
message = lev_str + 'Title: %s: %s' %(title, r)
|
||||||
elif 1 == status:
|
elif 1 == status:
|
||||||
logger('info', 'no message sent for non-text %s (%s)' %(r, title))
|
logger('info', 'no message sent for non-text %s (%s)' %(r, title))
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user