1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00

testing levenshtein distance for (url, title)

This commit is contained in:
urlbot
2014-09-29 00:11:40 +02:00
parent eba70a5ed0
commit 98dd94fc63
2 changed files with 24 additions and 1 deletions

View File

@@ -133,7 +133,9 @@ def extract_url(data):
(status, title) = extract_title(r)
if 0 == status:
message = 'Title: %s: %s' % (title.strip(), r)
message = 'lev=%d/%d:%d Title: %s: %s' %(
levenshtein(r, title.strip()), len(title.strip()), len(r), title.strip(), r
)
elif 1 == status:
logger('info', 'no message sent for non-text %s (%s)' %(r, title))
continue