simple parsing of forum postings with example action inlined
This commit is contained in:
35
urlbot.py
35
urlbot.py
@@ -3,8 +3,13 @@
|
|||||||
"""
|
"""
|
||||||
The URLBot - ready to strive for desaster in YOUR jabber MUC
|
The URLBot - ready to strive for desaster in YOUR jabber MUC
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from common import (
|
from common import (
|
||||||
rate_limit_classes,
|
rate_limit_classes,
|
||||||
RATE_GLOBAL,
|
RATE_GLOBAL,
|
||||||
@@ -218,6 +223,8 @@ class UrlBot(IdleBot):
|
|||||||
try:
|
try:
|
||||||
reacted_on_command = self.data_parse_commands(msg_obj)
|
reacted_on_command = self.data_parse_commands(msg_obj)
|
||||||
reacted_on_parse = self.data_parse_other(msg_obj)
|
reacted_on_parse = self.data_parse_other(msg_obj)
|
||||||
|
self.data_parse_forum_thread(msg_obj)
|
||||||
|
self.data_parse_forum_post(msg_obj)
|
||||||
|
|
||||||
if (msg_obj['body'].startswith(config.conf_get('bot_nickname')) and not any(
|
if (msg_obj['body'].startswith(config.conf_get('bot_nickname')) and not any(
|
||||||
[reacted_on_command, reacted_on_parse]) and rate_limit(RATE_GLOBAL)):
|
[reacted_on_command, reacted_on_parse]) and rate_limit(RATE_GLOBAL)):
|
||||||
@@ -296,6 +303,34 @@ class UrlBot(IdleBot):
|
|||||||
reacted = True
|
reacted = True
|
||||||
return reacted
|
return reacted
|
||||||
|
|
||||||
|
def data_parse_forum_thread(self, msg_obj):
|
||||||
|
return
|
||||||
|
|
||||||
|
def data_parse_forum_post(self, msg_obj):
|
||||||
|
links = re.findall(r'(https?://(?:www\.)?debianforum\.de/forum/[^\s>]+)', msg_obj['body'])
|
||||||
|
for link in links:
|
||||||
|
html = requests.get(link).text
|
||||||
|
tree = etree.XML(html, etree.HTMLParser())
|
||||||
|
postid = re.findall('p=?([0-9]{4,})', link)
|
||||||
|
if not postid:
|
||||||
|
return
|
||||||
|
postid = 'p{}'.format(postid[0])
|
||||||
|
post_path = '//div[@id="{}"]'.format(postid)
|
||||||
|
postelement = tree.xpath(post_path)
|
||||||
|
if postelement:
|
||||||
|
postelement = postelement[0]
|
||||||
|
else:
|
||||||
|
self.logger.warn("No post with id {} found!".format(postid))
|
||||||
|
return
|
||||||
|
# excludes any [code] and [quote] elements by only looking at direct text child nodes
|
||||||
|
username_xpath = '//dl[@class="postprofile"]//*[contains(@href, "memberlist")]/text()'
|
||||||
|
user = tree.xpath('{}{}'.format(post_path, username_xpath))[0]
|
||||||
|
posttext = postelement.xpath('{}//div[@class="content"]/text()'.format(post_path))
|
||||||
|
print(user, '\n'.join(posttext))
|
||||||
|
summary_action = {'msg': '{} posted {} words'.format(user, len('\n'.join(posttext).split()))}
|
||||||
|
self._run_action(summary_action, plugin=plugin_storage[ptypes_COMMAND][0], msg_obj=msg_obj)
|
||||||
|
return
|
||||||
|
|
||||||
def _run_action(self, action, plugin, msg_obj):
|
def _run_action(self, action, plugin, msg_obj):
|
||||||
"""
|
"""
|
||||||
Execute the plugin's execution plan
|
Execute the plugin's execution plan
|
||||||
|
|||||||
Reference in New Issue
Block a user