initial commit
This commit is contained in:
103
feeds.py
Normal file
103
feeds.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from lib.parser import dom_node
|
||||
from lib import utils
|
||||
|
||||
|
||||
class feed_manager():
|
||||
def __init__(self, submgr, arxivbot, style='./config/style.css'):
|
||||
self.style_path = style
|
||||
self.style = ''
|
||||
self.bot = arxivbot
|
||||
self.submgr = submgr
|
||||
self.update_style()
|
||||
|
||||
def update_style(self, path = None):
|
||||
if path is None:
|
||||
path = self.style_path
|
||||
print('loading style from:', path)
|
||||
with open(path, 'r') as f:
|
||||
self.style = f.read()
|
||||
self.style += '\n'
|
||||
|
||||
def fetch_today_feed(self):
|
||||
self.today_feed = self.bot.get_today_feed()
|
||||
|
||||
def filter_papers_for_user(self, subscriber):
|
||||
strong_papers = []
|
||||
weak_papers = []
|
||||
keywords = subscriber['keywords']
|
||||
papers = []
|
||||
for topic in subscriber['topics']:
|
||||
if topic in self.today_feed:
|
||||
papers += self.today_feed[topic]
|
||||
else:
|
||||
print('Warning: topic {0} is subscribed but not downloaded!'.format(topic))
|
||||
known_ids = []
|
||||
unique_papers = []
|
||||
for paper in papers:
|
||||
paper_id = paper.arxiv_id
|
||||
if paper_id not in known_ids:
|
||||
unique_papers.append(paper)
|
||||
known_ids.append(paper_id)
|
||||
print('removing {0} repeated papers.'.format(len(papers) - len(unique_papers)))
|
||||
papers = unique_papers
|
||||
for paper in papers:
|
||||
strong = False
|
||||
weak = False
|
||||
for keyword in keywords:
|
||||
if paper.info['title'].lower().find(keyword) != -1:
|
||||
strong = True
|
||||
break;
|
||||
elif paper.info['abstract'].lower().find(keyword) != -1:
|
||||
weak = True
|
||||
if strong:
|
||||
strong_papers.append(paper)
|
||||
elif weak:
|
||||
weak_papers.append(paper)
|
||||
return strong_papers, weak_papers
|
||||
|
||||
def generate_group_feed(self, paper_groups):
|
||||
group_html = ''
|
||||
for key in paper_groups:
|
||||
header = dom_node('paper-group')
|
||||
header.data = key
|
||||
group_html += header.to_string() + '\n'
|
||||
for paper in paper_groups[key]:
|
||||
group_html += paper.to_html() + '\n'
|
||||
return group_html
|
||||
|
||||
def generate_daily_feed_by_matched_paper(self, strong_interested, weak_interested):
|
||||
feeds = {}
|
||||
if len(strong_interested) > 0:
|
||||
feeds['Strong Interested Paper'] = strong_interested
|
||||
if len(weak_interested) > 0:
|
||||
feeds['Weak Interested Paper'] = weak_interested
|
||||
xml_feed = self.generate_group_feed(feeds)
|
||||
return xml_feed
|
||||
|
||||
def generate_daily_email_by_matched_paper(self, strong_interested, weak_interested):
|
||||
xml_feed = self.generate_daily_feed_by_matched_paper(strong_interested, weak_interested)
|
||||
email_content = ''
|
||||
if xml_feed != '':
|
||||
email_content = self.style + xml_feed
|
||||
return email_content
|
||||
|
||||
def generate_daily_emails(self):
|
||||
self.fetch_today_feed()
|
||||
emails = {}
|
||||
# email is a dict, containing title, reciver and content.
|
||||
today = utils.str_day()
|
||||
for name in self.submgr.subscribers:
|
||||
subscriber = self.submgr.subscribers[name]
|
||||
strong, weak = self.filter_papers_for_user(subscriber)
|
||||
content = self.generate_daily_email_by_matched_paper(strong, weak)
|
||||
reciver = subscriber['email']
|
||||
if content == '':
|
||||
print('Skipping user {0} [{1}] since no paper matched.'.format(name, reciver))
|
||||
continue;
|
||||
title = "Your Interested Paper On Arxiv Today ({0})".format(today)
|
||||
email = {}
|
||||
email['reciver'] = reciver
|
||||
email['title'] = title
|
||||
email['content'] = content
|
||||
emails[name] = email
|
||||
return emails
|
||||
Reference in New Issue
Block a user