From 0e69a6f3d3b095e89eff414cd951b9702fe48452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Luiz=20de=20Oliveira=20Ramos?= Date: Sat, 17 Sep 2016 19:20:12 -0300 Subject: [PATCH] Extracting topics from external blogs --- .gitignore | 1 + admin.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++------ models.py | 11 ++++----- utils.py | 4 ++-- 4 files changed, 68 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 0d20b64..3179668 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.pyc +.idea/ \ No newline at end of file diff --git a/admin.py b/admin.py index dc23b56..7a0f41e 100644 --- a/admin.py +++ b/admin.py @@ -1,24 +1,76 @@ # coding : utf -8 +import feedparser +from flask_admin.actions import action from quokka import admin from quokka.utils.translation import _, _l from quokka.core.admin.models import ModelAdmin from .models import ExternalBlogs, AggregatedTopic +from quokka.modules.posts.models import Post class ExternalBlogsAdmin(ModelAdmin): - roles_accepted = ('admin', 'editor') + roles_accepted = ('admin') column_filters = ('name', 'root_url') column_searchable_list = ('name', 'root_url', 'feeds_url') - column_list = ('name', 'root_url', 'feeds_url') - form_columns = ('name', 'root_url', 'feeds_url') + column_list = ('name', 'root_url', 'feeds_url', 'channel') + form_columns = ('name', 'root_url', 'feeds_url', 'channel') + + __content_format_dict = { + 'text/plain': 'plaintext', + 'plaintext': 'plaintext', + 'text/html': 'html', + 'html': 'html', + 'text/markdown': 'markdown', + 'markdown': 'markdown', + } + + @action('get_external_posts', _l('Get external topics')) + def get_external_posts(self, ids): + blogs = ExternalBlogs.objects(id__in=ids) + + for blog in blogs: + feed = feedparser.parse(blog.feeds_url) + + for entry in feed['entries']: + + # If already exist a topic with this url + if AggregatedTopic.objects(original_url=entry['feedburner_origlink']).first(): + continue # Go to next iteration + + body = '{sumary}... Continue reading'.format( + sumary=entry['summary'].encode('ascii', 'ignore'), + link=entry['feedburner_origlink'].encode('ascii', 'ignore') + ) + content_format = self.__content_format_dict[entry['content'][0]['type']] + + post = { + "title": entry['title'].encode('ascii', 'ignore'), + "slug": entry['title'].replace(' ', '-').encode('ascii', 'ignore'), + "summary": entry['summary'], + "content_format": content_format, + "body": body, + "channel": blog.channel, + "tags": [tag['term'].encode('ascii', 'ignore') for tag in entry['tags']], + "published": True + } + post, _ = Post.objects.get_or_create(**post) + + AggregatedTopic( + original_url=entry['feedburner_origlink'].encode('ascii', 'ignore'), + blog=blog, + post=post + ).save() class AggregatedTopicAdmin(ModelAdmin): roles_accepted = ('admin', 'editor') - column_filters = ('title', 'date', 'blog') - column_searchable_list = ('title', 'topic_url') - column_list = ('title', 'date', 'blog', 'topic_url', 'description') - form_columns = ('title', 'date', 'blog', 'topic_url', 'description') + column_filters = ('date_added', 'original_url', 'blog', 'post') + column_searchable_list = ['original_url'] + column_list = ('date_added', 'blog', 'original_url', 'post') + form_columns = ('date_added', 'blog', 'original_url', 'post') + + def on_model_delete(self, topic): + topic.post.delete() # Register RSSaggregator models to quokka admin page admin.register(ExternalBlogs, ExternalBlogsAdmin, category=_("RSSaggregator"), name=_l("External Blogs")) diff --git a/models.py b/models.py index 8cdf304..ccc2a1c 100644 --- a/models.py +++ b/models.py @@ -1,3 +1,4 @@ +from datetime import datetime from quokka.core.db import db @@ -8,6 +9,7 @@ class ExternalBlogs(db.Document): name = db.StringField(max_length=255, required=True) root_url = db.StringField(default='') feeds_url = db.StringField(required=True) + channel = db.ReferenceField('Channel', required=True, reverse_delete_rule=db.CASCADE) def __str__(self): return self.name @@ -18,11 +20,10 @@ class AggregatedTopic(db.Document): Store topics from external blogs """ - title = db.StringField(max_length=255, required=True) - description = db.StringField(required=False) - topic_url = db.StringField(required=True) - date = db.DateTimeField(required=False) + original_url = db.StringField(required=True) + date_added = db.DateTimeField(default=datetime.now()) blog = db.ReferenceField('ExternalBlogs', required=True, reverse_delete_rule=db.CASCADE) + post = db.ReferenceField('Post', required=True, reverse_delete_rule=db.CASCADE) def __str__(self): - return self.title + return self.post.title diff --git a/utils.py b/utils.py index 6d93bad..b17cedb 100644 --- a/utils.py +++ b/utils.py @@ -3,10 +3,10 @@ def get_external_topic(**kwargs): - ''' + """ :param kwargs: This function allows kwargs to give filter power for user :return: AggregatedTopic List - ''' + """ topics = AggregatedTopic.objects(**kwargs) return topics.order_by('date')