Skip to content

Commit

Permalink
Deprecating FeedLoadtime and moving to MongoDB-backed aggregate data …
Browse files Browse the repository at this point in the history
…for feed load time graphs.
  • Loading branch information
samuelclay committed Sep 27, 2012
1 parent 2461b2f commit f9ed7fd
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ logs/*.pid
*.pyc
static/*
local_settings.py
celerybeat-schedule
celerybeat.pid
media/iphone/NewsBlur/build
media/iphone/build
build/
Expand Down
1 change: 0 additions & 1 deletion apps/reader/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class CollectStats(Task):
def run(self, **kwargs):
logging.debug(" ---> Collecting stats...")
MStatistics.collect_statistics()
MStatistics.delete_old_stats()


class CollectFeedback(Task):
Expand Down
3 changes: 1 addition & 2 deletions apps/reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from apps.rss_feeds.models import MFeedIcon
from apps.statistics.models import MStatistics
try:
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory, FeedLoadtime
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory
except:
pass
from apps.social.models import MSharedStory, MSocialProfile, MSocialServices
Expand Down Expand Up @@ -542,7 +542,6 @@ def load_single_feed(request, feed_id):
if timediff > 0.50 else "")
logging.user(request, "~FYLoading feed: ~SB%s%s (%s/%s) %s" % (
feed.feed_title[:22], ('~SN/p%s' % page) if page > 1 else '', order, read_filter, time_breakdown))
FeedLoadtime.objects.create(feed=feed, loadtime=timediff)

data = dict(stories=stories,
user_profiles=user_profiles,
Expand Down
3 changes: 1 addition & 2 deletions apps/statistics/management/commands/collect_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ class Command(BaseCommand):

def handle(self, *args, **options):
MStatistics.collect_statistics()

MStatistics.delete_old_stats()

83 changes: 47 additions & 36 deletions apps/statistics/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import datetime
import mongoengine as mongo
import urllib2
from django.db.models import Avg, Count
from django.conf import settings
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, MFeedPushHistory
from apps.rss_feeds.models import FeedLoadtime
from apps.social.models import MSharedStory
from apps.profile.models import Profile
from utils import json_functions as json
Expand Down Expand Up @@ -57,24 +55,22 @@ def all(cls):
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
cls.collect_statistics_feeds_fetched(last_day)
cls.collect_statistics_feeds_fetched()
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_premium_users(last_day)
cls.collect_statistics_premium_users()
print "Premiums: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_standard_users(last_day)
cls.collect_statistics_standard_users()
print "Standard users: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_sites_loaded(last_day)
cls.collect_statistics_sites_loaded()
print "Sites loaded: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_stories_shared(last_day)
cls.collect_statistics_stories_shared()
print "Stories shared: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_for_db()
print "DB Stats: %s" % (datetime.datetime.now() - now)

@classmethod
def collect_statistics_feeds_fetched(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_feeds_fetched(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
last_month = datetime.datetime.now() - datetime.timedelta(days=30)

feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count()
Expand All @@ -100,43 +96,65 @@ def delete_old_history():
return feeds_fetched

@classmethod
def collect_statistics_premium_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)

def collect_statistics_premium_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)

premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users)

return premium_users

@classmethod
def collect_statistics_standard_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_standard_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)

standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users)

return standard_users

@classmethod
def collect_statistics_sites_loaded(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_sites_loaded(cls):
now = datetime.datetime.now()
sites_loaded = []
avg_time_taken = []

for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
load_times = FeedLoadtime.objects.filter(
date_accessed__lte=start_hours_ago,
date_accessed__gte=end_hours_ago
).aggregate(**aggregates)
sites_loaded.append(load_times['count'] or 0)
avg_time_taken.append(load_times['avg'] or 0)

load_times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
"$match": {
"date": {
"$gte": end_hours_ago,
"$lte": start_hours_ago,
},
"path": {
"$in": [
"/reader/feed/",
"/social/stories/",
"/reader/river_stories/",
"/social/river_stories/",
]
}
},
}, {
"$group": {
"_id" : 1,
"count" : {"$sum": 1},
"avg" : {"$avg": "$duration"},
},
}])

count = 0
avg = 0
if load_times['result']:
count = load_times['result'][0]['count']
avg = load_times['result'][0]['avg']

sites_loaded.append(count)
avg_time_taken.append(avg)

sites_loaded.reverse()
avg_time_taken.reverse()

Expand All @@ -152,9 +170,7 @@ def collect_statistics_sites_loaded(cls, last_day=None):
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)

@classmethod
def collect_statistics_stories_shared(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_stories_shared(cls):
now = datetime.datetime.now()
stories_shared = []

Expand Down Expand Up @@ -182,11 +198,6 @@ def collect_statistics_for_db(cls):
lag = db_functions.mongo_max_replication_lag(settings.MONGODB)
cls.set('mongodb_replication_lag', lag)

@classmethod
def delete_old_stats(cls):
now = datetime.datetime.now()
old_age = now - datetime.timedelta(days=7)
FeedLoadtime.objects.filter(date_accessed__lte=old_age).delete()

class MFeedback(mongo.Document):
date = mongo.StringField()
Expand Down
9 changes: 5 additions & 4 deletions config/hosts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# 199.15.253.226 db03 db03.newsblur.com
199.15.249.98 db04 db04.newsblur.com
199.15.249.99 db05 db05.newsblur.com
# 199.15.249.100 db06 db06.newsblur.com

199.15.249.101 db07 db07.newsblur.com
199.15.250.231 task01 task01.newsblur.com
199.15.250.250 task02 task02.newsblur.com
Expand All @@ -18,6 +18,7 @@
199.15.252.106 task05 task05.newsblur.com
199.15.252.107 task06 task06.newsblur.com
199.15.252.108 task07 task07.newsblur.com

# EC2
23.20.165.187 db10 db10.newsblur.com
199.15.251.144 task08 task08.newsblur.com
199.15.251.154 task09 task09.newsblur.com
199.15.251.137 task10 task10.newsblur.com
199.15.251.155 task11 task11.newsblur.com
8 changes: 7 additions & 1 deletion utils/db_functions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import pymongo

PRIMARY_STATE = 1
SECONDARY_STATE = 2

def mongo_max_replication_lag(connection):
status = connection.admin.command('replSetGetStatus')
try:
status = connection.admin.command('replSetGetStatus')
except pymongo.errors.OperationFailure:
return 0

members = status['members']
primary_optime = None
oldest_secondary_optime = None
Expand Down
51 changes: 40 additions & 11 deletions utils/munin/newsblur_loadtimes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
from utils.munin.base import MuninGraph

from django.conf import settings
import datetime

class NBMuninGraph(MuninGraph):

Expand All @@ -17,18 +18,46 @@ def graph_config(self):
}

def calculate_metrics(self):
from django.db.models import Avg, Min, Max, Count
import datetime
from apps.rss_feeds.models import FeedLoadtime
hour_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=60)

averages = dict(avg=Avg('loadtime'), max=Max('loadtime'), min=Min('loadtime'), count=Count('loadtime'))
hour = FeedLoadtime.objects.filter(date_accessed__gte=hour_ago).aggregate(**averages)
times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
"$match": {
"date": {
"$gte": hour_ago,
},
"path": {
"$in": [
"/reader/feed/",
"/social/stories/",
"/reader/river_stories/",
"/social/river_stories/",
]
}
},
}, {
"$group": {
"_id" : 1,
"count" : {"$sum": 1},
"avg" : {"$avg": "$duration"},
"min" : {"$min": "$duration"},
"max" : {"$max": "$duration"},
},
}])

load_avg = 0
load_min = 0
load_max = 0
load_count = 0
if times['result']:
load_avg = times['result'][0]['avg']
load_min = times['result'][0]['min']
load_max = times['result'][0]['max']
load_count = times['result'][0]['count']

return {
'feed_loadtimes_avg_hour': hour['avg'],
'feed_loadtimes_min_hour': hour['min'],
'feed_loadtimes_max_hour': hour['max'],
'feeds_loaded_hour': hour['count'],
'feed_loadtimes_avg_hour': load_avg,
'feed_loadtimes_min_hour': load_min,
'feed_loadtimes_max_hour': load_max,
'feeds_loaded_hour': load_count,
}

if __name__ == '__main__':
Expand Down

0 comments on commit f9ed7fd

Please sign in to comment.