# These functions come from Reddit # https://github.com/reddit/reddit/blob/master/r2/r2/lib/db/_sorts.pyx # Additional resources # http://www.redditblog.com/2009/10/reddits-new-comment-sorting-system.html # http://www.evanmiller.org/how-not-to-sort-by-average-rating.html # http://amix.dk/blog/post/19588 from datetime import datetime, timezone from math import log from math import sqrt epoch = datetime(1970, 1, 1, 0, 0, 0, 0, timezone.utc) def epoch_seconds(date): """Returns the number of seconds from the epoch to date.""" td = date - epoch return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) def score(ups, downs): return ups - downs def hot(ups, downs, date): """The hot formula. Reddit's hot ranking uses the logarithm function to weight the first votes higher than the rest. The first 10 upvotes have the same weight as the next 100 upvotes which have the same weight as the next 1000, etc. Dillo authors: we modified the formula to give more weight to negative votes when an entry is controversial. TODO: make this function more dynamic so that different defaults can be specified depending on the item that is being rated. """ s = score(ups, downs) order = log(max(abs(s), 1), 10) sign = 1 if s > 0 else -1 if s < 0 else 0 seconds = epoch_seconds(date) - 1134028003 base_hot = round(sign * order + seconds / 45000, 7) if downs > 1: rating_delta = 100 * (downs - ups) / downs if rating_delta < 25: # The post is controversial return base_hot base_hot = base_hot - (downs * 6) return base_hot def _confidence(ups, downs): n = ups + downs if n == 0: return 0 z = 1.0 #1.0 = 85%, 1.6 = 95% phat = float(ups) / n return sqrt(phat+z*z/(2*n)-z*((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n) def confidence(ups, downs): if ups + downs == 0: return 0 else: return _confidence(ups, downs) def update_hot(document): """Update the hotness of a document given its current ratings. We expect the document to implement the ratings_embedded_schema in a 'ratings' property. """ dt = document['_created'] dt = dt.replace(tzinfo=timezone.utc) document['properties']['ratings']['hot'] = hot( document['properties']['ratings']['positive'], document['properties']['ratings']['negative'], dt, )