Introduce rating functions
These hotness and confidence calculation algorithms come from Reddit and have been tweaked based on our experience on the Dillo project.
This commit is contained in:
parent
ce6df542cc
commit
0e14bdd09f
87
pillar/api/utils/rating.py
Normal file
87
pillar/api/utils/rating.py
Normal file
@ -0,0 +1,87 @@
|
||||
# These functions come from Reddit
|
||||
# https://github.com/reddit/reddit/blob/master/r2/r2/lib/db/_sorts.pyx
|
||||
|
||||
# Additional resources
|
||||
# http://www.redditblog.com/2009/10/reddits-new-comment-sorting-system.html
|
||||
# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
|
||||
# http://amix.dk/blog/post/19588
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from math import log
|
||||
from math import sqrt
|
||||
|
||||
epoch = datetime(1970, 1, 1, 0, 0, 0, 0, timezone.utc)
|
||||
|
||||
|
||||
def epoch_seconds(date):
|
||||
"""Returns the number of seconds from the epoch to date."""
|
||||
td = date - epoch
|
||||
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
||||
|
||||
|
||||
def score(ups, downs):
|
||||
return ups - downs
|
||||
|
||||
|
||||
def hot(ups, downs, date):
|
||||
"""The hot formula. Reddit's hot ranking uses the logarithm function to
|
||||
weight the first votes higher than the rest.
|
||||
The first 10 upvotes have the same weight as the next 100 upvotes which
|
||||
have the same weight as the next 1000, etc.
|
||||
|
||||
Dillo authors: we modified the formula to give more weight to negative
|
||||
votes when an entry is controversial.
|
||||
|
||||
TODO: make this function more dynamic so that different defaults can be
|
||||
specified depending on the item that is being rated.
|
||||
"""
|
||||
|
||||
s = score(ups, downs)
|
||||
order = log(max(abs(s), 1), 10)
|
||||
sign = 1 if s > 0 else -1 if s < 0 else 0
|
||||
seconds = epoch_seconds(date) - 1134028003
|
||||
base_hot = round(sign * order + seconds / 45000, 7)
|
||||
|
||||
if downs > 1:
|
||||
rating_delta = 100 * (downs - ups) / downs
|
||||
if rating_delta < 25:
|
||||
# The post is controversial
|
||||
return base_hot
|
||||
base_hot = base_hot - (downs * 6)
|
||||
|
||||
return base_hot
|
||||
|
||||
|
||||
def _confidence(ups, downs):
|
||||
n = ups + downs
|
||||
|
||||
if n == 0:
|
||||
return 0
|
||||
|
||||
z = 1.0 #1.0 = 85%, 1.6 = 95%
|
||||
phat = float(ups) / n
|
||||
return sqrt(phat+z*z/(2*n)-z*((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
|
||||
|
||||
|
||||
def confidence(ups, downs):
|
||||
if ups + downs == 0:
|
||||
return 0
|
||||
else:
|
||||
return _confidence(ups, downs)
|
||||
|
||||
|
||||
def update_hot(document):
|
||||
"""Update the hotness of a document given its current ratings.
|
||||
|
||||
We expect the document to implement the ratings_embedded_schema in
|
||||
a 'ratings' property.
|
||||
"""
|
||||
|
||||
dt = document['_created']
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
document['properties']['ratings']['hot'] = hot(
|
||||
document['properties']['ratings']['positive'],
|
||||
document['properties']['ratings']['negative'],
|
||||
dt,
|
||||
)
|
@ -184,3 +184,31 @@ class NodeSetattrTest(unittest.TestCase):
|
||||
|
||||
node_setattr(node, 'b.complex', {None: 5})
|
||||
self.assertEqual({'b': {'complex': {None: 5}}}, node)
|
||||
|
||||
|
||||
class TestRating(unittest.TestCase):
|
||||
def test_hotness(self):
|
||||
"""We expect the sorted values to reflect the original order in the
|
||||
list.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from pillar.api.utils.rating import hot
|
||||
t = datetime(2017, 2, 11, 0, 0, 0, 0, timezone.utc)
|
||||
y = datetime(2017, 2, 10, 0, 0, 0, 0, timezone.utc)
|
||||
w = datetime(2017, 2, 5, 0, 0, 0, 0, timezone.utc)
|
||||
cases = [
|
||||
(hot(1, 8, t), 'today super bad'),
|
||||
(hot(0, 3, t), 'today slightly worse'),
|
||||
(hot(0, 2, y), 'yesterday bad'),
|
||||
(hot(0, 2, t), 'today bad'),
|
||||
(hot(4, 4, w), 'last week controversial'),
|
||||
(hot(7, 1, w), 'last week very good'),
|
||||
(hot(5, 1, y), 'yesterday medium'),
|
||||
(hot(5, 0, y), 'yesterday good'),
|
||||
(hot(7, 1, y), 'yesterday very good'),
|
||||
(hot(4, 4, t), 'today controversial'),
|
||||
(hot(7, 1, t), 'today very good'),
|
||||
]
|
||||
sorted_by_hot = sorted(cases, key=lambda tup: tup[0])
|
||||
for idx, t in enumerate(sorted_by_hot):
|
||||
self.assertEqual(cases[idx][0], t[0])
|
||||
|
Loading…
x
Reference in New Issue
Block a user