From 0e14bdd09f7f3cfe83b02097e2a4b792d38c2363 Mon Sep 17 00:00:00 2001
From: Francesco Siddi <francesco.siddi@gmail.com>
Date: Sun, 3 Jun 2018 02:09:20 +0200
Subject: [PATCH] Introduce rating functions

These hotness and confidence calculation algorithms come from Reddit
and have been tweaked based on our experience on the Dillo project.
---
 pillar/api/utils/rating.py   | 87 ++++++++++++++++++++++++++++++++++++
 tests/test_api/test_utils.py | 28 ++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 pillar/api/utils/rating.py

diff --git a/pillar/api/utils/rating.py b/pillar/api/utils/rating.py
new file mode 100644
index 00000000..65e8140f
--- /dev/null
+++ b/pillar/api/utils/rating.py
@@ -0,0 +1,87 @@
+# These functions come from Reddit
+# https://github.com/reddit/reddit/blob/master/r2/r2/lib/db/_sorts.pyx
+
+# Additional resources
+# http://www.redditblog.com/2009/10/reddits-new-comment-sorting-system.html
+# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+# http://amix.dk/blog/post/19588
+
+from datetime import datetime, timezone
+from math import log
+from math import sqrt
+
+epoch = datetime(1970, 1, 1, 0, 0, 0, 0, timezone.utc)
+
+
+def epoch_seconds(date):
+    """Returns the number of seconds from the epoch to date."""
+    td = date - epoch
+    return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
+
+
+def score(ups, downs):
+    return ups - downs
+
+
+def hot(ups, downs, date):
+    """The hot formula. Reddit's hot ranking uses the logarithm function to
+    weight the first votes higher than the rest.
+    The first 10 upvotes have the same weight as the next 100 upvotes which
+    have the same weight as the next 1000, etc.
+
+    Dillo authors: we modified the formula to give more weight to negative
+    votes when an entry is controversial.
+
+    TODO: make this function more dynamic so that different defaults can be
+    specified depending on the item that is being rated.
+    """
+
+    s = score(ups, downs)
+    order = log(max(abs(s), 1), 10)
+    sign = 1 if s > 0 else -1 if s < 0 else 0
+    seconds = epoch_seconds(date) - 1134028003
+    base_hot = round(sign * order + seconds / 45000, 7)
+
+    if downs > 1:
+        rating_delta = 100 * (downs - ups) / downs
+        if rating_delta < 25:
+            # The post is controversial
+            return base_hot
+        base_hot = base_hot - (downs * 6)
+
+    return base_hot
+
+
+def _confidence(ups, downs):
+    n = ups + downs
+
+    if n == 0:
+        return 0
+
+    z = 1.0 #1.0 = 85%, 1.6 = 95%
+    phat = float(ups) / n
+    return sqrt(phat+z*z/(2*n)-z*((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
+
+
+def confidence(ups, downs):
+    if ups + downs == 0:
+        return 0
+    else:
+        return _confidence(ups, downs)
+
+
+def update_hot(document):
+    """Update the hotness of a document given its current ratings.
+
+    We expect the document to implement the ratings_embedded_schema in
+    a 'ratings' property.
+    """
+
+    dt = document['_created']
+    dt = dt.replace(tzinfo=timezone.utc)
+
+    document['properties']['ratings']['hot'] = hot(
+        document['properties']['ratings']['positive'],
+        document['properties']['ratings']['negative'],
+        dt,
+    )
diff --git a/tests/test_api/test_utils.py b/tests/test_api/test_utils.py
index 9d929782..46340830 100644
--- a/tests/test_api/test_utils.py
+++ b/tests/test_api/test_utils.py
@@ -184,3 +184,31 @@ class NodeSetattrTest(unittest.TestCase):
 
         node_setattr(node, 'b.complex', {None: 5})
         self.assertEqual({'b': {'complex': {None: 5}}}, node)
+
+
+class TestRating(unittest.TestCase):
+    def test_hotness(self):
+        """We expect the sorted values to reflect the original order in the
+        list.
+        """
+        from datetime import datetime, timezone
+        from pillar.api.utils.rating import hot
+        t = datetime(2017, 2, 11, 0, 0, 0, 0, timezone.utc)
+        y = datetime(2017, 2, 10, 0, 0, 0, 0, timezone.utc)
+        w = datetime(2017, 2, 5, 0, 0, 0, 0, timezone.utc)
+        cases = [
+            (hot(1, 8, t), 'today super bad'),
+            (hot(0, 3, t), 'today slightly worse'),
+            (hot(0, 2, y), 'yesterday bad'),
+            (hot(0, 2, t), 'today bad'),
+            (hot(4, 4, w), 'last week controversial'),
+            (hot(7, 1, w), 'last week very good'),
+            (hot(5, 1, y), 'yesterday medium'),
+            (hot(5, 0, y), 'yesterday good'),
+            (hot(7, 1, y), 'yesterday very good'),
+            (hot(4, 4, t), 'today controversial'),
+            (hot(7, 1, t), 'today very good'),
+        ]
+        sorted_by_hot = sorted(cases, key=lambda tup: tup[0])
+        for idx, t in enumerate(sorted_by_hot):
+            self.assertEqual(cases[idx][0], t[0])