wdm0006
diff --git a/‎CHANGELOG.md
Lines changed: 7 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 7 additions & 1 deletion
diff --git a/‎docs/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎docs/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎elote/arenas/base.py
Lines changed: 23 additions & 18 deletions b/‎elote/arenas/base.py
Lines changed: 23 additions & 18 deletions
diff --git a/‎elote/arenas/lambda_arena.py
Lines changed: 17 additions & 4 deletions b/‎elote/arenas/lambda_arena.py
Lines changed: 17 additions & 4 deletions
diff --git a/‎elote/competitors/glicko.py
Lines changed: 77 additions & 11 deletions b/‎elote/competitors/glicko.py
Lines changed: 77 additions & 11 deletions
@@ -1,4 +1,10 @@
-v1.0.0 (Unreleased)
+v1.1.0
+======
+
+ * Glicko and Glicko-2 now properly handle time since last match 
+ * Bugfix in evaluation of draws in benchmarking
+
+v1.0.0
 ======
 
  * [] Added end to end examples using the chess and cfb datasets
 
@@ -45,7 +45,7 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    "sphinx.ext.autodoc", 
+    "sphinx.ext.autodoc",
     "sphinx.ext.viewcode",
     "sphinx_rtd_dark_mode",
     "sphinxcontrib.googleanalytics",
 
@@ -145,14 +145,13 @@ def confusion_matrix(self, lower_threshold: float = 0.45, upper_threshold: float
             elif predicted_prob is None:
                 continue
 
-            # Determine the predicted winner based on thresholds
+            # Determine the predicted outcome
             if predicted_prob > upper_threshold:
                 predicted_winner = "a"
             elif predicted_prob < lower_threshold:
                 predicted_winner = "b"
             else:
-                # This is an uncertain prediction - skip it for confusion matrix calculation
-                continue
+                predicted_winner = "draw"
 
             # Normalize actual winner to 'a', 'b', or 'draw'
             if isinstance(actual_winner, str):
@@ -162,7 +161,6 @@ def confusion_matrix(self, lower_threshold: float = 0.45, upper_threshold: float
                 elif actual_winner in ["b", "loss", "false", "0"]:
                     actual_winner = "b"
                 else:
-                    # Treat other values as draw
                     actual_winner = "draw"
             elif isinstance(actual_winner, (int, float)):
                 if actual_winner == 1:
@@ -179,24 +177,31 @@ def confusion_matrix(self, lower_threshold: float = 0.45, upper_threshold: float
                 continue
 
             # Update confusion matrix
-            if actual_winner == "a":
-                if predicted_winner == "a":
+            if predicted_winner == "draw":
+                if actual_winner == "draw":
+                    true_positives += 1  # Correctly predicted draw
+                else:
+                    false_positives += 1  # Incorrectly predicted draw
+            elif actual_winner == "draw":
+                false_negatives += 1  # Failed to predict draw
+            elif predicted_winner == "a":
+                if actual_winner == "a":
                     true_positives += 1
-                elif predicted_winner == "b":
-                    false_negatives += 1
-            elif actual_winner == "b":
-                if predicted_winner == "b":
-                    true_negatives += 1
-                elif predicted_winner == "a":
+                else:
                     false_positives += 1
-            else:  # actual_winner == "draw"
-                if predicted_winner == "a":
-                    false_positives += 1  # Predicted a win but was a draw
-                else:  # predicted_winner == "b"
-                    false_negatives += 1  # Predicted b win but was a draw
+            elif predicted_winner == "b":
+                if actual_winner == "b":
+                    true_negatives += 1
+                else:
+                    false_negatives += 1
 
         # Return results as a dictionary
-        return {"tp": true_positives, "fp": false_positives, "tn": true_negatives, "fn": false_negatives}
+        return {
+            "tp": true_positives,
+            "fp": false_positives,
+            "tn": true_negatives,
+            "fn": false_negatives
+        }
 
     def random_search(self, trials=1000):
         """Search for optimal prediction thresholds using random sampling.
 
@@ -73,7 +73,7 @@ def tournament(self, matchups):
         for data in tqdm(matchups):
             self.matchup(*data)
 
-    def matchup(self, a, b, attributes=None):
+    def matchup(self, a, b, attributes=None, match_time=None):
         """Process a single matchup between two competitors.
 
         This method handles a matchup between two competitors, creating them
@@ -84,6 +84,7 @@ def matchup(self, a, b, attributes=None):
             a: The first competitor or competitor identifier.
             b: The second competitor or competitor identifier.
             attributes (dict, optional): Additional attributes to record with this bout.
+            match_time (datetime, optional): The time when the match occurred.
 
         Returns:
             The result of the matchup.
@@ -100,14 +101,26 @@ def matchup(self, a, b, attributes=None):
         else:
             res = self.func(a, b)
 
+        # Check if the competitor supports time-based ratings
+        supports_time = hasattr(self.competitors[a], "_last_activity")
+
         if res is None:
-            self.competitors[a].tied(self.competitors[b])
+            if supports_time:
+                self.competitors[a].tied(self.competitors[b], match_time=match_time)
+            else:
+                self.competitors[a].tied(self.competitors[b])
             self.history.add_bout(Bout(a, b, predicted_outcome, outcome="tie", attributes=attributes))
         elif res is True:
-            self.competitors[a].beat(self.competitors[b])
+            if supports_time:
+                self.competitors[a].beat(self.competitors[b], match_time=match_time)
+            else:
+                self.competitors[a].beat(self.competitors[b])
             self.history.add_bout(Bout(a, b, predicted_outcome, outcome="win", attributes=attributes))
         else:
-            self.competitors[b].beat(self.competitors[a])
+            if supports_time:
+                self.competitors[b].beat(self.competitors[a], match_time=match_time)
+            else:
+                self.competitors[b].beat(self.competitors[a])
             self.history.add_bout(Bout(a, b, predicted_outcome, outcome="loss", attributes=attributes))
 
     def expected_score(self, a, b):
 
@@ -1,5 +1,6 @@
 import math
-from typing import Dict, Any, ClassVar, Tuple, Type, TypeVar
+from typing import Dict, Any, ClassVar, Tuple, Type, TypeVar, Optional
+from datetime import datetime
 
 from elote.competitors.base import BaseCompetitor, InvalidRatingValueException, InvalidParameterException
 
@@ -17,19 +18,25 @@ class GlickoCompetitor(BaseCompetitor):
     the reliability of the rating. A higher RD indicates a less reliable rating.
 
     Class Attributes:
-        _c (float): Constant that determines how quickly the RD increases over time. Default: 1.
+        _c (float): Rating volatility constant that determines how quickly the RD increases over time.
+                   Default: 34.6, which is calibrated so that it takes about 100 rating periods
+                   for a player's RD to grow from 50 to 350 (maximum uncertainty).
         _q (float): Scaling factor used in the rating calculation. Default: 0.0057565.
+        _rating_period_days (float): Number of days that constitute one rating period.
+                                   Default: 1.0 (one day per rating period).
     """
 
-    _c: ClassVar[float] = 1
+    _c: ClassVar[float] = 34.6  # sqrt((350^2 - 50^2)/100) as per Glickman's paper
     _q: ClassVar[float] = 0.0057565
+    _rating_period_days: ClassVar[float] = 1.0
 
-    def __init__(self, initial_rating: float = 1500, initial_rd: float = 350):
+    def __init__(self, initial_rating: float = 1500, initial_rd: float = 350, initial_time: Optional[datetime] = None):
         """Initialize a Glicko competitor.
 
         Args:
             initial_rating (float, optional): The initial rating of this competitor. Default: 1500.
             initial_rd (float, optional): The initial rating deviation of this competitor. Default: 350.
+            initial_time (datetime, optional): The initial timestamp for this competitor. Default: current time.
 
         Raises:
             InvalidRatingValueException: If the initial rating is below the minimum rating.
@@ -47,6 +54,7 @@ def __init__(self, initial_rating: float = 1500, initial_rd: float = 350):
         self._initial_rd = initial_rd
         self._rating = initial_rating
         self.rd = initial_rd
+        self._last_activity = initial_time if initial_time is not None else datetime.now()
 
     def __repr__(self) -> str:
         """Return a string representation of this competitor.
@@ -84,6 +92,7 @@ def _export_current_state(self) -> Dict[str, Any]:
         return {
             "rating": self._rating,
             "rd": self.rd,
+            "last_activity": self._last_activity.isoformat(),
         }
 
     def _import_parameters(self, parameters: Dict[str, Any]) -> None:
@@ -130,6 +139,12 @@ def _import_current_state(self, state: Dict[str, Any]) -> None:
             raise InvalidParameterException("RD must be positive")
         self.rd = rd
 
+        # Set last activity time
+        if "last_activity" in state:
+            self._last_activity = datetime.fromisoformat(state["last_activity"])
+        else:
+            self._last_activity = datetime.now()
+
     @classmethod
     def _create_from_parameters(cls: Type[T], parameters: Dict[str, Any]) -> T:
         """Create a new competitor instance from parameters.
@@ -268,46 +283,65 @@ def expected_score(self, competitor: BaseCompetitor) -> float:
         E = 1 / (1 + 10 ** ((-1 * g_term * (self._rating - competitor.rating)) / 400))
         return E
 
-    def beat(self, competitor: "GlickoCompetitor") -> None:
+    def beat(self, competitor: "GlickoCompetitor", match_time: Optional[datetime] = None) -> None:
         """Update ratings after this competitor has won against the given competitor.
 
         This method updates the ratings of both this competitor and the opponent
         based on the match outcome where this competitor won.
 
         Args:
             competitor (GlickoCompetitor): The opponent competitor that lost.
+            match_time (datetime, optional): The time when the match occurred. Default: current time.
 
         Raises:
             MissMatchedCompetitorTypesException: If the competitor types don't match.
         """
         self.verify_competitor_types(competitor)
-        self._compute_match_result(competitor, s=1)
+        self._compute_match_result(competitor, s=1, match_time=match_time)
 
-    def tied(self, competitor: "GlickoCompetitor") -> None:
+    def tied(self, competitor: "GlickoCompetitor", match_time: Optional[datetime] = None) -> None:
         """Update ratings after this competitor has tied with the given competitor.
 
         This method updates the ratings of both this competitor and the opponent
         based on a drawn match outcome.
 
         Args:
             competitor (GlickoCompetitor): The opponent competitor that tied.
+            match_time (datetime, optional): The time when the match occurred. Default: current time.
 
         Raises:
             MissMatchedCompetitorTypesException: If the competitor types don't match.
         """
         self.verify_competitor_types(competitor)
-        self._compute_match_result(competitor, s=0.5)
+        self._compute_match_result(competitor, s=0.5, match_time=match_time)
 
-    def _compute_match_result(self, competitor: "GlickoCompetitor", s: float) -> None:
+    def _compute_match_result(
+        self, competitor: "GlickoCompetitor", s: float, match_time: Optional[datetime] = None
+    ) -> None:
         """Compute the result of a match and update ratings.
 
         Args:
             competitor (GlickoCompetitor): The opponent competitor.
             s (float): The score of this competitor (1 for win, 0.5 for draw, 0 for loss).
+            match_time (datetime, optional): The time when the match occurred. Default: current time.
 
         Raises:
             MissMatchedCompetitorTypesException: If the competitor types don't match.
+            InvalidParameterException: If the match time is before either competitor's last activity.
         """
+        # Get the match time
+        current_time = match_time if match_time is not None else datetime.now()
+
+        # Validate match time is not before last activity
+        if current_time < self._last_activity:
+            raise InvalidParameterException("Match time cannot be before competitor's last activity time")
+        if current_time < competitor._last_activity:
+            raise InvalidParameterException("Match time cannot be before opponent's last activity time")
+
+        # Update RDs for both competitors based on inactivity
+        self.update_rd_for_inactivity(current_time)
+        competitor.update_rd_for_inactivity(current_time)
+
         self.verify_competitor_types(competitor)
         # first we update ourselves
         s_new_r, s_new_rd = self.update_competitor_rating(competitor, s)
@@ -322,6 +356,10 @@ def _compute_match_result(self, competitor: "GlickoCompetitor", s: float) -> Non
         competitor.rating = c_new_r
         competitor.rd = c_new_rd
 
+        # Update last activity time for both competitors
+        self._last_activity = current_time
+        competitor._last_activity = current_time
+
     def update_competitor_rating(self, competitor: "GlickoCompetitor", s: float) -> Tuple[float, float]:
         """Update the rating and RD of this competitor based on a match result.
 
@@ -333,11 +371,39 @@ def update_competitor_rating(self, competitor: "GlickoCompetitor", s: float) ->
             tuple: A tuple containing the new rating and RD.
         """
         E_term = self.expected_score(competitor)
-        d_squared = (self._q**2 * (self._g(competitor.rd) ** 2 * E_term * (1 - E_term))) ** -1
-        s_new_r = self._rating + (self._q / (1 / self.rd**2 + 1 / d_squared)) * self._g(competitor.rd) * (s - E_term)
+        g = self._g(competitor.rd**2)
+        d_squared = (self._q**2 * (g**2 * E_term * (1 - E_term))) ** -1
+
+        # The rating change is proportional to 1/RD^2, so a higher RD means a larger change
+        rating_change = (self._q / (1 / self.rd**2 + 1 / d_squared)) * g * (s - E_term)
+        s_new_r = self._rating + rating_change
 
         # Ensure the new rating doesn't go below the minimum rating
         s_new_r = max(self._minimum_rating, s_new_r)
 
+        # The new RD is smaller (more certain) after a match
         s_new_rd = math.sqrt((1 / self.rd**2 + 1 / d_squared) ** -1)
         return s_new_r, s_new_rd
+
+    def update_rd_for_inactivity(self, current_time: datetime = None) -> None:
+        """Update the rating deviation based on time elapsed since last activity.
+
+        This implements Glickman's formula for increasing uncertainty in ratings
+        over time when a player is inactive. The RD increase is controlled by the _c parameter
+        and the number of rating periods that have passed.
+
+        Args:
+            current_time (datetime, optional): The current time to calculate inactivity against.
+                If None, uses the current system time.
+        """
+        if current_time is None:
+            current_time = datetime.now()
+
+        # Calculate number of rating periods (can be fractional)
+        days_inactive = (current_time - self._last_activity).total_seconds() / (24 * 3600)
+        rating_periods = days_inactive / self._rating_period_days
+
+        if rating_periods > 0:
+            # Use Glickman's formula for RD increase over time
+            new_rd = min([350, math.sqrt(self.rd**2 + (self._c**2 * rating_periods))])
+            self.rd = new_rd