diff --git a/pymc_extras/distributions/__init__.py b/pymc_extras/distributions/__init__.py
index 783154ba3..abdbc721e 100644
--- a/pymc_extras/distributions/__init__.py
+++ b/pymc_extras/distributions/__init__.py
@@ -22,6 +22,7 @@
     BetaNegativeBinomial,
     GeneralizedPoisson,
     Skellam,
+    GrassiaIIGeometric,
 )
 from pymc_extras.distributions.histogram_utils import histogram_approximation
 from pymc_extras.distributions.multivariate import R2D2M2CP
@@ -38,5 +39,6 @@
     "R2D2M2CP",
     "Skellam",
     "histogram_approximation",
+    "GrassiaIIGeometric",
     "PartialOrder",
 ]
diff --git a/pymc_extras/distributions/discrete.py b/pymc_extras/distributions/discrete.py
index fc810c415..09c6467e2 100644
--- a/pymc_extras/distributions/discrete.py
+++ b/pymc_extras/distributions/discrete.py
@@ -16,6 +16,7 @@
 import pymc as pm
 
 from pymc.distributions.dist_math import betaln, check_parameters, factln, logpow
+from pymc.distributions.distribution import Discrete
 from pymc.distributions.shape_utils import rv_size_is_none
 from pytensor import tensor as pt
 from pytensor.tensor.random.op import RandomVariable
@@ -399,3 +400,219 @@ def dist(cls, mu1, mu2, **kwargs):
             class_name="Skellam",
             **kwargs,
         )
+
+
+class GrassiaIIGeometricRV(RandomVariable):
+    name = "g2g"
+    signature = "(),(),()->()"
+
+    dtype = "int64"
+    _print_name = ("GrassiaIIGeometric", "\\operatorname{GrassiaIIGeometric}")
+
+    def __call__(self, r, alpha, time_covariate_vector=None, size=None, **kwargs):
+        return super().__call__(r, alpha, time_covariate_vector, size=size, **kwargs)
+
+    @classmethod
+    def rng_fn(cls, rng, r, alpha, time_covariate_vector, size):
+        # Handle None case for time_covariate_vector
+        if time_covariate_vector is None:
+            time_covariate_vector = 0.0
+
+        # Convert inputs to numpy arrays - these should be concrete values
+        r = np.asarray(r, dtype=np.float64)
+        alpha = np.asarray(alpha, dtype=np.float64)
+        time_covariate_vector = np.asarray(time_covariate_vector, dtype=np.float64)
+
+        # Determine output size
+        if size is None:
+            size = np.broadcast_shapes(r.shape, alpha.shape, time_covariate_vector.shape)
+
+        # Broadcast parameters to the output size
+        r = np.broadcast_to(r, size)
+        alpha = np.broadcast_to(alpha, size)
+        time_covariate_vector = np.broadcast_to(time_covariate_vector, size)
+
+        # Calculate exp(time_covariate_vector) for all samples
+        exp_time_covar_sum = np.exp(time_covariate_vector)
+
+        # Use a simpler approach: generate from a geometric distribution with transformed parameters
+        # This is an approximation but should be much faster and more reliable
+        lam = rng.gamma(shape=r, scale=1 / alpha, size=size)
+        lam_covar = lam * exp_time_covar_sum
+
+        # Handle numerical stability for very small lambda values
+        p = np.where(
+            lam_covar < 0.0001,
+            lam_covar,  # For small values, set this to p
+            1 - np.exp(-lam_covar),
+        )
+
+        # Ensure p is in valid range for geometric distribution
+        p = np.clip(p, np.finfo(float).tiny, 1.0)
+
+        # Generate geometric samples
+        return rng.geometric(p)
+
+
+g2g = GrassiaIIGeometricRV()
+
+
+class GrassiaIIGeometric(Discrete):
+    r"""Grassia(II)-Geometric distribution.
+
+    This distribution is a flexible alternative to the Geometric distribution for the number of trials until a
+    discrete event, and can be extended to support both static and time-varying covariates.
+
+    Hardie and Fader describe this distribution with the following PMF and survival functions in [1]_:
+
+    .. math::
+        \mathbb{P}T=t|r,\alpha,\beta;Z(t)) = (\frac{\alpha}{\alpha+C(t-1)})^{r} - (\frac{\alpha}{\alpha+C(t)})^{r}  \\
+        \begin{align}
+        \mathbb{S}(t|r,\alpha,\beta;Z(t)) = (\frac{\alpha}{\alpha+C(t)})^{r} \\
+        \end{align}
+
+    .. plot::
+        :context: close-figs
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        import scipy.stats as st
+        import arviz as az
+        plt.style.use('arviz-darkgrid')
+        t = np.arange(1, 11)
+        alpha_vals = [1., 1., 2., 2.]
+        r_vals = [.1, .25, .5, 1.]
+        for alpha, r in zip(alpha_vals, r_vals):
+            pmf = (alpha/(alpha + t - 1))**r - (alpha/(alpha+t))**r
+            plt.plot(t, pmf, '-o', label=r'$\alpha$ = {}, $r$ = {}'.format(alpha, r))
+        plt.xlabel('t', fontsize=12)
+        plt.ylabel('p(t)', fontsize=12)
+        plt.legend(loc=1)
+        plt.show()
+
+    ========  ===============================================
+    Support   :math:`t \in \mathbb{N}_{>0}`
+    ========  ===============================================
+
+    Parameters
+    ----------
+    r : tensor_like of float
+        Shape parameter (r > 0).
+    alpha : tensor_like of float
+        Scale parameter (alpha > 0).
+    time_covariate_vector : tensor_like of float, optional
+        Optional vector of dot product of time-varying covariates and their coefficients by time period.
+
+    References
+    ----------
+    .. [1] Fader, Peter & G. S. Hardie, Bruce (2020).
+       "Incorporating Time-Varying Covariates in a Simple Mixture Model for Discrete-Time Duration Data."
+       https://www.brucehardie.com/notes/037/time-varying_covariates_in_BG.pdf
+    """
+
+    rv_op = g2g
+
+    @classmethod
+    def dist(cls, r, alpha, time_covariate_vector=None, *args, **kwargs):
+        r = pt.as_tensor_variable(r)
+        alpha = pt.as_tensor_variable(alpha)
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+        return super().dist([r, alpha, time_covariate_vector], *args, **kwargs)
+
+    def logp(value, r, alpha, time_covariate_vector=None):
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+
+        def C_t(t):
+            # Aggregate time_covariate_vector over active time periods
+            if t == 0:
+                return pt.constant(1.0)
+            # Handle case where time_covariate_vector is a scalar
+            if time_covariate_vector.ndim == 0:
+                return t * pt.exp(time_covariate_vector)
+            else:
+                # For vector time_covariate_vector, we need to handle symbolic indexing
+                # Since we can't slice with symbolic indices, we'll use a different approach
+                # For now, we'll use the first element multiplied by t
+                # This is a simplification but should work for basic cases
+                return t * pt.exp(time_covariate_vector[:t])
+
+        # Calculate the PMF on log scale
+        logp = pt.log(
+            pt.pow(alpha / (alpha + C_t(value - 1)), r) - pt.pow(alpha / (alpha + C_t(value)), r)
+        )
+
+        # Handle invalid values
+        logp = pt.switch(
+            pt.or_(
+                value < 1,  # Value must be >= 1
+                pt.isnan(logp),  # Handle NaN cases
+            ),
+            -np.inf,
+            logp,
+        )
+
+        return check_parameters(
+            logp,
+            r > 0,
+            alpha > 0,
+            msg="r > 0, alpha > 0",
+        )
+
+    def logcdf(value, r, alpha, time_covariate_vector=None):
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+
+        # Calculate CDF on log scale
+        # For the GrassiaIIGeometric, the CDF is 1 - survival function
+        # S(t) = (alpha/(alpha + C(t)))^r
+        # CDF(t) = 1 - S(t)
+
+        def C_t(t):
+            if t == 0:
+                return pt.constant(1.0)
+            if time_covariate_vector.ndim == 0:
+                return t * pt.exp(time_covariate_vector)
+            else:
+                return t * pt.exp(time_covariate_vector[:t])
+
+        survival = pt.pow(alpha / (alpha + C_t(value)), r)
+        logcdf = pt.log(1 - survival)
+
+        return check_parameters(
+            logcdf,
+            r > 0,
+            alpha > 0,
+            msg="r > 0, alpha > 0",
+        )
+
+    def support_point(rv, size, r, alpha, time_covariate_vector=None):
+        """Calculate a reasonable starting point for sampling.
+
+        For the GrassiaIIGeometric distribution, we use a point estimate based on
+        the expected value of the mixing distribution. Since the mixing distribution
+        is Gamma(r, 1/alpha), its mean is r/alpha. We then transform this through
+        the geometric link function and round to ensure an integer value.
+
+        When time_covariate_vector is provided, it affects the expected value through
+        the exponential link function: exp(time_covariate_vector).
+        """
+        # Base mean without covariates
+        mean = pt.exp(alpha / r)
+
+        # Apply time-varying covariates if provided
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        mean = mean * pt.exp(time_covariate_vector)
+
+        # Round up to nearest integer
+        mean = pt.ceil(mean)
+
+        if not rv_size_is_none(size):
+            mean = pt.full(size, mean)
+
+        return mean
diff --git a/tests/distributions/test_discrete.py b/tests/distributions/test_discrete.py
index 8e803a31b..befc7594e 100644
--- a/tests/distributions/test_discrete.py
+++ b/tests/distributions/test_discrete.py
@@ -33,6 +33,7 @@
 from pymc_extras.distributions import (
     BetaNegativeBinomial,
     GeneralizedPoisson,
+    GrassiaIIGeometric,
     Skellam,
 )
 
@@ -208,3 +209,247 @@ def test_logp(self):
                 {"mu1": Rplus_small, "mu2": Rplus_small},
                 lambda value, mu1, mu2: scipy.stats.skellam.logpmf(value, mu1, mu2),
             )
+
+
+class TestGrassiaIIGeometric:
+    class TestRandomVariable(BaseTestDistributionRandom):
+        pymc_dist = GrassiaIIGeometric
+        pymc_dist_params = {"r": 0.5, "alpha": 2.0, "time_covariate_vector": 1.0}
+        expected_rv_op_params = {"r": 0.5, "alpha": 2.0, "time_covariate_vector": 1.0}
+        tests_to_run = [
+            "check_pymc_params_match_rv_op",
+            "check_rv_size",
+        ]
+
+        def test_random_basic_properties(self):
+            # Test standard parameter values with time covariates
+            discrete_random_tester(
+                dist=self.pymc_dist,
+                paramdomains={
+                    "r": Domain([0.5, 1.0, 2.0], edges=(None, None)),  # Standard values
+                    "alpha": Domain([0.5, 1.0, 2.0], edges=(None, None)),  # Standard values
+                    "time_covariate_vector": Domain(
+                        [-1.0, 1.0, 2.0], edges=(None, None)
+                    ),  # Time covariates
+                },
+                ref_rand=lambda r, alpha, time_covariate_vector, size: np.random.geometric(
+                    1
+                    - np.exp(
+                        -np.random.gamma(r, 1 / alpha, size=size) * np.exp(time_covariate_vector)
+                    ),
+                    size=size,
+                ),
+            )
+
+            # Test small parameter values that could generate small lambda values
+            discrete_random_tester(
+                dist=self.pymc_dist,
+                paramdomains={
+                    "r": Domain([0.01, 0.1], edges=(None, None)),  # Small r values
+                    "alpha": Domain([10.0, 100.0], edges=(None, None)),  # Large alpha values
+                    "time_covariate_vector": Domain(
+                        [0.0, 1.0], edges=(None, None)
+                    ),  # Time covariates
+                },
+                ref_rand=lambda r, alpha, time_covariate_vector, size: np.random.geometric(
+                    np.clip(
+                        np.random.gamma(r, 1 / alpha, size=size) * np.exp(time_covariate_vector),
+                        1e-5,
+                        1.0,
+                    ),
+                    size=size,
+                ),
+            )
+
+        @pytest.mark.parametrize(
+            "r,alpha,time_covariate_vector",
+            [
+                (0.5, 1.0, 0.0),
+                (1.0, 2.0, 1.0),
+                (2.0, 0.5, -1.0),
+                (5.0, 1.0, None),
+            ],
+        )
+        def test_random_moments(self, r, alpha, time_covariate_vector):
+            dist = self.pymc_dist.dist(
+                r=r, alpha=alpha, time_covariate_vector=time_covariate_vector, size=10_000
+            )
+            draws = dist.eval()
+
+            # Check that all values are positive integers
+            assert np.all(draws > 0)
+            assert np.all(draws.astype(int) == draws)
+
+            # Check that values are reasonably distributed
+            # Note: Exact moments are complex for this distribution
+            # so we just check basic properties
+            assert np.mean(draws) > 0
+            assert np.var(draws) > 0
+
+    def test_logp_basic(self):
+        r = pt.scalar("r")
+        alpha = pt.scalar("alpha")
+        time_covariate_vector = pt.vector("time_covariate_vector")
+        value = pt.vector("value", dtype="int64")
+
+        logp = pm.logp(GrassiaIIGeometric.dist(r, alpha, time_covariate_vector), value)
+        logp_fn = pytensor.function([value, r, alpha, time_covariate_vector], logp)
+
+        # Test basic properties of logp
+        test_value = np.array([1, 1, 2, 3, 4, 5])
+        test_r = 1.0
+        test_alpha = 1.0
+        test_time_covariate_vector = np.array(
+            [
+                None,
+                [1],
+                [1, 2],
+                [1, 2, 3],
+                [1, 2, 3, 4],
+                [1, 2, 3, 4, 5],
+            ]
+        )
+
+        logp_vals = logp_fn(test_value, test_r, test_alpha, test_time_covariate_vector)
+        assert not np.any(np.isnan(logp_vals))
+        assert np.all(np.isfinite(logp_vals))
+
+        # Test invalid values
+        assert (
+            logp_fn(np.array([0]), test_r, test_alpha, test_time_covariate_vector) == np.inf
+        )  # Value must be > 0
+
+        with pytest.raises(TypeError):
+            logp_fn(
+                np.array([1.5]), test_r, test_alpha, test_time_covariate_vector
+            )  # Value must be integer
+
+        # Test parameter restrictions
+        with pytest.raises(ParameterValueError):
+            logp_fn(np.array([1]), -1.0, test_alpha, test_time_covariate_vector)  # r must be > 0
+
+        with pytest.raises(ParameterValueError):
+            logp_fn(np.array([1]), test_r, -1.0, test_time_covariate_vector)  # alpha must be > 0
+
+    def test_sampling_consistency(self):
+        """Test that sampling from the distribution produces reasonable results"""
+        r = 2.0
+        alpha = 1.0
+        time_covariate_vector = None  # Start with just None case
+
+        # First test direct sampling from the distribution
+        try:
+            dist = GrassiaIIGeometric.dist(
+                r=r, alpha=alpha, time_covariate_vector=time_covariate_vector
+            )
+
+            direct_samples = dist.eval()
+
+            # Convert to numpy array if it's not already
+            if not isinstance(direct_samples, np.ndarray):
+                direct_samples = np.array([direct_samples])
+
+            # Ensure we have a 1D array
+            if direct_samples.ndim == 0:
+                direct_samples = direct_samples.reshape(1)
+
+            assert (
+                direct_samples.size > 0
+            ), f"Direct sampling produced no samples for {time_covariate_vector}"
+            assert np.all(
+                direct_samples > 0
+            ), f"Direct sampling produced non-positive values for {time_covariate_vector}"
+            assert np.all(
+                direct_samples.astype(int) == direct_samples
+            ), f"Direct sampling produced non-integer values for {time_covariate_vector}"
+
+        except Exception as e:
+            import traceback
+
+            traceback.print_exc()
+            raise
+
+        # Then test MCMC sampling
+        try:
+            with pm.Model():
+                x = GrassiaIIGeometric(
+                    "x", r=r, alpha=alpha, time_covariate_vector=time_covariate_vector
+                )
+
+                trace = pm.sample(
+                    chains=1, draws=50, tune=0, random_seed=42, progressbar=False
+                ).posterior
+
+            # Extract samples and ensure they're in the correct shape
+            samples = trace["x"].values
+
+            assert (
+                samples is not None
+            ), f"No samples were returned from MCMC for {time_covariate_vector}"
+            assert (
+                samples.size > 0
+            ), f"MCMC sampling produced empty array for {time_covariate_vector}"
+
+            if samples.ndim > 1:
+                samples = samples.reshape(-1)  # Flatten if needed
+
+            # Check basic properties of samples
+            assert samples.size > 0, f"No samples after reshaping for {time_covariate_vector}"
+            assert np.all(
+                samples > 0
+            ), f"Found non-positive values in samples for {time_covariate_vector}"
+            assert np.all(
+                samples.astype(int) == samples
+            ), f"Found non-integer values in samples for {time_covariate_vector}"
+
+            # Check mean and variance are reasonable
+            mean = np.mean(samples)
+            var = np.var(samples)
+            assert (
+                0 < mean < np.inf
+            ), f"Mean {mean} is not in valid range for {time_covariate_vector}"
+            assert (
+                0 < var < np.inf
+            ), f"Variance {var} is not in valid range for {time_covariate_vector}"
+
+            # Additional checks for distribution properties
+            # The mean should be greater than 1 for these parameters
+            assert mean > 1, f"Mean {mean} is not greater than 1 for {time_covariate_vector}"
+            # The variance should be positive and finite
+            assert var > 0, f"Variance {var} is not positive for {time_covariate_vector}"
+
+        except Exception as e:
+            import traceback
+
+            traceback.print_exc()
+            raise
+
+    @pytest.mark.parametrize(
+        "r, alpha, time_covariate_vector, size, expected_shape",
+        [
+            (1.0, 1.0, None, None, ()),  # Scalar output with no covariates
+            ([1.0, 2.0], 1.0, [1.0], None, (2,)),  # Vector output from r
+            (1.0, [1.0, 2.0], [1.0], None, (2,)),  # Vector output from alpha
+            (1.0, 1.0, [1.0, 2.0], None, (2,)),  # Vector output from time covariates
+            (1.0, 1.0, [1.0], (3, 2), (3, 2)),  # Explicit size
+        ],
+    )
+    def test_support_point(self, r, alpha, time_covariate_vector, size, expected_shape):
+        """Test that support_point returns reasonable values with correct shapes"""
+        with pm.Model() as model:
+            GrassiaIIGeometric(
+                "x", r=r, alpha=alpha, time_covariate_vector=time_covariate_vector, size=size
+            )
+
+        init_point = model.initial_point()["x"]
+
+        # Check shape
+        assert init_point.shape == expected_shape
+
+        # Check values are positive integers
+        assert np.all(init_point > 0)
+        assert np.all(init_point.astype(int) == init_point)
+
+        # Check values are finite and reasonable
+        assert np.all(np.isfinite(init_point))
+        assert np.all(init_point < 1e6)  # Should not be extremely large