Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
abdullahau committed Mar 9, 2025
1 parent e4c213d commit cba34a4
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 181 deletions.
285 changes: 153 additions & 132 deletions 03b - Geocentric Models.qmd

Large diffs are not rendered by default.

142 changes: 116 additions & 26 deletions R_code.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,20 @@ class BridgeStan(bs.StanModel):
super().__init__(stan_so, data, make_args=make_args, warn=False)
class StanQuap(object):
'''
Description:
Find mode of posterior distribution for arbitrary fixed effect models and
then produce an approximation of the full posterior using the quadratic
curvature at the mode.
"""
Description:
Find mode of posterior distribution for arbitrary fixed effect models and
then produce an approximation of the full posterior using the quadratic
curvature at the mode.
This command provides a convenient interface for finding quadratic approximations
of posterior distributions for models defined in Stan. This procedure is equivalent
to penalized maximum likelihood estimation and the use of a Hessian for profiling,
and therefore can be used to define many common regularization procedures. The point
estimates returned correspond to a maximum a posterior, or MAP, estimate. However the
intention is that users will use `draws` and `laplace_sample` and other methods to work
with the full posterior.
'''
This command provides a convenient interface for finding quadratic approximations
of posterior distributions for models defined in Stan. This procedure is equivalent
to penalized maximum likelihood estimation and the use of a Hessian for profiling,
and therefore can be used to define many common regularization procedures. The point
estimates returned correspond to a maximum a posterior, or MAP, estimate. However the
intention is that users will use `extract_samples` and `laplace_sample` and other methods to work
with the full posterior.
"""
def __init__(self,
stan_file: str,
stan_code: str,
Expand All @@ -90,9 +90,10 @@ class StanQuap(object):
jacobian=jacobian,
**kwargs
)
self.params = self.opt_model.stan_variables()
self.param_names = self.bs_model.param_names()
self.opt_params = {param: self.opt_model.stan_variable(param) for param in self.param_names}
self.params_unc = self.bs_model.param_unconstrain(
np.array(list(self.params.values()))
np.array(list(self.opt_params.values()))
)
self.jacobian = jacobian
Expand All @@ -109,16 +110,28 @@ class StanQuap(object):
cov_matrix = self.transform_vcov(vcov_unc, param_types, eps)
return cov_matrix
def laplace_sample(self, draws: int = 100_000):
return self.stan_model.laplace_sample(data=self.train_data,
def laplace_sample(self, data: dict = None, draws: int = 100_000):
if data is None:
data = self.train_data
return self.stan_model.laplace_sample(data=data,
mode=self.opt_model,
draws=draws,
jacobian=self.jacobian)
def extract_samples(self, n: int = 100_000, dict_out: bool = True):
laplace_obj = self.laplace_sample(n)
laplace_obj = self.laplace_sample(draws=n)
if dict_out:
return laplace_obj.stan_variables()
stan_var_dict = laplace_obj.stan_variables()
return {param: stan_var_dict[param] for param in self.param_names}
return laplace_obj.draws()
def sim(self, data: dict = None, n = 1000, dict_out: bool = True):
if data is None:
data = self.train_data
laplace_obj = self.laplace_sample(data=data, draws=n)
if dict_out:
stan_var_dict = laplace_obj.stan_variables()
return {param: stan_var_dict[param] for param in stan_var_dict if param not in self.param_names}
return laplace_obj.draws()
def compute_jacobian_analytical(self, param_types):
Expand Down Expand Up @@ -183,21 +196,76 @@ class StanQuap(object):
def precis(self, param_types=None, prob=0.89, eps=1e-6):
vcov_mat = self.vcov_matrix(param_types, eps)
pos_mu = np.array(list(self.params.values()))
pos_mu = np.array(list(self.opt_params.values()))
pos_sigma = np.sqrt(np.diag(vcov_mat))
plo = (1-prob)/2
phi = 1 - plo
lo = pos_mu + pos_sigma * stats.norm.ppf(plo)
hi = pos_mu + pos_sigma * stats.norm.ppf(phi)
res = pd.DataFrame({
'Parameter': list(self.params.keys()),
'Parameter': list(self.opt_params.keys()),
'Mean': pos_mu,
'StDev': pos_sigma,
f'{plo:.1%}': lo,
f'{phi:.1%}': hi})
return res.set_index('Parameter')
```


```{python}
d = pd.read_csv("data/Howell1.csv", sep=';')
d2 = d[d['age'] >= 18]
m4_3_pred = '''
data {
int<lower=1> N;
vector[N] height;
vector[N] weight;
real xbar;
int<lower=1> N_tilde;
vector[N_tilde] weight_tilde;
}
parameters {
real a;
real<lower=0> b;
real<lower=0, upper=50> sigma;
}
model {
vector[N] mu;
mu = a + b * (weight - xbar);
// Likelihood Function
height ~ normal(mu, sigma);
// Priors
a ~ normal(178, 20);
b ~ lognormal(0, 1);
sigma ~ uniform(0, 50);
}
generated quantities {
vector[N_tilde] y_tilde;
for (i in 1:N_tilde) {
y_tilde[i] = normal_rng(a + b * (weight_tilde[i] - xbar), sigma);
}
}
'''
weight_seq = np.arange(25, 71)
data = {'N': len(d2),
'xbar': d2['weight'].mean(),
'height': d2['height'].tolist(),
'weight': d2['weight'].tolist(),
'N_tilde': len(weight_seq),
'weight_tilde': weight_seq.tolist()}
m4_3_pred_model = StanQuap('stan_models/m4_3_pred', m4_3_pred, data, algorithm = 'Newton')
m4_3_pred_model.precis().round(2)
m4_3_pred_model.vcov_matrix().round(3)
m4_3_pred_model.sim(data=data, n=100)
```

```{python}
test_stan = '''
data {
Expand All @@ -223,7 +291,7 @@ model.precis(param_types=['prob'])
res_df = model.precis().round(7)
res_df
laplace_draws = model.draws()['p']
laplace_draws = model.extract_samples()['p']
sigma_draw = laplace_draws.std()
sigma_draw
Expand All @@ -235,7 +303,7 @@ import scipy.stats as stats
def dens_plot():
x = np.linspace(0,1,100)
y = stats.norm.pdf(x, model.params['p'], sigma_draw)
y = stats.norm.pdf(x, model.opt_params['p'], sigma_draw)
plt.plot(x, y, label='Laplace Draws')
y = stats.norm.pdf(x, res_df['Mean'][0], res_df['StDev'][0])
plt.plot(x, y, label='MAP')
Expand Down Expand Up @@ -464,13 +532,35 @@ pairs(m4.3)
```

```{r}
mu <- link( m4.3 )
?link
# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
weight.seq <- seq( from=25 , to=70 , by=1 )
# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <- link( m4.3 , data=data.frame(weight=weight.seq) )
str(mu)
```

```{r}
# use type="n" to hide raw data
plot( height ~ weight , d2 , type="n" )
# loop over samples and plot each mu value
for ( i in 1:100 )
points( weight.seq , mu[i,] , pch=16 , col=col.alpha(rangi2,0.1) )
```

```{r}
m4.3@links
sim.height <- sim( m4.3 , data=list(weight=weight.seq) )
sim.mean <- apply( sim.height , 2 , mean )
sim.PI <- apply( sim.height , 2 , PI , prob=0.89 )
sim.mean
sim.PI
```






Binary file added stan_models/m4_3_pred
Binary file not shown.
33 changes: 33 additions & 0 deletions stan_models/m4_3_pred.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

data {
int<lower=1> N;
vector[N] height;
vector[N] weight;
real xbar;

int<lower=1> N_tilde;
vector[N_tilde] weight_tilde;
}
parameters {
real a;
real<lower=0> b;
real<lower=0, upper=50> sigma;
}
model {
vector[N] mu;
mu = a + b * (weight - xbar);

// Likelihood Function
height ~ normal(mu, sigma);

// Priors
a ~ normal(178, 20);
b ~ lognormal(0, 1);
sigma ~ uniform(0, 50);
}
generated quantities {
vector[N_tilde] y_tilde;
for (i in 1:N_tilde) {
y_tilde[i] = normal_rng(a + b * (weight_tilde[i] - xbar), sigma);
}
}
Binary file added stan_models/m4_3_pred_model.so
Binary file not shown.
72 changes: 49 additions & 23 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,20 @@ def __init__(self, stan_file: str, data: dict, force_compile=False):
super().__init__(stan_so, data, make_args=make_args, warn=False)

class StanQuap(object):
'''
Description:
Find mode of posterior distribution for arbitrary fixed effect models and
then produce an approximation of the full posterior using the quadratic
curvature at the mode.
"""
Description:
Find mode of posterior distribution for arbitrary fixed effect models and
then produce an approximation of the full posterior using the quadratic
curvature at the mode.
This command provides a convenient interface for finding quadratic approximations
of posterior distributions for models defined in Stan. This procedure is equivalent
to penalized maximum likelihood estimation and the use of a Hessian for profiling,
and therefore can be used to define many common regularization procedures. The point
estimates returned correspond to a maximum a posterior, or MAP, estimate. However the
intention is that users will use `draws` and `laplace_sample` and other methods to work
with the full posterior.
'''
This command provides a convenient interface for finding quadratic approximations
of posterior distributions for models defined in Stan. This procedure is equivalent
to penalized maximum likelihood estimation and the use of a Hessian for profiling,
and therefore can be used to define many common regularization procedures. The point
estimates returned correspond to a maximum a posterior, or MAP, estimate. However the
intention is that users will use `extract_samples` and `laplace_sample` and other methods to work
with the full posterior.
"""
def __init__(self,
stan_file: str,
stan_code: str,
Expand All @@ -87,9 +87,10 @@ def __init__(self,
jacobian=jacobian,
**kwargs
)
self.params = self.opt_model.stan_variables()
self.param_names = self.bs_model.param_names()
self.opt_params = {param: self.opt_model.stan_variable(param) for param in self.param_names}
self.params_unc = self.bs_model.param_unconstrain(
np.array(list(self.params.values()))
np.array(list(self.opt_params.values()))
)
self.jacobian = jacobian

Expand All @@ -106,16 +107,33 @@ def vcov_matrix(self, param_types=None, eps=1e-6):
cov_matrix = self.transform_vcov(vcov_unc, param_types, eps)
return cov_matrix

def laplace_sample(self, draws: int = 100_000):
return self.stan_model.laplace_sample(data=self.train_data,
def laplace_sample(self, data: dict = None, draws: int = 100_000):
if data is None:
data = self.train_data
return self.stan_model.laplace_sample(data=data,
mode=self.opt_model,
draws=draws,
jacobian=self.jacobian)

def extract_samples(self, n: int = 100_000, dict_out: bool = True):
laplace_obj = self.laplace_sample(draws=n)
if dict_out:
return laplace_obj.stan_variables()
stan_var_dict = laplace_obj.stan_variables()
return {param: stan_var_dict[param] for param in self.param_names}
return laplace_obj.draws()

def sim(self, data: dict = None, n = 1000, dict_out: bool = True):
"""
Simulate posterior observations - Posterior Predictive Sampling
https://mc-stan.org/docs/stan-users-guide/posterior-prediction.html
https://mc-stan.org/docs/stan-users-guide/posterior-predictive-checks.html
"""
if data is None:
data = self.train_data
laplace_obj = self.laplace_sample(data=data, draws=n)
if dict_out:
stan_var_dict = laplace_obj.stan_variables()
return {param: stan_var_dict[param] for param in stan_var_dict if param not in self.param_names}
return laplace_obj.draws()

def compute_jacobian_analytical(self, param_types):
Expand Down Expand Up @@ -180,20 +198,28 @@ def transform_vcov(self, vcov_unc, param_types=None, eps=1e-6):

def precis(self, param_types=None, prob=0.89, eps=1e-6):
vcov_mat = self.vcov_matrix(param_types, eps)
pos_mu = np.array(list(self.params.values()))
pos_mu = np.array(list(self.opt_params.values()))
pos_sigma = np.sqrt(np.diag(vcov_mat))
plo = (1-prob)/2
phi = 1 - plo
lo = pos_mu + pos_sigma * stats.norm.ppf(plo)
hi = pos_mu + pos_sigma * stats.norm.ppf(phi)
res = pd.DataFrame({
'Parameter': list(self.params.keys()),
'Parameter': list(self.opt_params.keys()),
'Mean': pos_mu,
'StDev': pos_sigma,
f'{plo:.1%}': lo,
f'{phi:.1%}': hi})
return res.set_index('Parameter')


def link(fit, lm_func, data, n=1000, post=None):
# Extract Posterior Samples
if post is None:
post = fit.extract_samples(n=n, dict_out=True)
return lm_func(post, data)


# ----------------------- Stat Functions -----------------------
def center(vals: np.ndarray) -> np.ndarray:
return vals - np.nanmean(vals)
Expand All @@ -216,19 +242,19 @@ def invlogit(x: float) -> float:
return 1 / (1 + np.exp(-x))


def precis(samples, prob=0.89):
def precis(samples, prob=0.89, index_name='Parameter'):
if isinstance(samples, dict) or isinstance(samples, pd.Series):
samples = pd.DataFrame(samples)
plo = (1-prob)/2
phi = 1 - plo
res = pd.DataFrame({
'Parameter':samples.columns.to_numpy(),
f'{index_name}':samples.columns.to_numpy(),
'Mean': samples.mean().to_numpy(),
'StDev': samples.std().to_numpy(),
f'{plo:.1%}': samples.quantile(q=plo).to_numpy(),
f'{phi:.1%}': samples.quantile(q=phi).to_numpy()
})
return res.set_index('Parameter')
return res.set_index(f'{index_name}')


def precis_az(samples, var_names=None):
Expand Down

0 comments on commit cba34a4

Please sign in to comment.