Skip to content

Commit

Permalink
Merge pull request #34 from igmhub/update_forecast
Browse files Browse the repository at this point in the history
Update mock_data and sampler book-keeping
  • Loading branch information
andreufont authored Jul 4, 2023
2 parents e1149b3 + be9f7ef commit 4068adb
Show file tree
Hide file tree
Showing 16 changed files with 1,939 additions and 1,040 deletions.
5 changes: 5 additions & 0 deletions chains/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
!README
3 changes: 3 additions & 0 deletions chains/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Some scripts will point to this folder to store emcee chains


43 changes: 34 additions & 9 deletions cup1d/data/mock_data.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,48 @@
from lace.emulator import gp_emulator
from cup1d.data import base_p1d_data
from cup1d.data import data_Chabanier2019
from cup1d.data import data_Karacayli2022
from cup1d.data import data_QMLE_Ohio
from cup1d.likelihood import lya_theory


class Mock_P1D(base_p1d_data.BaseDataP1D):
""" Class to generate a mock P1D from another P1D object and a theory"""

def __init__(self,data,theory):
def __init__(self,emulator=None,data_label="Chabanier2019",
zmin=2.0,zmax=4.5):
""" Copy data and replace P1D signal using theory"""

# keep theory in case you need it later
self.theory=theory
# load original data
self.data_label=data_label
if data_label=="Chabanier2019":
data=data_Chabanier2019.P1D_Chabanier2019(zmin=zmin,zmax=zmax)
elif data_label=="QMLE_Ohio":
data=data_QMLE_Ohio.P1D_QMLE_Ohio(zmin=zmin,zmax=zmax)
elif data_label=="Karacayli2022":
data=data_Karacayli2022.P1D_Karacayli2022(zmin=zmin,zmax=zmax)
else:
raise ValueError("Unknown data_label",data_label)

# check if emulator was provided
if emulator is None:
emulator=gp_emulator.GPEmulator()

# evaluate theory at k_kms, for all redshifts
emu_p1d_kms=theory.get_p1d_kms(data.k_kms)
# setup and store theory (we will need it later)
self.theory=lya_theory.Theory(zs=data.z,emulator=emulator)

# at each z, update value of p1d
# at each z will update value of p1d
Pk_kms=data.Pk_kms.copy()
for iz,z in enumerate(data.z):
Pk_kms[iz]=emu_p1d_kms[iz]

# copy data
# if emulator is not trained, skip mock making
if emulator.trained:
# evaluate theory at k_kms, for all redshifts
emu_p1d_kms=self.theory.get_p1d_kms(data.k_kms)
for iz,z in enumerate(data.z):
Pk_kms[iz]=emu_p1d_kms[iz]
else:
print('emulator not trained, will not make mock')

base_p1d_data.BaseDataP1D.__init__(self,z=data.z,k_kms=data.k_kms,
Pk_kms=Pk_kms,cov_Pk_kms=data.cov_Pk_kms)

218 changes: 103 additions & 115 deletions cup1d/likelihood/emcee_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from lace.emulator import p1d_archive
from lace.emulator import gp_emulator
from cup1d.data import data_MPGADGET
from cup1d.data import mock_data
from cup1d.likelihood import likelihood


Expand All @@ -24,14 +25,12 @@ def __init__(self,like=None,
nwalkers=None,read_chain_file=None,verbose=False,
subfolder=None,rootdir=None,
save_chain=True,progress=False,
train_when_reading=True,
ignore_grid_when_reading=False):
train_when_reading=True):
"""Setup sampler from likelihood, or use default.
If read_chain_file is provided, read pre-computed chain.
rootdir allows user to search for saved chains in a different
location to the code itself.
If not train_when_reading, emulator can not be used when reading.
Use ignore_grid_when_reading for plotting marginalised chains."""
If not train_when_reading, emulator can not be used when reading."""

self.verbose=verbose
self.progress=progress
Expand All @@ -40,7 +39,7 @@ def __init__(self,like=None,
if self.verbose: print('will read chain from file',read_chain_file)
assert not like, "likelihood specified but reading chain from file"
self.read_chain_from_file(read_chain_file,rootdir,subfolder,
train_when_reading,ignore_grid_when_reading)
train_when_reading)
self.burnin_pos=None
else:
self.like=like
Expand Down Expand Up @@ -247,12 +246,10 @@ def resume_sampler(self,max_steps,log_func=None,timeout=None,force_timeout=False
return


def get_initial_walkers(self,initial=0.1):
def get_initial_walkers(self,initial=0.05):
"""Setup initial states of walkers in sensible points
-- initial will set a range within unit volume around the
fiducial values to initialise walkers (set to 0.5 to
distribute across full prior volume) """

fiducial values to initialise walkers (if no prior is used)"""

ndim=self.ndim
nwalkers=self.nwalkers
Expand All @@ -262,7 +259,7 @@ def get_initial_walkers(self,initial=0.1):

if self.like.prior_Gauss_rms is None:
p0=np.random.rand(ndim*nwalkers).reshape((nwalkers,ndim))
p0=p0*initial+0.5
p0=p0*2*initial+0.5-initial
else:
rms=self.like.prior_Gauss_rms
p0=np.ndarray([nwalkers,ndim])
Expand Down Expand Up @@ -375,14 +372,14 @@ def get_all_params(self,delta_lnprob_cut=None):


def read_chain_from_file(self,chain_number,rootdir,subfolder,
train_when_reading,ignore_grid_when_reading):
train_when_reading):
"""Read chain from file, check parameters and setup likelihood"""

if rootdir:
chain_location=rootdir
else:
assert ('CUP1D_PATH' in os.environ),'export CUP1D_PATH'
chain_location=os.environ['CUP1D_PATH']+"/emcee_chains/"
chain_location=os.environ['CUP1D_PATH']+"/chains/"
if subfolder:
self.save_directory=chain_location+"/"+subfolder+"/chain_"+str(chain_number)
else:
Expand All @@ -391,49 +388,69 @@ def read_chain_from_file(self,chain_number,rootdir,subfolder,
with open(self.save_directory+"/config.json") as json_file:
config = json.load(json_file)

if self.verbose: print("Building archive")
try:
kp=config["kp_Mpc"]
except:
kp=None

archive=p1d_archive.archiveP1D(basedir=config["basedir"],
drop_sim_number=config["data_sim_number"],
drop_tau_rescalings=True,
drop_temp_rescalings=True,
z_max=config["z_max"],kp_Mpc=kp)

# Setup the emulators
if self.verbose: print("Setting up emulator")
emulator=gp_emulator.GPEmulator(paramList=config["paramList"],
train=train_when_reading,
emu_type=config["emu_type"],
kmax_Mpc=config["kmax_Mpc"],
asymmetric_kernel=config["asym_kernel"],
rbf_only=config["asym_kernel"],
passarchive=archive,
verbose=self.verbose)
if self.verbose: print("Setup emulator")
emu_type=config["emu_type"]
emulator=gp_emulator.GPEmulator(train=train_when_reading,
emu_type=emu_type,
kmax_Mpc=config["kmax_Mpc"])

# Figure out redshift range in data
if "z_list" in config:
z_list=config["z_list"]
zmin=min(z_list)
zmax=max(z_list)
else:
zmin=config["data_zmin"]
zmax=config["data_zmax"]

# Setup mock data
data_cov=config["data_cov_factor"]
data_year=config["data_year"]
data=data_MPGADGET.P1D_MPGADGET(sim_label=config["data_sim_number"],
basedir=config["basedir"],
z_list=np.asarray(config["z_list"]),
data_cov_factor=data_cov,
data_cov_label=data_year,
polyfit=(config["emu_type"]=="polyfit"))

# (optionally) setup extra P1D data (from HIRES)
if "extra_p1d_label" in config:
extra_p1d_data=data_MPGADGET.P1D_MPGADGET(basedir=config["basedir"],
sim_label=config["data_sim_number"],
zmax=config["extra_p1d_zmax"],
data_cov_factor=1.0,
data_cov_label=config["extra_p1d_label"],
polyfit=(config["emu_type"]=="polyfit"))
if "data_type" in config:
data_type=config["data_type"]
else:
extra_p1d_data=None
data_type="gadget"
if self.verbose: print("Setup data of type =",data_type)
if data_type=="mock":
# using a mock_data P1D (computed from theory)
data=mock_data.Mock_P1D(emulator=emulator,
data_label=config["data_mock_label"],
zmin=zmin,zmax=zmax)
# (optionally) setup extra P1D from high-resolution
if "extra_p1d_label" in config:
extra_data=mock_data.Mock_P1D(emulator=emulator,
data_label=config["extra_p1d_label"],
zmin=config["extra_p1d_zmin"],
zmax=config["extra_p1d_zmax"])
else:
extra_data=None
elif data_type=="gadget":
# using a data_MPGADGET P1D (from Gadget sim)
if "data_sim_number" in config:
sim_label=config["data_sim_number"]
else:
sim_label=config["data_sim_label"]
# check that sim is not from emulator suite
assert sim_label not in range(30)
# figure out p1d covariance used
if "data_year" in config:
data_cov_label=config["data_year"]
else:
data_cov_label=config["data_cov_label"]
data=data_MPGADGET.P1D_MPGADGET(sim_label=sim_label,
zmin=zmin,zmax=zmax,
data_cov_factor=config["data_cov_factor"],
data_cov_label=data_cov_label,
polyfit=(emu_type=="polyfit"))
# (optionally) setup extra P1D from high-resolution
if "extra_p1d_label" in config:
extra_data=data_MPGADGET.P1D_MPGADGET(sim_label=sim_label,
zmin=config["extra_p1d_zmin"],
zmax=config["extra_p1d_zmax"],
data_cov_label=config["extra_p1d_label"],
polyfit=(emu_type=="polyfit"))
else:
extra_data=None
else:
raise ValueError("unknown data type")

# Setup free parameters
if self.verbose: print("Setting up likelihood")
Expand All @@ -447,11 +464,10 @@ def read_chain_from_file(self,chain_number,rootdir,subfolder,
self.like=likelihood.Likelihood(data=data,emulator=emulator,
free_param_names=free_param_names,
free_param_limits=free_param_limits,
verbose=False,
prior_Gauss_rms=config["prior_Gauss_rms"],
emu_cov_factor=config["emu_cov_factor"],
cosmo_fid_label=cosmo_fid_label,
extra_p1d_data=extra_p1d_data)
extra_p1d_data=extra_data)

# Verify we have a backend, and load it
assert os.path.isfile(self.save_directory+"/backend.h5"), "Backend not found, can't load chains"
Expand Down Expand Up @@ -482,7 +498,7 @@ def _setup_chain_folder(self,rootdir=None,subfolder=None):
chain_location=rootdir
else:
assert ('CUP1D_PATH' in os.environ),'export CUP1D_PATH'
chain_location=os.environ['CUP1D_PATH']+"/emcee_chains/"
chain_location=os.environ['CUP1D_PATH']+"/chains/"
if subfolder:
# If there is one, check if it exists, if not make it
if not os.path.isdir(chain_location+"/"+subfolder):
Expand Down Expand Up @@ -548,86 +564,58 @@ def write_chain_to_file(self,residuals=False,plot_nersc=False,

saveDict={}

# identify Nyx archives
if hasattr(self.like.theory.emulator.archive,"fname"):
saveDict["nyx_fname"]=self.like.theory.emulator.archive.fname
else:
saveDict["basedir"]=self.like.theory.emulator.archive.basedir
saveDict["skewers_label"]=self.like.theory.emulator.archive.skewers_label
saveDict["p1d_label"]=self.like.theory.emulator.archive.p1d_label
saveDict["drop_tau_rescalings"]=self.like.theory.emulator.archive.drop_tau_rescalings
saveDict["drop_temp_rescalings"]=self.like.theory.emulator.archive.drop_temp_rescalings
saveDict["nearest_tau"]=self.like.theory.emulator.archive.nearest_tau
saveDict["z_max"]=self.like.theory.emulator.archive.z_max
saveDict["undersample_cube"]=self.like.theory.emulator.archive.undersample_cube

# Emulator settings
saveDict["kp_Mpc"]=self.like.theory.emulator.archive.kp_Mpc
saveDict["paramList"]=self.like.theory.emulator.paramList
assert self.like.theory.emulator.asymmetric_kernel
assert self.like.theory.emulator.rbf_only
saveDict["kmax_Mpc"]=self.like.theory.emulator.kmax_Mpc

## Do we train a GP on each z?
if self.like.theory.emulator.emulators:
z_emulator=True
emu_hyperparams=[]
for emu in self.like.theory.emulator.emulators:
emu_hyperparams.append(emu.gp.param_array.tolist())
else:
z_emulator=False
emu_hyperparams=self.like.theory.emulator.gp.param_array.tolist()
saveDict["z_emulator"]=z_emulator

## Is this an asymmetric, rbf-only emulator?
if self.like.theory.emulator.asymmetric_kernel and self.like.theory.emulator.rbf_only:
saveDict["asym_kernel"]=True
else:
saveDict["asym_kernel"]=False

saveDict["emu_hyperparameters"]=emu_hyperparams
saveDict["emu_type"]=self.like.theory.emulator.emu_type
saveDict["reduce_var"]=self.like.theory.emulator.reduce_var_mf

## Likelihood & data settings
saveDict["prior_Gauss_rms"]=self.like.prior_Gauss_rms
saveDict["z_list"]=self.like.theory.zs.tolist()
saveDict["emu_cov_factor"]=self.like.emu_cov_factor
saveDict["data_basedir"]=self.like.data.basedir
saveDict["data_sim_number"]=self.like.data.sim_label
saveDict["data_cov_factor"]=self.like.data.data_cov_factor
saveDict["data_year"]=self.like.data.data_cov_label
saveDict["cosmo_fid_label"]=self.like.cosmo_fid_label
# Data settings
if hasattr(self.like.data,"mock_sim"):
# using a data_MPGADGET P1D (from Gadget sim)
saveDict["data_type"]="gadget"
saveDict["data_sim_label"]=self.like.data.sim_label
saveDict["data_cov_label"]=self.like.data.data_cov_label
saveDict["data_cov_factor"]=self.like.data.data_cov_factor
elif hasattr(self.like.data,"theory"):
# using a mock_data P1D (computed from theory)
saveDict["data_type"]="mock"
saveDict["data_mock_label"]=self.like.data.data_label
else:
raise ValueError("unknown data type")
saveDict["data_zmin"]=min(self.like.theory.zs)
saveDict["data_zmax"]=max(self.like.theory.zs)

# Add information about the extra-p1d data (high-resolution P1D)
if self.like.extra_p1d_like:
extra_p1d_data=self.like.extra_p1d_like.data
saveDict["extra_p1d_label"]=extra_p1d_data.data_cov_label
saveDict["extra_p1d_zmax"]=max(extra_p1d_data.z)
else:
print("did not have extra P1D likelihood")

# Make sure (As,ns,nrun) were defined in standard pivot_scalar
if hasattr(self.like.theory,"cosmo_model_fid"):
cosmo_fid=self.like.theory.cosmo_model_fid.cosmo
pivot_scalar=cosmo_fid.InitPower.pivot_scalar
assert pivot_scalar==0.05,"non-standard pivot_scalar"
extra_data=self.like.extra_p1d_like.data
if hasattr(extra_data,"mock_sim"):
saveDict["extra_p1d_label"]=extra_data.data_cov_label
elif hasattr(extra_data,"theory"):
saveDict["extra_p1d_label"]=extra_data.data_label
else:
raise ValueError("unknown data type")
saveDict["extra_p1d_zmin"]=min(extra_data.z)
saveDict["extra_p1d_zmax"]=max(extra_data.z)

# Other likelihood settings
saveDict["prior_Gauss_rms"]=self.like.prior_Gauss_rms
saveDict["cosmo_fid_label"]=self.like.cosmo_fid_label
saveDict["emu_cov_factor"]=self.like.emu_cov_factor
free_params_save=[]
free_param_limits=[]
for par in self.like.free_params:
## The parameter limits are saved twice but for the sake
## of backwards compatibility I'm going to leave this
free_params_save.append([par.name,par.min_value,par.max_value])
free_param_limits.append([par.min_value,par.max_value])
saveDict["free_params"]=free_params_save
saveDict["free_param_limits"]=free_param_limits

## Sampler stuff
# Sampler stuff
saveDict["burn_in"]=self.burnin_nsteps
saveDict["nwalkers"]=self.nwalkers
saveDict["autocorr"]=self.autocorr.tolist()

## Save dictionary to json file in the
## appropriate directory
# Save dictionary to json file in the appropriate directory
if self.save_directory is None:
self._setup_chain_folder()
with open(self.save_directory+"/config.json", "w") as json_file:
Expand Down
Loading

0 comments on commit 4068adb

Please sign in to comment.