Skip to content

Commit 4068adb

Browse files
authored
Merge pull request #34 from igmhub/update_forecast
Update mock_data and sampler book-keeping
2 parents e1149b3 + be9f7ef commit 4068adb

File tree

16 files changed

+1939
-1040
lines changed

16 files changed

+1939
-1040
lines changed

chains/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore
5+
!README

chains/README

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Some scripts will point to this folder to store emcee chains
2+
3+

cup1d/data/mock_data.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,48 @@
1+
from lace.emulator import gp_emulator
12
from cup1d.data import base_p1d_data
3+
from cup1d.data import data_Chabanier2019
4+
from cup1d.data import data_Karacayli2022
5+
from cup1d.data import data_QMLE_Ohio
6+
from cup1d.likelihood import lya_theory
7+
28

39
class Mock_P1D(base_p1d_data.BaseDataP1D):
410
""" Class to generate a mock P1D from another P1D object and a theory"""
511

6-
def __init__(self,data,theory):
12+
def __init__(self,emulator=None,data_label="Chabanier2019",
13+
zmin=2.0,zmax=4.5):
714
""" Copy data and replace P1D signal using theory"""
815

9-
# keep theory in case you need it later
10-
self.theory=theory
16+
# load original data
17+
self.data_label=data_label
18+
if data_label=="Chabanier2019":
19+
data=data_Chabanier2019.P1D_Chabanier2019(zmin=zmin,zmax=zmax)
20+
elif data_label=="QMLE_Ohio":
21+
data=data_QMLE_Ohio.P1D_QMLE_Ohio(zmin=zmin,zmax=zmax)
22+
elif data_label=="Karacayli2022":
23+
data=data_Karacayli2022.P1D_Karacayli2022(zmin=zmin,zmax=zmax)
24+
else:
25+
raise ValueError("Unknown data_label",data_label)
26+
27+
# check if emulator was provided
28+
if emulator is None:
29+
emulator=gp_emulator.GPEmulator()
1130

12-
# evaluate theory at k_kms, for all redshifts
13-
emu_p1d_kms=theory.get_p1d_kms(data.k_kms)
31+
# setup and store theory (we will need it later)
32+
self.theory=lya_theory.Theory(zs=data.z,emulator=emulator)
1433

15-
# at each z, update value of p1d
34+
# at each z will update value of p1d
1635
Pk_kms=data.Pk_kms.copy()
17-
for iz,z in enumerate(data.z):
18-
Pk_kms[iz]=emu_p1d_kms[iz]
1936

20-
# copy data
37+
# if emulator is not trained, skip mock making
38+
if emulator.trained:
39+
# evaluate theory at k_kms, for all redshifts
40+
emu_p1d_kms=self.theory.get_p1d_kms(data.k_kms)
41+
for iz,z in enumerate(data.z):
42+
Pk_kms[iz]=emu_p1d_kms[iz]
43+
else:
44+
print('emulator not trained, will not make mock')
45+
2146
base_p1d_data.BaseDataP1D.__init__(self,z=data.z,k_kms=data.k_kms,
2247
Pk_kms=Pk_kms,cov_Pk_kms=data.cov_Pk_kms)
2348

cup1d/likelihood/emcee_sampler.py

Lines changed: 103 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from lace.emulator import p1d_archive
1515
from lace.emulator import gp_emulator
1616
from cup1d.data import data_MPGADGET
17+
from cup1d.data import mock_data
1718
from cup1d.likelihood import likelihood
1819

1920

@@ -24,14 +25,12 @@ def __init__(self,like=None,
2425
nwalkers=None,read_chain_file=None,verbose=False,
2526
subfolder=None,rootdir=None,
2627
save_chain=True,progress=False,
27-
train_when_reading=True,
28-
ignore_grid_when_reading=False):
28+
train_when_reading=True):
2929
"""Setup sampler from likelihood, or use default.
3030
If read_chain_file is provided, read pre-computed chain.
3131
rootdir allows user to search for saved chains in a different
3232
location to the code itself.
33-
If not train_when_reading, emulator can not be used when reading.
34-
Use ignore_grid_when_reading for plotting marginalised chains."""
33+
If not train_when_reading, emulator can not be used when reading."""
3534

3635
self.verbose=verbose
3736
self.progress=progress
@@ -40,7 +39,7 @@ def __init__(self,like=None,
4039
if self.verbose: print('will read chain from file',read_chain_file)
4140
assert not like, "likelihood specified but reading chain from file"
4241
self.read_chain_from_file(read_chain_file,rootdir,subfolder,
43-
train_when_reading,ignore_grid_when_reading)
42+
train_when_reading)
4443
self.burnin_pos=None
4544
else:
4645
self.like=like
@@ -247,12 +246,10 @@ def resume_sampler(self,max_steps,log_func=None,timeout=None,force_timeout=False
247246
return
248247

249248

250-
def get_initial_walkers(self,initial=0.1):
249+
def get_initial_walkers(self,initial=0.05):
251250
"""Setup initial states of walkers in sensible points
252251
-- initial will set a range within unit volume around the
253-
fiducial values to initialise walkers (set to 0.5 to
254-
distribute across full prior volume) """
255-
252+
fiducial values to initialise walkers (if no prior is used)"""
256253

257254
ndim=self.ndim
258255
nwalkers=self.nwalkers
@@ -262,7 +259,7 @@ def get_initial_walkers(self,initial=0.1):
262259

263260
if self.like.prior_Gauss_rms is None:
264261
p0=np.random.rand(ndim*nwalkers).reshape((nwalkers,ndim))
265-
p0=p0*initial+0.5
262+
p0=p0*2*initial+0.5-initial
266263
else:
267264
rms=self.like.prior_Gauss_rms
268265
p0=np.ndarray([nwalkers,ndim])
@@ -375,14 +372,14 @@ def get_all_params(self,delta_lnprob_cut=None):
375372

376373

377374
def read_chain_from_file(self,chain_number,rootdir,subfolder,
378-
train_when_reading,ignore_grid_when_reading):
375+
train_when_reading):
379376
"""Read chain from file, check parameters and setup likelihood"""
380377

381378
if rootdir:
382379
chain_location=rootdir
383380
else:
384381
assert ('CUP1D_PATH' in os.environ),'export CUP1D_PATH'
385-
chain_location=os.environ['CUP1D_PATH']+"/emcee_chains/"
382+
chain_location=os.environ['CUP1D_PATH']+"/chains/"
386383
if subfolder:
387384
self.save_directory=chain_location+"/"+subfolder+"/chain_"+str(chain_number)
388385
else:
@@ -391,49 +388,69 @@ def read_chain_from_file(self,chain_number,rootdir,subfolder,
391388
with open(self.save_directory+"/config.json") as json_file:
392389
config = json.load(json_file)
393390

394-
if self.verbose: print("Building archive")
395-
try:
396-
kp=config["kp_Mpc"]
397-
except:
398-
kp=None
399-
400-
archive=p1d_archive.archiveP1D(basedir=config["basedir"],
401-
drop_sim_number=config["data_sim_number"],
402-
drop_tau_rescalings=True,
403-
drop_temp_rescalings=True,
404-
z_max=config["z_max"],kp_Mpc=kp)
405-
406-
# Setup the emulators
407-
if self.verbose: print("Setting up emulator")
408-
emulator=gp_emulator.GPEmulator(paramList=config["paramList"],
409-
train=train_when_reading,
410-
emu_type=config["emu_type"],
411-
kmax_Mpc=config["kmax_Mpc"],
412-
asymmetric_kernel=config["asym_kernel"],
413-
rbf_only=config["asym_kernel"],
414-
passarchive=archive,
415-
verbose=self.verbose)
391+
if self.verbose: print("Setup emulator")
392+
emu_type=config["emu_type"]
393+
emulator=gp_emulator.GPEmulator(train=train_when_reading,
394+
emu_type=emu_type,
395+
kmax_Mpc=config["kmax_Mpc"])
396+
397+
# Figure out redshift range in data
398+
if "z_list" in config:
399+
z_list=config["z_list"]
400+
zmin=min(z_list)
401+
zmax=max(z_list)
402+
else:
403+
zmin=config["data_zmin"]
404+
zmax=config["data_zmax"]
416405

417406
# Setup mock data
418-
data_cov=config["data_cov_factor"]
419-
data_year=config["data_year"]
420-
data=data_MPGADGET.P1D_MPGADGET(sim_label=config["data_sim_number"],
421-
basedir=config["basedir"],
422-
z_list=np.asarray(config["z_list"]),
423-
data_cov_factor=data_cov,
424-
data_cov_label=data_year,
425-
polyfit=(config["emu_type"]=="polyfit"))
426-
427-
# (optionally) setup extra P1D data (from HIRES)
428-
if "extra_p1d_label" in config:
429-
extra_p1d_data=data_MPGADGET.P1D_MPGADGET(basedir=config["basedir"],
430-
sim_label=config["data_sim_number"],
431-
zmax=config["extra_p1d_zmax"],
432-
data_cov_factor=1.0,
433-
data_cov_label=config["extra_p1d_label"],
434-
polyfit=(config["emu_type"]=="polyfit"))
407+
if "data_type" in config:
408+
data_type=config["data_type"]
435409
else:
436-
extra_p1d_data=None
410+
data_type="gadget"
411+
if self.verbose: print("Setup data of type =",data_type)
412+
if data_type=="mock":
413+
# using a mock_data P1D (computed from theory)
414+
data=mock_data.Mock_P1D(emulator=emulator,
415+
data_label=config["data_mock_label"],
416+
zmin=zmin,zmax=zmax)
417+
# (optionally) setup extra P1D from high-resolution
418+
if "extra_p1d_label" in config:
419+
extra_data=mock_data.Mock_P1D(emulator=emulator,
420+
data_label=config["extra_p1d_label"],
421+
zmin=config["extra_p1d_zmin"],
422+
zmax=config["extra_p1d_zmax"])
423+
else:
424+
extra_data=None
425+
elif data_type=="gadget":
426+
# using a data_MPGADGET P1D (from Gadget sim)
427+
if "data_sim_number" in config:
428+
sim_label=config["data_sim_number"]
429+
else:
430+
sim_label=config["data_sim_label"]
431+
# check that sim is not from emulator suite
432+
assert sim_label not in range(30)
433+
# figure out p1d covariance used
434+
if "data_year" in config:
435+
data_cov_label=config["data_year"]
436+
else:
437+
data_cov_label=config["data_cov_label"]
438+
data=data_MPGADGET.P1D_MPGADGET(sim_label=sim_label,
439+
zmin=zmin,zmax=zmax,
440+
data_cov_factor=config["data_cov_factor"],
441+
data_cov_label=data_cov_label,
442+
polyfit=(emu_type=="polyfit"))
443+
# (optionally) setup extra P1D from high-resolution
444+
if "extra_p1d_label" in config:
445+
extra_data=data_MPGADGET.P1D_MPGADGET(sim_label=sim_label,
446+
zmin=config["extra_p1d_zmin"],
447+
zmax=config["extra_p1d_zmax"],
448+
data_cov_label=config["extra_p1d_label"],
449+
polyfit=(emu_type=="polyfit"))
450+
else:
451+
extra_data=None
452+
else:
453+
raise ValueError("unknown data type")
437454

438455
# Setup free parameters
439456
if self.verbose: print("Setting up likelihood")
@@ -447,11 +464,10 @@ def read_chain_from_file(self,chain_number,rootdir,subfolder,
447464
self.like=likelihood.Likelihood(data=data,emulator=emulator,
448465
free_param_names=free_param_names,
449466
free_param_limits=free_param_limits,
450-
verbose=False,
451467
prior_Gauss_rms=config["prior_Gauss_rms"],
452468
emu_cov_factor=config["emu_cov_factor"],
453469
cosmo_fid_label=cosmo_fid_label,
454-
extra_p1d_data=extra_p1d_data)
470+
extra_p1d_data=extra_data)
455471

456472
# Verify we have a backend, and load it
457473
assert os.path.isfile(self.save_directory+"/backend.h5"), "Backend not found, can't load chains"
@@ -482,7 +498,7 @@ def _setup_chain_folder(self,rootdir=None,subfolder=None):
482498
chain_location=rootdir
483499
else:
484500
assert ('CUP1D_PATH' in os.environ),'export CUP1D_PATH'
485-
chain_location=os.environ['CUP1D_PATH']+"/emcee_chains/"
501+
chain_location=os.environ['CUP1D_PATH']+"/chains/"
486502
if subfolder:
487503
# If there is one, check if it exists, if not make it
488504
if not os.path.isdir(chain_location+"/"+subfolder):
@@ -548,86 +564,58 @@ def write_chain_to_file(self,residuals=False,plot_nersc=False,
548564

549565
saveDict={}
550566

551-
# identify Nyx archives
552-
if hasattr(self.like.theory.emulator.archive,"fname"):
553-
saveDict["nyx_fname"]=self.like.theory.emulator.archive.fname
554-
else:
555-
saveDict["basedir"]=self.like.theory.emulator.archive.basedir
556-
saveDict["skewers_label"]=self.like.theory.emulator.archive.skewers_label
557-
saveDict["p1d_label"]=self.like.theory.emulator.archive.p1d_label
558-
saveDict["drop_tau_rescalings"]=self.like.theory.emulator.archive.drop_tau_rescalings
559-
saveDict["drop_temp_rescalings"]=self.like.theory.emulator.archive.drop_temp_rescalings
560-
saveDict["nearest_tau"]=self.like.theory.emulator.archive.nearest_tau
561-
saveDict["z_max"]=self.like.theory.emulator.archive.z_max
562-
saveDict["undersample_cube"]=self.like.theory.emulator.archive.undersample_cube
563-
564567
# Emulator settings
565-
saveDict["kp_Mpc"]=self.like.theory.emulator.archive.kp_Mpc
566-
saveDict["paramList"]=self.like.theory.emulator.paramList
568+
assert self.like.theory.emulator.asymmetric_kernel
569+
assert self.like.theory.emulator.rbf_only
567570
saveDict["kmax_Mpc"]=self.like.theory.emulator.kmax_Mpc
568-
569-
## Do we train a GP on each z?
570-
if self.like.theory.emulator.emulators:
571-
z_emulator=True
572-
emu_hyperparams=[]
573-
for emu in self.like.theory.emulator.emulators:
574-
emu_hyperparams.append(emu.gp.param_array.tolist())
575-
else:
576-
z_emulator=False
577-
emu_hyperparams=self.like.theory.emulator.gp.param_array.tolist()
578-
saveDict["z_emulator"]=z_emulator
579-
580-
## Is this an asymmetric, rbf-only emulator?
581-
if self.like.theory.emulator.asymmetric_kernel and self.like.theory.emulator.rbf_only:
582-
saveDict["asym_kernel"]=True
583-
else:
584-
saveDict["asym_kernel"]=False
585-
586-
saveDict["emu_hyperparameters"]=emu_hyperparams
587571
saveDict["emu_type"]=self.like.theory.emulator.emu_type
588-
saveDict["reduce_var"]=self.like.theory.emulator.reduce_var_mf
589572

590-
## Likelihood & data settings
591-
saveDict["prior_Gauss_rms"]=self.like.prior_Gauss_rms
592-
saveDict["z_list"]=self.like.theory.zs.tolist()
593-
saveDict["emu_cov_factor"]=self.like.emu_cov_factor
594-
saveDict["data_basedir"]=self.like.data.basedir
595-
saveDict["data_sim_number"]=self.like.data.sim_label
596-
saveDict["data_cov_factor"]=self.like.data.data_cov_factor
597-
saveDict["data_year"]=self.like.data.data_cov_label
598-
saveDict["cosmo_fid_label"]=self.like.cosmo_fid_label
573+
# Data settings
574+
if hasattr(self.like.data,"mock_sim"):
575+
# using a data_MPGADGET P1D (from Gadget sim)
576+
saveDict["data_type"]="gadget"
577+
saveDict["data_sim_label"]=self.like.data.sim_label
578+
saveDict["data_cov_label"]=self.like.data.data_cov_label
579+
saveDict["data_cov_factor"]=self.like.data.data_cov_factor
580+
elif hasattr(self.like.data,"theory"):
581+
# using a mock_data P1D (computed from theory)
582+
saveDict["data_type"]="mock"
583+
saveDict["data_mock_label"]=self.like.data.data_label
584+
else:
585+
raise ValueError("unknown data type")
586+
saveDict["data_zmin"]=min(self.like.theory.zs)
587+
saveDict["data_zmax"]=max(self.like.theory.zs)
599588

600589
# Add information about the extra-p1d data (high-resolution P1D)
601590
if self.like.extra_p1d_like:
602-
extra_p1d_data=self.like.extra_p1d_like.data
603-
saveDict["extra_p1d_label"]=extra_p1d_data.data_cov_label
604-
saveDict["extra_p1d_zmax"]=max(extra_p1d_data.z)
605-
else:
606-
print("did not have extra P1D likelihood")
607-
608-
# Make sure (As,ns,nrun) were defined in standard pivot_scalar
609-
if hasattr(self.like.theory,"cosmo_model_fid"):
610-
cosmo_fid=self.like.theory.cosmo_model_fid.cosmo
611-
pivot_scalar=cosmo_fid.InitPower.pivot_scalar
612-
assert pivot_scalar==0.05,"non-standard pivot_scalar"
591+
extra_data=self.like.extra_p1d_like.data
592+
if hasattr(extra_data,"mock_sim"):
593+
saveDict["extra_p1d_label"]=extra_data.data_cov_label
594+
elif hasattr(extra_data,"theory"):
595+
saveDict["extra_p1d_label"]=extra_data.data_label
596+
else:
597+
raise ValueError("unknown data type")
598+
saveDict["extra_p1d_zmin"]=min(extra_data.z)
599+
saveDict["extra_p1d_zmax"]=max(extra_data.z)
613600

601+
# Other likelihood settings
602+
saveDict["prior_Gauss_rms"]=self.like.prior_Gauss_rms
603+
saveDict["cosmo_fid_label"]=self.like.cosmo_fid_label
604+
saveDict["emu_cov_factor"]=self.like.emu_cov_factor
614605
free_params_save=[]
615606
free_param_limits=[]
616607
for par in self.like.free_params:
617-
## The parameter limits are saved twice but for the sake
618-
## of backwards compatibility I'm going to leave this
619608
free_params_save.append([par.name,par.min_value,par.max_value])
620609
free_param_limits.append([par.min_value,par.max_value])
621610
saveDict["free_params"]=free_params_save
622611
saveDict["free_param_limits"]=free_param_limits
623612

624-
## Sampler stuff
613+
# Sampler stuff
625614
saveDict["burn_in"]=self.burnin_nsteps
626615
saveDict["nwalkers"]=self.nwalkers
627616
saveDict["autocorr"]=self.autocorr.tolist()
628617

629-
## Save dictionary to json file in the
630-
## appropriate directory
618+
# Save dictionary to json file in the appropriate directory
631619
if self.save_directory is None:
632620
self._setup_chain_folder()
633621
with open(self.save_directory+"/config.json", "w") as json_file:

0 commit comments

Comments
 (0)