Skip to content

Commit 132d7bc

Browse files
WIP free retrieval templates
1 parent ea6d440 commit 132d7bc

File tree

1 file changed

+394
-0
lines changed

1 file changed

+394
-0
lines changed

reference/scripts/free_retreival.py

Lines changed: 394 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,394 @@
1+
##!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# Author: Natasha E Batalha
5+
6+
# PICASO Retrieval Script Template for fitting a free a spectrum
7+
8+
# CTRL-F "CHANGEME" for areas you should consider changing
9+
# IF CHANGEME is commented out then it is just a suggestion
10+
# IF CHANGEME is not commented you must change before proceeding
11+
12+
# Use this as a starting point for create a retrieval script
13+
14+
# HOW to run with OpenMPI
15+
16+
# Running with mpiexec command line
17+
18+
# mpiexec -n numprocs python -m mpi4py pyfile
19+
# for example: (-n specifies number of jobs)
20+
# mpiexec -n 5 python -m mpi4py run.py
21+
22+
23+
24+
import numpy as np
25+
import os
26+
import picaso.justdoit as jdi
27+
pd = jdi.pd
28+
import picaso.analyze as lyz
29+
import picaso.retrieval as prt
30+
import <sampler>
31+
32+
33+
34+
###DATA###
35+
def get_data():
36+
"""
37+
Create a function to process your data in any way you see fit.
38+
Here we are using the ExoTiC-MIRI data
39+
https://zenodo.org/records/8360121/files/ExoTiC-MIRI.zip?download=1
40+
But no need to download it.
41+
42+
Checklist
43+
---------
44+
- your function returns a spectrum that will be in the same units as your picaso model (e.g. rp/rs^2, erg/s/cm/cm or other)
45+
- your function retuns a spectrum that is in ascending order of wavenumber
46+
- your function returns a dictionary where the key specifies the instrument name (in the event there are multiple)
47+
48+
Returns
49+
-------
50+
dict:
51+
dictionary key: wavenumber (ascneding), flux or transit depth, and error.
52+
e.g. {'MIRI LRS':[wavenumber, transit depth, transit depth error], 'NIRSpec G395H':[wavenumber, transit depth, transit depth error]}
53+
"""
54+
datadict = <datadict>
55+
for idata in datadict.keys():
56+
dat = xr.load_dataset(datadict[idata])
57+
58+
#add check for valid astropy unit
59+
CHECKUNITS for data_vars
60+
if unit is valid
61+
unity = u.Unit()
62+
else:
63+
raise Exception('Not a valid astropy unit for data_vars')
64+
65+
CHECKUNITS for coords
66+
if unit is valid
67+
unitx = u.Unit()
68+
else:
69+
raise Exception("Not a valid unit for coords")
70+
71+
#build nice dataframe so we can easily
72+
final = jdi.pd.DataFrame(dict(x=dat.coords['wavelength'].values,
73+
y=dat.data_vars[<to_fit>].values,
74+
e=dat.data_vars[<to_fit>'_error'].values))
75+
76+
#create a wavenumber grid
77+
final['micron'] = (dat.coords['wavelength'].values*u.Unit(unitx)).to(u.um).values
78+
final['wavenumber'] = 1e4/final['micron']
79+
80+
#always ensure we are ordered correctly
81+
final = final.sort_values(by='wavenumber').reset_index(drop=True)
82+
83+
#return a nice dictionary with the info we need
84+
returns = {idata: [final['wavenumber'].values,
85+
final[<to_fit>].values ,final[<to_fit>'_error'].values] }
86+
return returns
87+
88+
89+
###Opacity Data Selection###
90+
opacity_filename_db= '/data2/picaso_dbs/R15000/all_opacities_0.3_15_R15000.db'
91+
opacity_method = 'resampled' #['preweighted','resortrebin']
92+
93+
###Initialize MIE files for cloud species requested###
94+
virga_mieff_files = '/data/virga_dbs/virga_0,3_15_R300/'
95+
cloud_species = ['SiO2']
96+
param_tools = prt.Parameterize(load_cld_optical=cloud_species,
97+
mieff_dir=virga_mieff_files)
98+
99+
###PLANET & STAR###
100+
#planet properties
101+
mass = planet_params['mp']['value']
102+
mass_unit = planet_params['mp']['unit'] #you should always double check what units are stored. here i know the xarra
103+
radius = planet_params['rp']['value']
104+
radius_unit = planet_params['rp']['unit']
105+
gravity = planet_params['gravity']['value']
106+
gravity_unit = planet_params['gravity']['unit']
107+
108+
#stellar properties
109+
database = stellar_params['database'] #ck04models, phoenix, etc
110+
t_eff = stellar_params['steff']
111+
metallicity = stellar_params['feh']
112+
log_g = stellar_params['logg']
113+
r_star = stellar_params['rs']['value']
114+
r_star_unit = stellar_params['rs']['unit']
115+
116+
117+
opacity = {'db1':jdi.opannection(filename_db= opacity_filename_db)}
118+
def setup_planet():
119+
"""
120+
First need to setup initial parameters. Usually these are fixed (anything we wont be including
121+
in the retrieval would go here).
122+
123+
Returns
124+
-------
125+
Must return the full picaso class
126+
"""
127+
pl = {i:jdi.inputs() for i in opacity.keys()}
128+
#define stellar inputs
129+
for i in opacity.keys(): pl[i].star(opacity[i], t_eff,metallicity,log_g,
130+
radius=r_star,
131+
database = database,
132+
radius_unit = jdi.u.Unit(r_star_unit) )
133+
#define reference pressure for transmission
134+
for i in opacity.keys(): pl[i].approx(p_reference=1)
135+
#define planet gravity
136+
for i in opacity.keys(): pl[i].gravity(mass=mass, mass_unit=jdi.u.Unit(mass_unit),
137+
radius=radius, radius_unit=jdi.u.Unit(radius_unit))
138+
return pl
139+
planet = setup_planet()
140+
141+
142+
# ## Step 4) Param Set
143+
144+
#we can get the grid parameters directly from the module load, this will help with setting up our free parameters
145+
grid_parameters_unique = fitter.interp_params[grid_name]['grid_parameters_unique']
146+
147+
class param_set:
148+
"""
149+
This is much easier now since it is the grid parameters plus an offset to account for unknown reference pressure
150+
"""
151+
152+
153+
grid_flexcloud = list(grid_parameters_unique.keys())+['xrp','logcldbar','logfsed','logndz','sigma','lograd']
154+
155+
# ## Step 5) Guesses Set
156+
class guesses_set:
157+
"""
158+
For our guesses, we can verify things are working by just taking the first instance of each grid point
159+
"""
160+
#completely random guesses just to make sure it runs
161+
162+
163+
grid_flexcloud=[grid_parameters_unique[i][0]
164+
for i in grid_parameters_unique.keys()] + [1,1,1,1,1,-5]
165+
166+
# ## Step 6) Model Set
167+
class model_set:
168+
"""
169+
There are several different ways to interpolate onto a grid. Here, we use picaso's fast custom interpolator that
170+
uses a flex linear interpolator and can be used on any type of matrix grid.
171+
"""
172+
173+
174+
def grid_flexcloud(cube, return_ptchem=False):
175+
"""Here we want to interpolate on temperature and chemistry instead of the spectra as we did last time.
176+
We can still use custom interp to do so!
177+
178+
Parameters
179+
----------
180+
cube : list
181+
List of parameters sampled from Bayesian sampler
182+
return_ptchem : bool
183+
True/False; Default=False. This is new!
184+
Returns the planet class from the function *without* running a spectrum.
185+
This will enable you to use the kwarg `pressure_bands` in the function `picaso.retrieval.get_evaluations`
186+
The formalism is to return the entire planet class in either dictionary or class format. e.g.
187+
{'db1':picaso.inputs class} or just picaso.inputs class
188+
"""
189+
# 1. Grab parameters from your cube
190+
191+
final_goal = cube[0:len(grid_parameters_unique.keys())]
192+
## using this formalism with index is a bit more "fool-proof" than relying on yourself to get the index number correct
193+
xrp = cube[param_set.grid_flexcloud.index('xrp')]
194+
## note here I am removing the log in front of fsed and kzz
195+
base_pressure = 10**cube[param_set.grid_flexcloud.index('logcldbar')]
196+
fsed = 10**cube[param_set.grid_flexcloud.index('logfsed')]
197+
ndz = 10**cube[param_set.grid_flexcloud.index('logndz')]
198+
sigma= cube[param_set.grid_flexcloud.index('sigma')]
199+
eff_radius = 10**cube[param_set.grid_flexcloud.index('lograd')]
200+
201+
# 2. Reset the mass and radius based on the radius scaling factor
202+
203+
for i in opacity.keys(): planet[i].gravity(mass=mass, mass_unit=jdi.u.Unit(mass_unit),
204+
radius=xrp*radius, radius_unit=jdi.u.Unit(radius_unit))
205+
206+
#3. Grab the pressure grid from the GridFitter class
207+
208+
pressure = fitter.pressure[grid_name][0]
209+
pressure_layer = np.sqrt(pressure[0:-1]*pressure[1:])
210+
nlayer = len(pressure_layer)
211+
212+
#4. Interpolate to get temperature
213+
214+
temp = lyz.custom_interp(final_goal,
215+
fitter,grid_name, to_interp='custom',
216+
array_to_interp=fitter.interp_params[grid_name]['square_temp_grid'])
217+
218+
#5. Interpolate to get chemistry for each molecule
219+
## lets be intentional about what molecules we include in the retrieval
220+
221+
mols = ['H2', 'He', 'H2O', 'CO', 'CO2', 'CH4', 'H2O']
222+
223+
## setup a dataframe that we will use to add our chemistry to
224+
df_chem = pd.DataFrame(dict(pressure=pressure,temperature=temp))
225+
for imol in mols:
226+
#still using the same custom interp function just need to replace square_temp with square_chem
227+
df_chem[imol] = lyz.custom_interp(final_goal,
228+
fitter,grid_name, to_interp='custom',
229+
array_to_interp=fitter.interp_params[grid_name]['square_chem_grid'][imol])
230+
231+
#6. Add chemistry and PT profile to PICASO atmosphere class
232+
233+
for i in opacity.keys(): planet[i].atmosphere(df=pd.DataFrame(df_chem),verbose=False)
234+
235+
#7. Now we can introduce the parameterized flex cloud
236+
# add class will setup the pressure and pressure layer which we only need to do once
237+
238+
param_tools.add_class(planet[i])
239+
lognorm_kwargs = {'sigma':sigma, 'lograd[cm]':eff_radius}
240+
df_cld = param_tools.flex_cloud(cloud_species,base_pressure, ndz, fsed, 'lognorm',
241+
lognorm_kwargs=lognorm_kwargs )
242+
for i in opacity.keys(): planet[i].clouds(df=df_cld.astype(float))
243+
244+
if return_ptchem: return planet #KEEP THIS ! This will give us a short cut to test our atmosphere and cloud setup without running the spectrum
245+
246+
#8. Create a spectrum -- note here I am just looping through all opacity dictionaries for cases where we have
247+
#more than one opacity file in the dictionary.
248+
x = []
249+
y = []
250+
for i in opacity.keys():
251+
out = planet[i].spectrum(opacity[i], calculation='transmission',full_output=True)
252+
x += list(out['wavenumber'])
253+
y += list(out['transit_depth'])
254+
255+
#9. Sort by wavenumber so we know we always pass the correct thing to the likelihood function
256+
combined = sorted(zip(x, y), key=lambda pair: pair[0])
257+
wno = np.array([pair[0] for pair in combined])
258+
spectra = np.array([pair[1] for pair in combined])
259+
260+
offset={} #no offset needed for this calculation
261+
error_inf={} # let's not add error inf
262+
return wno, spectra,offset,error_inf
263+
264+
# ## Step 6) Prior Set
265+
266+
class prior_set:
267+
"""
268+
Store all your priors. You should have the same exact function names in here as
269+
you do in model_set and param_set
270+
271+
Now we need to add priors for our new parameters, which include xrp, log fsed, and log kzz
272+
273+
Make sure the order of the unpacked cube follows the unpacking in your model
274+
set and in your parameter set.
275+
#pymultinest: http://johannesbuchner.github.io/pymultinest-tutorial/example1.html
276+
"""
277+
278+
279+
def grid_flexcloud(cube):
280+
params = cube.copy()
281+
for i,key in enumerate(grid_parameters_unique):
282+
minn = np.min(grid_parameters_unique[key])
283+
maxx = np.max(grid_parameters_unique[key])
284+
params[i] = minn + (maxx-minn)*params[i]
285+
286+
#xrp
287+
minn=0.7;maxx=1.3
288+
i = param_set.grid_flexcloud.index('xrp')
289+
params[i] = minn + (maxx-minn)*params[i]
290+
291+
292+
#log base_pressure
293+
minn = 1
294+
maxx = -4
295+
i = param_set.grid_flexcloud.index('logcldbar')
296+
params[i] = minn + (maxx-minn)*params[i]
297+
298+
#log fsed
299+
minn = -1
300+
maxx = 1
301+
i = param_set.grid_flexcloud.index('logfsed')
302+
params[i] = minn + (maxx-minn)*params[i]
303+
304+
#ndz
305+
minn = 1
306+
maxx = 10
307+
i = param_set.grid_flexcloud.index('logndz')
308+
params[i] = minn + (maxx-minn)*params[i]
309+
310+
#sigma
311+
minn = 0.5
312+
maxx = 2.5
313+
i = param_set.grid_flexcloud.index('sigma')
314+
params[i] = minn + (maxx-minn)*params[i]
315+
316+
#loggradii
317+
minn = -7
318+
maxx = -3
319+
i = param_set.grid_flexcloud.index('lograd')
320+
params[i] = minn + (maxx-minn)*params[i]
321+
return params
322+
323+
# ## Step 7) Loglikelihood
324+
325+
def loglikelihood(cube):
326+
"""
327+
Log_likelihood function that ultimately is given to the sampler
328+
Note if you keep to our same formats you will not have to change this code move
329+
330+
Tips
331+
----
332+
- Remember how we put our data dict, error inflation, and offsets all in dictionary format? Now we can utilize that
333+
functionality if we properly named them all with the right keys!
334+
335+
Checklist
336+
---------
337+
- ensure that error inflation and offsets are incorporated in the way that suits your problem
338+
- note there are many different ways to incorporate error inflation! this is just one example
339+
"""
340+
#compute model spectra
341+
resultx,resulty,offset_all,err_inf_all = MODEL(cube) # we will define MODEL below
342+
343+
#initiate the four terms we willn eed for the likelihood
344+
ydat_all=[];ymod_all=[];sigma_all=[];extra_term_all=[];
345+
346+
#loop through data (if multiple instruments, add offsets if present, add err inflation if present)
347+
for ikey in DATA_DICT.keys(): #we will also define DATA_DICT below
348+
xdata,ydata,edata = DATA_DICT[ikey]
349+
xbin_model , y_model = jdi.mean_regrid(resultx, resulty, newx=xdata)#remember we put everything already sorted on wavenumber
350+
351+
#add offsets if they exist to the data
352+
offset = offset_all.get(ikey,0) #if offset for that instrument doesnt exist, return 0
353+
ydata = ydata+offset
354+
355+
#add error inflation if they exist
356+
err_inf = err_inf_all.get(ikey,0) #if err inf term for that instrument doesnt exist, return 0
357+
sigma = edata**2 + (err_inf)**2 #there are multiple ways to do this, here just adding in an extra noise term
358+
if err_inf !=0:
359+
#see formalism here for example https://emcee.readthedocs.io/en/stable/tutorials/line/#maximum-likelihood-estimation
360+
extra_term = np.log(2*np.pi*sigma)
361+
else:
362+
extra_term=sigma*0
363+
364+
ydat_all.append(ydata);ymod_all.append(y_model);sigma_all.append(sigma);extra_term_all.append(extra_term);
365+
366+
ymod_all = np.concatenate(ymod_all)
367+
ydat_all = np.concatenate(ydat_all)
368+
sigma_all = np.concatenate(sigma_all)
369+
extra_term_all = np.concatenate(extra_term_all)
370+
371+
#compute likelihood
372+
loglike = -0.5*np.sum((ydat_all-ymod_all)**2/sigma_all + extra_term_all)
373+
return loglike
374+
375+
376+
if __name__ == "__main__":
377+
378+
# CHANGEME based on what model you want to run
379+
MODEL_TO_RUN = 'grid_flexcloud'#options for templates: grid_virga, grid_addchem, grid_flexcloud
380+
381+
ultranest_output= '/data/test/ultranest/grid_flexcloud' # point to your own directory
382+
383+
#we can easity grab all the important pieces now that they are neatly stored in a class structure
384+
DATA_DICT = get_data()
385+
PARAMS = getattr(param_set,MODEL_TO_RUN)
386+
MODEL = getattr(model_set,MODEL_TO_RUN)
387+
PRIOR = getattr(prior_set,MODEL_TO_RUN)
388+
GUESS = getattr(guesses_set,MODEL_TO_RUN)
389+
390+
391+
sampler = ultranest.ReactiveNestedSampler(PARAMS, loglikelihood, PRIOR,
392+
log_dir = ultranest_output,resume=True)
393+
result = sampler.run()
394+

0 commit comments

Comments
 (0)