|
| 1 | +##!/usr/bin/env python |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +# Author: Natasha E Batalha |
| 5 | + |
| 6 | +# PICASO Retrieval Script Template for fitting a free a spectrum |
| 7 | + |
| 8 | +# CTRL-F "CHANGEME" for areas you should consider changing |
| 9 | +# IF CHANGEME is commented out then it is just a suggestion |
| 10 | +# IF CHANGEME is not commented you must change before proceeding |
| 11 | + |
| 12 | +# Use this as a starting point for create a retrieval script |
| 13 | + |
| 14 | +# HOW to run with OpenMPI |
| 15 | + |
| 16 | +# Running with mpiexec command line |
| 17 | + |
| 18 | +# mpiexec -n numprocs python -m mpi4py pyfile |
| 19 | +# for example: (-n specifies number of jobs) |
| 20 | +# mpiexec -n 5 python -m mpi4py run.py |
| 21 | + |
| 22 | + |
| 23 | + |
| 24 | +import numpy as np |
| 25 | +import os |
| 26 | +import picaso.justdoit as jdi |
| 27 | +pd = jdi.pd |
| 28 | +import picaso.analyze as lyz |
| 29 | +import picaso.retrieval as prt |
| 30 | +import <sampler> |
| 31 | + |
| 32 | + |
| 33 | + |
| 34 | +###DATA### |
| 35 | +def get_data(): |
| 36 | + """ |
| 37 | + Create a function to process your data in any way you see fit. |
| 38 | + Here we are using the ExoTiC-MIRI data |
| 39 | + https://zenodo.org/records/8360121/files/ExoTiC-MIRI.zip?download=1 |
| 40 | + But no need to download it. |
| 41 | +
|
| 42 | + Checklist |
| 43 | + --------- |
| 44 | + - your function returns a spectrum that will be in the same units as your picaso model (e.g. rp/rs^2, erg/s/cm/cm or other) |
| 45 | + - your function retuns a spectrum that is in ascending order of wavenumber |
| 46 | + - your function returns a dictionary where the key specifies the instrument name (in the event there are multiple) |
| 47 | +
|
| 48 | + Returns |
| 49 | + ------- |
| 50 | + dict: |
| 51 | + dictionary key: wavenumber (ascneding), flux or transit depth, and error. |
| 52 | + e.g. {'MIRI LRS':[wavenumber, transit depth, transit depth error], 'NIRSpec G395H':[wavenumber, transit depth, transit depth error]} |
| 53 | + """ |
| 54 | + datadict = <datadict> |
| 55 | + for idata in datadict.keys(): |
| 56 | + dat = xr.load_dataset(datadict[idata]) |
| 57 | + |
| 58 | + #add check for valid astropy unit |
| 59 | + CHECKUNITS for data_vars |
| 60 | + if unit is valid |
| 61 | + unity = u.Unit() |
| 62 | + else: |
| 63 | + raise Exception('Not a valid astropy unit for data_vars') |
| 64 | + |
| 65 | + CHECKUNITS for coords |
| 66 | + if unit is valid |
| 67 | + unitx = u.Unit() |
| 68 | + else: |
| 69 | + raise Exception("Not a valid unit for coords") |
| 70 | + |
| 71 | + #build nice dataframe so we can easily |
| 72 | + final = jdi.pd.DataFrame(dict(x=dat.coords['wavelength'].values, |
| 73 | + y=dat.data_vars[<to_fit>].values, |
| 74 | + e=dat.data_vars[<to_fit>'_error'].values)) |
| 75 | + |
| 76 | + #create a wavenumber grid |
| 77 | + final['micron'] = (dat.coords['wavelength'].values*u.Unit(unitx)).to(u.um).values |
| 78 | + final['wavenumber'] = 1e4/final['micron'] |
| 79 | + |
| 80 | + #always ensure we are ordered correctly |
| 81 | + final = final.sort_values(by='wavenumber').reset_index(drop=True) |
| 82 | + |
| 83 | + #return a nice dictionary with the info we need |
| 84 | + returns = {idata: [final['wavenumber'].values, |
| 85 | + final[<to_fit>].values ,final[<to_fit>'_error'].values] } |
| 86 | + return returns |
| 87 | + |
| 88 | + |
| 89 | +###Opacity Data Selection### |
| 90 | +opacity_filename_db= '/data2/picaso_dbs/R15000/all_opacities_0.3_15_R15000.db' |
| 91 | +opacity_method = 'resampled' #['preweighted','resortrebin'] |
| 92 | + |
| 93 | +###Initialize MIE files for cloud species requested### |
| 94 | +virga_mieff_files = '/data/virga_dbs/virga_0,3_15_R300/' |
| 95 | +cloud_species = ['SiO2'] |
| 96 | +param_tools = prt.Parameterize(load_cld_optical=cloud_species, |
| 97 | + mieff_dir=virga_mieff_files) |
| 98 | + |
| 99 | +###PLANET & STAR### |
| 100 | +#planet properties |
| 101 | +mass = planet_params['mp']['value'] |
| 102 | +mass_unit = planet_params['mp']['unit'] #you should always double check what units are stored. here i know the xarra |
| 103 | +radius = planet_params['rp']['value'] |
| 104 | +radius_unit = planet_params['rp']['unit'] |
| 105 | +gravity = planet_params['gravity']['value'] |
| 106 | +gravity_unit = planet_params['gravity']['unit'] |
| 107 | + |
| 108 | +#stellar properties |
| 109 | +database = stellar_params['database'] #ck04models, phoenix, etc |
| 110 | +t_eff = stellar_params['steff'] |
| 111 | +metallicity = stellar_params['feh'] |
| 112 | +log_g = stellar_params['logg'] |
| 113 | +r_star = stellar_params['rs']['value'] |
| 114 | +r_star_unit = stellar_params['rs']['unit'] |
| 115 | + |
| 116 | + |
| 117 | +opacity = {'db1':jdi.opannection(filename_db= opacity_filename_db)} |
| 118 | +def setup_planet(): |
| 119 | + """ |
| 120 | + First need to setup initial parameters. Usually these are fixed (anything we wont be including |
| 121 | + in the retrieval would go here). |
| 122 | + |
| 123 | + Returns |
| 124 | + ------- |
| 125 | + Must return the full picaso class |
| 126 | + """ |
| 127 | + pl = {i:jdi.inputs() for i in opacity.keys()} |
| 128 | + #define stellar inputs |
| 129 | + for i in opacity.keys(): pl[i].star(opacity[i], t_eff,metallicity,log_g, |
| 130 | + radius=r_star, |
| 131 | + database = database, |
| 132 | + radius_unit = jdi.u.Unit(r_star_unit) ) |
| 133 | + #define reference pressure for transmission |
| 134 | + for i in opacity.keys(): pl[i].approx(p_reference=1) |
| 135 | + #define planet gravity |
| 136 | + for i in opacity.keys(): pl[i].gravity(mass=mass, mass_unit=jdi.u.Unit(mass_unit), |
| 137 | + radius=radius, radius_unit=jdi.u.Unit(radius_unit)) |
| 138 | + return pl |
| 139 | +planet = setup_planet() |
| 140 | + |
| 141 | + |
| 142 | +# ## Step 4) Param Set |
| 143 | + |
| 144 | +#we can get the grid parameters directly from the module load, this will help with setting up our free parameters |
| 145 | +grid_parameters_unique = fitter.interp_params[grid_name]['grid_parameters_unique'] |
| 146 | + |
| 147 | +class param_set: |
| 148 | + """ |
| 149 | + This is much easier now since it is the grid parameters plus an offset to account for unknown reference pressure |
| 150 | + """ |
| 151 | + |
| 152 | + |
| 153 | + grid_flexcloud = list(grid_parameters_unique.keys())+['xrp','logcldbar','logfsed','logndz','sigma','lograd'] |
| 154 | + |
| 155 | +# ## Step 5) Guesses Set |
| 156 | +class guesses_set: |
| 157 | + """ |
| 158 | + For our guesses, we can verify things are working by just taking the first instance of each grid point |
| 159 | + """ |
| 160 | + #completely random guesses just to make sure it runs |
| 161 | + |
| 162 | + |
| 163 | + grid_flexcloud=[grid_parameters_unique[i][0] |
| 164 | + for i in grid_parameters_unique.keys()] + [1,1,1,1,1,-5] |
| 165 | + |
| 166 | +# ## Step 6) Model Set |
| 167 | +class model_set: |
| 168 | + """ |
| 169 | + There are several different ways to interpolate onto a grid. Here, we use picaso's fast custom interpolator that |
| 170 | + uses a flex linear interpolator and can be used on any type of matrix grid. |
| 171 | + """ |
| 172 | + |
| 173 | + |
| 174 | + def grid_flexcloud(cube, return_ptchem=False): |
| 175 | + """Here we want to interpolate on temperature and chemistry instead of the spectra as we did last time. |
| 176 | + We can still use custom interp to do so! |
| 177 | +
|
| 178 | + Parameters |
| 179 | + ---------- |
| 180 | + cube : list |
| 181 | + List of parameters sampled from Bayesian sampler |
| 182 | + return_ptchem : bool |
| 183 | + True/False; Default=False. This is new! |
| 184 | + Returns the planet class from the function *without* running a spectrum. |
| 185 | + This will enable you to use the kwarg `pressure_bands` in the function `picaso.retrieval.get_evaluations` |
| 186 | + The formalism is to return the entire planet class in either dictionary or class format. e.g. |
| 187 | + {'db1':picaso.inputs class} or just picaso.inputs class |
| 188 | + """ |
| 189 | + # 1. Grab parameters from your cube |
| 190 | + |
| 191 | + final_goal = cube[0:len(grid_parameters_unique.keys())] |
| 192 | + ## using this formalism with index is a bit more "fool-proof" than relying on yourself to get the index number correct |
| 193 | + xrp = cube[param_set.grid_flexcloud.index('xrp')] |
| 194 | + ## note here I am removing the log in front of fsed and kzz |
| 195 | + base_pressure = 10**cube[param_set.grid_flexcloud.index('logcldbar')] |
| 196 | + fsed = 10**cube[param_set.grid_flexcloud.index('logfsed')] |
| 197 | + ndz = 10**cube[param_set.grid_flexcloud.index('logndz')] |
| 198 | + sigma= cube[param_set.grid_flexcloud.index('sigma')] |
| 199 | + eff_radius = 10**cube[param_set.grid_flexcloud.index('lograd')] |
| 200 | + |
| 201 | + # 2. Reset the mass and radius based on the radius scaling factor |
| 202 | + |
| 203 | + for i in opacity.keys(): planet[i].gravity(mass=mass, mass_unit=jdi.u.Unit(mass_unit), |
| 204 | + radius=xrp*radius, radius_unit=jdi.u.Unit(radius_unit)) |
| 205 | + |
| 206 | + #3. Grab the pressure grid from the GridFitter class |
| 207 | + |
| 208 | + pressure = fitter.pressure[grid_name][0] |
| 209 | + pressure_layer = np.sqrt(pressure[0:-1]*pressure[1:]) |
| 210 | + nlayer = len(pressure_layer) |
| 211 | + |
| 212 | + #4. Interpolate to get temperature |
| 213 | + |
| 214 | + temp = lyz.custom_interp(final_goal, |
| 215 | + fitter,grid_name, to_interp='custom', |
| 216 | + array_to_interp=fitter.interp_params[grid_name]['square_temp_grid']) |
| 217 | + |
| 218 | + #5. Interpolate to get chemistry for each molecule |
| 219 | + ## lets be intentional about what molecules we include in the retrieval |
| 220 | + |
| 221 | + mols = ['H2', 'He', 'H2O', 'CO', 'CO2', 'CH4', 'H2O'] |
| 222 | + |
| 223 | + ## setup a dataframe that we will use to add our chemistry to |
| 224 | + df_chem = pd.DataFrame(dict(pressure=pressure,temperature=temp)) |
| 225 | + for imol in mols: |
| 226 | + #still using the same custom interp function just need to replace square_temp with square_chem |
| 227 | + df_chem[imol] = lyz.custom_interp(final_goal, |
| 228 | + fitter,grid_name, to_interp='custom', |
| 229 | + array_to_interp=fitter.interp_params[grid_name]['square_chem_grid'][imol]) |
| 230 | + |
| 231 | + #6. Add chemistry and PT profile to PICASO atmosphere class |
| 232 | + |
| 233 | + for i in opacity.keys(): planet[i].atmosphere(df=pd.DataFrame(df_chem),verbose=False) |
| 234 | + |
| 235 | + #7. Now we can introduce the parameterized flex cloud |
| 236 | + # add class will setup the pressure and pressure layer which we only need to do once |
| 237 | + |
| 238 | + param_tools.add_class(planet[i]) |
| 239 | + lognorm_kwargs = {'sigma':sigma, 'lograd[cm]':eff_radius} |
| 240 | + df_cld = param_tools.flex_cloud(cloud_species,base_pressure, ndz, fsed, 'lognorm', |
| 241 | + lognorm_kwargs=lognorm_kwargs ) |
| 242 | + for i in opacity.keys(): planet[i].clouds(df=df_cld.astype(float)) |
| 243 | + |
| 244 | + if return_ptchem: return planet #KEEP THIS ! This will give us a short cut to test our atmosphere and cloud setup without running the spectrum |
| 245 | + |
| 246 | + #8. Create a spectrum -- note here I am just looping through all opacity dictionaries for cases where we have |
| 247 | + #more than one opacity file in the dictionary. |
| 248 | + x = [] |
| 249 | + y = [] |
| 250 | + for i in opacity.keys(): |
| 251 | + out = planet[i].spectrum(opacity[i], calculation='transmission',full_output=True) |
| 252 | + x += list(out['wavenumber']) |
| 253 | + y += list(out['transit_depth']) |
| 254 | + |
| 255 | + #9. Sort by wavenumber so we know we always pass the correct thing to the likelihood function |
| 256 | + combined = sorted(zip(x, y), key=lambda pair: pair[0]) |
| 257 | + wno = np.array([pair[0] for pair in combined]) |
| 258 | + spectra = np.array([pair[1] for pair in combined]) |
| 259 | + |
| 260 | + offset={} #no offset needed for this calculation |
| 261 | + error_inf={} # let's not add error inf |
| 262 | + return wno, spectra,offset,error_inf |
| 263 | + |
| 264 | +# ## Step 6) Prior Set |
| 265 | + |
| 266 | +class prior_set: |
| 267 | + """ |
| 268 | + Store all your priors. You should have the same exact function names in here as |
| 269 | + you do in model_set and param_set |
| 270 | +
|
| 271 | + Now we need to add priors for our new parameters, which include xrp, log fsed, and log kzz |
| 272 | +
|
| 273 | + Make sure the order of the unpacked cube follows the unpacking in your model |
| 274 | + set and in your parameter set. |
| 275 | + #pymultinest: http://johannesbuchner.github.io/pymultinest-tutorial/example1.html |
| 276 | + """ |
| 277 | + |
| 278 | + |
| 279 | + def grid_flexcloud(cube): |
| 280 | + params = cube.copy() |
| 281 | + for i,key in enumerate(grid_parameters_unique): |
| 282 | + minn = np.min(grid_parameters_unique[key]) |
| 283 | + maxx = np.max(grid_parameters_unique[key]) |
| 284 | + params[i] = minn + (maxx-minn)*params[i] |
| 285 | + |
| 286 | + #xrp |
| 287 | + minn=0.7;maxx=1.3 |
| 288 | + i = param_set.grid_flexcloud.index('xrp') |
| 289 | + params[i] = minn + (maxx-minn)*params[i] |
| 290 | + |
| 291 | + |
| 292 | + #log base_pressure |
| 293 | + minn = 1 |
| 294 | + maxx = -4 |
| 295 | + i = param_set.grid_flexcloud.index('logcldbar') |
| 296 | + params[i] = minn + (maxx-minn)*params[i] |
| 297 | + |
| 298 | + #log fsed |
| 299 | + minn = -1 |
| 300 | + maxx = 1 |
| 301 | + i = param_set.grid_flexcloud.index('logfsed') |
| 302 | + params[i] = minn + (maxx-minn)*params[i] |
| 303 | + |
| 304 | + #ndz |
| 305 | + minn = 1 |
| 306 | + maxx = 10 |
| 307 | + i = param_set.grid_flexcloud.index('logndz') |
| 308 | + params[i] = minn + (maxx-minn)*params[i] |
| 309 | + |
| 310 | + #sigma |
| 311 | + minn = 0.5 |
| 312 | + maxx = 2.5 |
| 313 | + i = param_set.grid_flexcloud.index('sigma') |
| 314 | + params[i] = minn + (maxx-minn)*params[i] |
| 315 | + |
| 316 | + #loggradii |
| 317 | + minn = -7 |
| 318 | + maxx = -3 |
| 319 | + i = param_set.grid_flexcloud.index('lograd') |
| 320 | + params[i] = minn + (maxx-minn)*params[i] |
| 321 | + return params |
| 322 | + |
| 323 | +# ## Step 7) Loglikelihood |
| 324 | + |
| 325 | +def loglikelihood(cube): |
| 326 | + """ |
| 327 | + Log_likelihood function that ultimately is given to the sampler |
| 328 | + Note if you keep to our same formats you will not have to change this code move |
| 329 | +
|
| 330 | + Tips |
| 331 | + ---- |
| 332 | + - Remember how we put our data dict, error inflation, and offsets all in dictionary format? Now we can utilize that |
| 333 | + functionality if we properly named them all with the right keys! |
| 334 | +
|
| 335 | + Checklist |
| 336 | + --------- |
| 337 | + - ensure that error inflation and offsets are incorporated in the way that suits your problem |
| 338 | + - note there are many different ways to incorporate error inflation! this is just one example |
| 339 | + """ |
| 340 | + #compute model spectra |
| 341 | + resultx,resulty,offset_all,err_inf_all = MODEL(cube) # we will define MODEL below |
| 342 | + |
| 343 | + #initiate the four terms we willn eed for the likelihood |
| 344 | + ydat_all=[];ymod_all=[];sigma_all=[];extra_term_all=[]; |
| 345 | + |
| 346 | + #loop through data (if multiple instruments, add offsets if present, add err inflation if present) |
| 347 | + for ikey in DATA_DICT.keys(): #we will also define DATA_DICT below |
| 348 | + xdata,ydata,edata = DATA_DICT[ikey] |
| 349 | + xbin_model , y_model = jdi.mean_regrid(resultx, resulty, newx=xdata)#remember we put everything already sorted on wavenumber |
| 350 | + |
| 351 | + #add offsets if they exist to the data |
| 352 | + offset = offset_all.get(ikey,0) #if offset for that instrument doesnt exist, return 0 |
| 353 | + ydata = ydata+offset |
| 354 | + |
| 355 | + #add error inflation if they exist |
| 356 | + err_inf = err_inf_all.get(ikey,0) #if err inf term for that instrument doesnt exist, return 0 |
| 357 | + sigma = edata**2 + (err_inf)**2 #there are multiple ways to do this, here just adding in an extra noise term |
| 358 | + if err_inf !=0: |
| 359 | + #see formalism here for example https://emcee.readthedocs.io/en/stable/tutorials/line/#maximum-likelihood-estimation |
| 360 | + extra_term = np.log(2*np.pi*sigma) |
| 361 | + else: |
| 362 | + extra_term=sigma*0 |
| 363 | + |
| 364 | + ydat_all.append(ydata);ymod_all.append(y_model);sigma_all.append(sigma);extra_term_all.append(extra_term); |
| 365 | + |
| 366 | + ymod_all = np.concatenate(ymod_all) |
| 367 | + ydat_all = np.concatenate(ydat_all) |
| 368 | + sigma_all = np.concatenate(sigma_all) |
| 369 | + extra_term_all = np.concatenate(extra_term_all) |
| 370 | + |
| 371 | + #compute likelihood |
| 372 | + loglike = -0.5*np.sum((ydat_all-ymod_all)**2/sigma_all + extra_term_all) |
| 373 | + return loglike |
| 374 | + |
| 375 | + |
| 376 | +if __name__ == "__main__": |
| 377 | + |
| 378 | + # CHANGEME based on what model you want to run |
| 379 | + MODEL_TO_RUN = 'grid_flexcloud'#options for templates: grid_virga, grid_addchem, grid_flexcloud |
| 380 | + |
| 381 | + ultranest_output= '/data/test/ultranest/grid_flexcloud' # point to your own directory |
| 382 | + |
| 383 | + #we can easity grab all the important pieces now that they are neatly stored in a class structure |
| 384 | + DATA_DICT = get_data() |
| 385 | + PARAMS = getattr(param_set,MODEL_TO_RUN) |
| 386 | + MODEL = getattr(model_set,MODEL_TO_RUN) |
| 387 | + PRIOR = getattr(prior_set,MODEL_TO_RUN) |
| 388 | + GUESS = getattr(guesses_set,MODEL_TO_RUN) |
| 389 | + |
| 390 | + |
| 391 | + sampler = ultranest.ReactiveNestedSampler(PARAMS, loglikelihood, PRIOR, |
| 392 | + log_dir = ultranest_output,resume=True) |
| 393 | + result = sampler.run() |
| 394 | + |
0 commit comments