Skip to content

Commit 4ce552d

Browse files
committed
Getting norm_total working
Got norm_total working and handling multiple wavelength ranges, but it is a convoluted mess and could probably be done a lot more cleanly. Minor tweaks to a few other scripts.
1 parent 3aad4d7 commit 4ce552d

File tree

10 files changed

+445
-79
lines changed

10 files changed

+445
-79
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,8 @@ target/
6767

6868
# Vim
6969
*.swp
70+
71+
#Data and output files
72+
.csv
73+
.png
74+
.SAV

Ti_spect_compare.py

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Thu Feb 25 08:25:47 2016
4+
5+
@author: rbanderson
6+
"""
7+
#import sys
8+
#sys.path.append(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART")
9+
from autocnet.fileio.io_ccs import ccs_batch
10+
from autocnet.fileio.io_jsc import JSC,jsc_batch,read_refdata
11+
from autocnet.fileio.lookup import lookup
12+
from autocnet.spectral.interp import interp_spect
13+
from autocnet.spectral.mask import mask
14+
from autocnet.spectral.spectra import Spectra
15+
from autocnet.spectral.spectral_data import spectral_data
16+
from autocnet.spectral.norm_total import norm_total,norm_spect
17+
import pandas as pd
18+
import numpy as np
19+
import matplotlib.pyplot as plot
20+
import time
21+
from sklearn.decomposition import PCA
22+
23+
24+
##Read CCAM data
25+
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\0-250"
26+
#
27+
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
28+
#t1=time.time()
29+
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
30+
#dt1=time.time()-t1
31+
#
32+
##work only with average spectra
33+
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
34+
#ccs=ccs.reset_index(drop=True) #This is important! without it, the lookup is screwed up
35+
#ccs=lookup(ccs,masterlists)
36+
#
37+
##save ccs data
38+
#ccs.to_csv('CCAM_data_aves_0-250.csv')
39+
#
40+
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\251-500"
41+
#
42+
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
43+
#t1=time.time()
44+
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
45+
#dt2=time.time()-t1
46+
#
47+
##work only with average spectra
48+
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
49+
#ccs=ccs.reset_index(drop=True) #This is important! without it, the lookup is screwed up
50+
#ccs=lookup(ccs,masterlists)
51+
#
52+
##save ccs data
53+
#ccs.to_csv('CCAM_data_aves_251-500.csv')
54+
#
55+
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\501-750"
56+
#
57+
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
58+
#t1=time.time()
59+
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
60+
#dt3=time.time()-t1
61+
#
62+
##work only with average spectra
63+
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
64+
#ccs=ccs.reset_index(drop=True) #This is important! without it, the lookup is screwed up
65+
#ccs=lookup(ccs,masterlists)
66+
#
67+
##save ccs data
68+
#ccs.to_csv('CCAM_data_aves_501-750.csv')
69+
#
70+
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\751-1000"
71+
#
72+
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
73+
#t1=time.time()
74+
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
75+
#dt4=time.time()-t1
76+
#
77+
##work only with average spectra
78+
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
79+
#ccs=ccs.reset_index(drop=True) #This is important! without it, the lookup is screwed up
80+
#ccs=lookup(ccs,masterlists)
81+
#
82+
##save ccs data
83+
#ccs.to_csv('CCAM_data_aves_751-1000.csv')
84+
#
85+
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\1001-1250"
86+
#
87+
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
88+
#t1=time.time()
89+
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
90+
#dt5=time.time()-t1
91+
#
92+
##work only with average spectra
93+
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
94+
#ccs=ccs.reset_index(drop=True) #This is important! without it, the lookup is screwed up
95+
#ccs=lookup(ccs,masterlists)
96+
#
97+
##save ccs data
98+
#ccs.to_csv('CCAM_data_aves_1001_1250.csv')
99+
100+
#f1=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_0-250.csv"
101+
#f2=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_251-500.csv"
102+
#f3=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_501-750.csv"
103+
#f4=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_751-1000.csv"
104+
#f5=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_1001_1250.csv"
105+
#
106+
#ccs1=pd.read_csv(f1,header=[0,1])
107+
#ccs2=pd.read_csv(f2,header=[0,1])
108+
#ccs3=pd.read_csv(f3,header=[0,1])
109+
#ccs4=pd.read_csv(f4,header=[0,1])
110+
#ccs5=pd.read_csv(f5,header=[0,1])
111+
#
112+
#ccs=pd.concat([ccs1,ccs2,ccs3,ccs4,ccs5])
113+
####
114+
#ccs.to_csv('CCAM_data_aves.csv')
115+
ccs=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves.csv",header=[0,1])
116+
pca=PCA(n_components=2)
117+
ccs_geo=ccs.loc[ccs['meta']['Distance (mm)']>1.7]
118+
119+
##Filter out just Ti targets
120+
#ccs_Ti=ccs.loc[np.squeeze(ccs['meta']['Target'].isin(['Cal Target 10']))]
121+
#ccs_Ti.to_csv('CCAM_data_aves_Ti.csv')
122+
123+
ccs_Ti=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_Ti.csv",header=[0,1])
124+
125+
126+
xnew=np.array(ccs_Ti['wvl'].columns)
127+
ccs_Ti=interp_spect(ccs_Ti,xnew)
128+
ccs_geo=interp_spect(ccs_geo,xnew)
129+
130+
plot.figure(figsize=(10,8))
131+
plot.subplot(311)
132+
plot.xlim([200,900])
133+
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
134+
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Raw',c='b')
135+
plot.legend()
136+
137+
#Mask spectra
138+
maskfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv"
139+
ccs_Ti=mask(ccs_Ti,maskfile)
140+
ccs_geo=mask(ccs_geo,maskfile)
141+
plot.subplot(312)
142+
plot.xlim([200,900])
143+
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
144+
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Masked',c='r')
145+
plot.legend()
146+
#Normalize Spectra
147+
ranges=[(0,350),(350,460),(460,1000)]
148+
ccs_Ti=norm_spect(ccs_Ti,ranges)
149+
ccs_geo=norm_spect(ccs_geo,ranges)
150+
plot.subplot(313)
151+
plot.xlim([200,900])
152+
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
153+
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Normalized',c='g')
154+
155+
plot.legend()
156+
plot.savefig('Rocknest_example.png',dpi=600)
157+
plot.show()
158+
159+
do_pca=pca.fit(ccs_geo['wvl'])
160+
seqs=ccs_geo['meta']['Sequence']
161+
seqs_uniq=np.unique(seqs)
162+
plot.figure(figsize=(8,8))
163+
plot.title('PCA of Mars Targets')
164+
plot.xlabel('PC1 ('+str(round(do_pca.explained_variance_ratio_[0],2))+'%)')
165+
plot.ylabel('PC2 ('+str(round(do_pca.explained_variance_ratio_[1],2))+'%)')
166+
167+
colors=plot.cm.jet(np.linspace(0,1,len(seqs_uniq)))
168+
for t,i in enumerate(seqs_uniq):
169+
170+
scores=do_pca.transform(ccs_geo['wvl'].loc[ccs_geo['meta']['Sequence'].isin([i])])
171+
plot.scatter(scores[:,0],scores[:,1],c=colors[t,:],label=i)
172+
plot.savefig('Full_CCS_PCA.png',dpi=600)
173+
plot.show()
174+
175+
pca=PCA(n_components=2)
176+
do_pca=pca.fit(ccs_Ti['wvl'])
177+
scores_ccs_Ti=do_pca.transform(ccs_Ti['wvl'])
178+
179+
180+
plot.figure()
181+
plot.scatter(scores_ccs_Ti[:,0],scores_ccs_Ti[:,1],c='r')
182+
plot.show()
183+
184+
ccs_Ti=ccs_Ti.iloc[scores_ccs_Ti[:,0]<0.06,:]
185+
do_pca=pca.fit(ccs_Ti['wvl'])
186+
scores_ccs_Ti=do_pca.transform(ccs_Ti['wvl'])
187+
188+
189+
plot.figure()
190+
plot.scatter(scores_ccs_Ti[:,0],scores_ccs_Ti[:,1],c='r')
191+
plot.show()
192+
193+
194+
#get average mars spectra
195+
ccs_Ti_ave=ccs_Ti['wvl'].sum(axis=0)/len(ccs_Ti.index)
196+
197+
198+
#Read JSC data
199+
#spect_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Spectrometer_Table.csv"
200+
#experiment_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Experiment_Setup_Table.csv"
201+
#laser_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Laser_Setup_Table.csv"
202+
#sample_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Sample_Table.csv"
203+
#LUT_files={'spect':spect_table,'exp':experiment_table,'laser':laser_table,'sample':sample_table}
204+
#data_dir=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\LIBS USGS\DATA"
205+
#JSC_data=jsc_batch(data_dir,LUT_files)
206+
#JSC_data.to_csv('JSC_data.csv')
207+
##Filter out just the Ti targets
208+
#JSC_Ti=JSC_data.loc[np.squeeze(JSC_data['Sample ID'].isin(['TISDT01']))]
209+
210+
JSC_Ti=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\JSC_Ti_data.csv",header=[0,1])
211+
212+
#Interpolate JSC data to CCAM data
213+
JSC_Ti=interp_spect(JSC_Ti,xnew)
214+
215+
##Combine JSC and CCAM Ti data
216+
#data=pd.concat([JSC_Ti_interp,ccs_Ti])
217+
#data.to_csv('JSC_CCS_Ti_data.csv')
218+
#Mask spectra
219+
JSC_Ti=mask(JSC_Ti,maskfile)
220+
#Normalize Spectra
221+
222+
JSC_Ti=norm_spect(JSC_Ti,ranges)
223+
#data_masked['wvl']=norm_total(data_masked['wvl'])
224+
#
225+
#data_masked['wvl']=data_masked['wvl'].div(data_masked['wvl'].sum(axis=1),axis=0)
226+
#
227+
#data_mask_norm=data_masked['wvl'].copy()
228+
#for row in data_mask_norm.index.values:
229+
# data_mask_norm.iloc[row]/=sum(data_mask_norm.iloc[row])
230+
#data_masked['wvl']=data_mask_norm
231+
#data_masked_norm.to_csv('JSC_CCS_Ti_data_masked_norm.csv')
232+
#data_mask_norm=norm_total(data_masked)
233+
#data_mask_norm.to_csv('JSC_CCS_Ti_data_mask_norm.csv')
234+
#print('foo')
235+
236+
#get average of JSC spectra
237+
JSC_ave=JSC_Ti['wvl'].sum(axis=0)/len(JSC_Ti.index)
238+
239+
ratio=ccs_Ti_ave/JSC_ave
240+
ratio[abs(ratio)>100]=1.0
241+
plot.plot(ratio)
242+
plot.show()
243+
244+
JSC_Ti_r=JSC_Ti['wvl'].mul(ratio,axis=1)
245+
JSC_Ti_1248=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([12.48])]
246+
JSC_Ti_1196=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([11.98])]
247+
JSC_Ti_1498=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([14.98])]
248+
JSC_Ti_1723=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([17.23])]
249+
250+
JSC_Ti_1248_ave=JSC_Ti_1248['wvl'].sum(axis=0)/len(JSC_Ti_1248.index)
251+
JSC_Ti_1196_ave=JSC_Ti_1196['wvl'].sum(axis=0)/len(JSC_Ti_1196.index)
252+
JSC_Ti_1498_ave=JSC_Ti_1498['wvl'].sum(axis=0)/len(JSC_Ti_1498.index)
253+
JSC_Ti_1723_ave=JSC_Ti_1723['wvl'].sum(axis=0)/len(JSC_Ti_1723.index)
254+
255+
dist_1248=np.linalg.norm(JSC_Ti_1248_ave-ccs_Ti_ave)
256+
dist_1196=np.linalg.norm(JSC_Ti_1196_ave-ccs_Ti_ave)
257+
dist_1498=np.linalg.norm(JSC_Ti_1498_ave-ccs_Ti_ave)
258+
dist_1723=np.linalg.norm(JSC_Ti_1723_ave-ccs_Ti_ave)
259+
260+
#combine mars and JSC data
261+
data=pd.concat([JSC_Ti_r,ccs_Ti['wvl']])
262+
263+
264+
#Run PCA on spectra
265+
pca=PCA(n_components=2)
266+
do_pca=pca.fit(data)
267+
scores_all=do_pca.transform(data)
268+
269+
270+
271+
#Extract different laser energies
272+
mars_40A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/40A/40A'])]['wvl']
273+
mars_60A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/60A/60A'])]['wvl']
274+
mars_95A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/95A/95A'])]['wvl']
275+
276+
277+
JSC_1248=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([12.48])]['wvl'].mul(ratio,axis=1)
278+
JSC_1196=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([11.96])]['wvl'].mul(ratio,axis=1)
279+
JSC_1498=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([14.98])]['wvl'].mul(ratio,axis=1)
280+
281+
282+
scores_40A=do_pca.transform(mars_40A)
283+
scores_60A=do_pca.transform(mars_60A)
284+
scores_95A=do_pca.transform(mars_95A)
285+
286+
scores_1248=do_pca.transform(JSC_1248)
287+
scores_1196=do_pca.transform(JSC_1196)
288+
scores_1498=do_pca.transform(JSC_1498)
289+
290+
plot.figure(figsize=(5,5))
291+
plot.scatter(scores_40A[:,0],scores_40A[:,1],label='Mars (40A)',c='r')
292+
plot.scatter(scores_60A[:,0],scores_60A[:,1],label='Mars (60A)',c='g')
293+
plot.scatter(scores_95A[:,0],scores_95A[:,1],label='Mars (95A)',c='b')
294+
295+
plot.scatter(scores_1248[:,0],scores_1248[:,1],label='JSC (12.48 mJ)',c='c')
296+
plot.scatter(scores_1196[:,0],scores_1196[:,1],label='JSC (11.96 mJ)',c='m')
297+
plot.scatter(scores_1498[:,0],scores_1498[:,1],label='JSC (14.98 mJ)',c='y')
298+
plot.legend()
299+
300+
plot.savefig('PCA_Ti_JSC_CCS.png',dpi=600)
301+
plot.show()
302+
print('foo')
303+

autocnet/fileio/io_ccs.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import scipy
88
from autocnet.fileio.header_parser import header_parser
99
from autocnet.fileio.utils import file_search
10+
import copy
1011

1112
def CCS(input_data):
1213
df = pd.DataFrame.from_csv(input_data, header=14)
@@ -25,7 +26,8 @@ def CCS(input_data):
2526
df['Pversion']=fname[34:36]
2627
#transpose the data frame
2728

28-
#read the file header and put information into the dataframe as new columns (inneficient to store this data many times, but much easier to concatenate data from multiple files)
29+
#read the file header and put information into the dataframe as new columns
30+
#(inefficient to store this data many times, but much easier to concatenate data from multiple files)
2931
with open(input_data,'r') as f:
3032
header={}
3133
for i,row in enumerate(f.readlines()):
@@ -77,7 +79,7 @@ def CCS_SAV(input_data):
7779
d['seqid']=fname[25:34].upper()
7880
d['Pversion']=fname[34:36]
7981

80-
#Add metadata to the data frame by stepping through the d dict
82+
#Add metadata to the data frame by stepping through the dict
8183
for label,data in d.items():
8284
if type(data) is bytes: data=data.decode()
8385
df[label]=data
@@ -89,26 +91,47 @@ def CCS_SAV(input_data):
8991
return df
9092

9193
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False):
94+
9295
if 'SAV' in searchstring:
9396
is_sav=True
9497
else:
9598
is_sav=False
9699
filelist=file_search(directory,searchstring)
97-
for i in filelist:
100+
basenames=np.zeros_like(filelist)
101+
sclocks=np.zeros_like(filelist)
102+
P_version=np.zeros_like(filelist,dtype='int')
103+
104+
#Extract the sclock and version for each file and ensure that only one
105+
#file per sclock is being read, and that it is the one with the highest version number
106+
for i,name in enumerate(filelist):
107+
basenames[i]=os.path.basename(name)
108+
sclocks[i]=basenames[i][4:13]
109+
P_version[i]=basenames[i][-5:-4]
110+
sclocks_unique=np.unique(sclocks)
111+
filelist_new=np.array([],dtype='str')
112+
for i in sclocks_unique:
113+
match=(sclocks==i)
114+
maxP=P_version[match]==max(P_version[match])
115+
filelist_new=np.append(filelist_new,filelist[match][maxP])
98116

117+
filelist=filelist_new
118+
#any way to speed this up for large numbers of files?
119+
#Should add a progress bar for importing large numbers of files
120+
for i in filelist:
99121
if is_sav:
100122
tmp=CCS_SAV(i)
123+
101124
else:
102125
tmp=CCS(i)
103-
126+
104127
try:
105-
cols1=list(combined.columns[combined.dtypes=='float'])
106-
cols2=list(tmp.columns[tmp.dtypes=='float'])
128+
#This ensures that rounding errors are not causing mismatches in columns
129+
cols1=list(combined['wvl'].columns)
130+
cols2=list(tmp['wvl'].columns)
107131
if set(cols1)==set(cols2):
108132
combined=pd.concat([combined,tmp])
109133
else:
110134
print("Wavelengths don't match!")
111-
print('foo')
112135
except:
113136
combined=tmp
114137
return combined

0 commit comments

Comments
 (0)