Skip to content

Commit aa7cb35

Browse files
committed
Tested version of EGV Matlab data parser
1 parent 1e40c2d commit aa7cb35

File tree

1 file changed

+47
-52
lines changed

1 file changed

+47
-52
lines changed

egvparser/parser/egienvcparser.py

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,54 +3,41 @@
33
__author__ = 'Jaganadh Gopinadhan'
44
__contact__ = "https://www.linkedin.com/in/jaganadhg/"
55

6-
import scipy.io as sio
7-
import numpy as np
86
import pandas as pd
7+
import numpy as np
8+
import scipy.io as sio
9+
import logging
10+
logging.basicConfig(level=logging.INFO,
11+
format='%(asctime)s :: %(levelname)s :: %(message)s')
912

1013

11-
def ndarry_todf(sensor_data : np.ndarray, colnames : list) -> pd.DataFrame:
12-
""" Convert a nympy nd array to a Pandas DataFrame
13-
:params sensor_data: numpy ndarray data with sensor data
14-
:params colnames: column names
15-
:returns sensor_frame: converted pandas dataframe
16-
"""
17-
sensor_frame = None
18-
19-
try:
20-
sensor_frame = pd.DataFrame(sensor_data,
21-
columns=colnames)
22-
except:
23-
print("The data is not in numpy.ndarray format")
24-
25-
return sensor_frame
26-
27-
28-
def raw_data_to_df(calibration : np.ndarray,
29-
calib_names : np.ndarray,
30-
colnames : list,
31-
fault_names : np.ndarray = None) -> pd.DataFrame:
14+
def raw_data_to_df(calibration: np.ndarray,
15+
calib_names: np.ndarray,
16+
colnames: list,
17+
fault_names: np.ndarray = None) -> pd.DataFrame:
3218
""" Create a pandas DataFrame from the calibration data
3319
:params calibration: All calib_names sensor data as numpy.ndarray
3420
:params calib_names: calibration wafer names as np.array
3521
:params colnames: column names
3622
:returns calib_frame: a pandas DataFrame with clibration data
3723
"""
3824

39-
calib_frame = None
25+
calib_frame = None
4026
calib_frame_list = list()
4127
colnames = [cname.strip() for cname in colnames]
4228

43-
calib_data_range = list(range(0,calibration.shape[0]))
29+
calib_data_range = list(range(0, calibration.shape[0]))
4430

4531
for idx in calib_data_range:
46-
byte_adjusted_data = np.array(calibration[idx,:][0]).byteswap().newbyteorder()
32+
byte_adjusted_data = np.array(
33+
calibration[idx, :][0]).byteswap().newbyteorder()
4734
"""
48-
To overcome
49-
ValueError: Big-endian buffer not supported on little-endian compiler
35+
To overcome
36+
ValueError: Big-endian buffer not supported on little-endian compiler
5037
pandas error
5138
"""
5239
curr_df = pd.DataFrame(byte_adjusted_data,
53-
columns = colnames)
40+
columns=colnames)
5441
curr_df['wafer_names'] = calib_names[idx]
5542

5643
if fault_names is not None:
@@ -62,64 +49,72 @@ def raw_data_to_df(calibration : np.ndarray,
6249

6350
try:
6451
calib_frame = pd.concat(calib_frame_list)
65-
except:
66-
print("Error in stacking the DataFrames")
52+
except BaseException:
53+
logging.error("Error in stacking the DataFrames")
6754

6855
return calib_frame
6956

7057

71-
def egienvec_parser(data_path : str,dkey : str = "LAMDATA") -> pd.DataFrame:
58+
def egienvec_parser(data_path: str, dkey: str = "LAMDATA") -> pd.DataFrame:
7259
""" Parse the eigenvector LAM Etch Data and return the vaues as dictionary
7360
Source of Data https://www.eigenvector.com/data/Etch/
7461
The data is in a Matlab struct file. Varibles in the files are
75-
INFORMATION: [ 29x63 char] - 0
62+
INFORMATION: [ 29x63 char] - 0
7663
calibration: {108x1 cell} The normal or calibration wafers - 1
7764
calib_names: [108x9 char] Names of the calibration wafers - 2
7865
test: { 21x1 cell} The test or faulty wafers - 3
7966
test_names: [ 21x9 char] Names of the test wafers - 4
8067
fault_names: [ 21x9 char] Names of the specific faults - 5
81-
variables: [ 21x14 char] Names of the variables -6
68+
variables: [ 21x14 char] Names of the variables -6
8269
:param data_apth: Path to individual .mat file
8370
:param dkey: Key for the data LAMDATA for MACHINE_Data.mat,
8471
OESDATA for OES_DATA.mat and RFMDATA for RFM_DATA.mat
8572
:returns data_set: a pandas DataFrame contaning both calibration and test data
86-
73+
8774
"""
88-
data_set = list()
75+
data_set = None
8976
data_dict = dict()
90-
var_names = ['information','calibration','calib_names','test','test_names',
91-
'fault_names','variables']
77+
var_names = ['information', 'calibration', 'calib_names', 'test', 'test_names',
78+
'fault_names', 'variables']
9279

9380
base_data = sio.loadmat(data_path)
9481

9582
try:
9683
lam_data = base_data[dkey]
9784
for idx, var_name in enumerate(var_names):
98-
data_dict[var_name] = lam_data[0,0][idx]
85+
data_dict[var_name] = lam_data[0, 0][idx]
9986
except KeyError:
100-
print(f"The specified key {dkey} not found in the data!")
87+
logging.error(f"The specified key {dkey} not found in the data!")
88+
89+
sensor_names = list(data_dict['variables'])
10190

102-
sensor_names = list(data_dict['variables'])
91+
logging.info(f"Processing calibration data for {dkey}")
10392

10493
calibration_data = raw_data_to_df(data_dict['calibration'],
105-
data_dict['calib_names'],
106-
sensor_names)
94+
data_dict['calib_names'],
95+
sensor_names)
96+
logging.info(f"Processed calibration data for {dkey}")
10797

98+
logging.info(f"Processing test data for {dkey}")
10899
test_data = raw_data_to_df(data_dict['test'],
109-
data_dict['test_names'],
110-
sensor_names,
111-
data_dict['fault_names'])
100+
data_dict['test_names'],
101+
sensor_names,
102+
data_dict['fault_names'])
103+
logging.info(f"Processed test data for {dkey}")
112104

113105
try:
114106
data_set = pd.concat([calibration_data,
115-
test_data])
116-
except:
117-
print("May be empty data in the DataFrames!")
107+
test_data])
108+
logging.info(
109+
f"Total sensor values in the data {dkey} is {data_set.shape[0]}")
110+
except BaseException:
111+
logging.error("May be empty data in the DataFrames!")
118112

119113
return data_set
120114

121115

122-
123116
if __name__ == "__main__":
124-
machine = egienvec_parser("/home/jaganadhg/AI_RND/Semiconductor/eigenvector/RFM_DATA.mat", dkey="RFMDATA")
125-
117+
matlab_data = "/home/jaganadhg/AI_RND/Semiconductor/eigenvector/MACHINE_Data.mat"
118+
machine = egienvec_parser(matlab_data,
119+
dkey="LAMDATA")
120+
print(machine.head())

0 commit comments

Comments
 (0)