From 44958d7b2552c2970a1f67a7e6a053908f23f8a2 Mon Sep 17 00:00:00 2001 From: Charles Clark Date: Thu, 23 Feb 2023 17:31:12 -0500 Subject: [PATCH] some testing stuff --- .gitignore | 1 + xas/analysis.py | 2 +- xas/outliers.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b608e88..fbfb23a 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,4 @@ target/ #Ipython Notebook .ipynb_checkpoints +df_uid.ipynb diff --git a/xas/analysis.py b/xas/analysis.py index a4b1516..fcd3313 100644 --- a/xas/analysis.py +++ b/xas/analysis.py @@ -1,6 +1,6 @@ import matplotlib -# matplotlib.use("TkAgg") +matplotlib.use("TkAgg") import numpy as np import pandas as pd diff --git a/xas/outliers.py b/xas/outliers.py index 03a05ec..eaaedcb 100644 --- a/xas/outliers.py +++ b/xas/outliers.py @@ -14,7 +14,8 @@ from sklearn.ensemble import IsolationForest from sklearn.svm import OneClassSVM -from xas.analysis import standardize_energy_grid, prenormalize_data +from xas.analysis import standardize_energy_grid, prenormalize_data, check_scan +from xas.energy_calibration import compute_shift_between_spectra TEST_PATH = "/home/charles/Desktop/test_data/outlier_muf.pkl" @@ -316,3 +317,32 @@ def plot_scangroup(sg_df, channels=("mut", "muf", "mur")): for ch in channels: plt.plot(scan["data"]["energy"], scan["data"][ch]) plt.show() + + +def check_refs(sg_df_uid: pd.DataFrame) -> None: + sg_df_uid["mur_source"] = None + if all(sg_df_uid["mur_good"]): + return + if all(sg_df_uid["mur_good"]==False): + print("Failed. No good refs in scan group.") + return + #bad_ref_inds = sg_df_uid.index[sg_df_uid["mur_good"]==False] + bad_ref_df = sg_df_uid[sg_df_uid["mur_good"]==False] + good_ref_df = sg_df_uid[sg_df_uid["mur_good"]==True] + + for i, scan_time in bad_ref_df["time"].items(): + closest_time_idx = np.abs(scan_time - good_ref_df["time"]).idxmin() + source_mur = good_ref_df.loc[closest_time_idx, "data"]["mur"] + sg_df_uid.loc[i, "data"]["mur"] = source_mur + sg_df_uid.loc[i, "mur_source"] = good_ref_df.loc[closest_time_idx, "uid"] + + +def big_test(df_uid): + outlier_results = [] + sg_dfs_out = [] + for sg in df_uid.scan_group.unique()[1:2]: + sg_df_uid = df_uid[df_uid.scan_group == sg] + outlier_results.append(outlier_rejection(sg_df_uid.data.tolist(), sg_df_uid.uid.tolist())) + check_refs(sg_df_uid) + sg_dfs_out.append(sg_df_uid) + return outlier_results, sg_dfs_out \ No newline at end of file