def profile_files(
+ df: pd.DataFrame,
+ output_path: str | Path = ".",
+ profile_tool: str = "ydata-profiling",
+):
+ """Profile the structured data.
+
+ Args:
+ df (pd.DataFrame): DataFrame with the files to be profiled.
+ output_path (str | Path, optional): Folder to save the HTML reports.
+ Defaults to ".".
+ profile_tool (str, optional): Select which profiling too to use.
+ Defaults to "ydata-profiling".
+ """
+ output_path = Path(output_path)
+ output_path.mkdir(parents=True, exist_ok=True)
+ df.sort_values(by="size", inplace=True)
+ print(
+ f"Profiling files with {profile_tool} and generating reports in folder {output_path}"
+ )
+ pbar = tqdm(df.iterrows(), total=len(df))
+ for _, r in pbar:
+ pbar.set_description(f"Profiling {r['name']} ({r['rows']:,} records)")
+ if r.rows > 0:
+ df_to_profile = load_file_with_pandas(
+ file_path=r["path"],
+ file_name=r["name"],
+ extension=r["extension"],
+ sep=r["separator"],
+ )
+ if profile_tool == "ydata-profiling":
+ profile_with_ydata_profiling(
+ output_path=output_path,
+ df_to_profile=df_to_profile,
+ file_name=r["name"],
+ file_size=r["size"],
+ )
+ elif profile_tool == "sweetviz":
+ profile_with_sweetviz(
+ df_to_profile=df_to_profile,
+ output_path=output_path,
+ file_name=r["name"],
+ )
+
+ print(f'\nCheck out all the reports in "{output_path.resolve()}"\n')
+ return
+