diff --git a/README.md b/README.md index 29041a7..549dce6 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,17 @@ Roadmap: There may be more than one table that hold the version information, e.g A merge operation will attempt to provide a unified table. ### Nested tables, merged columns/rows -Roadmap: Tables may have nested tables (e.g. KB52520 - VCF). -A decomposition is needed to provide the information in a usable format. +Since v0.2.0: For vCenter build information (KB2143838), this release based on PR #5 offers merged tables: +The KB article contains three tables: +- Release information for VCSA 7 +- Release information for VCSA/Windows VC 6.7 +- Release information for VCSA/Windows before that + +The merged output available is now: +- one table for all VCSA releases +- one table for all Windows releases +- one table for all releases +Unicode issues are addressed as well ## Output format and folder structures The way the output is currently structured is: diff --git a/data_handling.py b/data_handling.py index 59acd1d..fb86e4b 100644 --- a/data_handling.py +++ b/data_handling.py @@ -22,7 +22,7 @@ __contact__ = "dominik@why-did-it.fail" __license__ = "GPLv3" __status__ = "beta" -__version__ = "0.2.0" +__version__ = "0.3.0" import os @@ -49,8 +49,8 @@ def create_json_output(kb_dataobject, output_base_dir: str, record_type: str): table_id += 1 if kb_dataobject.list_of_merged_frames: table_id = 0 - for dataframe in kb_dataobject.list_of_merged_frames: - filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_merged{table_id}_release_as-{record_type}.json" + for table_name, dataframe in kb_dataobject.list_of_merged_frames.items(): + filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_{table_name}_as-{record_type}.json" if "Build Number" in dataframe.columns and record_type == "index": dataframe = transform_index(dataframe) try: @@ -60,8 +60,6 @@ def create_json_output(kb_dataobject, output_base_dir: str, record_type: str): ) except ValueError as err: print(f"{kb_dataobject.id}: Error for json {record_type} in merged table {table_id}: {err}") - finally: - table_id += 1 def transform_index(dataframe): diff --git a/kb_data.py b/kb_data.py index ceab296..2063aca 100644 --- a/kb_data.py +++ b/kb_data.py @@ -22,7 +22,7 @@ __contact__ = "dominik@why-did-it.fail" __license__ = "GPLv3" __status__ = "beta" -__version__ = "0.2.0" +__version__ = "0.3.0" import pandas as pd @@ -112,6 +112,7 @@ def parse_releasedata(self): # Contains a list of all tables converted to dataframes in the resolution section list_of_release_df = [] for table_id in range(len(df)): + # VCSA 7 if table_id == 0: vcenter7_table = df[table_id] reformatted_df = self.transform_kb2143838(vcenter7_table) @@ -120,6 +121,7 @@ def parse_releasedata(self): infer_datetime_format=True, errors='coerce') list_of_release_df.append(reformatted_df) + # VCSA/Windows 6.7 elif table_id == 1: vcenter67_table = df[table_id] product_editions = ["VCSA", "Windows"] @@ -131,18 +133,22 @@ def parse_releasedata(self): errors='coerce') list_of_release_df.append(reformatted_df) del split_df + # VCSA/Windows less equal 6.5 elif table_id == 2: # The HTML table have no header, we need to reassign the first row as heading df_header = df[table_id][:1] - current_df = df[table_id][1:] - current_df.columns = df_header.values.tolist()[0] - # Moving the del up here - del df_header - current_df["Edition"] = "Windows" + vcenter_le65_table = df[table_id][1:] + vcenter_le65_table.columns = df_header.values.tolist()[0] # Get the data types right, especially the date format='%m/%d/%Y' - current_df["Release Date"] = pd.to_datetime(current_df["Release Date"], infer_datetime_format=True, + vcenter_le65_table["Release Date"] = pd.to_datetime(vcenter_le65_table["Release Date"], infer_datetime_format=True, errors='coerce') - list_of_release_df.append(current_df) + #Filter VCSA releases by keyword "Appliance", for Windows negate the search + vcsa_le65 = vcenter_le65_table[vcenter_le65_table["Version"].str.contains("appliance", case=False)] + vcsa_le65["Edition"] = "VCSA" + winvc_le65 = vcenter_le65_table[~vcenter_le65_table["Version"].str.contains("appliance", case=False)] + winvc_le65["Edition"] = "Windows" + list_of_release_df.append(vcsa_le65) + list_of_release_df.append(winvc_le65) else: print("Unknown table added, please add handling") return list_of_release_df @@ -175,25 +181,27 @@ def transform_kb2143838(self, dataframe): def merge_tables_kb2143838(self): """Accepts a list of dataframes, merge them and return a list of the merged df""" # Return this list when ready - merged_vcenter_tables = [] + merged_vcenter_tables = {} # Prepare the tables vc7x_vcsa = self.list_of_dframes[0] vc67_vcsa = self.list_of_dframes[1] vc67_win = self.list_of_dframes[2] - vc_win_only = self.list_of_dframes[3] + vc65le_vcsa = self.list_of_dframes[3] + vc65le_win = self.list_of_dframes[4] # Solved by WET # Merge VCSA tables merged_vcsa_builds = vc7x_vcsa.append(vc67_vcsa) + merged_vcsa_builds = merged_vcsa_builds.append(vc65le_vcsa) merged_vcsa_builds.reset_index(drop=True, inplace=True) - merged_vcenter_tables.append(merged_vcsa_builds) + merged_vcenter_tables["vcsa_builds"] = merged_vcsa_builds # Merge vCenter for Windows tables - merged_windows_builds = vc67_win.append(vc_win_only) + merged_windows_builds = vc67_win.append(vc65le_win) merged_windows_builds.reset_index(drop=True, inplace=True) - merged_vcenter_tables.append(merged_windows_builds) + merged_vcenter_tables["windows_vc_builds"] = merged_windows_builds # Merge both tables merged_vc_all_builds = merged_vcsa_builds.append(merged_windows_builds) merged_vc_all_builds.reset_index(drop=True, inplace=True) - merged_vcenter_tables.append(merged_vc_all_builds) + merged_vcenter_tables["all_vcenter_builds"] = merged_vc_all_builds # Return the list return merged_vcenter_tables @@ -229,5 +237,6 @@ def parse_releasedata(self): def transform_kb2143850(self, dataframe): """Special handling of KB2143850 (vRA)""" if r"Build Number - Version" in dataframe: + dataframe[r"Build Number - Version"] = dataframe[r"Build Number - Version"].str.normalize("NFKD") dataframe[["Build Number", "Version"]] = dataframe[r"Build Number - Version"].str.split(r" - ", expand=True) return dataframe diff --git a/main.py b/main.py index 0e42f70..82ccb75 100644 --- a/main.py +++ b/main.py @@ -22,7 +22,7 @@ __contact__ = "dominik@why-did-it.fail" __license__ = "GPLv3" __status__ = "beta" -__version__ = "0.2.0" +__version__ = "0.3.0" # Imports from data_handling import create_json_output diff --git a/templates/README.md b/templates/README.md index 6bc0218..53bdf71 100644 --- a/templates/README.md +++ b/templates/README.md @@ -16,8 +16,18 @@ Columns may have more than one value, e.g. "Build Number - Version" in KB2143850 In this case, two additional columns (Version, Build Number) will be added to the table each containing just a single Value. ### Merged tables -Roadmap: There may be more than one table that hold the version information, e.g. in KB2143838 (vCenter Server). -A merge operation will attempt to provide a unified table. +Since v0.2.0: For vCenter build information (KB2143838), this release based on PR #5 offers merged tables: +The KB article contains three tables: +- Release information for VCSA 7 +- Release information for VCSA/Windows VC 6.7 +- Release information for VCSA/Windows before that + +The merged output available is now: +- one table for all VCSA releases +- one table for all Windows releases +- one table for all releases +Unicode issues are addressed as well + ### Nested tables, merged columns/rows Roadmap: Tables may have nested tables (e.g. KB52520 - VCF).