Skip to content

Commit

Permalink
Merge pull request #7 from dominikzorgnotti/split-historic-vcenter-re…
Browse files Browse the repository at this point in the history
…leases

Split historic vcenter releases
  • Loading branch information
dominikzorgnotti authored Mar 14, 2021
2 parents b2365eb + 35c1d41 commit 9994448
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 24 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,17 @@ Roadmap: There may be more than one table that hold the version information, e.g
A merge operation will attempt to provide a unified table.

### Nested tables, merged columns/rows
Roadmap: Tables may have nested tables (e.g. KB52520 - VCF).
A decomposition is needed to provide the information in a usable format.
Since v0.2.0: For vCenter build information (KB2143838), this release based on PR #5 offers merged tables:
The KB article contains three tables:
- Release information for VCSA 7
- Release information for VCSA/Windows VC 6.7
- Release information for VCSA/Windows before that

The merged output available is now:
- one table for all VCSA releases
- one table for all Windows releases
- one table for all releases
Unicode issues are addressed as well

## Output format and folder structures
The way the output is currently structured is:
Expand Down
8 changes: 3 additions & 5 deletions data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__contact__ = "[email protected]"
__license__ = "GPLv3"
__status__ = "beta"
__version__ = "0.2.0"
__version__ = "0.3.0"

import os

Expand All @@ -49,8 +49,8 @@ def create_json_output(kb_dataobject, output_base_dir: str, record_type: str):
table_id += 1
if kb_dataobject.list_of_merged_frames:
table_id = 0
for dataframe in kb_dataobject.list_of_merged_frames:
filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_merged{table_id}_release_as-{record_type}.json"
for table_name, dataframe in kb_dataobject.list_of_merged_frames.items():
filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_{table_name}_as-{record_type}.json"
if "Build Number" in dataframe.columns and record_type == "index":
dataframe = transform_index(dataframe)
try:
Expand All @@ -60,8 +60,6 @@ def create_json_output(kb_dataobject, output_base_dir: str, record_type: str):
)
except ValueError as err:
print(f"{kb_dataobject.id}: Error for json {record_type} in merged table {table_id}: {err}")
finally:
table_id += 1


def transform_index(dataframe):
Expand Down
37 changes: 23 additions & 14 deletions kb_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__contact__ = "[email protected]"
__license__ = "GPLv3"
__status__ = "beta"
__version__ = "0.2.0"
__version__ = "0.3.0"

import pandas as pd

Expand Down Expand Up @@ -112,6 +112,7 @@ def parse_releasedata(self):
# Contains a list of all tables converted to dataframes in the resolution section
list_of_release_df = []
for table_id in range(len(df)):
# VCSA 7
if table_id == 0:
vcenter7_table = df[table_id]
reformatted_df = self.transform_kb2143838(vcenter7_table)
Expand All @@ -120,6 +121,7 @@ def parse_releasedata(self):
infer_datetime_format=True,
errors='coerce')
list_of_release_df.append(reformatted_df)
# VCSA/Windows 6.7
elif table_id == 1:
vcenter67_table = df[table_id]
product_editions = ["VCSA", "Windows"]
Expand All @@ -131,18 +133,22 @@ def parse_releasedata(self):
errors='coerce')
list_of_release_df.append(reformatted_df)
del split_df
# VCSA/Windows less equal 6.5
elif table_id == 2:
# The HTML table have no header, we need to reassign the first row as heading
df_header = df[table_id][:1]
current_df = df[table_id][1:]
current_df.columns = df_header.values.tolist()[0]
# Moving the del up here
del df_header
current_df["Edition"] = "Windows"
vcenter_le65_table = df[table_id][1:]
vcenter_le65_table.columns = df_header.values.tolist()[0]
# Get the data types right, especially the date format='%m/%d/%Y'
current_df["Release Date"] = pd.to_datetime(current_df["Release Date"], infer_datetime_format=True,
vcenter_le65_table["Release Date"] = pd.to_datetime(vcenter_le65_table["Release Date"], infer_datetime_format=True,
errors='coerce')
list_of_release_df.append(current_df)
#Filter VCSA releases by keyword "Appliance", for Windows negate the search
vcsa_le65 = vcenter_le65_table[vcenter_le65_table["Version"].str.contains("appliance", case=False)]
vcsa_le65["Edition"] = "VCSA"
winvc_le65 = vcenter_le65_table[~vcenter_le65_table["Version"].str.contains("appliance", case=False)]
winvc_le65["Edition"] = "Windows"
list_of_release_df.append(vcsa_le65)
list_of_release_df.append(winvc_le65)
else:
print("Unknown table added, please add handling")
return list_of_release_df
Expand Down Expand Up @@ -175,25 +181,27 @@ def transform_kb2143838(self, dataframe):
def merge_tables_kb2143838(self):
"""Accepts a list of dataframes, merge them and return a list of the merged df"""
# Return this list when ready
merged_vcenter_tables = []
merged_vcenter_tables = {}
# Prepare the tables
vc7x_vcsa = self.list_of_dframes[0]
vc67_vcsa = self.list_of_dframes[1]
vc67_win = self.list_of_dframes[2]
vc_win_only = self.list_of_dframes[3]
vc65le_vcsa = self.list_of_dframes[3]
vc65le_win = self.list_of_dframes[4]
# Solved by WET
# Merge VCSA tables
merged_vcsa_builds = vc7x_vcsa.append(vc67_vcsa)
merged_vcsa_builds = merged_vcsa_builds.append(vc65le_vcsa)
merged_vcsa_builds.reset_index(drop=True, inplace=True)
merged_vcenter_tables.append(merged_vcsa_builds)
merged_vcenter_tables["vcsa_builds"] = merged_vcsa_builds
# Merge vCenter for Windows tables
merged_windows_builds = vc67_win.append(vc_win_only)
merged_windows_builds = vc67_win.append(vc65le_win)
merged_windows_builds.reset_index(drop=True, inplace=True)
merged_vcenter_tables.append(merged_windows_builds)
merged_vcenter_tables["windows_vc_builds"] = merged_windows_builds
# Merge both tables
merged_vc_all_builds = merged_vcsa_builds.append(merged_windows_builds)
merged_vc_all_builds.reset_index(drop=True, inplace=True)
merged_vcenter_tables.append(merged_vc_all_builds)
merged_vcenter_tables["all_vcenter_builds"] = merged_vc_all_builds
# Return the list
return merged_vcenter_tables

Expand Down Expand Up @@ -229,5 +237,6 @@ def parse_releasedata(self):
def transform_kb2143850(self, dataframe):
"""Special handling of KB2143850 (vRA)"""
if r"Build Number - Version" in dataframe:
dataframe[r"Build Number - Version"] = dataframe[r"Build Number - Version"].str.normalize("NFKD")
dataframe[["Build Number", "Version"]] = dataframe[r"Build Number - Version"].str.split(r" - ", expand=True)
return dataframe
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__contact__ = "[email protected]"
__license__ = "GPLv3"
__status__ = "beta"
__version__ = "0.2.0"
__version__ = "0.3.0"

# Imports
from data_handling import create_json_output
Expand Down
14 changes: 12 additions & 2 deletions templates/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,18 @@ Columns may have more than one value, e.g. "Build Number - Version" in KB2143850
In this case, two additional columns (Version, Build Number) will be added to the table each containing just a single Value.

### Merged tables
Roadmap: There may be more than one table that hold the version information, e.g. in KB2143838 (vCenter Server).
A merge operation will attempt to provide a unified table.
Since v0.2.0: For vCenter build information (KB2143838), this release based on PR #5 offers merged tables:
The KB article contains three tables:
- Release information for VCSA 7
- Release information for VCSA/Windows VC 6.7
- Release information for VCSA/Windows before that

The merged output available is now:
- one table for all VCSA releases
- one table for all Windows releases
- one table for all releases
Unicode issues are addressed as well


### Nested tables, merged columns/rows
Roadmap: Tables may have nested tables (e.g. KB52520 - VCF).
Expand Down

0 comments on commit 9994448

Please sign in to comment.