feat: ENH Semi-Automatic Parser (#134)

* docs: semi-automatic parser * feat(jsonParser): semi-automatic from csv * feat(data): Informatica * feat(data): Dipartimento di Mat e Inf * Update jsonParser/semi-automatic/parse_election_results.py Co-authored-by: Stefano Borzì <[email protected]> * Update jsonParser/semi-automatic/parse_election_results.py Co-authored-by: Stefano Borzì <[email protected]> --------- Co-authored-by: Stefano Borzì <[email protected]>
UNICT-DMI · Nov 3, 2023 · 83ef26f · 83ef26f
1 parent 37d021d
commit 83ef26f
Show file tree

Hide file tree

Showing 4 changed files with 896 additions and 0 deletions.
diff --git a/jsonParser/semi-automatic/README.md b/jsonParser/semi-automatic/README.md
@@ -0,0 +1 @@
+## Semi-Automatic Parser
diff --git a/jsonParser/semi-automatic/parse_election_results.py b/jsonParser/semi-automatic/parse_election_results.py
@@ -0,0 +1,189 @@
+import csv
+import json
+import click
+import os
+
+@click.command()
+@click.option('--input', '-i', help='Input CSV file', required=True)
+@click.option('--output', '-o', help='Output JSON file', required=True)
+def main(input:str, output:str):
+ check_file_exists(input)
+
+ # Read the CSV file
+ with open(input, 'r', encoding='utf-8-sig') as csv_file:
+ csv_reader = csv.reader(csv_file, delimiter=';')
+ rows_list = create_list(csv_reader)
+
+ # Create the dictionary that will contain the data
+ data = {
+ "schede": {
+ "bianche": {
+ "totali": 0,
+ "seggio_n_telematico": 0
+ },
+ "nulle": {
+ "totali": 0
+ },
+ "contestate": {
+ "totali": 0
+ }
+ },
+ "liste": [],
+ "eletti": [],
+ "non_eletti": [],
+ }
+
+ rows_list = get_name_and_seats(rows_list, data)
+ rows_list = get_list_information(rows_list, data)
+ rows_list = get_votation_information(rows_list, data)
+ get_candidates_information(rows_list, data)
+
+ # Convert data to JSON format
+ json_output = json.dumps(data, indent=4, ensure_ascii=False)
+
+ # Save the JSON data to a file
+ with open(output, 'w', encoding='utf-8') as json_file:
+ json_file.write(json_output)
+
+def check_file_exists(file_path:str) -> None:
+ if not os.path.isfile(file_path):
+ raise FileNotFoundError(f"File '{file_path}' not found")
+
+def create_list(csv_reader:List[str]) -> List[str]:
+
+ """Create a list of rows from the CSV file
+
+ Args:
+ csv_reader (list): The CSV reader
+
+ Returns:
+ list: The list of rows of the CSV file
+ """
+ rows_list = []
+ for row in csv_reader:
+ # Replace \xa0 with a space in the entire row
+ row = [cell.replace('\xa0', ' ') for cell in row]
+ rows_list.append(row)
+ return rows_list
+
+def get_name_and_seats(rows_list:list, data:dict) -> list:
+ """Get the name of the department and the number of seats to be assigned
+
+ Args:
+ rows_list (list): The list of rows of the CSV file
+ data (dict): The dictionary that will contain the data
+ 
+ Returns:
+ list: The list of rows of the CSV file
+ """
+ row = rows_list[0]
+ data["dipartimento"] = str(row[0])
+ row = rows_list[1]
+ data["seggi_da_assegnare"] = row[1]
+ rows_list = rows_list[4:]
+ return rows_list
+
+def get_list_information(rows_list: list, data: dict) -> list:
+
+ """Get lists information
+
+ Args:
+ rows_list (list): The list of rows of the CSV file
+ data (dict): The dictionary that will contain the data
+ 
+ Returns:
+ list: The list of rows of the CSV file
+ """
+ count = 0
+ for row in rows_list:
+ count += 1
+ if row[0].strip() == "TOTALE":
+ data["liste"].append({"totale": int(row[1].strip())})
+ break
+ lista = {
+ "nome": str(row[0].strip()),
+ "seggi": {
+ "seggi_pieni": str(row[1].strip()),
+ "resti": str(row[2].strip()),
+ "seggi_ai_resti": str(row[3].strip()),
+ "seggi_totali": str(row[4].strip())
+ },
+ "voti": {
+ "totali": str(row[5].strip()),
+ "seggio_telematico": str(row[-1].strip())
+ }
+ }
+ data["liste"].append(lista)
+ rows_list = rows_list[count:]
+ return rows_list
+
+def get_votation_information(rows_list:list, data:dict) -> list:
+ """Get votation information
+
+ Args:
+ rows_list (list): The list of rows of the CSV file
+ data (dict): The dictionary that will contain the data
+ 
+ Returns:
+ list: The list of rows of the CSV file
+ """
+ count = 0
+ for row in rows_list:
+ count += 1
+ if "Schede Bianche" in row[0]:
+ data["schede"]["bianche"]["totali"] = int(row[1])
+ data["schede"]["bianche"]["seggio_n_telematico"] = int(row[-1])
+ elif "Schede Nulle" in row[0]:
+ data["schede"]["nulle"]["totali"] = int(row[-1])
+ elif "Schede Contestate" in row[0]:
+ data["schede"]["contestate"]["totali"] = int(row[-1])
+ elif row[0].strip() == "QUOZIENTE":
+ data["quoziente"] = float(row[1].strip().replace(",", "."))
+ elif row[0].strip() == "VOTANTI":
+ data["votanti"] = {
+ "totali": int(row[1].strip()),
+ "percentuale": float(row[4].strip().replace(",", ".")),
+ "seggio_n_telematico": int(row[-1].strip())
+ }
+ elif row[0].strip() == "TOTALE ELETTORI AVENTI DIRITTO":
+ data["elettori"] = {
+ "totali": int(row[1].strip()),
+ "seggio_n_telematico": int(row[1].strip())
+ }
+ elif row[0].strip() == "PREFERENZE CANDIDATI":
+ break
+ rows_list = rows_list[count:]
+ return rows_list
+
+def get_candidates_information(rows_list:list, data:dict) -> None:
+ """Get candidates information
+
+ Args:
+ rows_list (list): The list of rows of the CSV file
+ data (dict): The dictionary that will contain the data
+ """
+ while len(rows_list) > 0:
+ row = rows_list[0]
+ list_name = row[0].strip()
+ rows_list = rows_list[2:]
+ count = 0
+ for row in rows_list:
+ count += 1
+ if "SEGGI" in row[0].strip():
+ break
+ candidate = {
+ "nominativo": str(row[0].strip()),
+ "lista": list_name,
+ "voti": {
+ "totali": int(row[1].strip()),
+ "seggio_telematico": int(row[-4].strip())
+ }
+ }
+ if "ELETTO" in row[3].strip():
+ data["eletti"].append(candidate)
+ else:
+ data["non_eletti"].append(candidate)
+ rows_list = rows_list[count:]
+
+if __name__ == "__main__":
+ main()