clean code

ypriverol · ypriverol · commit bcc955639315 · 2023-05-22T09:24:14.000+01:00
diff --git a/bin/compute_ibaq.py b/bin/compute_ibaq.py
@@ -12,7 +12,6 @@
     CONDITION
 from ibaq.ibaqpy_commons import plot_distributions, plot_box_plot
 
-
 def print_help_msg(command):
     """
     Print the help of the command
@@ -133,10 +132,10 @@ def get_average_nr_peptides_unique_bygroup(pdrow: Series) -> Series:
         # Remove IBAQ NAN values
         res = res.dropna(subset=[IBAQ])
         plot_column = IBAQ
-        
+
     plot_distributions(res, plot_column, SAMPLE_ID, log2=True)
     plot_box_plot(res, plot_column, SAMPLE_ID, log2=True,
-                title="IBAQ Distribution", violin=False)
+                  title="IBAQ Distribution", violin=False)
 
     # # For absolute expression the relation is one sample + one condition
     # condition = data[CONDITION].unique()[0]
diff --git a/bin/compute_tpa.py b/bin/compute_tpa.py
@@ -20,7 +20,8 @@
 @click.option("-n", "--ploidy", help="Ploidy number", default=2)
 @click.option("-c", "--cpc", help="Cellular protein concentration(g/L)", default=200)
 @click.option("-o", "--output", help="Output file with the proteins and other values")
-def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploidy: int, cpc: float, output: str) -> None:
+def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploidy: int, cpc: float,
+                output: str) -> None:
     """
     This command computes the protein copies and concentrations according to a file output of peptides with the
     format described in peptide_contaminants_file_generation.py.
@@ -37,11 +38,11 @@ def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploid
         print_help_msg(tpa_compute)
         exit(1)
 
-    data = pd.read_csv(peptides, sep=",", usecols = [PROTEIN_NAME, INTENSITY, SAMPLE_ID, CONDITION])
+    data = pd.read_csv(peptides, sep=",", usecols=[PROTEIN_NAME, INTENSITY, SAMPLE_ID, CONDITION])
     print("Remove contaminants...")
     data = remove_contaminants_decoys(data, contaminants)
     data[INTENSITY] = data[INTENSITY].astype("float")
-    data = data.dropna(subset = [INTENSITY])
+    data = data.dropna(subset=[INTENSITY])
     data = data[data[INTENSITY] > 0]
     print(data.head())
 
diff --git a/bin/peptide_file_generation.py b/bin/peptide_file_generation.py
@@ -45,6 +45,7 @@ def remove_extension_file(filename: str) -> str:
   """
     return filename.replace('.raw', '').replace('.RAW', '').replace('.mzML', '').replace('.wiff', '')
 
+
 def get_study_accession(sample_id: str) -> str:
     """
   Get the project accession from the Sample accession. The function expected a sample accession in the following
@@ -140,13 +141,14 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
     msstats_df = msstats_df[msstats_df[INTENSITY] > 0]
     msstats_df[PEPTIDE_CANONICAL] = msstats_df.apply(lambda x: get_canonical_peptide(x[PEPTIDE_SEQUENCE]), axis=1)
     # Only peptides with more than min_aa (default: 7) amino acids are retained
-    msstats_df = msstats_df[msstats_df.apply(lambda x: len(x[PEPTIDE_CANONICAL]) >= min_aa, axis = 1)]
+    msstats_df = msstats_df[msstats_df.apply(lambda x: len(x[PEPTIDE_CANONICAL]) >= min_aa, axis=1)]
     # Only proteins with unique peptides number greater than min_unique (default: 2) are retained
-    unique_peptides = set(msstats_df.groupby(PEPTIDE_CANONICAL).filter(lambda x: len(set(x[PROTEIN_NAME])) == 1)[PEPTIDE_CANONICAL].tolist())
-    strong_proteins = set(msstats_df[msstats_df[PEPTIDE_CANONICAL].isin(unique_peptides)].groupby(PROTEIN_NAME).filter(lambda x: len(set(x[PEPTIDE_CANONICAL])) >= min_unique)[PROTEIN_NAME].tolist())
+    unique_peptides = set(msstats_df.groupby(PEPTIDE_CANONICAL).filter(lambda x: len(set(x[PROTEIN_NAME])) == 1)[
+                              PEPTIDE_CANONICAL].tolist())
+    strong_proteins = set(msstats_df[msstats_df[PEPTIDE_CANONICAL].isin(unique_peptides)].groupby(PROTEIN_NAME).filter(
+        lambda x: len(set(x[PEPTIDE_CANONICAL])) >= min_unique)[PROTEIN_NAME].tolist())
     msstats_df = msstats_df[msstats_df[PROTEIN_NAME].isin(strong_proteins)]
 
-
     msstats_df.rename(
         columns={'ProteinName': PROTEIN_NAME, 'PeptideSequence': PEPTIDE_SEQUENCE, 'PrecursorCharge': PEPTIDE_CHARGE,
                  'Run': RUN,
@@ -197,7 +199,7 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
         else:
             choice = ITRAQ4plex
         choice = pd.DataFrame.from_dict(choice, orient='index', columns=[CHANNEL]).reset_index().rename(
-        columns={'index': 'comment[label]'})
+            columns={'index': 'comment[label]'})
         sdrf_df = sdrf_df.merge(choice, on='comment[label]', how='left')
         msstats_df[REFERENCE] = msstats_df[REFERENCE].apply(get_reference_name)
         result_df = pd.merge(msstats_df, sdrf_df[['source name', REFERENCE, CHANNEL]], how='left',