Skip to content

Commit bcc9556

Browse files
committed
clean code
1 parent 3a82838 commit bcc9556

File tree

3 files changed

+13
-11
lines changed

3 files changed

+13
-11
lines changed

bin/compute_ibaq.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
CONDITION
1313
from ibaq.ibaqpy_commons import plot_distributions, plot_box_plot
1414

15-
1615
def print_help_msg(command):
1716
"""
1817
Print the help of the command
@@ -133,10 +132,10 @@ def get_average_nr_peptides_unique_bygroup(pdrow: Series) -> Series:
133132
# Remove IBAQ NAN values
134133
res = res.dropna(subset=[IBAQ])
135134
plot_column = IBAQ
136-
135+
137136
plot_distributions(res, plot_column, SAMPLE_ID, log2=True)
138137
plot_box_plot(res, plot_column, SAMPLE_ID, log2=True,
139-
title="IBAQ Distribution", violin=False)
138+
title="IBAQ Distribution", violin=False)
140139

141140
# # For absolute expression the relation is one sample + one condition
142141
# condition = data[CONDITION].unique()[0]

bin/compute_tpa.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
@click.option("-n", "--ploidy", help="Ploidy number", default=2)
2121
@click.option("-c", "--cpc", help="Cellular protein concentration(g/L)", default=200)
2222
@click.option("-o", "--output", help="Output file with the proteins and other values")
23-
def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploidy: int, cpc: float, output: str) -> None:
23+
def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploidy: int, cpc: float,
24+
output: str) -> None:
2425
"""
2526
This command computes the protein copies and concentrations according to a file output of peptides with the
2627
format described in peptide_contaminants_file_generation.py.
@@ -37,11 +38,11 @@ def tpa_compute(fasta: str, contaminants: str, peptides: str, ruler: bool, ploid
3738
print_help_msg(tpa_compute)
3839
exit(1)
3940

40-
data = pd.read_csv(peptides, sep=",", usecols = [PROTEIN_NAME, INTENSITY, SAMPLE_ID, CONDITION])
41+
data = pd.read_csv(peptides, sep=",", usecols=[PROTEIN_NAME, INTENSITY, SAMPLE_ID, CONDITION])
4142
print("Remove contaminants...")
4243
data = remove_contaminants_decoys(data, contaminants)
4344
data[INTENSITY] = data[INTENSITY].astype("float")
44-
data = data.dropna(subset = [INTENSITY])
45+
data = data.dropna(subset=[INTENSITY])
4546
data = data[data[INTENSITY] > 0]
4647
print(data.head())
4748

bin/peptide_file_generation.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def remove_extension_file(filename: str) -> str:
4545
"""
4646
return filename.replace('.raw', '').replace('.RAW', '').replace('.mzML', '').replace('.wiff', '')
4747

48+
4849
def get_study_accession(sample_id: str) -> str:
4950
"""
5051
Get the project accession from the Sample accession. The function expected a sample accession in the following
@@ -140,13 +141,14 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
140141
msstats_df = msstats_df[msstats_df[INTENSITY] > 0]
141142
msstats_df[PEPTIDE_CANONICAL] = msstats_df.apply(lambda x: get_canonical_peptide(x[PEPTIDE_SEQUENCE]), axis=1)
142143
# Only peptides with more than min_aa (default: 7) amino acids are retained
143-
msstats_df = msstats_df[msstats_df.apply(lambda x: len(x[PEPTIDE_CANONICAL]) >= min_aa, axis = 1)]
144+
msstats_df = msstats_df[msstats_df.apply(lambda x: len(x[PEPTIDE_CANONICAL]) >= min_aa, axis=1)]
144145
# Only proteins with unique peptides number greater than min_unique (default: 2) are retained
145-
unique_peptides = set(msstats_df.groupby(PEPTIDE_CANONICAL).filter(lambda x: len(set(x[PROTEIN_NAME])) == 1)[PEPTIDE_CANONICAL].tolist())
146-
strong_proteins = set(msstats_df[msstats_df[PEPTIDE_CANONICAL].isin(unique_peptides)].groupby(PROTEIN_NAME).filter(lambda x: len(set(x[PEPTIDE_CANONICAL])) >= min_unique)[PROTEIN_NAME].tolist())
146+
unique_peptides = set(msstats_df.groupby(PEPTIDE_CANONICAL).filter(lambda x: len(set(x[PROTEIN_NAME])) == 1)[
147+
PEPTIDE_CANONICAL].tolist())
148+
strong_proteins = set(msstats_df[msstats_df[PEPTIDE_CANONICAL].isin(unique_peptides)].groupby(PROTEIN_NAME).filter(
149+
lambda x: len(set(x[PEPTIDE_CANONICAL])) >= min_unique)[PROTEIN_NAME].tolist())
147150
msstats_df = msstats_df[msstats_df[PROTEIN_NAME].isin(strong_proteins)]
148151

149-
150152
msstats_df.rename(
151153
columns={'ProteinName': PROTEIN_NAME, 'PeptideSequence': PEPTIDE_SEQUENCE, 'PrecursorCharge': PEPTIDE_CHARGE,
152154
'Run': RUN,
@@ -197,7 +199,7 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
197199
else:
198200
choice = ITRAQ4plex
199201
choice = pd.DataFrame.from_dict(choice, orient='index', columns=[CHANNEL]).reset_index().rename(
200-
columns={'index': 'comment[label]'})
202+
columns={'index': 'comment[label]'})
201203
sdrf_df = sdrf_df.merge(choice, on='comment[label]', how='left')
202204
msstats_df[REFERENCE] = msstats_df[REFERENCE].apply(get_reference_name)
203205
result_df = pd.merge(msstats_df, sdrf_df[['source name', REFERENCE, CHANNEL]], how='left',

0 commit comments

Comments
 (0)