@@ -45,6 +45,7 @@ def remove_extension_file(filename: str) -> str:
45
45
"""
46
46
return filename .replace ('.raw' , '' ).replace ('.RAW' , '' ).replace ('.mzML' , '' ).replace ('.wiff' , '' )
47
47
48
+
48
49
def get_study_accession (sample_id : str ) -> str :
49
50
"""
50
51
Get the project accession from the Sample accession. The function expected a sample accession in the following
@@ -140,13 +141,14 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
140
141
msstats_df = msstats_df [msstats_df [INTENSITY ] > 0 ]
141
142
msstats_df [PEPTIDE_CANONICAL ] = msstats_df .apply (lambda x : get_canonical_peptide (x [PEPTIDE_SEQUENCE ]), axis = 1 )
142
143
# Only peptides with more than min_aa (default: 7) amino acids are retained
143
- msstats_df = msstats_df [msstats_df .apply (lambda x : len (x [PEPTIDE_CANONICAL ]) >= min_aa , axis = 1 )]
144
+ msstats_df = msstats_df [msstats_df .apply (lambda x : len (x [PEPTIDE_CANONICAL ]) >= min_aa , axis = 1 )]
144
145
# Only proteins with unique peptides number greater than min_unique (default: 2) are retained
145
- unique_peptides = set (msstats_df .groupby (PEPTIDE_CANONICAL ).filter (lambda x : len (set (x [PROTEIN_NAME ])) == 1 )[PEPTIDE_CANONICAL ].tolist ())
146
- strong_proteins = set (msstats_df [msstats_df [PEPTIDE_CANONICAL ].isin (unique_peptides )].groupby (PROTEIN_NAME ).filter (lambda x : len (set (x [PEPTIDE_CANONICAL ])) >= min_unique )[PROTEIN_NAME ].tolist ())
146
+ unique_peptides = set (msstats_df .groupby (PEPTIDE_CANONICAL ).filter (lambda x : len (set (x [PROTEIN_NAME ])) == 1 )[
147
+ PEPTIDE_CANONICAL ].tolist ())
148
+ strong_proteins = set (msstats_df [msstats_df [PEPTIDE_CANONICAL ].isin (unique_peptides )].groupby (PROTEIN_NAME ).filter (
149
+ lambda x : len (set (x [PEPTIDE_CANONICAL ])) >= min_unique )[PROTEIN_NAME ].tolist ())
147
150
msstats_df = msstats_df [msstats_df [PROTEIN_NAME ].isin (strong_proteins )]
148
151
149
-
150
152
msstats_df .rename (
151
153
columns = {'ProteinName' : PROTEIN_NAME , 'PeptideSequence' : PEPTIDE_SEQUENCE , 'PrecursorCharge' : PEPTIDE_CHARGE ,
152
154
'Run' : RUN ,
@@ -197,7 +199,7 @@ def peptide_file_generation(msstats: str, sdrf: str, compress: bool, min_aa: int
197
199
else :
198
200
choice = ITRAQ4plex
199
201
choice = pd .DataFrame .from_dict (choice , orient = 'index' , columns = [CHANNEL ]).reset_index ().rename (
200
- columns = {'index' : 'comment[label]' })
202
+ columns = {'index' : 'comment[label]' })
201
203
sdrf_df = sdrf_df .merge (choice , on = 'comment[label]' , how = 'left' )
202
204
msstats_df [REFERENCE ] = msstats_df [REFERENCE ].apply (get_reference_name )
203
205
result_df = pd .merge (msstats_df , sdrf_df [['source name' , REFERENCE , CHANNEL ]], how = 'left' ,
0 commit comments