Skip to content

Commit

Permalink
Merge pull request #130 from MannLabs/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jalew188 committed Jan 7, 2024
2 parents 8cc6541 + 22711d7 commit bc76528
Show file tree
Hide file tree
Showing 16 changed files with 155 additions and 117 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.1.3
current_version = 1.1.4
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
copyright = '2022, Mann Labs, MPIB'
author = 'Mann Labs, MPIB'

release = "1.1.3"
release = "1.1.4"

# -- General configuration ---------------------------------------------------

Expand Down
7 changes: 5 additions & 2 deletions nbdev_nbs/settings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
{
"data": {
"text/plain": [
"{'Lumos': 'Lumos',\n",
"{'ThermoTOF': 'ThermoTOF',\n",
" 'Astral': 'ThermoTOF',\n",
" 'Lumos': 'Lumos',\n",
" 'QE': 'QE',\n",
" 'timsTOF': 'timsTOF',\n",
" 'SciexTOF': 'SciexTOF',\n",
Expand All @@ -55,6 +57,8 @@
" 'QEHFX': 'QE',\n",
" 'Exploris': 'QE',\n",
" 'Exploris480': 'QE',\n",
" 'THERMOTOF': 'ThermoTOF',\n",
" 'ASTRAL': 'ThermoTOF',\n",
" 'LUMOS': 'Lumos',\n",
" 'TIMSTOF': 'timsTOF',\n",
" 'SCIEXTOF': 'SciexTOF',\n",
Expand Down Expand Up @@ -86,7 +90,6 @@
"metadata": {},
"outputs": [],
"source": [
"from peptdeep.model.featurize import MOD_TO_FEATURE, MOD_DF\n",
"add_user_defined_modifications({\n",
" \"Hi@S\":{'composition':\"H(2)\"},\n",
" \"AlphaX@S\":{'composition':\"O(10)\",'modloss_composition':\"O(3)\"}\n",
Expand Down
2 changes: 1 addition & 1 deletion peptdeep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# pass

__project__ = "peptdeep"
__version__ = "1.1.3"
__version__ = "1.1.4"
__license__ = "Apache 2.0"
__description__ = "The AlphaX deep learning framework for Proteomics"
__author__ = "Mann Labs"
Expand Down
83 changes: 50 additions & 33 deletions peptdeep/cli_argparse.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!python

import os

from peptdeep.settings import global_settings, load_global_settings

import argparse

from peptdeep.settings import (
global_settings, load_global_settings,
_refine_global_settings
)

__argparse_dict_level_sep="--" # do not change

def convert_dict_to_argparse(
Expand All @@ -18,40 +20,48 @@ def convert_dict_to_argparse(
return [(prefix_key, settings)]
ret = []
for key, val in settings.items():
ret += convert_dict_to_argparse(
val, prefix_key=(prefix_key+dict_level_sep+key) if prefix_key else key
)
if key in [
"labeling_channels",
"psm_modification_mapping",
"user_defined_modifications",
"instrument_group",
]:
ret += [(prefix_key+dict_level_sep+key, val)]
else:
ret += convert_dict_to_argparse(
val, prefix_key=(prefix_key+dict_level_sep+key) if prefix_key else key
)
return ret
else:
return [(prefix_key, settings)]

def _set_dict_val(_dict, keys, val):
if len(keys) < 1: return
elif keys[0] == "labeling_channels":
def _get(x:str):
i = x.find(":")
k,v = x[:i], x[i+1:]
k = int(k) if k.isdigit() else k
v = v.split(";")
return k,v
_dict[keys[0]].update(dict([_get(s) for s in val]))
elif keys[0] == "psm_modification_mapping":
def _get(x):
i = x.find(":", x.find("@"))
k,v = x[:i], x[i+1:]
return k, v.split(";")
_dict[keys[0]].update(dict([_get(s) for s in val]))
elif keys[0] == "user_defined_modifications":
def _get(x):
i = x.find(":", x.find("@"))
k,v = x[:i], x[i+1:]
items = v.split(";")
if len(items) == 1:
return k, {"composition":items[0]}
else:
return k, {"composition": items[0], "modloss_composition": items[1]}
_dict[keys[0]].update(dict([_get(s) for s in val]))
elif len(keys) == 1:
if keys[0] == "labeling_channels":
def _get(x:str):
i = x.find(":")
k,v = x[:i], x[i+1:]
k = int(k) if k.isdigit() else k
v = v.split(";")
return k,v
val = dict([_get(s) for s in val])
elif keys[0] == "psm_modification_mapping":
def _get(x):
i = x.find(":", x.find("@"))
k,v = x[:i], x[i+1:]
return k, v.split(";")
val = dict([_get(s) for s in val])
elif keys[0] == "user_defined_modifications":
def _get(x):
i = x.find(":", x.find("@"))
k,v = x[:i], x[i+1:]
items = v.split(";")
if len(items) == 1:
return k, {"composition":items[0]}
else:
return k, {"composition": items[0], "modloss_composition": items[1]}
val = dict([_get(s) for s in val])
_dict[keys[0]] = val
else: _set_dict_val(_dict[keys[0]], keys[1:], val)

Expand Down Expand Up @@ -83,8 +93,7 @@ def get_parser():
return parser

def parse_args_to_global_settings(parser, args):
args, extras = parser.parse_known_args(args)
args_dict = vars(args)
args_dict = vars(parser.parse_known_args(args)[0])
if "settings_yaml" in args_dict:
if os.path.isfile(
args_dict["settings_yaml"]
Expand All @@ -95,7 +104,15 @@ def parse_args_to_global_settings(parser, args):
else:
print(f"Settings.yaml `{args_dict['settings_yaml']}` does not exist.")
args_dict.pop("settings_yaml")
for key, val in vars(args).items():
used_args = {}
for arg in args:
if arg.startswith("--"):
arg = arg[2:].replace("--","__")
if arg in args_dict:
used_args[arg] = args_dict[arg]

for key, val in used_args.items():
keys = key.split("__")
_set_dict_val(global_settings, keys, val)
_refine_global_settings()
return global_settings
33 changes: 5 additions & 28 deletions peptdeep/model/featurize.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,12 @@
import numpy as np
import pandas as pd
from typing import List, Union
from alphabase.constants.modification import MOD_DF

from peptdeep.settings import model_const

mod_elements = model_const['mod_elements']
mod_feature_size = len(mod_elements)

mod_elem_to_idx = dict(zip(mod_elements, range(mod_feature_size)))

def _parse_mod_formula(formula):
'''
Parse a modification formula to a feature vector
'''
feature = np.zeros(mod_feature_size)
elems = formula.strip(')').split(')')
for elem in elems:
chem, num = elem.split('(')
num = int(num)
if chem in mod_elem_to_idx:
feature[mod_elem_to_idx[chem]] = num
else:
feature[-1] += num
return feature

MOD_TO_FEATURE = {}
def update_all_mod_features():
for modname, formula in MOD_DF[['mod_name','composition']].values:
MOD_TO_FEATURE[modname] = _parse_mod_formula(formula)
update_all_mod_features()
from peptdeep.settings import (
model_const, mod_feature_size, MOD_TO_FEATURE,
mod_elements, mod_elem_to_idx,
_parse_mod_formula, update_all_mod_features,
)

def parse_mod_feature(
nAA:int,
Expand Down
15 changes: 11 additions & 4 deletions peptdeep/pretrained_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
ccs_to_mobility_for_df
)

from peptdeep.settings import global_settings
from peptdeep.settings import global_settings, add_user_defined_modifications
from peptdeep.utils import logging, process_bar
from peptdeep.settings import global_settings

Expand All @@ -43,7 +43,7 @@
uniform_sampling, evaluate_linear_regression
)

from peptdeep.settings import global_settings
from peptdeep.settings import global_settings, update_global_settings

pretrain_dir = os.path.join(
os.path.join(
Expand Down Expand Up @@ -909,8 +909,9 @@ def predict_mobility(self, precursor_df:pd.DataFrame,
precursor_df
)

def _predict_func_for_mp(self, arg_dict):
def _predict_func_for_mp(self, arg_dict:dict):
"""Internal function, for multiprocessing"""
update_global_settings(arg_dict.pop("mp_global_settings"))
return self.predict_all(
multiprocessing=False, **arg_dict
)
Expand All @@ -930,6 +931,10 @@ def predict_all_mp(self, precursor_df:pd.DataFrame,

df_groupby = precursor_df.groupby('nAA')

mgr = mp.Manager()
mp_global_settings = mgr.dict()
mp_global_settings.update(global_settings)

def get_batch_num_mp(df_groupby):
batch_num = 0
for group_len in df_groupby.size().values:
Expand All @@ -944,6 +949,7 @@ def mp_param_generator(df_groupby):
'precursor_df': df.iloc[i:i+mp_batch_size,:],
'predict_items': predict_items,
'frag_types': frag_types,
'mp_global_settings': mp_global_settings
}

precursor_df_list = []
Expand Down Expand Up @@ -1076,7 +1082,8 @@ def refine_df(df):
update_precursor_mz(precursor_df)

if (
self.ms2_model.device_type!='cpu' or not multiprocessing
self.ms2_model.device_type!='cpu'
or not multiprocessing or process_num <= 1
or len(precursor_df) < min_required_precursor_num_for_mp
):
refine_df(precursor_df)
Expand Down
Loading

0 comments on commit bc76528

Please sign in to comment.