Merge pull request #130 from MannLabs/development

Development
MannLabs · Jan 7, 2024 · bc76528 · bc76528
2 parents 8cc6541 + 22711d7
commit bc76528
Show file tree

Hide file tree

Showing 16 changed files with 155 additions and 117 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.3
+current_version = 1.1.4
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?

diff --git a/docs/conf.py b/docs/conf.py
@@ -23,7 +23,7 @@
 copyright = '2022, Mann Labs, MPIB'
 author = 'Mann Labs, MPIB'
 
-release = "1.1.3"
+release = "1.1.4"
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/nbdev_nbs/settings.ipynb b/nbdev_nbs/settings.ipynb
@@ -40,7 +40,9 @@
  {
  "data": {
  "text/plain": [
- "{'Lumos': 'Lumos',\n",
+ "{'ThermoTOF': 'ThermoTOF',\n",
+ " 'Astral': 'ThermoTOF',\n",
+ " 'Lumos': 'Lumos',\n",
  " 'QE': 'QE',\n",
  " 'timsTOF': 'timsTOF',\n",
  " 'SciexTOF': 'SciexTOF',\n",
@@ -55,6 +57,8 @@
  " 'QEHFX': 'QE',\n",
  " 'Exploris': 'QE',\n",
  " 'Exploris480': 'QE',\n",
+ " 'THERMOTOF': 'ThermoTOF',\n",
+ " 'ASTRAL': 'ThermoTOF',\n",
  " 'LUMOS': 'Lumos',\n",
  " 'TIMSTOF': 'timsTOF',\n",
  " 'SCIEXTOF': 'SciexTOF',\n",
@@ -86,7 +90,6 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "from peptdeep.model.featurize import MOD_TO_FEATURE, MOD_DF\n",
  "add_user_defined_modifications({\n",
  " \"Hi@S\":{'composition':\"H(2)\"},\n",
  " \"AlphaX@S\":{'composition':\"O(10)\",'modloss_composition':\"O(3)\"}\n",

diff --git a/peptdeep/__init__.py b/peptdeep/__init__.py
@@ -11,7 +11,7 @@
 # pass
 
 __project__ = "peptdeep"
-__version__ = "1.1.3"
+__version__ = "1.1.4"
 __license__ = "Apache 2.0"
 __description__ = "The AlphaX deep learning framework for Proteomics"
 __author__ = "Mann Labs"

diff --git a/peptdeep/cli_argparse.py b/peptdeep/cli_argparse.py
@@ -1,11 +1,13 @@
 #!python
 
 import os
-
-from peptdeep.settings import global_settings, load_global_settings
-
 import argparse
 
+from peptdeep.settings import (
+ global_settings, load_global_settings,
+ _refine_global_settings
+)
+
 __argparse_dict_level_sep="--" # do not change
 
 def convert_dict_to_argparse(
@@ -18,40 +20,48 @@ def convert_dict_to_argparse(
  return [(prefix_key, settings)]
  ret = []
  for key, val in settings.items():
- ret += convert_dict_to_argparse(
- val, prefix_key=(prefix_key+dict_level_sep+key) if prefix_key else key
- )
+ if key in [
+ "labeling_channels",
+ "psm_modification_mapping",
+ "user_defined_modifications",
+ "instrument_group",
+ ]:
+ ret += [(prefix_key+dict_level_sep+key, val)]
+ else:
+ ret += convert_dict_to_argparse(
+ val, prefix_key=(prefix_key+dict_level_sep+key) if prefix_key else key
+ )
  return ret
  else:
  return [(prefix_key, settings)]
 
 def _set_dict_val(_dict, keys, val):
  if len(keys) < 1: return
+ elif keys[0] == "labeling_channels":
+ def _get(x:str):
+ i = x.find(":")
+ k,v = x[:i], x[i+1:]
+ k = int(k) if k.isdigit() else k
+ v = v.split(";")
+ return k,v
+ _dict[keys[0]].update(dict([_get(s) for s in val]))
+ elif keys[0] == "psm_modification_mapping":
+ def _get(x):
+ i = x.find(":", x.find("@"))
+ k,v = x[:i], x[i+1:]
+ return k, v.split(";")
+ _dict[keys[0]].update(dict([_get(s) for s in val]))
+ elif keys[0] == "user_defined_modifications":
+ def _get(x):
+ i = x.find(":", x.find("@"))
+ k,v = x[:i], x[i+1:]
+ items = v.split(";")
+ if len(items) == 1:
+ return k, {"composition":items[0]}
+ else:
+ return k, {"composition": items[0], "modloss_composition": items[1]}
+ _dict[keys[0]].update(dict([_get(s) for s in val]))
  elif len(keys) == 1:
- if keys[0] == "labeling_channels":
- def _get(x:str):
- i = x.find(":")
- k,v = x[:i], x[i+1:]
- k = int(k) if k.isdigit() else k
- v = v.split(";")
- return k,v
- val = dict([_get(s) for s in val])
- elif keys[0] == "psm_modification_mapping":
- def _get(x):
- i = x.find(":", x.find("@"))
- k,v = x[:i], x[i+1:]
- return k, v.split(";")
- val = dict([_get(s) for s in val])
- elif keys[0] == "user_defined_modifications":
- def _get(x):
- i = x.find(":", x.find("@"))
- k,v = x[:i], x[i+1:]
- items = v.split(";")
- if len(items) == 1:
- return k, {"composition":items[0]}
- else:
- return k, {"composition": items[0], "modloss_composition": items[1]}
- val = dict([_get(s) for s in val])
  _dict[keys[0]] = val
  else: _set_dict_val(_dict[keys[0]], keys[1:], val)
 
@@ -83,8 +93,7 @@ def get_parser():
  return parser
 
 def parse_args_to_global_settings(parser, args):
- args, extras = parser.parse_known_args(args)
- args_dict = vars(args)
+ args_dict = vars(parser.parse_known_args(args)[0])
  if "settings_yaml" in args_dict:
  if os.path.isfile(
  args_dict["settings_yaml"]
@@ -95,7 +104,15 @@ def parse_args_to_global_settings(parser, args):
  else:
  print(f"Settings.yaml `{args_dict['settings_yaml']}` does not exist.")
  args_dict.pop("settings_yaml")
- for key, val in vars(args).items():
+ used_args = {}
+ for arg in args:
+ if arg.startswith("--"):
+ arg = arg[2:].replace("--","__")
+ if arg in args_dict:
+ used_args[arg] = args_dict[arg]
+
+ for key, val in used_args.items():
  keys = key.split("__")
  _set_dict_val(global_settings, keys, val)
+ _refine_global_settings()
  return global_settings
diff --git a/peptdeep/model/featurize.py b/peptdeep/model/featurize.py
@@ -1,35 +1,12 @@
 import numpy as np
 import pandas as pd
 from typing import List, Union
-from alphabase.constants.modification import MOD_DF
 
-from peptdeep.settings import model_const
-
-mod_elements = model_const['mod_elements']
-mod_feature_size = len(mod_elements)
-
-mod_elem_to_idx = dict(zip(mod_elements, range(mod_feature_size)))
-
-def _parse_mod_formula(formula):
- '''
- Parse a modification formula to a feature vector
- '''
- feature = np.zeros(mod_feature_size)
- elems = formula.strip(')').split(')')
- for elem in elems:
- chem, num = elem.split('(')
- num = int(num)
- if chem in mod_elem_to_idx:
- feature[mod_elem_to_idx[chem]] = num
- else:
- feature[-1] += num
- return feature
-
-MOD_TO_FEATURE = {}
-def update_all_mod_features():
- for modname, formula in MOD_DF[['mod_name','composition']].values:
- MOD_TO_FEATURE[modname] = _parse_mod_formula(formula)
-update_all_mod_features()
+from peptdeep.settings import (
+ model_const, mod_feature_size, MOD_TO_FEATURE, 
+ mod_elements, mod_elem_to_idx,
+ _parse_mod_formula, update_all_mod_features,
+)
 
 def parse_mod_feature(
  nAA:int, 

diff --git a/peptdeep/pretrained_models.py b/peptdeep/pretrained_models.py
@@ -29,7 +29,7 @@
  ccs_to_mobility_for_df
 )
 
-from peptdeep.settings import global_settings
+from peptdeep.settings import global_settings, add_user_defined_modifications
 from peptdeep.utils import logging, process_bar
 from peptdeep.settings import global_settings
 
@@ -43,7 +43,7 @@
  uniform_sampling, evaluate_linear_regression
 )
 
-from peptdeep.settings import global_settings
+from peptdeep.settings import global_settings, update_global_settings
 
 pretrain_dir = os.path.join(
  os.path.join(
@@ -909,8 +909,9 @@ def predict_mobility(self, precursor_df:pd.DataFrame,
  precursor_df
  )
 
- def _predict_func_for_mp(self, arg_dict):
+ def _predict_func_for_mp(self, arg_dict:dict):
  """Internal function, for multiprocessing"""
+ update_global_settings(arg_dict.pop("mp_global_settings"))
  return self.predict_all(
  multiprocessing=False, **arg_dict
  )
@@ -930,6 +931,10 @@ def predict_all_mp(self, precursor_df:pd.DataFrame,
 
  df_groupby = precursor_df.groupby('nAA')
 
+ mgr = mp.Manager()
+ mp_global_settings = mgr.dict()
+ mp_global_settings.update(global_settings)
+
  def get_batch_num_mp(df_groupby):
  batch_num = 0
  for group_len in df_groupby.size().values:
@@ -944,6 +949,7 @@ def mp_param_generator(df_groupby):
  'precursor_df': df.iloc[i:i+mp_batch_size,:],
  'predict_items': predict_items,
  'frag_types': frag_types,
+ 'mp_global_settings': mp_global_settings
  }
 
  precursor_df_list = []
@@ -1076,7 +1082,8 @@ def refine_df(df):
  update_precursor_mz(precursor_df)
 
  if (
- self.ms2_model.device_type!='cpu' or not multiprocessing
+ self.ms2_model.device_type!='cpu' 
+ or not multiprocessing or process_num <= 1
  or len(precursor_df) < min_required_precursor_num_for_mp
  ):
  refine_df(precursor_df)