Skip to content

Commit

Permalink
Turn off identify_connected_components default (#760)
Browse files Browse the repository at this point in the history
* turn off identify_connected_components

* rename option to better reflect its purpose and shorten assert_x_params

* address remaining failed tests

* address comments by Cal and Chris

* quick fix to a docstring

* Update gmso/parameterization/parameterize.py

Co-authored-by: CalCraven <[email protected]>

* Update gmso/parameterization/topology_parameterizer.py

Co-authored-by: CalCraven <[email protected]>

---------

Co-authored-by: chrisjonesbsu <[email protected]>
Co-authored-by: CalCraven <[email protected]>
  • Loading branch information
3 people authored Sep 7, 2023
1 parent 0011e15 commit 6230bd9
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 97 deletions.
57 changes: 22 additions & 35 deletions gmso/parameterization/parameterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,63 +12,52 @@ def apply(
forcefields,
match_ff_by="molecule",
identify_connections=False,
identify_connected_components=True,
use_molecule_info=False,
assert_bond_params=True,
assert_angle_params=True,
assert_dihedral_params=True,
assert_improper_params=False,
speedup_by_molgraph=False,
speedup_by_moltag=False,
ignore_params=["improper"],
remove_untyped=False,
fast_copy=True,
):
"""Set Topology parameter types from GMSO ForceFields.
Parameters
----------
top: gmso.core.topology.Topology, required
top : gmso.core.topology.Topology, required
The GMSO topology on which to apply forcefields
forcefields: ForceField or dict, required
forcefields : ForceField or dict, required
The forcefield to apply. If a dictionary is used the keys are labels that match
the molecule name (specified as a label of site), and the values are gmso ForceField objects that gets applied
to the specified molecule.
Note: if a Topology with no molecule is provided, this option will only take
a ForceField object. If a dictionary of ForceFields is provided, this method will
fail.
match_ff_by: str, optional, default="molecule"
match_ff_by : str, optional, default="molecule"
They site's tag used to match the forcefields provided above to the Topology.
Options include "molecule" and "group". This option is only valid if forcefields are provided
as a dict.
identify_connections: bool, optional, default=False
identify_connections : bool, optional, default=False
If true, add connections identified using networkx graph matching to match
the topology's bonding graph to smaller sub-graphs that correspond to an angle,
dihedral, improper etc
identify_connected_components: bool, optional, default=True
speedup_by_molgraph: bool, optional, default=False
A flag to determine whether or not to search the topology for repeated disconnected
structures, otherwise known as molecules and type each molecule only once.
This option will be usefult to handle systems with many repeated small molecules,
but may slow down system with large molecule, e.g., monolayer.
use_molecule_info: bool, optional, default=False
A flag to determine whether or not to look at site.residue_name to look parameterize
each molecule only once. Currently unused.
speedup_by_moltag : bool, optional, default=False
A flag to determine whether or not to look at site.molecule_name to try to parameterize
each molecule only once. This option provides speedup for topologies with properly
assigned molecule and residue labels.
assert_bond_params : bool, optional, default=True
If True, an error is raised if parameters are not found for all system
bonds.
assert_angle_params : bool, optional, default=True
If True, an error is raised if parameters are not found for all system
angles.
assert_dihedral_params : bool, optional, default=True
If True, an error is raised if parameters are not found for all system
proper dihedrals.
assert_improper_params : bool, optional, default=False
If True, an error is raised if parameters are not found for all system
improper dihedrals.
ignore_params : set or list or tuple, optional, default=["impropers"]
Skipping the checks that make sure all connections (in the list) have a connection types.
Available options includes "bonds", "angles", "dihedrals", and "impropers".
If you wish to have all connection types checks, provides an empty set/list/tuple.
remove_untyped : bool, optional, default=False
If True, after the atomtyping and parameterization step, remove all connection
Expand All @@ -83,16 +72,14 @@ def apply(
this should be changed to False if further modification of expressions are
necessary post parameterization.
"""
ignore_params = set([option.lower() for option in ignore_params])
config = TopologyParameterizationConfig.parse_obj(
dict(
match_ff_by=match_ff_by,
identify_connections=identify_connections,
identify_connected_components=identify_connected_components,
use_molecule_info=use_molecule_info,
assert_bond_params=assert_bond_params,
assert_angle_params=assert_angle_params,
assert_dihedral_params=assert_dihedral_params,
assert_improper_params=assert_improper_params,
speedup_by_molgraph=speedup_by_molgraph,
speedup_by_moltag=speedup_by_moltag,
ignore_params=ignore_params,
remove_untyped=remove_untyped,
fast_copy=fast_copy,
)
Expand Down
68 changes: 26 additions & 42 deletions gmso/parameterization/topology_parameterizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,44 +58,24 @@ class TopologyParameterizationConfig(GMSOBase):
"angle, dihedral, improper etc",
)

identify_connected_components: bool = Field(
speedup_by_molgraph: bool = Field(
default=False,
description="A flag to determine whether or not to search the topology"
" for repeated disconnected structures, otherwise known as "
"molecules and type each molecule only once.",
)

use_molecule_info: bool = Field(
speedup_by_moltag: bool = Field(
default=False,
description="A flag to determine whether or not to look at site.molecule "
"to look parameterize each molecule only once. Will only be used if "
"identify_connected_components=False",
) # Unused

assert_bond_params: bool = Field(
default=True,
description="If True, an error is raised if parameters are not found for "
"all system bonds.",
"speedup_by_molgraph=True",
)

assert_angle_params: bool = Field(
default=True,
description="If True, an error is raised if parameters are not found for "
"all system angles",
)

assert_dihedral_params: bool = (
Field(
default=True,
description="If True, an error is raised if parameters are not found for "
"all system dihedrals.",
),
)

assert_improper_params: bool = Field(
default=False,
description="If True, an error is raised if parameters are not found for "
"all system impropers.",
ignore_params: list = Field(
default=[],
description="Skipping the checks that make sure all connections (in the list) "
"have a connection types.",
)

remove_untyped: bool = Field(
Expand Down Expand Up @@ -134,7 +114,7 @@ def get_ff(self, key=None):
else:
return self.forcefields

def _parameterize_sites(self, sites, typemap, ff, use_molecule_info=None):
def _parameterize_sites(self, sites, typemap, ff, speedup_by_moltag=None):
"""Parameterize sites with appropriate atom-types from the forcefield."""
for j, site in enumerate(sites):
site.atom_type = ff.get_potential(
Expand Down Expand Up @@ -170,16 +150,20 @@ def _parameterize_connections(
impropers = top.impropers

self._apply_connection_parameters(
bonds, ff, self.config.assert_bond_params
bonds, ff, False if "bond" in self.config.ignore_params else True
)
self._apply_connection_parameters(
angles, ff, self.config.assert_angle_params
angles, ff, False if "angle" in self.config.ignore_params else True
)
self._apply_connection_parameters(
dihedrals, ff, self.config.assert_dihedral_params
dihedrals,
ff,
False if "dihedral" in self.config.ignore_params else True,
)
self._apply_connection_parameters(
impropers, ff, self.config.assert_improper_params
impropers,
ff,
False if "improper" in self.config.ignore_params else True,
)

def _apply_connection_parameters(
Expand Down Expand Up @@ -231,7 +215,7 @@ def _apply_connection_parameters(
)

def _parameterize(
self, top, typemap, label_type=None, label=None, use_molecule_info=False
self, top, typemap, label_type=None, label=None, speedup_by_moltag=False
):
"""Parameterize a topology/subtopology based on an atomtype map."""
if label and label_type:
Expand All @@ -242,7 +226,7 @@ def _parameterize(
sites = top.sites

self._parameterize_sites(
sites, typemap, forcefield, use_molecule_info=use_molecule_info
sites, typemap, forcefield, speedup_by_moltag=speedup_by_moltag
)
self._parameterize_connections(
top,
Expand Down Expand Up @@ -353,27 +337,27 @@ def run_parameterization(self):
self.topology,
self.config.match_ff_by,
label,
self.config.use_molecule_info,
self.config.identify_connected_components,
self.config.speedup_by_moltag,
self.config.speedup_by_molgraph,
)
self._parameterize(
self.topology,
typemap,
label_type=self.config.match_ff_by,
label=label,
use_molecule_info=self.config.use_molecule_info, # This will be removed from the future iterations
speedup_by_moltag=self.config.speedup_by_moltag, # This will be removed from the future iterations
)
else:
typemap = self._get_atomtypes(
self.get_ff(),
self.topology,
use_molecule_info=self.config.use_molecule_info,
use_isomorphic_checks=self.config.identify_connected_components,
speedup_by_moltag=self.config.speedup_by_moltag,
use_isomorphic_checks=self.config.speedup_by_molgraph,
)
self._parameterize(
self.topology,
typemap,
use_molecule_info=self.config.use_molecule_info,
speedup_by_moltag=self.config.speedup_by_moltag,
)

self._set_scaling_factors() # Set global or per molecule scaling factors
Expand Down Expand Up @@ -417,7 +401,7 @@ def _get_atomtypes(
topology,
label_type=None,
label=None,
use_molecule_info=False,
speedup_by_moltag=False,
use_isomorphic_checks=False,
):
"""Run atom-typing in foyer and return the typemap."""
Expand All @@ -428,7 +412,7 @@ def _get_atomtypes(
label,
)

if use_molecule_info:
if speedup_by_moltag:
# Iterate through foyer_topology_graph, which is a subgraph of label_type
typemap, reference = dict(), dict()
for connected_component in nx.connected_components(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class TestImpropersParameterization(ParameterizationBaseTest):
def test_improper_parameterization(self, fake_improper_ff_gmso, ethane):
ethane.identify_connections()
apply(ethane, fake_improper_ff_gmso, assert_improper_params=True)
apply(ethane, fake_improper_ff_gmso, ignore_params=list())

lib = PotentialTemplateLibrary()
template_improper_type = lib["PeriodicImproperPotential"]
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_improper_parameterization(self, fake_improper_ff_gmso, ethane):

def test_improper_assertion_error(self, ethane_methane_top, oplsaa_gmso):
with pytest.raises(ParameterizationError):
apply(ethane_methane_top, oplsaa_gmso, assert_improper_params=True)
apply(ethane_methane_top, oplsaa_gmso, ignore_params=list())

@pytest.mark.parametrize(
"mol2_loc",
Expand Down
2 changes: 1 addition & 1 deletion gmso/tests/parameterization/test_opls_gmso.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_foyer_oplsaa_files(

gmso_top_from_pmd = from_parmed(struct, refer_type=True)
gmso_top = from_parmed(struct, refer_type=False)
apply(gmso_top, oplsaa_gmso, identify_connected_components=False)
apply(gmso_top, oplsaa_gmso, speedup_by_molgraph=False)

assert_same_atom_params(gmso_top, gmso_top_from_pmd)
assert_same_connection_params(gmso_top, gmso_top_from_pmd)
Expand Down
28 changes: 14 additions & 14 deletions gmso/tests/parameterization/test_parameterization_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,23 +137,23 @@ def test_empty_top_parameterization(self, oplsaa_gmso):
apply(top=Topology(), forcefields=oplsaa_gmso)

@pytest.mark.parametrize(
"identify_connected_components, use_molecule_info",
"speedup_by_molgraph, speedup_by_moltag",
[(False, False), (True, False), (False, True), (True, True)],
)
def test_speedup_options(
self,
ethane_box_with_methane,
oplsaa_gmso,
identify_connected_components,
use_molecule_info,
speedup_by_molgraph,
speedup_by_moltag,
):
ethane_box_with_methane.identify_connections()
apply(
ethane_box_with_methane,
oplsaa_gmso,
identify_connections=False,
identify_connected_components=identify_connected_components,
use_molecule_info=use_molecule_info,
speedup_by_molgraph=speedup_by_molgraph,
speedup_by_moltag=speedup_by_moltag,
)

molecule_labels = ethane_box_with_methane.unique_site_labels("molecule")
Expand Down Expand Up @@ -214,8 +214,8 @@ def test_match_ff_by_molecule(self, ethane_box_with_methane, oplsaa_gmso):
ff_dict,
match_ff_by="molecule",
identify_connections=False,
identify_connected_components=True,
use_molecule_info=True,
speedup_by_molgraph=True,
speedup_by_moltag=True,
)
assert ethane_box_with_methane.atom_types is not None

Expand All @@ -231,13 +231,13 @@ def test_match_ff_by_group(self, ethane_box_with_methane, oplsaa_gmso):
ff_dict,
match_ff_by="group",
identify_connections=False,
identify_connected_components=True,
use_molecule_info=True,
speedup_by_molgraph=True,
speedup_by_moltag=True,
)
assert ethane_box_with_methane.atom_types is not None

@pytest.mark.parametrize(
"identify_connected_components, use_molecule_info, match_ff_by",
"speedup_by_molgraph, speedup_by_moltag, match_ff_by",
[
(False, False, "group"),
(True, False, "group"),
Expand All @@ -253,8 +253,8 @@ def test_hierarchical_mol_structure(
self,
oplsaa_gmso,
hierarchical_top,
identify_connected_components,
use_molecule_info,
speedup_by_molgraph,
speedup_by_moltag,
match_ff_by,
):
top = deepcopy(hierarchical_top)
Expand All @@ -274,7 +274,7 @@ def test_hierarchical_mol_structure(
apply(
top,
ff_dict,
identify_connected_components=identify_connected_components,
use_molecule_info=use_molecule_info,
speedup_by_molgraph=speedup_by_molgraph,
speedup_by_moltag=speedup_by_moltag,
match_ff_by=match_ff_by,
)
2 changes: 1 addition & 1 deletion gmso/tests/parameterization/test_trappe_gmso.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_foyer_trappe_files(
apply(
gmso_top,
trappe_ua_gmso,
identify_connected_components=False,
speedup_by_molgraph=False,
identify_connections=True,
)
gmso_top_from_parmeterized_pmd = from_parmed(struct_pmd)
Expand Down
4 changes: 2 additions & 2 deletions gmso/utils/specific_ff_to_residue.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,10 @@ def specific_ff_to_residue(
gmso_apply(
new_gmso_topology,
gmso_compatible_forcefield_selection,
identify_connected_components=True,
speedup_by_molgraph=True,
identify_connections=True,
match_ff_by=gmso_match_ff_by,
use_molecule_info=True,
speedup_by_moltag=True,
remove_untyped=True,
)
new_gmso_topology.update_topology()
Expand Down

0 comments on commit 6230bd9

Please sign in to comment.