From c1b6e0245674f103cc6310deee851fa98d113b26 Mon Sep 17 00:00:00 2001 From: orionarcher Date: Sun, 27 Oct 2024 17:53:24 -0400 Subject: [PATCH 1/3] Add a coerce formal charges method to openff code --- src/pymatgen/io/openff.py | 115 +++++++++++++++++++++++++++++++------- 1 file changed, 94 insertions(+), 21 deletions(-) diff --git a/src/pymatgen/io/openff.py b/src/pymatgen/io/openff.py index 5c6817f5735..fd91e14044b 100644 --- a/src/pymatgen/io/openff.py +++ b/src/pymatgen/io/openff.py @@ -3,6 +3,8 @@ from __future__ import annotations import warnings +import copy +from collections import defaultdict from pathlib import Path import numpy as np @@ -24,7 +26,40 @@ ) -def mol_graph_to_openff_mol(mol_graph: MoleculeGraph) -> tk.Molecule: +def coerce_formal_charges( + badly_charged_mol: tk.Molecule, template_mol: tk.Molecule +) -> tk.Molecule: + """Coerce formal charges on a molecule to match a template molecule. + + This is quite a hacky approach but is better than not fitting the formal charges at all. + + Args: + badly_charged_mol (tk.Molecule): The molecule with incorrectly assigned formal charges. + template_mol (tk.Molecule): The template molecule with the correct formal charges. + + Returns: + tk.Molecule: The molecule with correctly assigned formal charges. + """ + badly_charged_mol = copy.deepcopy(badly_charged_mol) + + atom_template_charges: dict[tuple[int, int], list[Quantity]] = defaultdict(list) + + # load list of formal charges + for atom in template_mol.atoms: + element_n_bonds = (atom.atomic_number, len(atom.bonds)) + atom_template_charges[element_n_bonds].append(atom.formal_charge) + + # apply formal charges + for atom in badly_charged_mol.atoms: + element_n_bonds = (atom.atomic_number, len(atom.bonds)) + atom.formal_charge = atom_template_charges[element_n_bonds].pop() + + return badly_charged_mol + + +def mol_graph_to_openff_mol( + mol_graph: MoleculeGraph, template_mol: tk.Molecule = None +) -> tk.Molecule: """ Convert a Pymatgen MoleculeGraph to an OpenFF Molecule. @@ -43,12 +78,19 @@ def mol_graph_to_openff_mol(mol_graph: MoleculeGraph) -> tk.Molecule: # TODO: should assert that there is only one molecule for i_node in range(len(mol_graph.graph.nodes)): node = mol_graph.graph.nodes[i_node] - atomic_number = node.get("atomic_number") or p_table[mol_graph.molecule[i_node].species_string] + atomic_number = ( + node.get("atomic_number") + or p_table[mol_graph.molecule[i_node].species_string] + ) # put formal charge on first atom if there is none present formal_charge = node.get("formal_charge") if formal_charge is None: - formal_charge = (i_node == 0) * int(round(mol_graph.molecule.charge, 0)) * unit.elementary_charge + formal_charge = ( + (i_node == 0) + * int(round(mol_graph.molecule.charge, 0)) + * unit.elementary_charge + ) # assume not aromatic if no info present is_aromatic = node.get("is_aromatic") or False @@ -72,6 +114,10 @@ def mol_graph_to_openff_mol(mol_graph: MoleculeGraph) -> tk.Molecule: openff_mol.add_bond(i_node, j, bond_order, is_aromatic=is_aromatic) openff_mol.add_conformer(mol_graph.molecule.cart_coords * unit.angstrom) + + if template_mol: + openff_mol = coerce_formal_charges(openff_mol, template_mol) + return openff_mol @@ -89,7 +135,11 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: p_table = {el.Z: str(el) for el in Element} total_charge = cum_atoms = 0 - coords = molecule.conformers[0].magnitude if molecule.conformers is not None else np.zeros((molecule.n_atoms, 3)) + coords = ( + molecule.conformers[0].magnitude + if molecule.conformers is not None + else np.zeros((molecule.n_atoms, 3)) + ) for idx, atom in enumerate(molecule.atoms): mol_graph.insert_node( cum_atoms + idx, @@ -100,7 +150,9 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: mol_graph.graph.nodes[cum_atoms + idx]["is_aromatic"] = atom.is_aromatic mol_graph.graph.nodes[cum_atoms + idx]["stereochemistry"] = atom.stereochemistry # set partial charge as a pure float - partial_charge = None if atom.partial_charge is None else atom.partial_charge.magnitude + partial_charge = ( + None if atom.partial_charge is None else atom.partial_charge.magnitude + ) mol_graph.graph.nodes[cum_atoms + idx]["partial_charge"] = partial_charge # set formal charge as a pure float formal_charge = atom.formal_charge.magnitude @@ -120,7 +172,9 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: return mol_graph -def get_atom_map(inferred_mol: tk.Molecule, openff_mol: tk.Molecule) -> tuple[bool, dict[int, int]]: +def get_atom_map( + inferred_mol: tk.Molecule, openff_mol: tk.Molecule +) -> tuple[bool, dict[int, int]]: """ Compute an atom mapping between two OpenFF Molecules. @@ -141,25 +195,32 @@ def get_atom_map(inferred_mol: tk.Molecule, openff_mol: tk.Molecule) -> tuple[bo "return_atom_map": True, "formal_charge_matching": False, } - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( + openff_mol, inferred_mol, **kwargs + ) if isomorphic: return True, atom_map # relax stereochemistry restrictions kwargs["atom_stereochemistry_matching"] = False kwargs["bond_stereochemistry_matching"] = False - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( + openff_mol, inferred_mol, **kwargs + ) if isomorphic: return True, atom_map # relax bond order restrictions kwargs["bond_order_matching"] = False - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( + openff_mol, inferred_mol, **kwargs + ) if isomorphic: return True, atom_map return False, {} def infer_openff_mol( - mol_geometry: Molecule, + mol_geometry: MoleculeGraph | Molecule, + template_mol: tk.Molecule = None, ) -> tk.Molecule: """Infer an OpenFF Molecule from a Pymatgen Molecule. @@ -173,13 +234,19 @@ def infer_openff_mol( Returns: tk.Molecule: The inferred OpenFF Molecule. """ - mol_graph = MoleculeGraph.with_local_env_strategy(mol_geometry, OpenBabelNN()) - mol_graph = metal_edge_extender(mol_graph) - return mol_graph_to_openff_mol(mol_graph) + if isinstance(mol_geometry, Molecule): + mol_graph = MoleculeGraph.with_local_env_strategy(mol_geometry, OpenBabelNN()) + mol_graph = metal_edge_extender(mol_graph) + else: + mol_graph = mol_geometry + return mol_graph_to_openff_mol(mol_graph, template_mol=template_mol) -def add_conformer(openff_mol: tk.Molecule, geometry: Molecule | None) -> tuple[tk.Molecule, dict[int, int]]: +def add_conformer( + openff_mol: tk.Molecule, geometry: MoleculeGraph | Molecule | None +) -> tuple[tk.Molecule, dict[int, int]]: """ + Add conformers to an OpenFF Molecule based on the provided geometry. If a geometry is provided, infers an OpenFF Molecule from it, @@ -199,16 +266,18 @@ def add_conformer(openff_mol: tk.Molecule, geometry: Molecule | None) -> tuple[t """ # TODO: test this if geometry: - # for geometry in geometries: - inferred_mol = infer_openff_mol(geometry) + inferred_mol = infer_openff_mol(geometry, template_mol=openff_mol) is_isomorphic, atom_map = get_atom_map(inferred_mol, openff_mol) if not is_isomorphic: raise ValueError( f"An isomorphism cannot be found between smile {openff_mol.to_smiles()}" f"and the provided molecule {geometry}." ) - new_mol = Molecule.from_sites([geometry.sites[i] for i in atom_map.values()]) - openff_mol.add_conformer(new_mol.cart_coords * unit.angstrom) + original_mol = geometry if isinstance(geometry, Molecule) else geometry.molecule + ordered_mol = Molecule.from_sites( + [original_mol.sites[i] for i in atom_map.values()] + ) + openff_mol.add_conformer(ordered_mol.cart_coords * unit.angstrom) else: atom_map = {i: i for i in range(openff_mol.n_atoms)} openff_mol.generate_conformers(n_conformers=1) @@ -247,7 +316,9 @@ def assign_partial_charges( chargs = partial_charges[list(atom_map.values())] # type: ignore[index, call-overload] openff_mol.partial_charges = chargs * unit.elementary_charge elif openff_mol.n_atoms == 1: - openff_mol.partial_charges = np.array([openff_mol.total_charge.magnitude]) * unit.elementary_charge + openff_mol.partial_charges = ( + np.array([openff_mol.total_charge.magnitude]) * unit.elementary_charge + ) else: openff_mol.assign_partial_charges(charge_method) return openff_mol @@ -255,7 +326,7 @@ def assign_partial_charges( def create_openff_mol( smile: str, - geometry: Molecule | str | Path | None = None, + geometry: MoleculeGraph | Molecule | str | Path | None = None, charge_scaling: float = 1, partial_charges: list[float] | None = None, backup_charge_method: str = "am1bcc", @@ -289,7 +360,9 @@ def create_openff_mol( if geometry is None: raise ValueError("geometries must be set if partial_charges is set") if len(partial_charges) != len(geometry): - raise ValueError("partial charges must have same length & order as geometry") + raise ValueError( + "partial charges must have same length & order as geometry" + ) openff_mol = tk.Molecule.from_smiles(smile, allow_undefined_stereo=True) From 6d61f38ffa8e234acd38318d417ff5e64d5e8a7e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 21:56:19 +0000 Subject: [PATCH 2/3] pre-commit auto-fixes --- src/pymatgen/io/openff.py | 59 ++++++++++----------------------------- 1 file changed, 14 insertions(+), 45 deletions(-) diff --git a/src/pymatgen/io/openff.py b/src/pymatgen/io/openff.py index fd91e14044b..3b767b1b039 100644 --- a/src/pymatgen/io/openff.py +++ b/src/pymatgen/io/openff.py @@ -2,8 +2,8 @@ from __future__ import annotations -import warnings import copy +import warnings from collections import defaultdict from pathlib import Path @@ -26,9 +26,7 @@ ) -def coerce_formal_charges( - badly_charged_mol: tk.Molecule, template_mol: tk.Molecule -) -> tk.Molecule: +def coerce_formal_charges(badly_charged_mol: tk.Molecule, template_mol: tk.Molecule) -> tk.Molecule: """Coerce formal charges on a molecule to match a template molecule. This is quite a hacky approach but is better than not fitting the formal charges at all. @@ -57,9 +55,7 @@ def coerce_formal_charges( return badly_charged_mol -def mol_graph_to_openff_mol( - mol_graph: MoleculeGraph, template_mol: tk.Molecule = None -) -> tk.Molecule: +def mol_graph_to_openff_mol(mol_graph: MoleculeGraph, template_mol: tk.Molecule = None) -> tk.Molecule: """ Convert a Pymatgen MoleculeGraph to an OpenFF Molecule. @@ -78,19 +74,12 @@ def mol_graph_to_openff_mol( # TODO: should assert that there is only one molecule for i_node in range(len(mol_graph.graph.nodes)): node = mol_graph.graph.nodes[i_node] - atomic_number = ( - node.get("atomic_number") - or p_table[mol_graph.molecule[i_node].species_string] - ) + atomic_number = node.get("atomic_number") or p_table[mol_graph.molecule[i_node].species_string] # put formal charge on first atom if there is none present formal_charge = node.get("formal_charge") if formal_charge is None: - formal_charge = ( - (i_node == 0) - * int(round(mol_graph.molecule.charge, 0)) - * unit.elementary_charge - ) + formal_charge = (i_node == 0) * int(round(mol_graph.molecule.charge, 0)) * unit.elementary_charge # assume not aromatic if no info present is_aromatic = node.get("is_aromatic") or False @@ -135,11 +124,7 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: p_table = {el.Z: str(el) for el in Element} total_charge = cum_atoms = 0 - coords = ( - molecule.conformers[0].magnitude - if molecule.conformers is not None - else np.zeros((molecule.n_atoms, 3)) - ) + coords = molecule.conformers[0].magnitude if molecule.conformers is not None else np.zeros((molecule.n_atoms, 3)) for idx, atom in enumerate(molecule.atoms): mol_graph.insert_node( cum_atoms + idx, @@ -150,9 +135,7 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: mol_graph.graph.nodes[cum_atoms + idx]["is_aromatic"] = atom.is_aromatic mol_graph.graph.nodes[cum_atoms + idx]["stereochemistry"] = atom.stereochemistry # set partial charge as a pure float - partial_charge = ( - None if atom.partial_charge is None else atom.partial_charge.magnitude - ) + partial_charge = None if atom.partial_charge is None else atom.partial_charge.magnitude mol_graph.graph.nodes[cum_atoms + idx]["partial_charge"] = partial_charge # set formal charge as a pure float formal_charge = atom.formal_charge.magnitude @@ -172,9 +155,7 @@ def mol_graph_from_openff_mol(molecule: tk.Molecule) -> MoleculeGraph: return mol_graph -def get_atom_map( - inferred_mol: tk.Molecule, openff_mol: tk.Molecule -) -> tuple[bool, dict[int, int]]: +def get_atom_map(inferred_mol: tk.Molecule, openff_mol: tk.Molecule) -> tuple[bool, dict[int, int]]: """ Compute an atom mapping between two OpenFF Molecules. @@ -195,24 +176,18 @@ def get_atom_map( "return_atom_map": True, "formal_charge_matching": False, } - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( - openff_mol, inferred_mol, **kwargs - ) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) if isomorphic: return True, atom_map # relax stereochemistry restrictions kwargs["atom_stereochemistry_matching"] = False kwargs["bond_stereochemistry_matching"] = False - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( - openff_mol, inferred_mol, **kwargs - ) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) if isomorphic: return True, atom_map # relax bond order restrictions kwargs["bond_order_matching"] = False - isomorphic, atom_map = tk.topology.Molecule.are_isomorphic( - openff_mol, inferred_mol, **kwargs - ) + isomorphic, atom_map = tk.topology.Molecule.are_isomorphic(openff_mol, inferred_mol, **kwargs) if isomorphic: return True, atom_map return False, {} @@ -274,9 +249,7 @@ def add_conformer( f"and the provided molecule {geometry}." ) original_mol = geometry if isinstance(geometry, Molecule) else geometry.molecule - ordered_mol = Molecule.from_sites( - [original_mol.sites[i] for i in atom_map.values()] - ) + ordered_mol = Molecule.from_sites([original_mol.sites[i] for i in atom_map.values()]) openff_mol.add_conformer(ordered_mol.cart_coords * unit.angstrom) else: atom_map = {i: i for i in range(openff_mol.n_atoms)} @@ -316,9 +289,7 @@ def assign_partial_charges( chargs = partial_charges[list(atom_map.values())] # type: ignore[index, call-overload] openff_mol.partial_charges = chargs * unit.elementary_charge elif openff_mol.n_atoms == 1: - openff_mol.partial_charges = ( - np.array([openff_mol.total_charge.magnitude]) * unit.elementary_charge - ) + openff_mol.partial_charges = np.array([openff_mol.total_charge.magnitude]) * unit.elementary_charge else: openff_mol.assign_partial_charges(charge_method) return openff_mol @@ -360,9 +331,7 @@ def create_openff_mol( if geometry is None: raise ValueError("geometries must be set if partial_charges is set") if len(partial_charges) != len(geometry): - raise ValueError( - "partial charges must have same length & order as geometry" - ) + raise ValueError("partial charges must have same length & order as geometry") openff_mol = tk.Molecule.from_smiles(smile, allow_undefined_stereo=True) From 054ae28d67d6b1d17fc36d06a3dafee60143d0ce Mon Sep 17 00:00:00 2001 From: orionarcher Date: Sat, 16 Nov 2024 11:42:47 -0500 Subject: [PATCH 3/3] add test --- tests/io/test_openff.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/io/test_openff.py b/tests/io/test_openff.py index 3c2def7bf4f..389e36854df 100644 --- a/tests/io/test_openff.py +++ b/tests/io/test_openff.py @@ -12,6 +12,7 @@ from pymatgen.io.openff import ( add_conformer, assign_partial_charges, + coerce_formal_charges, create_openff_mol, get_atom_map, infer_openff_mol, @@ -41,6 +42,36 @@ def mol_files(): } +def test_coerce_formal_charges(): + # Create a badly charged molecule + badly_charged = tk.Molecule.from_smiles("CCO") + badly_charged.atoms[2].formal_charge = 1 # Incorrectly assign +1 to oxygen + + # Create template molecule with correct charges + template = tk.Molecule.from_smiles("CCO") # All atoms neutral + + # Test coercion + fixed_mol = coerce_formal_charges(badly_charged, template) + + # Check charges were fixed + assert fixed_mol.atoms[2].formal_charge == 0 + assert fixed_mol != badly_charged # Should be a different object + assert badly_charged.atoms[2].formal_charge.magnitude == 1 # Original unchanged + + # Test with more complex case + badly_charged = tk.Molecule.from_smiles("F[P-](F)(F)(F)(F)F") + badly_charged.atoms[0].formal_charge = -1 # Wrong charge on F + badly_charged.atoms[1].formal_charge = 0 # Wrong charge on P + + template = tk.Molecule.from_smiles("F[P-](F)(F)(F)(F)F") # Correct charges + + fixed_mol = coerce_formal_charges(badly_charged, template) + + # Verify charges + assert fixed_mol.atoms[0].formal_charge.magnitude == 0 # F should be neutral + assert fixed_mol.atoms[1].formal_charge.magnitude == -1 # P should be -1 + + def test_mol_graph_from_atom_bonds(mol_files): pytest.importorskip("openff")