Skip to content

Commit 3d51f6d

Browse files
authored
Support unnamed molecules (#85)
* Support providing mols with no _Name entry * Test that generation works without a mol name * Fix path in loaded file
1 parent 4fea3d6 commit 3d51f6d

File tree

8 files changed

+70
-31
lines changed

8 files changed

+70
-31
lines changed

doc/source/usage/config.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Loading Default Parameters
1212
The below example shows all default parameters, accessed via the
1313
:py:mod:`e3fp.config` module.
1414

15-
.. literalinclude:: ../../../e3fp/config/defaults.cfg
15+
.. literalinclude:: ../../../src/e3fp/config/defaults.cfg
1616
:caption: `defaults.cfg <https://github.com/keiserlab/e3fp/blob/master/e3fp/config/defaults.cfg>`_
1717

1818
:py:mod:`configparser` is used internally to parse and store these

src/e3fp/conformer/generate.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,14 +120,19 @@ def generate_conformers(
120120
conformations generated, and 2D numpy array of pairwise RMSDs between
121121
final conformations.
122122
"""
123-
if name is None:
123+
if name is None and input_mol.HasProp("_Name"):
124124
name = input_mol.GetProp("_Name")
125+
log_name = name
126+
else:
127+
log_name = "molecule"
125128

126129
if standardise:
127130
input_mol = mol_to_standardised_mol(input_mol)
128131

129132
if save:
130133
if out_file is None:
134+
if name is None:
135+
raise ValueError("Molecule is missing property '_Name', cannot save conformers.")
131136
extensions = ("", ".gz", ".bz2")
132137
if compress not in (0, 1, 2):
133138
compress = 0
@@ -139,7 +144,7 @@ def generate_conformers(
139144
logging.warning("{} already exists. Skipping.".format(out_file))
140145
return False
141146

142-
logging.info("Generating conformers for {}.".format(name))
147+
logging.info("Generating conformers for {}.".format(log_name))
143148
try:
144149
conf_gen = ConformerGenerator(
145150
num_conf=num_conf,
@@ -154,25 +159,25 @@ def generate_conformers(
154159
mol, values = conf_gen.generate_conformers(input_mol)
155160
logging.info(
156161
"Generated {:d} conformers for {}.".format(
157-
mol.GetNumConformers(), name
162+
mol.GetNumConformers(), log_name
158163
)
159164
)
160165
except Exception:
161166
logging.warning(
162-
"Problem generating conformers for {}.".format(name), exc_info=True
167+
"Problem generating conformers for {}.".format(log_name), exc_info=True
163168
)
164169
return False
165170

166171
if save:
167172
try:
168173
mol_to_sdf(mol, out_file)
169174
logging.info(
170-
"Saved conformers for {} to {}.".format(name, out_file)
175+
"Saved conformers for {} to {}.".format(log_name, out_file)
171176
)
172177
except Exception:
173178
logging.warning(
174179
"Problem saving conformers for {} to {}.".format(
175-
name, out_file
180+
log_name, out_file
176181
),
177182
exc_info=True,
178183
)

src/e3fp/conformer/generator.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -206,20 +206,19 @@ def embed_molecule(self, mol):
206206
mol : RDKit Mol
207207
Molecule.
208208
"""
209-
logging.debug("Adding hydrogens for %s" % mol.GetProp("_Name"))
209+
log_name = mol.GetProp("_Name") if mol.HasProp("_Name") else "molecule"
210+
logging.debug("Adding hydrogens for %s" % log_name)
210211
mol = Chem.AddHs(mol) # add hydrogens
211-
logging.debug("Hydrogens added to %s" % mol.GetProp("_Name"))
212-
logging.debug("Sanitizing mol for %s" % mol.GetProp("_Name"))
212+
logging.debug("Hydrogens added to %s" % log_name)
213+
logging.debug("Sanitizing mol for %s" % log_name)
213214
Chem.SanitizeMol(mol)
214-
logging.debug("Mol sanitized for %s" % mol.GetProp("_Name"))
215+
logging.debug("Mol sanitized for %s" % log_name)
215216
if self.max_conformers == -1 or type(self.max_conformers) is not int:
216217
self.max_conformers = self.get_num_conformers(mol)
217218
n_confs = self.max_conformers * self.pool_multiplier
218219
if self.first_conformers == -1:
219220
self.first_conformers = self.max_conformers
220-
logging.debug(
221-
"Embedding %d conformers for %s" % (n_confs, mol.GetProp("_Name"))
222-
)
221+
logging.debug("Embedding %d conformers for %s" % (n_confs, log_name))
223222
AllChem.EmbedMultipleConfs(
224223
mol,
225224
numConfs=n_confs,
@@ -228,7 +227,7 @@ def embed_molecule(self, mol):
228227
randomSeed=self.seed,
229228
ignoreSmoothingFailures=True,
230229
)
231-
logging.debug("Conformers embedded for %s" % mol.GetProp("_Name"))
230+
logging.debug("Conformers embedded for %s" % log_name)
232231
return mol
233232

234233
def get_molecule_force_field(self, mol, conf_id=None, **kwargs):
@@ -269,11 +268,12 @@ def minimize_conformers(self, mol):
269268
mol : RDKit Mol
270269
Molecule.
271270
"""
272-
logging.debug("Minimizing conformers for %s" % mol.GetProp("_Name"))
271+
log_name = mol.GetProp("_Name") if mol.HasProp("_Name") else "molecule"
272+
logging.debug("Minimizing conformers for %s" % log_name)
273273
for conf in mol.GetConformers():
274274
ff = self.get_molecule_force_field(mol, conf_id=conf.GetId())
275275
ff.Minimize()
276-
logging.debug("Conformers minimized for %s" % mol.GetProp("_Name"))
276+
logging.debug("Conformers minimized for %s" % log_name)
277277

278278
def get_conformer_energies(self, mol):
279279
"""Calculate conformer energies.
@@ -308,7 +308,8 @@ def filter_conformers(self, mol):
308308
A new RDKit Mol containing the chosen conformers, sorted by
309309
increasing energy.
310310
"""
311-
logging.debug("Pruning conformers for %s" % mol.GetProp("_Name"))
311+
log_name = mol.GetProp("_Name") if mol.HasProp("_Name") else "molecule"
312+
logging.debug("Pruning conformers for %s" % log_name)
312313
energies = self.get_conformer_energies(mol)
313314
energy_below_threshold = np.ones_like(energies, dtype=np.bool_)
314315

@@ -378,7 +379,7 @@ def filter_conformers(self, mol):
378379
conf = mol.GetConformer(conf_ids[i])
379380
new.AddConformer(conf, assignId=True)
380381

381-
logging.debug("Conformers filtered for %s" % mol.GetProp("_Name"))
382+
logging.debug("Conformers filtered for %s" % log_name)
382383
return new, np.asarray(accepted, dtype=int), energies, rmsds
383384

384385
@staticmethod

src/e3fp/fingerprint/fprinter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -556,13 +556,15 @@ def __init__(
556556
if i < j
557557
]
558558
if len(overlap_atoms) > 0:
559+
owning_mol = conf.GetOwningMol()
560+
name = owning_mol.GetProp("_Name") if owning_mol.HasProp("_Name") else "molecule"
559561
logging.warning(
560562
"Overlapping atoms {} in conformer {} of molecule"
561563
" {}. Fingerprinting will continue but is less "
562564
"reliable.".format(
563565
", ".join(map(repr, overlap_atoms)),
564566
conf.GetId(),
565-
conf.GetOwningMol().GetProp("_Name"),
567+
name,
566568
)
567569
)
568570

src/e3fp/fingerprint/generate.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,12 @@ def fprints_dict_from_mol(
126126
sdf_file : str
127127
SDF file path.
128128
"""
129-
name = mol.GetProp("_Name")
129+
if mol.HasProp("_Name"):
130+
name = mol.GetProp("_Name")
131+
log_name = name
132+
else:
133+
name = None
134+
log_name = "mol"
130135

131136
if level is None:
132137
level = -1
@@ -135,6 +140,8 @@ def fprints_dict_from_mol(
135140
bits = BITS
136141

137142
if save:
143+
if name is None:
144+
raise ValueError("Molecule is missing property '_Name', cannot save fingerprints.")
138145
filenames = []
139146
all_files_exist = True
140147
if level == -1 or not all_iters:
@@ -162,7 +169,7 @@ def fprints_dict_from_mol(
162169
if all_files_exist and not overwrite:
163170
logging.warning(
164171
"All fingerprint files for {!s} already exist. "
165-
"Skipping.".format(name)
172+
"Skipping.".format(log_name)
166173
)
167174
return {}
168175

@@ -180,7 +187,7 @@ def fprints_dict_from_mol(
180187

181188
try:
182189
fprints_dict = {}
183-
logging.info("Generating fingerprints for {!s}.".format(name))
190+
logging.info("Generating fingerprints for {!s}.".format(log_name))
184191
for j, conf in enumerate(mol.GetConformers()):
185192
if j == first:
186193
j -= 1
@@ -194,16 +201,17 @@ def fprints_dict_from_mol(
194201
level_range = range(level + 1)
195202
for i in level_range:
196203
fprint = fingerprinter.get_fingerprint_at_level(i)
197-
fprint.name = MolItemName.from_str(name).to_conf_name(j)
204+
if name is not None:
205+
fprint.name = MolItemName.from_str(name).to_conf_name(j)
198206
# if i not in fprints_dict and j != 0:
199207
# fprints_dict[i] = fprints_dict[i-1][:j]
200208
fprints_dict.setdefault(i, []).append(fprint)
201209
logging.info(
202-
"Generated {:d} fingerprints for {!s}.".format(j + 1, name)
210+
"Generated {:d} fingerprints for {!s}.".format(j + 1, log_name)
203211
)
204212
except Exception:
205213
logging.error(
206-
"Error generating fingerprints for {:s}.".format(name),
214+
"Error generating fingerprints for {:s}.".format(log_name),
207215
exc_info=True,
208216
)
209217
return {}
@@ -213,11 +221,11 @@ def fprints_dict_from_mol(
213221
fprints = fprints_dict[max(fprints_dict.keys())]
214222
try:
215223
fp.savez(filenames[0], *fprints)
216-
logging.info("Saved fingerprints for {:s}.".format(name))
224+
logging.info("Saved fingerprints for {:s}.".format(log_name))
217225
except Exception:
218226
logging.error(
219227
"Error saving fingerprints for {:s} to {:s}".format(
220-
name, filenames[0]
228+
log_name, filenames[0]
221229
),
222230
exc_info=True,
223231
)
@@ -226,11 +234,11 @@ def fprints_dict_from_mol(
226234
try:
227235
for i, fprints in sorted(fprints_dict.items()):
228236
fp.savez(filenames[i], *fprints)
229-
logging.info("Saved fingerprints for {:s}.".format(name))
237+
logging.info("Saved fingerprints for {:s}.".format(log_name))
230238
except Exception:
231239
logging.error(
232240
"Error saving fingerprints for {:s} to {:s}".format(
233-
name, filenames[i]
241+
log_name, filenames[i]
234242
),
235243
exc_info=True,
236244
)

src/e3fp/fingerprint/structs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ def shell_to_pdb(
263263
list of str: list of PDB file lines, if `out_file` not specified
264264
"""
265265
remark = "REMARK 400"
266-
header_lines = [remark + " COMPOUND", remark + " " + mol.GetProp("_Name")]
266+
name = mol.GetProp("_Name") if mol.HasProp("_Name") else "molecule"
267+
header_lines = [remark + " COMPOUND", remark + " " + name]
267268
lines = header_lines + [
268269
"MODEL",
269270
]

tests/test_conformer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,14 @@ def test_compressed_sdf_reads_same_as_uncompressed(self):
8686
sdf_files = [SDF_FILE_COMPRESSED, SDF_FILE_UNCOMPRESSED]
8787
smiles = [Chem.MolToSmiles(mol_from_sdf(f)) for f in sdf_files]
8888
assert smiles[0] == smiles[1]
89+
90+
def test_conformer_generation_without_name(self):
91+
from e3fp.conformer.util import mol_from_smiles
92+
from e3fp.conformer.generate import generate_conformers
93+
94+
confgen_params = {"num_conf": 1, "seed": 42}
95+
smiles = "C" * 20 # long flexible molecule
96+
mol = mol_from_smiles(smiles, "tmp")
97+
mol.ClearProp("_Name")
98+
assert not mol.HasProp("_Name")
99+
generate_conformers(mol, **confgen_params)

tests/test_fingerprinting.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,17 @@ def test_reordering_mols_produces_same_fprints(self):
925925

926926
assert fprints1 == fprints2
927927

928+
def test_fingerprint_generation_without_name(self):
929+
from e3fp.fingerprint import fprinter
930+
from e3fp.conformer.util import mol_from_sdf
931+
932+
mol = mol_from_sdf(PLANAR_SDF_FILE)
933+
mol.ClearProp("_Name")
934+
assert not mol.HasProp("_Name")
935+
fpr = fprinter.Fingerprinter(level=5, bits=1024, stereo=True, radius_multiplier=1.718)
936+
fpr.run(conf=0, mol=mol)
937+
fpr.get_fingerprint_at_level(5)
938+
928939

929940
class TestAtomInvariant:
930941
def test_daylight_invariants(self):

0 commit comments

Comments
 (0)