|
17 | 17 | assert version.major > 3 or (version.major == 3 and version.minor >= 7)
|
18 | 18 |
|
19 | 19 | __author__ = "Domenico Marson and Andrew Jewett"
|
20 |
| -__version__ = '1.0.0' |
21 |
| -__date__ = '2025-1-31' |
| 20 | +__version__ = '1.0.1' |
| 21 | +__date__ = '2025-2-01' |
22 | 22 |
|
23 | 23 | g_program_name = __file__.split('/')[-1]
|
24 | 24 |
|
@@ -168,84 +168,126 @@ def get_dihedrals_and_impropers(input_lines) -> tuple[list[Dihedral], list[Impro
|
168 | 168 | return loaded_dihedrals, loaded_impropers
|
169 | 169 |
|
170 | 170 |
|
171 |
| -# NOTE: PROBLEM WITH SAME-TYPES BONDED INTERACTIONS... |
172 |
| -# The same dihedral (from atom types POV) can have != parameters, based on comment... |
173 |
| -# e.g.: |
174 |
| -# 102 0.000 5.500 0.00 0.0 O -C -OH-HO carboxylic acid - aliphatic |
175 |
| -# 210 0.000 5.000 0.00 0.0 O -C -OH-HO benzoic acids |
176 |
| -# the same problem is observed also for bonds and angles... |
177 |
| -def check_uniqueness( |
| 171 | +def delete_redundant_duplicates( |
178 | 172 | interactions: list[Dihedral]|list[Improper]|list[Angle]|list[Bond],
|
179 | 173 | ) -> None:
|
| 174 | + """ OPLSAA files contain many duplicates. Decide which interactions to keep and return the list to the caller.""" |
| 175 | + |
180 | 176 | ##### My apologies for this ugly code. #####
|
181 |
| - # It wasn't planned that way, but we keep finding |
182 |
| - # more special cases that need custom code added. |
183 |
| - # PAR and SB files have grown over time and we keep |
184 |
| - # finding more redundancy problems that need fixing. |
185 |
| - |
186 |
| - # Step1: Remove duplicate interactions. |
187 |
| - # Create a dictionary of all the duplicate interactions |
188 |
| - # which share the same atom types and coefficients |
189 |
| - # (excluding comments). |
190 |
| - typescoeff_to_interaction = defaultdict(dict) |
| 177 | + # It wasn't planned that way. The PAR and SB files |
| 178 | + # that store OPLSAA parameters have grown messy over time. |
| 179 | + # We keep finding more redundancy problems that need fixing. |
| 180 | + # That's why this code has so many edge cases. |
| 181 | + |
| 182 | + # ----------- Step 1: --------------- |
| 183 | + # Organize interactions according to: |
| 184 | + # -atom type strings (a tuple which may include wildcards) |
| 185 | + # -force-field parameters (This is called "params" in the code.) |
| 186 | + # -the "paramstr" (This is the entire string following the atom type list. |
| 187 | + # including the parameters AND comments, if present.) |
| 188 | + # The result is stored in a dictionary-of-dictionary-of-dictionaries. |
| 189 | + # named "types_to_paraminteractions". Lookup interactions this way: |
| 190 | + # types_to_paraminteractions[atomtypes][params][paramstr] --> interaction |
| 191 | + # |
| 192 | + # Later, during Step2, we will use it to figure out which interactions |
| 193 | + # are redundant and can be discarded. |
| 194 | + types_to_paraminteractions = {} |
191 | 195 | for interaction in interactions:
|
192 | 196 | types = tuple(interaction.types)
|
193 |
| - # Find the text containing the force field parameters ("coeffs") |
194 |
| - coeff = interaction.coeff_line |
195 |
| - # Typically, this is "angle_coeff @angle:C3_N~_H~ 38. 118.4 # " |
196 |
| - # But we only want "38. 118.4 # ". |
| 197 | + # Find the text containing the force field parameters ("paramstr") |
| 198 | + paramstr = interaction.coeff_line |
| 199 | + # Typically, this is "angle_coeff @angle:C3_N~_H~ 38. 118.4 # WJ94" |
| 200 | + # But we only want "38. 118.4 # WJ94". |
197 | 201 | # So we ignore the text before the first two spaces:
|
198 |
| - ispace1 = coeff.find(" ") |
199 |
| - ispace2 = coeff.find(" ", ispace1+1) |
| 202 | + ispace1 = paramstr.find(" ") |
| 203 | + ispace2 = paramstr.find(" ", ispace1+1) |
200 | 204 | ispace2 = ispace2+1
|
201 |
| - coeff = coeff[ispace2:].strip() # eg. "38. 118.4 #" |
202 |
| - # Finally, strip off the comment |
203 |
| - coeff_no_comment = coeff.lstrip("#").split("#")[0].strip() |
204 |
| - # Instead of storing the interaction at (types, coeff_no_comment), |
205 |
| - # I store an additional dictionary (whose key is (types, coeff)). |
206 |
| - # That way I can keep track of all the different comments that |
207 |
| - # were added to this (otherwise identical) interaction. |
208 |
| - # We want to keep all of them, but discard the ones that are |
209 |
| - # trivially similar to each other. |
210 |
| - typescoeff_to_interaction[(types, coeff_no_comment)][(types, coeff)] = interaction |
211 |
| - # Copy the the non-redudant entries back into "interactions" again. |
212 |
| - i = 0 |
213 |
| - for coeff_no_comment, coeff_interactions in typescoeff_to_interaction.items(): |
214 |
| - for typescoeff, interaction in coeff_interactions.items(): |
215 |
| - # Edge case: We want to ignore interactions if |
216 |
| - # they lack a comment and are otherwise identical. |
217 |
| - # ...so we also check for (coeff != coeff_no_comment). |
218 |
| - coeff = typescoeff[1] |
219 |
| - coeff_blank_comment = coeff |
220 |
| - i_comment = coeff.find("#") |
221 |
| - if i_comment > 0: |
222 |
| - coeff_blank_comment = coeff[:i_comment+1] |
223 |
| - comment = coeff[i_comment+1:] |
224 |
| - if len(coeff_interactions) == 1: |
225 |
| - interactions[i] = interaction |
226 |
| - i += 1 |
227 |
| - elif coeff not in (coeff_no_comment, coeff_blank_comment): |
228 |
| - # For some reason, a lot of comments only contain ' "' |
229 |
| - # We want to skip those too. |
230 |
| - if comment.strip() != '"': |
231 |
| - interactions[i] = interaction |
232 |
| - i += 1 |
233 |
| - |
234 |
| - del interactions[i:] |
| 205 | + paramstr = paramstr[ispace2:].strip() # eg. "38. 118.4 # WJ94" |
| 206 | + # Finally, strip off the comment, leaving only the parameters |
| 207 | + params = paramstr.lstrip("#").split("#")[0].strip() # eg. "38. 118.4" |
| 208 | + if types not in types_to_paraminteractions: |
| 209 | + # Multiple interactions can exist for these same atom types |
| 210 | + # differing by either the parameters, or comments, or both. |
| 211 | + # Store a dictionary that looks up the interaction between these |
| 212 | + # atoms according to their params (without comments). |
| 213 | + types_to_paraminteractions[types] = {} # We will use this below |
| 214 | + |
| 215 | + # Create a dictionary that looks up all the interactions |
| 216 | + # which share the same types and parameters (.ie params), but may |
| 217 | + # have different comments. We might want to delete these duplicates |
| 218 | + # later, but for now, we keep track of all of them. |
| 219 | + params_to_interactions = types_to_paraminteractions[types] |
| 220 | + if params not in params_to_interactions: |
| 221 | + params_to_interactions[params] = {} |
| 222 | + paramstr_to_interactions = params_to_interactions[params] |
| 223 | + |
| 224 | + # For these interactions, its possible that multiple interactions |
| 225 | + # may exist in the file with identical atom types, parameters (coeffs) |
| 226 | + # AND comments. To get rid of these trivial duplicates, we store them in |
| 227 | + # a dictionary, indexed by the original paramstr (including comments). |
| 228 | + paramstr_to_interactions[paramstr] = interaction |
| 229 | + |
| 230 | + # ----------- Step 2: --------------- |
| 231 | + # Decide which of these interactions should be discarded. |
| 232 | + # (The interactions we want to keep will be stored in |
| 233 | + # out_interactions, which will be returned to the caller.) |
| 234 | + del interactions[:] |
| 235 | + for types, params_to_interactions in types_to_paraminteractions.items(): |
| 236 | + for params, paramstr_to_interactions in params_to_interactions.items(): |
| 237 | + for paramstr, interaction in paramstr_to_interactions.items(): |
| 238 | + discarded = True |
| 239 | + paramstr_blank_comment = paramstr |
| 240 | + i_comment = paramstr.find("#") |
| 241 | + if i_comment > 0: |
| 242 | + paramstr_blank_comment = paramstr[:i_comment+1] |
| 243 | + comment = paramstr[i_comment+1:] |
| 244 | + if len(paramstr_to_interactions) == 1: |
| 245 | + interactions.append(interaction) |
| 246 | + discarded = False |
| 247 | + elif ( |
| 248 | + paramstr not in (params, paramstr_blank_comment) # case 1 |
| 249 | + and (comment.strip() != '"') # case 2 (see below) |
| 250 | + ): |
| 251 | + # Edge Case 1: |
| 252 | + # We want to ignore interactions if |
| 253 | + # they lack a comment but they are otherwise identical. |
| 254 | + # ...so we also check to make sure that paramstr != params. |
| 255 | + # (and also paramstr != paramstr_blank_comment) |
| 256 | + # |
| 257 | + # Edge Case 2: |
| 258 | + # For some reason, a lot of comments only contain '"'. We |
| 259 | + # want to skip those too (since they are otherwise identical) |
| 260 | + # |
| 261 | + # If none of these edge-cases are true, |
| 262 | + # then we don't discard the interaction. |
| 263 | + interactions.append(interaction) |
| 264 | + discarded = False |
| 265 | + # If all of the duplicate interactions are identical except for |
| 266 | + # the comments following the params, then throw away these |
| 267 | + # duplicates (merge them into a single interaction). |
| 268 | + if (len(params_to_interactions) == 1) and (not discarded): |
| 269 | + break # skip the remaining duplicates for these atoms |
235 | 270 |
|
| 271 | + |
| 272 | +# NOTE: PROBLEM WITH SAME-TYPES BONDED INTERACTIONS... |
| 273 | +# The same dihedral (from atom types POV) can have != parameters, based on comment... |
| 274 | +# e.g.: |
| 275 | +# 102 0.000 5.500 0.00 0.0 O -C -OH-HO carboxylic acid - aliphatic |
| 276 | +# 210 0.000 5.000 0.00 0.0 O -C -OH-HO benzoic acids |
| 277 | +# the same problem is observed also for bonds and angles... |
| 278 | +def count_nonredundant_duplicates( |
| 279 | + interactions: list[Dihedral]|list[Improper]|list[Angle]|list[Bond], |
| 280 | +) -> None: |
236 | 281 | for idx, it1 in enumerate(interactions):
|
237 | 282 | for it2 in interactions[idx+1:]:
|
238 | 283 | if it1.types == it2.types:
|
239 |
| - it1_coeff = it1.coeff_line.lstrip("#").split("#")[0] |
240 |
| - it2_coeff = it2.coeff_line.lstrip("#").split("#")[0] |
241 |
| - # If we are not skipping this interaction, then |
242 |
| - # keep track of the number of duplicate interactions of this type |
243 | 284 | if it1.duplicate_count == 0:
|
244 | 285 | it1.duplicate_count = 1
|
245 | 286 | it2.duplicate_count = it1.duplicate_count + 1
|
246 | 287 |
|
247 | 288 |
|
248 | 289 |
|
| 290 | + |
249 | 291 | def sort_duplicates(
|
250 | 292 | interactions: list[Dihedral]|list[Improper]|list[Angle]|list[Bond],
|
251 | 293 | ) -> None:
|
@@ -335,10 +377,13 @@ def main(argv):
|
335 | 377 | dihedrals, impropers = get_dihedrals_and_impropers(lines_par)
|
336 | 378 |
|
337 | 379 |
|
| 380 | + # Now, let's cleanup all the lists of bonded interactions. |
| 381 | + # (Remove redundant entries, and sort by atom-type) |
338 | 382 | for interactions in [bonds, angles, dihedrals, impropers]:
|
339 | 383 | interactions.sort(key=lambda x: x.typename)
|
340 | 384 | interactions.sort(key=lambda x: x.sort_key)
|
341 |
| - check_uniqueness(interactions) |
| 385 | + delete_redundant_duplicates(interactions) |
| 386 | + count_nonredundant_duplicates(interactions) |
342 | 387 | sort_duplicates(interactions)
|
343 | 388 |
|
344 | 389 |
|
|
0 commit comments