forked from OpenChemistry/mongochemdeploy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request OpenChemistry#82 from OpenChemistry/upload-molecul…
…es-from-qcarchive Added script to upload molecules from qcarchive
- Loading branch information
Showing
4 changed files
with
315 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
This script converts qcarchive json to cjson | ||
It includes automatic bond detection, since qcarchive | ||
json does not store bonds. | ||
""" | ||
|
||
import json | ||
|
||
import click | ||
|
||
from avogadro.core import Molecule | ||
from avogadro.io import FileFormatManager | ||
from openbabel import OBMol, OBConversion | ||
|
||
|
||
# Copied from mongochemserver | ||
def avo_convert_str(str_data, in_format, out_format): | ||
mol = Molecule() | ||
conv = FileFormatManager() | ||
conv.read_string(mol, str_data, in_format) | ||
|
||
return conv.write_string(mol, out_format) | ||
|
||
|
||
# Copied from mongochemserver | ||
def cjson_to_ob_molecule(cjson): | ||
cjson_str = json.dumps(cjson) | ||
sdf_str = avo_convert_str(cjson_str, 'cjson', 'sdf') | ||
conv = OBConversion() | ||
conv.SetInFormat('sdf') | ||
conv.SetOutFormat('sdf') | ||
mol = OBMol() | ||
conv.ReadString(mol, sdf_str) | ||
return mol | ||
|
||
|
||
# Copied from mongochemserver | ||
def autodetect_bonds(cjson): | ||
mol = cjson_to_ob_molecule(cjson) | ||
mol.ConnectTheDots() | ||
mol.PerceiveBondOrders() | ||
conv = OBConversion() | ||
conv.SetInFormat('sdf') | ||
conv.SetOutFormat('sdf') | ||
sdf_str = conv.WriteString(mol) | ||
cjson_str = avo_convert_str(sdf_str, 'sdf', 'cjson') | ||
return json.loads(cjson_str) | ||
|
||
|
||
def convert_to_cjson(qcjson): | ||
cjson = {} | ||
cjson['chemicalJson'] = 1 | ||
|
||
# The qcjson geometry is in atomic units. Convert to angstrom. | ||
for i in range(len(qcjson['geometry'])): | ||
qcjson['geometry'][i] *= 0.529177249 | ||
|
||
cjson['atoms'] = { | ||
'coords': { | ||
'3d': qcjson['geometry'] | ||
}, | ||
'elements': { | ||
'number': qcjson['atomic_numbers'] | ||
} | ||
} | ||
|
||
# Auto-detect bonds, since qcjson does not store them | ||
cjson = autodetect_bonds(cjson) | ||
|
||
return cjson | ||
|
||
|
||
@click.command() | ||
@click.argument('input_file', type=click.File('r')) | ||
@click.argument('output_file', type=click.File('w')) | ||
def main(input_file, output_file): | ||
qcjson = json.load(input_file) | ||
cjson = convert_to_cjson(qcjson) | ||
json.dump(cjson, output_file) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
qcfractal | ||
girder_client | ||
avogadro | ||
openbabel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import qcfractal.interface as ptl" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"client = ptl.FractalClient()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Shows all of the collections\n", | ||
"client.list_collections()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Pick a collection type and a collection name\n", | ||
"collection_type = 'OptimizationDataset'\n", | ||
"collection_name = 'JGI Metabolite Set 1'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get the dataset\n", | ||
"ds = client.get_collection(collection_type, collection_name)\n", | ||
"print(ds)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get all of the records in the dataset\n", | ||
"records = ds.status()\n", | ||
"print(records)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get the number of records\n", | ||
"num_records = ds.status().shape[0]\n", | ||
"print(num_records)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Look at the specifications\n", | ||
"ds.list_specifications()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Pick one\n", | ||
"specification = 'default'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Pick a record to look at\n", | ||
"index = 0\n", | ||
"name = records.iloc[index].name\n", | ||
"print(name)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get the record\n", | ||
"record = ds.get_record(name, specification)\n", | ||
"print(record)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Visualize the final molecule (result of optimization)\n", | ||
"mol = record.get_final_molecule()\n", | ||
"mol" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Look at the json\n", | ||
"mol.json()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import json | ||
|
||
import girder_client | ||
|
||
import qcfractal.interface as ptl | ||
|
||
from convert_to_cjson import convert_to_cjson | ||
|
||
# These can be listed using the client | ||
# We might be able to come up with a method to generate these automatically | ||
collection_type = 'OptimizationDataset' | ||
collection_name = 'JGI Metabolite Set 1' | ||
specification = 'default' | ||
|
||
# Need to use a girder api key | ||
girder_api_key = '' | ||
|
||
gc = girder_client.GirderClient() | ||
gc.authenticate(apiKey=girder_api_key) | ||
|
||
client = ptl.FractalClient() | ||
|
||
print('Downloading records from:', collection_type, collection_name) | ||
ds = client.get_collection(collection_type, collection_name) | ||
records = ds.status() | ||
num_records = records.shape[0] | ||
|
||
print('Number of records found:', num_records) | ||
for i in range(num_records): | ||
|
||
name = records.iloc[i].name | ||
|
||
print('Downloading data for record:', name) | ||
|
||
try: | ||
record = ds.get_record(name, specification) | ||
|
||
if 'INCOMPLETE' in record.status: | ||
print(name, 'is incomplete. Skipping') | ||
continue | ||
|
||
final_molecule = record.get_final_molecule() | ||
qcjson = json.loads(final_molecule.json()) | ||
id = qcjson['id'] | ||
|
||
cjson = convert_to_cjson(qcjson) | ||
|
||
# The id can be used to find the molecule in qcarchive like so: | ||
# client.query_molecules(id=id) | ||
provenance = 'qcarchive molecule: ' + id | ||
params = { | ||
'cjson': json.dumps(cjson), | ||
'provenance': provenance | ||
} | ||
|
||
print('Uploading to girder...') | ||
gc.post('/molecules', json=params) | ||
|
||
except Exception as exc: | ||
print('Failed to get records for:', name) | ||
print('Exception was:', exc) | ||
continue |