diff --git a/atomate/cp2k/__init__.py b/atomate/cp2k/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/atomate/cp2k/database.py b/atomate/cp2k/database.py new file mode 100644 index 000000000..3d2b54576 --- /dev/null +++ b/atomate/cp2k/database.py @@ -0,0 +1,281 @@ +# coding: utf-8 + +from monty.json import MontyEncoder +from monty.serialization import loadfn + +""" +This module defines the database classes. +""" + +import zlib +import json +from bson import ObjectId + +from pymatgen.electronic_structure.bandstructure import ( + BandStructure, + BandStructureSymmLine, +) +from pymatgen.electronic_structure.dos import CompleteDos + +import gridfs +from pymongo import ASCENDING, DESCENDING + +from atomate.utils.database import CalcDb +from atomate.utils.utils import get_logger + +__author__ = "Nicholas Winner" + +logger = get_logger(__name__) + + +class Cp2kCalcDb(CalcDb): + """ + Class to help manage database insertions of cp2k drones + """ + + def __init__( + self, + host="localhost", + port=27017, + database="cp2k", + collection="tasks", + user=None, + password=None, + **kwargs + ): + super(Cp2kCalcDb, self).__init__( + host, port, database, collection, user, password, **kwargs + ) + + def build_indexes(self, indexes=None, background=True): + """ + Build the indexes. + + Args: + indexes (list): list of single field indexes to be built. + background (bool): Run in the background or not. + + TODO: make sure that the index building is sensible and check for + existing indexes. + """ + _indices = ( + indexes + if indexes + else [ + "formula_pretty", + "formula_anonymous", + "output.energy", + "output.energy_per_atom", + "dir_name", + ] + ) + self.collection.create_index( + "task_id", unique=True, background=background + ) + # build single field indexes + for i in _indices: + self.collection.create_index(i, background=background) + # build compound indexes + for formula in ("formula_pretty", "formula_anonymous"): + self.collection.create_index( + [ + (formula, ASCENDING), + ("output.energy", DESCENDING), + ("completed_at", DESCENDING), + ], + background=background, + ) + self.collection.create_index( + [ + (formula, ASCENDING), + ("output.energy_per_atom", DESCENDING), + ("completed_at", DESCENDING), + ], + background=background, + ) + + def insert_task(self, task_doc, use_gridfs=False): + """ + Inserts a task document (e.g., as returned by Drone.assimilate()) into the database. + Handles putting DOS, band structure and charge density into GridFS as needed. + During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK. + So we do a quick check here and only record the AECCARs if they are valid + + Args: + task_doc: (dict) the task document + use_gridfs (bool) use gridfs for bandstructures and DOS + Returns: + (int) - task_id of inserted document + """ + dos = None + + # move dos BS and CHGCAR from doc to gridfs + if use_gridfs and "calcs_reversed" in task_doc: + + if ( + "dos" in task_doc["calcs_reversed"][0] + ): # only store idx=0 (last step) + dos = json.dumps( + task_doc["calcs_reversed"][0]["dos"], cls=MontyEncoder + ) + del task_doc["calcs_reversed"][0]["dos"] + + # insert the task document + t_id = self.insert(task_doc) + + # insert the dos into gridfs and update the task document + if dos: + dos_gfs_id, compression_type = self.insert_gridfs( + dos, "dos_fs", task_id=t_id + ) + self.collection.update_one( + {"task_id": t_id}, + { + "$set": { + "calcs_reversed.0.dos_compression": compression_type + } + }, + ) + self.collection.update_one( + {"task_id": t_id}, + {"$set": {"calcs_reversed.0.dos_fs_id": dos_gfs_id}}, + ) + + return t_id + + def retrieve_task(self, task_id): + """ + Retrieves a task document and unpacks the band structure and DOS as dict + + Args: + task_id: (int) task_id to retrieve + + Returns: + (dict) complete task document with BS + DOS included + + """ + task_doc = self.collection.find_one({"task_id": task_id}) + calc = task_doc["calcs_reversed"][0] + if "dos_fs_id" in calc: + dos = self.get_dos(task_id) + calc["dos"] = dos.as_dict() + return task_doc + + def insert_gridfs( + self, d, collection="fs", compress=True, oid=None, task_id=None + ): + """ + Insert the given document into GridFS. + + Args: + d (dict): the document + collection (string): the GridFS collection name + compress (bool): Whether to compress the data or not + oid (ObjectId()): the _id of the file; if specified, it must not already exist in GridFS + task_id(int or str): the task_id to store into the gridfs metadata + Returns: + file id, the type of compression used. + """ + oid = oid or ObjectId() + compression_type = None + + if compress: + d = zlib.compress(d.encode(), compress) + compression_type = "zlib" + + fs = gridfs.GridFS(self.db, collection) + if task_id: + # Putting task id in the metadata subdocument as per mongo specs: + # https://github.com/mongodb/specifications/blob/master/source/gridfs/gridfs-spec.rst#terms + fs_id = fs.put( + d, + _id=oid, + metadata={"task_id": task_id, "compression": compression_type}, + ) + else: + fs_id = fs.put( + d, _id=oid, metadata={"compression": compression_type} + ) + + return fs_id, compression_type + + def get_band_structure(self, task_id): + m_task = self.collection.find_one( + {"task_id": task_id}, {"calcs_reversed": 1} + ) + fs_id = m_task["calcs_reversed"][0]["bandstructure_fs_id"] + fs = gridfs.GridFS(self.db, "bandstructure_fs") + bs_json = zlib.decompress(fs.get(fs_id).read()) + bs_dict = json.loads(bs_json.decode()) + if bs_dict["@class"] == "BandStructure": + return BandStructure.from_dict(bs_dict) + elif bs_dict["@class"] == "BandStructureSymmLine": + return BandStructureSymmLine.from_dict(bs_dict) + else: + raise ValueError( + "Unknown class for band structure! {}".format(bs_dict["@class"]) + ) + + def get_dos(self, task_id): + m_task = self.collection.find_one( + {"task_id": task_id}, {"calcs_reversed": 1} + ) + fs_id = m_task["calcs_reversed"][0]["dos_fs_id"] + fs = gridfs.GridFS(self.db, "dos_fs") + dos_json = zlib.decompress(fs.get(fs_id).read()) + dos_dict = json.loads(dos_json.decode()) + return CompleteDos.from_dict(dos_dict) + + def reset(self): + self.collection.delete_many({}) + self.db.counter.delete_one({"_id": "taskid"}) + self.db.counter.insert_one({"_id": "taskid", "c": 0}) + self.db.dos_fs.files.delete_many({}) + self.db.dos_fs.chunks.delete_many({}) + self.build_indexes() + + # TODO: This become part of CalcDb, VASP/CP2K specific Db methods dont make sense anyway + @classmethod + def from_db_file(cls, db_file, admin=True, user_settings={}): + """ + Create MMDB from database file. File requires host, port, database, + collection, and optionally admin_user/readonly_user and + admin_password/readonly_password + + Args: + db_file (str): path to the file containing the credentials + admin (bool): whether to use the admin user + user_settings (dict): User settings to overwrite those in the db file. + Example: db_file is used to acquire all credentials, but + {'collection': 'test'} is used to overwrite the default DB insertion + collection to something else. + + Returns: + MMDb object + """ + creds = loadfn(db_file) + if user_settings: + creds.update(user_settings) + + if admin and "admin_user" not in creds and "readonly_user" in creds: + raise ValueError("Trying to use admin credentials, " + "but no admin credentials are defined. " + "Use admin=False if only read_only " + "credentials are available.") + + if admin: + user = creds.get("admin_user") + password = creds.get("admin_password") + else: + user = creds.get("readonly_user") + password = creds.get("readonly_password") + + kwargs = creds.get("mongoclient_kwargs", {}) # any other MongoClient kwargs can go here ... + + if "authsource" in creds: + kwargs["authsource"] = creds["authsource"] + else: + kwargs["authsource"] = creds["database"] + + return cls(creds["host"], int(creds.get("port", 27017)), creds["database"], creds["collection"], + user, password, **kwargs) diff --git a/atomate/cp2k/drones.py b/atomate/cp2k/drones.py new file mode 100644 index 000000000..15a5d5307 --- /dev/null +++ b/atomate/cp2k/drones.py @@ -0,0 +1,571 @@ +# coding: utf-8 + +""" +This drone assimilates directories containing the results of cp2k calculations +""" + +import os +import re +import datetime +from fnmatch import fnmatch +from collections import OrderedDict +import json +import glob +import traceback +import warnings + +from monty.io import zopen +from monty.json import jsanitize + +import numpy as np + +from pymatgen.core.composition import Composition +from pymatgen.core.structure import Structure +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer +from pymatgen.io.cp2k.outputs import Cp2kOutput, Cube +from pymatgen.apps.borg.hive import AbstractDrone +from pymatgen.io.vasp.outputs import VolumetricData + +from atomate.utils.utils import get_logger, get_uri +from atomate import __version__ as atomate_version + +__author__ = "Nicholas Winner" + +logger = get_logger(__name__) + + +# TODO: Don't push until going through all this +class Cp2kDrone(AbstractDrone): + """ + Adapted from VaspDrone + """ + + __version__ = ( + atomate_version # note: the version is inserted into the task doc + ) + + # Schema def of important keys and sub-keys; used in validation + schema = { + "root": { + "schema", + "dir_name", + "chemsys", + "composition_reduced", + "formula_pretty", + "formula_reduced_abc", + "elements", + "nelements", + "formula_anonymous", + "calcs_reversed", + "completed_at", + "nsites", + "composition_unit_cell", + "input", + "output", + "state", + "analysis", + "run_stats", + }, + "input": {"atomic_kind_info", "structure"}, + "output": { + "structure", + "spacegroup", + "density", + "energy", + "energy_per_atom", + "bandgap", + "vbm", + "cbm", + "is_metal", + "forces", + "stress", + "ionic_steps" + }, + "calcs_reversed": { + "dir_name", + "run_type", + "elements", + "nelements", + "formula_pretty", + "formula_reduced_abc", + "composition_reduced", + "cp2k_version", + "formula_anonymous", + "nsites", + "composition_unit_cell", + "completed_at", + "task", + "input", + "output", + "has_cp2k_completed", + }, + "analysis": { + "delta_volume_as_percent", + "delta_volume", + "max_force", + "errors", + "warnings", + }, + } + + def __init__( + self, + runs=None, + parse_dos="auto", + parse_hartree=False, + additional_fields=None, + use_full_uri=True, + ): + """ + Initialize a cp2k drone to parse cp2k outputs + Args: + runs (list): Naming scheme for multiple calcuations in one folder e.g. ["relax1","relax2"]. + Can be subfolder or extension + parse_dos (str or bool): Whether to parse the DOS. Can be "auto", True or False. + "auto" will only parse DOS if NSW = 0, so there are no ionic steps + + additional_fields (dict): dictionary of additional fields to add to output document + use_full_uri (bool): converts the directory path to the full URI path + """ + self.runs = runs or ["precondition"] + [ + "relax" + str(i + 1) for i in range(9) + ] + self.parse_dos = parse_dos + self.parse_hartree = parse_hartree + self.additional_fields = additional_fields or {} + self.use_full_uri = use_full_uri + + def assimilate(self, path): + """ + Adapted from matgendb.creator + Parses cp2k runs and insert the result into the db. + Get the entire task doc from the cp2k.out file in the path. + Also adds some post-processed info. + + Args: + path (str): Path to the directory containing cp2k.out file + + Returns: + (dict): a task dictionary + """ + logger.info("Getting task doc for base dir :{}".format(path)) + cp2k_out_files = self.filter_files(path, file_pattern="cp2k.out") + if len(cp2k_out_files) > 0: + d = self.generate_doc(path, cp2k_out_files) + self.post_process(path, d) + else: + raise ValueError("No CP2K output files found!") + self.validate_doc(d) + return d + + def filter_files(self, path, file_pattern="cp2k.out"): + """ + Find the files that match the pattern in the given path and + return them in an ordered dictionary. The searched for files are + filtered by the run types defined in self.runs. e.g. ["relax1", "relax2", ...]. + Only 2 schemes of the file filtering is enabled: searching for run types + in the list of files and in the filenames. Modify this method if more + sophisticated filtering scheme is needed. + + Args: + path (string): path to the folder + file_pattern (string): files to be searched for + + Returns: + OrderedDict of the names of the files to be processed further. + The key is set from list of run types: self.runs + """ + processed_files = OrderedDict() + files = os.listdir(path) + for r in self.runs: + # try subfolder schema + if r in files: + for f in os.listdir(os.path.join(path, r)): + if fnmatch(f, "{}*".format(file_pattern)): + processed_files[r] = os.path.join(r, f) + # try extension schema + else: + for f in files: + if fnmatch(f, "{}.{}*".format(file_pattern, r)): + processed_files[r] = f + if len(processed_files) == 0: + # get any matching file from the folder + for f in files: + if fnmatch(f, "{}*".format(file_pattern)): + processed_files["standard"] = f + return processed_files + + def generate_doc(self, dir_name, cp2k_files): + """ + Adapted from matgendb.creator.generate_doc + """ + try: + # basic properties, incl. calcs_reversed and run_stats + fullpath = os.path.abspath(dir_name) + d = jsanitize(self.additional_fields, strict=True) + d["schema"] = {"code": "atomate", "version": Cp2kDrone.__version__} + d["dir_name"] = fullpath + d["calcs_reversed"] = [ + self.process_cp2k(dir_name, taskname, filename) + for taskname, filename in cp2k_files.items() + ] + + # TODO More run stats + run_stats = {} + for i, d_calc in enumerate(d["calcs_reversed"]): + run_stats[d_calc["task"]["name"]] = d_calc["total_time"] + d["run_stats"] = run_stats + + # TODO + # set root formula/composition keys based on initial and final calcs + d_calc_init = d["calcs_reversed"][-1] + d_calc_final = d["calcs_reversed"][0] + d["chemsys"] = "-".join(sorted(d_calc_final["elements"])) + comp = Composition(d_calc_final["composition_unit_cell"]) + d["formula_anonymous"] = comp.anonymized_formula + d[ + "formula_reduced_abc" + ] = comp.reduced_composition.alphabetical_formula + for root_key in [ + "completed_at", + "nsites", + "composition_unit_cell", + "composition_reduced", + "formula_pretty", + "elements", + "nelements", + ]: + d[root_key] = d_calc_final[root_key] + + # TODO: Should atomic kind info and DFT really be saved like this? + # Right now the input is where you save the dft parameters, but for + # things like hybrid calcluations, the dft parameters change as you + # switch from GGA to GGA hybrid. Is it maybe better to save dft + # parameters as a sequence or something? showing how they are the whole time? + d["input"] = { + "structure": d_calc_init["input"]["structure"], + "atomic_kind_info": d_calc_init["input"]["atomic_kind_info"], + "dft": d_calc_init["input"]["dft"], + "scf": d_calc_init["input"]["scf"], + } + + # store the output key based on final calc + d["output"] = { + "structure": d_calc_final["output"]["structure"], + "density": d_calc_final.pop("density"), + "energy": d_calc_final["output"]["energy"], + "energy_per_atom": d_calc_final["output"]["energy_per_atom"], + "forces": d_calc_final["output"]["ionic_steps"][-1].get( + "forces" + ), + "stress": d_calc_final["output"]["ionic_steps"][-1].get( + "stress" + ), + "ionic_steps": d_calc_final["output"]["ionic_steps"], + "cbm": d_calc_final["output"]["cbm"], + "vbm": d_calc_final["output"]["vbm"], + "bandgap": d_calc_final["output"]["bandgap"], + "efermi": d_calc_final["output"]["efermi"], + "is_metal": d_calc_final["output"]["is_metal"], + "v_hartree": d_calc_final.pop('v_hartree', None), + "v_hartree_grid": d_calc_final.pop('v_hartree_grid', None), + } + + # Store symmetry information + try: + sg = SpacegroupAnalyzer( + Structure.from_dict(d_calc_final["output"]["structure"]), + 0.1, + ) + if not sg.get_symmetry_dataset(): + sg = SpacegroupAnalyzer( + Structure.from_dict( + d_calc_final["output"]["structure"] + ), + 1e-3, + 1, + ) + d["output"]["spacegroup"] = { + "source": "spglib", + "symbol": sg.get_space_group_symbol(), + "number": sg.get_space_group_number(), + "point_group": sg.get_point_group_symbol(), + "crystal_system": sg.get_crystal_system(), + "hall": sg.get_hall(), + } + except TypeError: + d["output"]["spacegroup"] = { + "source": None, + "symbol": None, + "number": None, + "point_group": None, + "crystal_system": None, + "hall": None, + } + warnings.warn( + "Space Group could not be determined by this drone.", + Warning, + ) + + d["state"] = ( + "successful" + if all([i["has_cp2k_completed"] for i in d["calcs_reversed"]]) + else "unsuccessful" + ) + + self.set_analysis(d) + + d["last_updated"] = datetime.datetime.utcnow() + return d + + except Exception: + logger.error(traceback.format_exc()) + logger.error( + "Error in " + + os.path.abspath(dir_name) + + ".\n" + + traceback.format_exc() + ) + raise + + def process_cp2k(self, dir_name, taskname, filename): + """ + Adapted from matgendb.creator + + Process a cp2k output file. + """ + cp2k_file = os.path.join(dir_name, filename) + + out = Cp2kOutput(cp2k_file, auto_load=True) + d = out.as_dict() + + comp = Composition(d["composition"]) + d["formula_pretty"] = comp.reduced_formula + d["composition_reduced"] = comp.reduced_composition + d["composition_unit_cell"] = comp.as_dict() + d["formula_anonymous"] = comp.anonymized_formula + d["formula_reduced_abc"] = comp.reduced_composition.alphabetical_formula + d["elements"] = list(comp.as_dict().keys()) + d["nelements"] = len(self.as_dict().keys()) + d["nsites"] = len(d["input"]["structure"]["sites"]) + d["dir_name"] = os.path.abspath(dir_name) + d["completed_at"] = str( + datetime.datetime.fromtimestamp(os.path.getmtime(cp2k_file)) + ) + d["density"] = out.final_structure.density + + d["has_cp2k_completed"] = d.pop("ran_successfully") + + # store run name and location ,e.g. relax1, relax2, etc. + d["task"] = {"type": taskname, "name": taskname} + + # include output file names + d["output_file_paths"] = self.process_raw_data( + dir_name, taskname=taskname + ) + + if self.parse_dos: + d['dos'] = out.parse_pdos() + + if self.parse_hartree: + cube = Cube(out.filenames['v_hartree'][-1]) + vd = VolumetricData(structure=cube.structure, data={'total': cube.data}) + d['v_hartree'] = [ + vd.get_average_along_axis(i) for i in range(3) + ] + d['v_hartree_grid'] = [ + vd.get_axis_grid(i) for i in range(3) + ] + return d + + def process_raw_data(self, dir_name, taskname="standard"): + pass + + @staticmethod + def set_analysis(d, max_force_threshold=0.5, volume_change_threshold=0.2): + """ + Adapted from matgendb.creator + + set the 'analysis' key + """ + initial_vol = d["input"]["structure"]["lattice"]["volume"] + final_vol = d["output"]["structure"]["lattice"]["volume"] + delta_vol = final_vol - initial_vol + percent_delta_vol = 100 * delta_vol / initial_vol + warning_msgs = [] + error_msgs = [] + + # delta volume checks + if abs(percent_delta_vol) > volume_change_threshold: + warning_msgs.append( + "Volume change > {}%".format(volume_change_threshold * 100) + ) + + # max force and valid structure checks + max_force = None + calc = d["calcs_reversed"][0] + if ( + d["state"] == "successful" + and "forces" in calc["output"]["ionic_steps"][-1].keys() + ): + + # calculate max forces + forces = np.array(calc["output"]["ionic_steps"][-1]["forces"]) + # account for selective dynamics + final_structure = Structure.from_dict(calc["output"]["structure"]) + sdyn = final_structure.site_properties.get("selective_dynamics") + if sdyn: + forces[np.logical_not(sdyn)] = 0 + max_force = max(np.linalg.norm(forces, axis=1)) + + s = Structure.from_dict(d["output"]["structure"]) + if not s.is_valid(): + error_msgs.append("Bad structure (atoms are too close!)") + d["state"] = "error" + + d["analysis"] = { + "delta_volume": delta_vol, + "delta_volume_as_percent": percent_delta_vol, + "max_force": max_force, + "warnings": warning_msgs, + "errors": error_msgs, + } + + def post_process(self, dir_name, d): + """ + Post-processing for various files other than the cp2k.out file. + Looks for files: transformations.json and custodian.json. Modify this if other + output files need to be processed. + + Args: + dir_name: + The dir_name. + d: + Current doc generated. + """ + logger.info("Post-processing dir:{}".format(dir_name)) + fullpath = os.path.abspath(dir_name) + # CP2K input generated by pymatgen's alchemy has a transformations.json file that tracks + # the origin of a particular structure. If such a file is found, it is inserted into the + # task doc as d["transformations"] + transformations = {} + filenames = glob.glob(os.path.join(fullpath, "transformations.json*")) + if len(filenames) >= 1: + with zopen(filenames[0], "rt") as f: + transformations = json.load(f) + try: + m = re.match( + "(\d+)-ICSD", transformations["history"][0]["source"] + ) + if m: + d["icsd_id"] = int(m.group(1)) + except Exception as ex: + logger.warning( + "Cannot parse ICSD from transformations file." + ) + pass + else: + logger.warning("Transformations file does not exist.") + + other_parameters = transformations.get("other_parameters") + new_tags = None + if other_parameters: + # We don't want to leave tags or authors in the + # transformations file because they'd be copied into + # every structure generated after this one. + new_tags = other_parameters.pop("tags", None) + new_author = other_parameters.pop("author", None) + if new_author: + d["author"] = new_author + if not other_parameters: # if dict is now empty remove it + transformations.pop("other_parameters") + d["transformations"] = transformations + + # Calculations done using custodian has a custodian.json, + # which tracks the jobs performed and any errors detected and fixed. + # This is useful for tracking what has actually be done to get a + # result. If such a file is found, it is inserted into the task doc + # as d["custodian"] + filenames = glob.glob(os.path.join(fullpath, "custodian.json*")) + if len(filenames) >= 1: + with zopen(filenames[0], "rt") as f: + d["custodian"] = json.load(f) + # Convert to full uri path. + if self.use_full_uri: + d["dir_name"] = get_uri(dir_name) + if new_tags: + d["tags"] = new_tags + + # Calculations using custodian generate a *.orig file for the inputs + # This is useful to know how the calculation originally started + # if such files are found they are inserted into orig_inputs + filenames = glob.glob(os.path.join(fullpath, "*.orig*")) + + if len(filenames) >= 1: + d["orig_inputs"] = {} + for f in filenames: + if "INCAR.orig" in f: + d["orig_inputs"]["incar"] = Incar.from_file(f).as_dict() + if "POTCAR.orig" in f: + d["orig_inputs"]["potcar"] = Potcar.from_file(f).as_dict() + if "KPOINTS.orig" in f: + d["orig_inputs"]["kpoints"] = Kpoints.from_file(f).as_dict() + if "POSCAR.orig" in f: + d["orig_inputs"]["poscar"] = Poscar.from_file(f).as_dict() + + logger.info("Post-processed " + fullpath) + + def validate_doc(self, d): + """ + Sanity check. + Make sure all the important keys are set + """ + # TODO: @matk86 - I like the validation but I think no one will notice a failed + # validation tests which removes the usefulness of this. Any ideas to make people + # notice if the validation fails? -computron + for k, v in self.schema.items(): + if k == "calcs_reversed": + diff = v.difference(set(d.get(k, d)[0].keys())) + else: + diff = v.difference(set(d.get(k, d).keys())) + if diff: + logger.warn("The keys {0} in {1} not set".format(diff, k)) + + def get_valid_paths(self, path): + """ + There are some restrictions on the valid directory structures: + + 1. There can be only one cp2k run in each directory. Nested directories + are fine. + 2. Directories designated "relax1"..."relax9" are considered to be + parts of a multiple-optimization run. + 3. Directories containing cp2k output with ".relax1"...".relax9" are + also considered as parts of a multiple-optimization run. + """ + (parent, subdirs, files) = path + if set(self.runs).intersection(subdirs): + return [parent] + if ( + not any([parent.endswith(os.sep + r) for r in self.runs]) + and len(glob.glob(os.path.join(parent, "cp2k.out*"))) > 0 + ): + return [parent] + return [] + + def as_dict(self): + init_args = { + "parse_dos": self.parse_dos, + "additional_fields": self.additional_fields, + "use_full_uri": self.use_full_uri, + "runs": self.runs, + } + return { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + "version": self.__class__.__version__, + "init_args": init_args, + } + + @classmethod + def from_dict(cls, d): + return cls(**d["init_args"]) diff --git a/atomate/cp2k/firetasks/__init__.py b/atomate/cp2k/firetasks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/atomate/cp2k/firetasks/glue_tasks.py b/atomate/cp2k/firetasks/glue_tasks.py new file mode 100644 index 000000000..c3d3fa468 --- /dev/null +++ b/atomate/cp2k/firetasks/glue_tasks.py @@ -0,0 +1,158 @@ +# coding: utf-8 + +from __future__ import ( + division, + print_function, + unicode_literals, + absolute_import, +) + +""" +This module defines tasks that acts as a glue between other vasp Firetasks to allow communication +between different Firetasks and Fireworks. This module also contains tasks that affect the control +flow of the workflow, e.g. tasks to check stability or the gap is within a certain range. +""" + +import gzip +import os +import re +import pickle +import glob +from monty.io import zopen + +from pymatgen.io.cp2k.sets import Cp2kInputSet +from pymatgen.io.cp2k.inputs import Cp2kInput, Coord, Cell +from pymatgen.io.cp2k.outputs import Cp2kOutput + +from fireworks import explicit_serialize, FiretaskBase, FWAction + +from atomate.utils.utils import env_chk, get_logger +from atomate.common.firetasks.glue_tasks import ( + get_calc_loc, + PassResult, + CopyFiles, + CopyFilesFromCalcLoc, +) + +logger = get_logger(__name__) + +__author__ = "Nicholas Winner" +__email__ = "nwinner@berkeley.edu" + + +@explicit_serialize +class UpdateStructureFromPrevCalc(FiretaskBase): + """ + Using the location of a previous calculation. The CP2K output parser will + get the final structure from a previous calculation and update this FW's + cp2k_input_set using it. + """ + + required_params = ["prev_calc_loc"] + optional_params = ["cp2k_output_file"] + + def run_task(self, fw_spec): + calc_loc = ( + get_calc_loc(self.get("prev_calc_loc"), fw_spec["calc_locs"]) + if self.get("prev_calc_loc") + else {} + ) + + cp2k_input_set = fw_spec.get("cp2k_input_set") + ci = Cp2kInputSet.from_dict(cp2k_input_set) + + if self.get("cp2k_output_file"): + out = Cp2kOutput( + os.path.join(calc_loc["path"], self.get("cp2k_output_file")) + ) + else: + out = Cp2kOutput(glob.glob(calc_loc["path"] + "/cp2k.out*")[0]) + + out.parse_structures() + ci["FORCE_EVAL"]["SUBSYS"]["COORD"] = Coord(out.final_structure) + ci["FORCE_EVAL"]["SUBSYS"]["CELL"] = Cell(out.final_structure.lattice) + fw_spec["cp2k_input_set"] = ci.as_dict() + + +@explicit_serialize +class CopyCp2kOutputs(CopyFiles): + """ + Copy CP2K outputs from from one location to another, unzipping them if necessary. + Unlike VASP, which might have CONTCAR copied to POSCAR in order to continue a calculation, + Cp2k doesn't exactly use this file system. What will generally be used for is to copy + a .wfn file in order to restart a calculation or as an initial guess for a hybrid calculation. + + + Note that you must specify either "calc_loc" or "calc_dir" to indicate + the directory containing the previous VASP run. + + Required params: + (none) - but you must specify either "calc_loc" OR "calc_dir" + + Optional params: + calc_loc (str OR bool): if True will set most recent calc_loc. If str + search for the most recent calc_loc with the matching name + calc_dir (str): path to dir that contains VASP output files. + filesystem (str): remote filesystem. e.g. username@host + """ + + optional_params = ["files_to_copy", "calc_loc", "calc_dir", "filesystem"] + + def run_task(self, fw_spec): + + calc_loc = ( + get_calc_loc(self["calc_loc"], fw_spec["calc_locs"]) + if self.get("calc_loc") + else {} + ) + + files_to_copy = self.get("files_to_copy", []) + # setup the copy + self.setup_copy( + self.get("calc_dir", None), + filesystem=self.get("filesystem", None), + files_to_copy=files_to_copy, + from_path_dict=calc_loc, + ) + # do the copying + self.copy_files() + + def copy_files(self): + all_files = self.fileclient.listdir(self.from_dir) + # start file copy + for f in self.files_to_copy: + prev_path_full = os.path.join(self.from_dir, f) + dest_fname = f + dest_path = os.path.join(self.to_dir, dest_fname) + + # detect .gz extension if needed - note that monty zpath() did not seem useful here + gz_ext = "" + if not f in all_files: + for possible_ext in [".gz", ".GZ"]: + if (f + possible_ext) in all_files: + gz_ext = possible_ext + + if not (f + gz_ext) in all_files: + raise ValueError("Cannot find file: {}".format(f)) + + # copy the file (minus the relaxation extension) + self.fileclient.copy(prev_path_full + gz_ext, dest_path + gz_ext) + + # unzip the .gz if needed + if gz_ext in [".gz", ".GZ"]: + # unzip dest file + try: + f = zopen(dest_path + gz_ext, "rt") + file_content = f.read() + except (UnicodeDecodeError, AttributeError): + f = zopen(dest_path + gz_ext, "rb") + file_content = f.read() + if isinstance(file_content, (bytes, bytearray)): + with open(dest_path, "wb") as f_out: + f_out.write(file_content) + else: + with open(dest_path, "w") as f_out: + f_out.write(file_content) + + f.close() + os.remove(dest_path + gz_ext) diff --git a/atomate/cp2k/firetasks/parse_outputs.py b/atomate/cp2k/firetasks/parse_outputs.py new file mode 100644 index 000000000..4cd479ede --- /dev/null +++ b/atomate/cp2k/firetasks/parse_outputs.py @@ -0,0 +1,163 @@ +# coding: utf-8 + +import json +import os +from pydash.objects import has, get + +from atomate.vasp.config import DEFUSE_UNSUCCESSFUL +from fireworks import FiretaskBase, FWAction, explicit_serialize +from fireworks.utilities.fw_serializers import DATETIME_HANDLER + +from atomate.common.firetasks.glue_tasks import get_calc_loc +from atomate.utils.utils import env_chk +from atomate.utils.utils import get_logger +from atomate.cp2k.drones import Cp2kDrone +from atomate.cp2k.database import Cp2kCalcDb + +__author__ = "Nicholas Winner" +__email__ = "nwinner@berkeley.edu" + +logger = get_logger(__name__) + + +@explicit_serialize +class Cp2kToDb(FiretaskBase): + """ + Enter a cp2k run into the database. Uses current directory unless you + specify calc_dir or calc_loc. + + Optional params: + calc_dir (str): path to dir (on current filesystem) that contains cp2k + output files. Default: use current working directory. + calc_loc (str OR bool): if True will set most recent calc_loc. If str + search for the most recent calc_loc with the matching name + parse_dos (bool): whether to parse the DOS and store in GridFS. + Defaults to False. + bandstructure_mode (str): Set to "uniform" for uniform band structure. + Set to "line" for line mode. If not set, band structure will not + be parsed. + additional_fields (dict): dict of additional fields to add + db_file (str): path to file containing the database credentials. + Supports env_chk. Default: write data to JSON file. + fw_spec_field (str): if set, will update the task doc with the contents + of this key in the fw_spec. + defuse_unsuccessful (bool): this is a three-way toggle on what to do if + your job looks OK, but is actually unconverged (either electronic or + ionic). True -> mark job as COMPLETED, but defuse children. + False --> do nothing, continue with workflow as normal. "fizzle" + --> throw an error (mark this job as FIZZLED) + task_fields_to_push (dict): if set, will update the next Firework/Firetask + spec using fields from the task document. + Format: {key : path} -> fw.spec[key] = task_doc[path] + The path is a full mongo-style path so subdocuments can be referneced + using dot notation and array keys can be referenced using the index. + E.g "calcs_reversed.0.output.outar.run_stats" + """ + + optional_params = [ + "drone", + "calc_dir", + "calc_loc", + "parse_dos", + "bandstructure_mode", + "additional_fields", + "db_file", + "fw_spec_field", + "defuse_unsuccessful", + "task_fields_to_push", + "parse_chgcar", + "parse_aeccar", + "db_user_settings" + ] + + def run_task(self, fw_spec): + # get the directory that contains the cp2k dir to parse + calc_dir = os.getcwd() + if "calc_dir" in self: + calc_dir = self["calc_dir"] + elif self.get("calc_loc"): + calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])[ + "path" + ] + + # parse the cp2k directory + logger.info("PARSING DIRECTORY: {}".format(calc_dir)) + + drone = Cp2kDrone( + additional_fields=self.get("additional_fields"), + parse_dos=self.get("parse_dos", False), + parse_hartree=self.get("parse_hartree", False) + ) + + # assimilate (i.e., parse) + task_doc = drone.assimilate(calc_dir) + + # Check for additional keys to set based on the fw_spec + if self.get("fw_spec_field"): + task_doc.update(self.get("fw_spec_field")) + + # get the database connection + db_file = env_chk(self.get("db_file"), fw_spec) + + # db insertion or taskdoc dump + if not db_file: + with open("task.json", "w") as f: + f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) + else: + mmdb = Cp2kCalcDb.from_db_file( + db_file, admin=True, user_settings=self.get( + 'db_user_settings', {})) + t_id = mmdb.insert_task( + task_doc, use_gridfs=self.get("parse_dos", False) + ) + logger.info("Finished parsing with task_id: {}".format(t_id)) + + defuse_children = False + if task_doc["state"] != "successful": + defuse_unsuccessful = self.get( + "defuse_unsuccessful", DEFUSE_UNSUCCESSFUL + ) + if defuse_unsuccessful is True: + defuse_children = True + elif defuse_unsuccessful is False: + pass + elif defuse_unsuccessful == "fizzle": + raise RuntimeError( + "Cp2kToDb indicates that job is not successful " + "(perhaps your job did not converge within the " + "limit of electronic/ionic iterations)!" + ) + else: + raise RuntimeError( + "Unknown option for defuse_unsuccessful: " + "{}".format(defuse_unsuccessful) + ) + + task_fields_to_push = self.get("task_fields_to_push", None) + update_spec = {} + if task_fields_to_push: + if isinstance(task_fields_to_push, dict): + for key, path_in_task_doc in task_fields_to_push.items(): + if has(task_doc, path_in_task_doc): + update_spec[key] = get(task_doc, path_in_task_doc) + else: + logger.warn( + "Could not find {} in task document. Unable to push to next firetask/firework".format( + path_in_task_doc + ) + ) + else: + raise RuntimeError( + "Inappropriate type {} for task_fields_to_push. It must be a " + "dictionary of format: {key: path} where key refers to a field " + "in the spec and path is a full mongo-style path to a " + "field in the task document".format( + type(task_fields_to_push) + ) + ) + + return FWAction( + stored_data={"task_id": task_doc.get("task_id", None)}, + defuse_children=defuse_children, + update_spec=update_spec, + ) diff --git a/atomate/cp2k/firetasks/run_calc.py b/atomate/cp2k/firetasks/run_calc.py new file mode 100644 index 000000000..30f6cb44d --- /dev/null +++ b/atomate/cp2k/firetasks/run_calc.py @@ -0,0 +1,133 @@ +# coding: utf-8 + +from monty.os.path import zpath +from monty.serialization import loadfn + +""" +This module defines tasks that support running vasp in various ways. +""" + +import shlex +import os +import six +import subprocess + +from custodian import Custodian +from custodian.cp2k.handlers import UnconvergedScfErrorHandler +from custodian.cp2k.jobs import Cp2kJob + +from fireworks import explicit_serialize, FiretaskBase, FWAction + +from atomate.utils.utils import env_chk, get_logger +from atomate.vasp.config import CUSTODIAN_MAX_ERRORS + +__author__ = "Nicholas Winner " + +logger = get_logger(__name__) + + +@explicit_serialize +class RunCp2KDirect(FiretaskBase): + """ + Execute a command directly (no custodian). + + Required params: + cmd (str): the name of the full executable to run. Supports env_chk. + Optional params: + expand_vars (str): Set to true to expand variable names in the cmd. + """ + + required_params = ["cp2k_cmd"] + optional_params = ["expand_vars"] + + def run_task(self, fw_spec): + cmd = env_chk(self["cp2k_cmd"], fw_spec) + if self.get("expand_vars", False): + cmd = os.path.expandvars(cmd) + + logger.info("Running command: {}".format(cmd)) + return_code = subprocess.call(cmd, shell=True) + logger.info( + "Command {} finished running with returncode: {}".format( + cmd, return_code + ) + ) + + +@explicit_serialize +class RunCp2KCustodian(FiretaskBase): + """ + Run CP2K using custodian "on rails", i.e. in a simple way that supports most common options. + + Required params: + cp2k_cmd (str): the name of the full executable for running Cp2k. Supports env_chk. + + Optional params: + job_type: (str) - choose from "normal" (default) + handler_group: (str or [ErrorHandler]) - group of handlers to use. See handler_groups dict in the code for + the groups and complete list of handlers in each group. Alternatively, you can + specify a list of ErrorHandler objects. + max_force_threshold: (float) - if >0, adds MaxForceErrorHandler. Not recommended for + nscf runs. + scratch_dir: (str) - if specified, uses this directory as the root scratch dir. + Supports env_chk. + gzip_output: (bool) - gzip output (default=T) + max_errors: (int) - maximum # of errors to fix before giving up (default=5) + ediffg: (float) shortcut for setting EDIFFG in special custodian jobs + wall_time (int): Total wall time in seconds. Activates WalltimeHandler if set. + """ + + required_params = ["cp2k_cmd"] + optional_params = [] + + def run_task(self, fw_spec): + + handler_groups = { + "default": [UnconvergedScfErrorHandler()], + "strict": [], + "md": [], + "no_handler": [], + } + + cp2k_cmd = env_chk(self["cp2k_cmd"], fw_spec) + + if isinstance(cp2k_cmd, six.string_types): + cp2k_cmd = os.path.expandvars(cp2k_cmd) + cp2k_cmd = shlex.split(cp2k_cmd) + + # initialize variables + job_type = self.get("job_type", "normal") + scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) + gzip_output = self.get("gzip_output", True) + max_errors = self.get("max_errors", CUSTODIAN_MAX_ERRORS) + + # construct jobs + if job_type == "normal": + jobs = [Cp2kJob(cp2k_cmd)] + + # construct handlers + handler_group = self.get("handler_group", "default") + if isinstance(handler_group, six.string_types): + handlers = handler_groups[handler_group] + else: + handlers = handler_group + + # construct validators + validators = [] + + c = Custodian( + handlers, + jobs, + validators=validators, + max_errors=max_errors, + scratch_dir=scratch_dir, + gzipped_output=gzip_output, + ) + + c.run() + + if os.path.exists(zpath("custodian.json")): + stored_custodian_data = { + "custodian": loadfn(zpath("custodian.json")) + } + return FWAction(stored_data=stored_custodian_data) diff --git a/atomate/cp2k/firetasks/write_inputs.py b/atomate/cp2k/firetasks/write_inputs.py new file mode 100644 index 000000000..501c4156d --- /dev/null +++ b/atomate/cp2k/firetasks/write_inputs.py @@ -0,0 +1,89 @@ +# coding: utf-8 + +from __future__ import ( + division, + print_function, + unicode_literals, + absolute_import, +) + +""" +This module defines tasks for writing vasp input sets for various types of vasp calculations +""" + +import os +from six.moves import range +from importlib import import_module +import glob + +import numpy as np + +from monty.serialization import dumpfn + +from pymatgen.io.cp2k.sets import Cp2kInputSet +from pymatgen.io.cp2k.outputs import Cp2kOutput +from fireworks import FiretaskBase, explicit_serialize + +from atomate.utils.utils import env_chk, load_class +from atomate.common.firetasks.glue_tasks import get_calc_loc + +__author__ = "Nicholas Winner" +__email__ = "nwinner@berkeley.edu" + + +@explicit_serialize +class WriteCp2kFromIOSet(FiretaskBase): + """ + Create CP2K input files using the pymatgen Cp2kInputSet object or dict representation. + + Required params: + structure (Structure): structure + cp2k_input_set (Cp2kInputSet or dict): Either a Cp2kInputSet object or a dict representation of one + + Optional params: + cp2k_input_params (dict): When using a string name for CP2K input set, use this as a dict + to specify kwargs for instantiating the input set parameters. For example, if you wan + """ + + required_params = ["structure", "cp2k_input_set"] + optional_params = ["cp2k_input_params"] + + def run_task(self, fw_spec): + if isinstance(self["cp2k_input_set"], dict): + cis = load_class( + self["cp2k_input_set"]["@module"], + self["cp2k_input_set"]["@class"] + ).from_dict(self["cp2k_input_set"]) + elif isinstance(self['cp2k_input_set'], str): + cis_cls = load_class( + "pymatgen.io.cp2k.sets", self["cp2k_input_set"] + ) + cis = cis_cls( + self["structure"], **self.get("cp2k_input_params", {}) + ) + else: + cis = self["cp2k_input_set"] + cis.silence() + cis.write_file(input_filename="cp2k.inp", output_dir=".") + + +@explicit_serialize +class WriteCp2kFromPrevious(FiretaskBase): + + optional_params = [ + "cp2k_input_params", + "prev_calc_loc", + "original_input_filename", + "new_input_filename", + ] + + def run_task(self, fw_spec): + calc_loc = get_calc_loc("prev_calc_loc", fw_spec["calc_locs"]) + input_filename = self.get("original_input_filename", "cp2k.input") + if os.path.isfile(calc_loc, input_filename): + input_path = os.path.join(calc_loc, input_filename) + else: + raise FileNotFoundError("Could not find the cp2k input file!") + + ci = Cp2kInputSet.from_file(input_path) + ci.write_file(input_filename="cp2k.inp") diff --git a/atomate/cp2k/fireworks/__init__.py b/atomate/cp2k/fireworks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/atomate/cp2k/fireworks/core.py b/atomate/cp2k/fireworks/core.py new file mode 100644 index 000000000..9c8868893 --- /dev/null +++ b/atomate/cp2k/fireworks/core.py @@ -0,0 +1,399 @@ +# coding: utf-8 + +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) + +import warnings + +""" +Defines standardized Fireworks that can be chained easily to perform various +sequences of CP2K calculations. +""" + +from fireworks.core.firework import Firework + +from pymatgen.core.structure import Structure +from pymatgen.io.cp2k.sets import ( + RelaxSet, + CellOptSet, + StaticSet, + HybridStaticSet, + HybridRelaxSet, + HybridCellOptSet +) + +from atomate.cp2k.firetasks.write_inputs import ( + WriteCp2kFromIOSet, + WriteCp2kFromPrevious, +) +from atomate.cp2k.firetasks.run_calc import RunCp2KCustodian +from atomate.cp2k.firetasks.glue_tasks import ( + UpdateStructureFromPrevCalc, + CopyCp2kOutputs, +) +from atomate.cp2k.firetasks.parse_outputs import Cp2kToDb +from atomate.common.firetasks.glue_tasks import PassCalcLocs +from atomate.cp2k.utils import optimize_structure_sc_scale + + +# TODO: almost all Fws follow a similar copy from calc loc --> write input --> run --> pass locs/data. +# Combine into single core firework maybe? + + +class BaseFW(Firework): + def __init__( + self, + structure=None, + name=None, + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Standard calculation Firework. A FW generally follows the following structure: + (1) Update based on previous calculation + (2) Copy files from previous calculation + (3) Write input files as necessary + (4) Run Cp2k with custodian + (5) Pass this calculation location to the database (in order to stitch calculations together) + (6) Run CP2K drone to assimilate results of the calculation + + And this doesn't change, whether its a FW that is part of series of calculations, or a parallel + FW, or a standalone. Therefore, this BaseFW defines this general structure, and other FWs simply + specify a slightly specific variation on this. Generally, the FWs that inherit will simply specify + what the default input set is. + + Args: + structure (Structure): Input structure. Note that for prev_calc_loc jobs, the structure + is only used to set the name of the FW and any structure with the same composition + can be used. + name (str): Name for the Firework. + cp2k_input_set (Cp2kInputSet): input set to use. Needed unless a previous calculation's inputs + parameters will be used. + cp2k_input_set_params (dict): Dict of Cp2kInputSet kwargs. Remember, overriding default set parameters + is done inside of here using the "{override_default_params: {...} }" as an entry + cp2k_cmd (str): Command to run cp2k. Supports env_chk. + prev_calc_loc (bool or str): If true (default), copies outputs from previous calc. If + a str value, retrieves a previous calculation output by name. If False/None, will create + new calculation using the provided structure. + prev_calc_dir (str): Path to a previous calculation to copy from. + db_file (str): Path to file specifying db credentials. Supports env_chk. + parents (Firework): Parents of this particular Firework. FW or list of FWS. + cp2ktodb_kwargs (dict): kwargs to pass to Cp2kToDb + \*\*kwargs: Other kwargs that are passed to Firework.__init__. + """ + t = [] + + # Set up the name, the input set, and how cp2k will be pushed to DB + if "project_name" not in cp2k_input_set_params.keys(): + cp2k_input_set_params["project_name"] = name + + cp2k_input_set = cp2k_input_set or StaticSet( + structure, **cp2k_input_set_params + ) + + cp2k_input_set.silence() # Don't include section descriptions + + cp2ktodb_kwargs = cp2ktodb_kwargs or {} + if "additional_fields" not in cp2ktodb_kwargs: + cp2ktodb_kwargs["additional_fields"] = {} + cp2ktodb_kwargs["additional_fields"]["task_label"] = name + + # if continuing from prev calc, update the structure with the previous result + # For hybrid, should almost always be true (initialize with gga first) + if prev_calc_loc: + if isinstance(files_to_copy, str): + files_to_copy = [files_to_copy] + t.append( + CopyCp2kOutputs( + files_to_copy=files_to_copy, calc_loc=prev_calc_loc + ) + ) + t.append(UpdateStructureFromPrevCalc(prev_calc_loc=prev_calc_loc)) + + # if prev calc directory is being REPEATED, copy files + if prev_calc_dir: + t.append( + WriteCp2kFromPrevious( + cp2k_input_set_params=cp2k_input_set_params + ) + ) + + # else run based on the IO set + else: + t.append( + WriteCp2kFromIOSet( + structure=structure, + cp2k_input_set=cp2k_input_set, + cp2k_input_params=cp2k_input_set_params, + ) + ) + + t.append(RunCp2KCustodian(cp2k_cmd=cp2k_cmd)) + t.append(PassCalcLocs(name=name)) + t.append(Cp2kToDb(db_file=db_file, **cp2ktodb_kwargs)) + + spec = {"cp2k_input_set": cp2k_input_set.as_dict()} + spec.update(kwargs.get('spec', {})) + super().__init__( + t, + parents=parents, + name=name, + spec=spec, + **kwargs + ) + + +class StaticFW(BaseFW): + def __init__( + self, + structure=None, + name="Static", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Standard static calculation. Can start from input set or from previous calculation. + """ + cp2k_input_set = cp2k_input_set or StaticSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) + + +class StaticHybridFW(BaseFW): + def __init__( + self, + structure=None, + name="HybridStatic", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Hybrid Static calculation. Presumably restarting from a previous GGA calculation. + """ + cp2k_input_set = cp2k_input_set or HybridStaticSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) + + +class RelaxFW(BaseFW): + def __init__( + self, + structure=None, + name="Relax", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Geometry Relaxation Calculation. + """ + + cp2k_input_set = cp2k_input_set or RelaxSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) + + +class CellOptFW(BaseFW): + def __init__( + self, + structure=None, + name="Relax", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Geometry Relaxation Calculation. + """ + + cp2k_input_set = cp2k_input_set or CellOptSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) + + +class RelaxHybridFW(BaseFW): + def __init__( + self, + structure=None, + name="HybridRelax", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Hybrid Static calculation. Presumably restarting from a previous GGA calculation. + """ + + cp2k_input_set = cp2k_input_set or HybridRelaxSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) + + +class CellOptHybridFW(BaseFW): + def __init__( + self, + structure=None, + name="HybridCellOpt", + cp2k_input_set=None, + cp2k_input_set_params={}, + cp2k_cmd="cp2k", + prev_calc_loc=True, + prev_calc_dir=None, + db_file=None, + cp2ktodb_kwargs=None, + parents=None, + files_to_copy=[], + **kwargs + ): + """ + Hybrid cell optimization. + """ + + cp2k_input_set = cp2k_input_set or HybridCellOptSet( + structure, project_name=name, + **cp2k_input_set_params.copy() + ) + + super().__init__( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=cp2k_input_set_params, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=prev_calc_loc, + prev_calc_dir=prev_calc_dir, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=parents, + files_to_copy=files_to_copy, + **kwargs + ) diff --git a/atomate/cp2k/fireworks/defects.py b/atomate/cp2k/fireworks/defects.py new file mode 100644 index 000000000..57c3cf322 --- /dev/null +++ b/atomate/cp2k/fireworks/defects.py @@ -0,0 +1,39 @@ +# coding: utf-8 + +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) + +import warnings + +""" +Defines standardized Fireworks that can be chained easily to perform various +sequences of CP2K calculations. +""" + +from fireworks.core.firework import Firework + +from pymatgen.core.structure import Structure +from pymatgen.io.cp2k.sets import ( + RelaxSet, + StaticSet, + HybridStaticSet, + HybridRelaxSet, +) + +from atomate.cp2k.firetasks.write_inputs import ( + WriteCp2kFromIOSet, + WriteCp2kFromPrevious, +) +from atomate.cp2k.firetasks.run_calc import RunCp2KCustodian +from atomate.cp2k.firetasks.glue_tasks import ( + UpdateStructureFromPrevCalc, + CopyCp2kOutputs, +) +from atomate.cp2k.firetasks.parse_outputs import Cp2kToDb + +from atomate.common.firetasks.glue_tasks import PassCalcLocs + diff --git a/atomate/cp2k/utils.py b/atomate/cp2k/utils.py new file mode 100644 index 000000000..8672327cb --- /dev/null +++ b/atomate/cp2k/utils.py @@ -0,0 +1,146 @@ +from pymatgen.analysis.defects.generators import VacancyGenerator, InterstitialGenerator, SubstitutionGenerator +from pymatgen.analysis.defects.core import Vacancy +from pymatgen.core.structure import Structure +import itertools +import numpy as np + + +def get_defect_structures(structure, defect_dict): + + vacancies = defect_dict.get('vacancies') + substitutions = defect_dict.get('substitutions') + interstitials = defect_dict.get('interstititals') + defect_structures = [] + + if vacancies: + # only create vacancies of interest... + b_struct = structure.copy() # base structure (un-defective) + VG = VacancyGenerator(b_struct, include_bv_charge=False) + for _vac in VG: + vac = GhostVacancy(_vac.bulk_structure, _vac.site, _vac.charge, _vac.multiplicity) + vac_ind = vac.site.specie.symbol + if vac_ind not in vacancies.keys(): + continue + else: + if isinstance(vacancies[vac_ind], list): + for c in vacancies[vac_ind]: + v = vac.copy() + v.set_charge(c) + defect_structures.append(v) + else: + defect_structures.append(vac) + + # TODO shouldn't this be done in defect object? + for d in defect_structures: + d.bulk_structure.set_charge(d.bulk_structure.charge + d.charge) + + return defect_structures + + +def optimize_structure_sc_scale(inp_struct, final_site_no=None): + """ + A function for finding optimal supercell transformation + by maximizing the nearest image distance with the number of + atoms remaining less than final_site_no + + Args: + inp_struct: input pymatgen Structure object + final_site_no (float or int): maximum number of atoms + for final supercell + + Returns: + 3 x 1 array for supercell transformation + """ + if final_site_no <= len(inp_struct.sites): + final_site_no = len(inp_struct.sites) + + dictio={} + #consider up to a 7x7x7 supercell + for kset in itertools.product(range(1,7), range(1,7), range(1,7)): + num_sites = len(inp_struct) * np.product(kset) + if num_sites > final_site_no: + continue + + struct = inp_struct.copy() + struct.make_supercell(kset) + + #find closest image + min_dist = 1000. + for image_array in itertools.product( range(-1,2), range(-1,2), range(-1,2)): + if image_array == (0,0,0): + continue + distance = struct.get_distance(0, 0, image_array) + if distance < min_dist: + min_dist = distance + + min_dist = round(min_dist, 3) + if min_dist in dictio.keys(): + if dictio[min_dist]['num_sites'] > num_sites: + dictio[min_dist].update( {'num_sites': num_sites, 'supercell': kset[:]}) + else: + dictio[min_dist] = {'num_sites': num_sites, 'supercell': kset[:]} + + if not len(dictio.keys()): + raise RuntimeError('could not find any supercell scaling vector') + + min_dist = max( list(dictio.keys())) + biggest = dictio[ min_dist]['supercell'] + + return biggest + + +def optimize_structure_sc_scale_by_length(inp_struct, minimum_distance): + """ + Given a structure, provide the scaling matrix such that the points in the lattice maintain + a minimum distance from one another. + + Args: + inp_struct (Structure): structure to consider + minimum_distance (float or int): minimum distance desired between atoms. Note that if the + value of minimum distance is < 0, this means a scalar will be returned, defining the + minimum uniform scaling required to achieve minimum_distance. If minimum distance is + > 0, then a vector will be returned, giving the appropriate scaling in each direction + to achieve minimum_distance. + :return: + """ + if minimum_distance == 0: + raise ValueError("Cannot assign a minimum distance of 0!") + elif minimum_distance < 0: + minimum_distance = -minimum_distance + return np.ceil(minimum_distance/np.linalg.norm(inp_struct.lattice.matrix, axis=0)).astype(int) + else: + return np.ceil(minimum_distance/np.linalg.norm(inp_struct.lattice.matrix, axis=0)).astype(int) + + +class GhostVacancy(Vacancy): + """ + Current workaround for the Vacancy class in CP2K. Vacancies are normally just structures + with an atom removed, but with CP2K we want to retain the site and turn off its interaction + potential (Ghost atom) in order to avoid Basis set superposition error for localized basis. + """ + + def generate_defect_structure(self, supercell=(1, 1, 1)): + """ + Returns Defective Vacancy structure, decorated with charge + Args: + supercell (int, [3x1], or [[]] (3x3)): supercell integer, vector, or scaling matrix + """ + defect_structure = self.bulk_structure.copy() + defect_structure.make_supercell(supercell) + + # create a trivial defect structure to find where supercell transformation moves the lattice + struct_for_defect_site = Structure(self.bulk_structure.copy().lattice, + [self.site.specie], + [self.site.frac_coords], + to_unit_cell=True) + struct_for_defect_site.make_supercell(supercell) + defect_site = struct_for_defect_site[0] + + poss_deflist = sorted( + defect_structure.get_sites_in_sphere(defect_site.coords, 0.1, include_index=True), key=lambda x: x[1]) + defindex = poss_deflist[0][2] + defect_structure.add_site_property('ghost', [True if i == defindex else False for i in range(len(defect_structure))]) + defect_structure.set_charge(self.charge) + return defect_structure + + diff --git a/atomate/cp2k/workflows/__init__.py b/atomate/cp2k/workflows/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/atomate/cp2k/workflows/core.py b/atomate/cp2k/workflows/core.py new file mode 100644 index 000000000..6d74a3541 --- /dev/null +++ b/atomate/cp2k/workflows/core.py @@ -0,0 +1,414 @@ +from pymatgen.io.cp2k.sets import ( + StaticSet, + RelaxSet, + HybridStaticSet, + HybridRelaxSet, + HybridCellOptSet +) +from atomate.cp2k.fireworks.core import ( + StaticFW, + RelaxFW, + StaticHybridFW, + RelaxHybridFW, + CellOptHybridFW +) +from fireworks import Workflow +from atomate.utils.utils import get_wf_from_spec_dict +from monty.serialization import loadfn +import os + +module_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) + +ADD_NAMEFILE = True +SCRATCH_DIR = ">>scratch_dir<<" +STABILITY_CHECK = False +CP2K_CMD = ">>cp2k_cmd<<" +DB_FILE = ">>db_file<<" +ADD_WF_METADATA = True + +# TODO Incomplete. Just an idea. +def get_wf(structure, wf_filename, params=None, common_params=None, wf_metadata=None): + """ + structure, + cp2k_static_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-Static-WF", + user_static_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, + """ + + fws = [] + + sets = [ + + ] + + for p in params: + fw = p['fw'] + + mod = __import__(modulepath, globals(), locals(), [classname], 0) + return getattr(mod, classname) + + cp2k_static_input_set = cp2k_static_input_set or StaticSet( + structure, **user_static_settings + ) + cp2k_hybrid_input_set = cp2k_hybrid_input_set or HybridStaticSet( + structure, **user_hybrid_settings + ) + + gga_name = "{}-GGA-FW".format(structure.composition.reduced_formula) + hybrid_name = "{}-Hybrid-FW".format(structure.composition.reduced_formula) + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=wf_metadata) + + +def get_wf_static( + structure, + cp2k_input_set=None, + name="Static-WF", + cp2k_cmd=">>cp2k_cmd<<", + db_file=">>db_file<<", + user_cp2k_settings={}, + cp2ktodb_kwargs=None, + metadata=None, +): + """ + Returns the workflow that computes the bulk modulus by fitting to the given equation of state. + + Args: + structure (Structure): input structure. + cp2k_input_set (Cp2kInputSet): Cp2k input set to use, if not using the default. + cp2k_cmd (str): cp2k command to run. + db_file (str): path to the db file. + tag (str): something unique to identify the tasks in this workflow. If None a random uuid + will be assigned. + user_cp2k_settings (dict): If passing a non-default Cp2kInputSet, this dict contains the + kwargs to pass to the Cp2kInputSet initialization. Note that to override the cp2k input + file parameters themselves, a key of the form {'override_default_params': {...}} + + Returns: + Workflow + """ + fws = [] + + fw = StaticFW( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=user_cp2k_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + prev_calc_dir=None, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=None + ) + fws.append(fw) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + + +def get_wf_relax( + structure, + cp2k_input_set=None, + name="Relax WF", + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + user_cp2k_settings={}, + cp2ktodb_kwargs=None, + metadata=None, +): + """ + Returns the workflow that computes the bulk modulus by fitting to the given equation of state. + + Args: + structure (Structure): input structure. + cp2k_input_set (Cp2kInputSet): for relax calculations + cp2k_cmd (str): cp2k command to run. + db_file (str): path to the db file. + tag (str): something unique to identify the tasks in this workflow. If None a random uuid + will be assigned. + + Returns: + Workflow + """ + fws = [] + + fw = RelaxFW( + structure=structure, + name=name, + cp2k_input_set=cp2k_input_set, + cp2k_input_set_params=user_cp2k_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + prev_calc_dir=None, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=None + ) + fws.append(fw) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + + +def get_wf_hybrid_static( + structure, + cp2k_static_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-Static-WF", + user_static_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, +): + + fws = [] + + gga_name = "{}-GGA-FW".format(structure.composition.reduced_formula) + hybrid_name = "{}-Hybrid-FW".format(structure.composition.reduced_formula) + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + user_hybrid_settings['wfn_restart_file_name'] = restart_filename + + fw1 = StaticFW( + structure=structure, + name=gga_name, + cp2k_input_set=cp2k_static_input_set, + cp2k_input_set_params=user_static_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=None + ) + fws.append(fw1) + + fw2 = StaticHybridFW( + structure=structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=True, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=fw1, + files_to_copy=restart_filename + ) + fws.append(fw2) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + + +def get_wf_hybrid_relax( + structure, + cp2k_static_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-Relax-WF", + user_static_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, +): + + fws = [] + + gga_name = "{}-GGA-FW".format(structure.composition.reduced_formula) + hybrid_name = "{}-Hybrid-FW".format(structure.composition.reduced_formula) + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + user_hybrid_settings['wfn_restart_file_name'] = restart_filename + + fw1 = StaticFW( + structure=structure, + name=gga_name, + cp2k_input_set=cp2k_static_input_set, + cp2k_input_set_params=user_static_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=False + ) + fws.append(fw1) + + fw2 = RelaxHybridFW( + structure=structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=True, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=fw1, + files_to_copy=restart_filename + ) + fws.append(fw2) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + + +def get_wf_hybrid_cell_opt( + structure, + cp2k_static_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-CellOpt-WF", + user_static_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, +): + + fws = [] + + gga_name = "{}-GGA-FW".format(structure.composition.reduced_formula) + hybrid_name = "{}-Hybrid-FW".format(structure.composition.reduced_formula) + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + user_hybrid_settings['wfn_restart_file_name'] = restart_filename + + fw1 = StaticFW( + structure=structure, + name=gga_name, + cp2k_input_set=cp2k_static_input_set, + cp2k_input_set_params=user_static_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=False + ) + fws.append(fw1) + + fw2 = CellOptHybridFW( + structure=structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=True, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=fw1, + files_to_copy=restart_filename + ) + fws.append(fw2) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + + +def get_wf_gga_relax_to_hybrid_static( + structure, + cp2k_gga_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-Static-WF", + user_gga_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, +): + + fws = [] + + gga_name = "{}-GGA-FW".format(structure.composition.reduced_formula) + hybrid_name = "{}-Hybrid-FW".format(structure.composition.reduced_formula) + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + user_hybrid_settings['wfn_restart_file_name'] = restart_filename + + fw1 = RelaxFW( + structure=structure, + name=gga_name, + cp2k_input_set=cp2k_gga_input_set, + cp2k_input_set_params=user_gga_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=False, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=None + ) + fws.append(fw1) + + fw2 = StaticHybridFW( + structure=structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=True, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=fw1, + files_to_copy=restart_filename + ) + fws.append(fw2) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) + +def get_wf_atom( + structure, + cp2k_gga_input_set=None, + cp2k_hybrid_input_set=None, + name="Hybrid-Static-WF", + user_gga_settings={}, + user_hybrid_settings={}, + cp2k_cmd=CP2K_CMD, + db_file=DB_FILE, + cp2ktodb_kwargs=None, + metadata=None, +): + + fws = [] + + hybrid_name = "{}-Hybrid-Atom-FW".format(structure.composition.reduced_formula) + + cis = HybridStaticSet(structure, hybrid_functional='PBE0', hf_fraction=1, gga_x_fraction=0) + cis.insert({'FORCE_EVAL': {'DFT': {''}}}) + + fw = StaticHybridFW( + structure=structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings, + cp2k_cmd=cp2k_cmd, + prev_calc_loc=True, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs, + parents=None, + files_to_copy=None + ) + fws.append(fw) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + + return Workflow(fws, name=wfname, metadata=metadata) diff --git a/atomate/cp2k/workflows/defects.py b/atomate/cp2k/workflows/defects.py new file mode 100644 index 000000000..531b57f61 --- /dev/null +++ b/atomate/cp2k/workflows/defects.py @@ -0,0 +1,308 @@ +""" +This module defines a workflow for charged defects in non-metals. It is based on the original workflow +developed by Danny Broberg for VASP. +""" + +from fireworks import Workflow, Firework +from copy import deepcopy +from pymatgen.io.vasp.sets import MPRelaxSet, MVLScanRelaxSet +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer + +from atomate.utils.utils import get_logger +from atomate.cp2k.fireworks.core import StaticFW, RelaxFW, StaticHybridFW, RelaxHybridFW +from atomate.cp2k.utils import get_defect_structures, optimize_structure_sc_scale, optimize_structure_sc_scale_by_length + +#from atomate.vasp.firetasks.defects import DefectSetupFiretask +#from atomate.vasp.fireworks.defects import DefectAnalysisFW + +logger = get_logger(__name__) + + +# TODO: does conventional unit cell vs. primitive matter? +# TODO: run_analysis FW for all defects in WF or should it be a part of DefectDrone +def get_wf_chg_defects(structure, + mpid=None, + name="chg_defect_wf", + cp2k_cmd=">>cp2k_cmd<<", + db_file=">>db_file<<", + cp2k_gga_input_set=None, + user_gga_settings={}, + cp2k_hybrid_input_set=None, + user_hybrid_settings={}, + cp2ktodb_kwargs={}, + diel_flag=False, + defect_dict={}, + rerelax_flag=False, + relax_flag=True, + hybrid_flag=True, + hybrid_relax=True, + minimum_distance=16): + """ + Returns a charged defect workflow. + + Firework 0 : (optional) re-relax the bulk structure that is input before running rest of workflow + Firework 1 : (optional) run hybrid calculation of bulk structure to allow for + Firework 2 : bulk supercell calculation + Firework 3 : (optional) dielectric calculation + Firework 4 - len(defectcalcs): Optimize the internal structure (fixed volume) + of each charge+defect combination. + + (note if no re-relaxation required, then 1-len(defectcalcs) will all run at same time...) + + Args: + structure (Structure): input structure to have defects run on + mpid (str): Materials Project id number to be used (for storage in metadata). + name (str): some appropriate name for the workflow. + cp2k_gga_input_set (Cp2kInputSet): User defined Cp2kInputSet to use for the GGA calculations + in this workflow in place of the default. + Default: None + user_gga_settings (dict): Dictionary defining user settings for initializing the Cp2kInputSet + for GGA calculations. See Cp2kInputSet for list of arguments given to Cp2kInputSet. + This can be more convenient to use this instead of cp2k_gga_input_set. + Default: {} + cp2k_hybrid_input_set (Cp2kInputSet): User defined Cp2kInputSet to use for the hybrid calculations + in this workflow in place of the default. + Default: None + user_hybrid_settings (dict): Dictionary defining user settings for initializing the Cp2kInputSet + for hybrid calculations. See Cp2kInputSet for list of arguments given to Cp2kInputSet. + This can be more convenient to use this instead of cp2k_gga_input_set. + Default: {} + cp2k_cmd (str): Command to run cp2k. + Default: >>cp2k_cmd<< (use env check on fworker) + db_file (str): path to file containing the database credentials. + Default: >>db_file<< (use env check on fworker) + conventional (bool): flag to use conventional structure (rather than primitive) for defect supercells, + defaults to True. + diel_flag (bool): flag to also run dielectric calculations. + (required for charge corrections to be run) defaults to True. + n_max (int): maximum supercell size to consider for supercells + + job_type (str): type of defect calculation that user desires to run + default is 'normal' which runs a GGA defect calculation + additional options are: + 'double_relaxation_run' which runs a double relaxation GGA run + 'metagga_opt_run' which runs a double relaxation with SCAN (currently + no way to turn off double relaxation approach with SCAN) + 'hse' which runs a relaxation step with GGA followed by a relaxation with HSE + NOTE: for these latter two we highly recommend that rerelax is set to True + so the bulk_structure's lattice is optimized before running defects + + vacancies (list): + If list is totally empty, all vacancies are considered (default). + If only specific vacancies are desired then add desired Element symbol to the list + ex. ['Ga'] in GaAs structure will only produce Galium vacancies + + if NO vacancies are desired, then just add an empty list to the list + ex. [ [] ] yields no vacancies + + substitutions (dict): + If dict is totally empty, all intrinsic antisites are considered (default). + If only specific antisites/substituions are desired then add vacant site type as key, with list of + sub site symbol as value + ex 1. {'Ga': ['As'] } in GaAs structure will only produce Arsenic_on_Gallium antisites + ex 2. {'Ga': ['Sb'] } in GaAs structure will only produce Antimonide_on_Gallium substitutions + + if NO antisites or substitutions are desired, then just add an empty dict + ex. {'None':{}} yields no antisites or subs + + + interstitials (list): + If list is totally empty, NO interstitial defects are considered (default). + Option 1 for generation: If one wants to use Pymatgen to predict interstitial + then list of pairs of [symbol, generation method (str)] can be provided + ex. ['Ga', 'Voronoi'] in GaAs structure will produce Galium interstitials from the + Voronoi site finding algorithm + NOTE: only options for interstitial generation are "Voronoi" and "Nils" + Option 2 for generation: If user wants to add their own interstitial sites for consideration + the list of pairs of [symbol, Interstitial object] can be provided, where the + Interstitial pymatgen.analysis.defects.core object is used to describe the defect of interest + + + initial_charges (dict): + says how to specify initial charges for each defect. + An empty dict (DEFAULT) is to do a fairly restrictive charge generation method: + for vacancies: use bond valence method to assign oxidation states and consider + negative of the vacant site's oxidation state as single charge to try + antisites and subs: use bond valence method to assign oxidation states and consider + negative of the vacant site's oxidation state as single charge to try + + added to likely charge of substitutional site (closest to zero) + interstitial: charge zero + For non empty dict, charges are specified as: + initial_charges = {'vacancies': {'Ga': [-3,2,1,0]}, + 'substitutions': {'Ga': {'As': [0]} }, + 'interstitials': {}} + in the GaAs structure this makes vacancy charges in states -3,-2,-1,0; Ga_As antisites in the q=0 state, + and all other defects will have charges generated in the restrictive automated format stated for DEFAULT + + rerelax_flag (bool): + Flag to re-relax the input structure for minimizing forces + (does volume relaxation of small primitive cell) + Default is False (no re-relaxation occurs) + Returns: + Workflow + """ + fws, parents = [], [] + + # Make supercell. Should always be done for CP2K because it is gamma point only + scale = optimize_structure_sc_scale_by_length(structure, minimum_distance=minimum_distance) + + cp2ktodb_kwargs['additional_fields'] = cp2ktodb_kwargs.get('additional_fields', {}) + cp2ktodb_kwargs['parse_hartree'] = True + + # Run the bulk structure calculation GGA-->Hybrid to get bulk band gap + # Re-relaxing the structure if not starting from a good initial state + bulk_name = "Bulk-GGA-FW" + bulk_hybrid_name = "Bulk-Hybrid-FW" + restart_filename = "{}-RESTART.wfn".format(bulk_name) # GGA restart WFN + user_gga_settings.update({'print_hartree_potential': True}) + user_hybrid_settings.update({'wfn_restart_file_name': restart_filename, + 'print_hartree_potential': True, + 'print_e_density': True}) + struc = structure.copy() # make so unscaled cell can be used for defects + struc.make_supercell(scale) + if rerelax_flag: + fws.append( + RelaxFW( + structure=struc, + name=bulk_name, + cp2k_input_set=cp2k_gga_input_set, + cp2k_input_set_params=user_gga_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=None, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=None, + files_to_copy=None + ) + ) + if hybrid_flag: + fws.append( + StaticHybridFW( + structure=struc, + name=bulk_hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=bulk_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=fws[-1], + files_to_copy=restart_filename + ) + ) + else: + fws.append( + StaticFW( + structure=struc, + name=bulk_name, + cp2k_input_set=cp2k_gga_input_set, + cp2k_input_set_params=user_gga_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=None, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=None, + files_to_copy=None + ) + ) + if hybrid_flag: + fws.append( + StaticHybridFW( + structure=struc, + name=bulk_hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=bulk_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=fws[-1], + files_to_copy=restart_filename + ) + ) + + # Run dielectric calculation before defects (for finite size corrections) + if diel_flag: + print("DIELECTRIC FW FOR CP2K IS NOT IMPLEMENTED") + + # Collect all defects based on the defect provided and run GGA-->Hybrid for each + defects = get_defect_structures(structure, defect_dict=defect_dict) + for i, defect in enumerate(defects): + bulk_name = 'Re-Relax-FW' if rerelax_flag else None # So prev_calc_loc can find re-relax fw + cp2ktodb_kwargs['additional_fields'].update({'defect': defect.as_dict().copy(), + 'scale': scale, + 'task_label': 'defect'}) # Keep record of defect object + gga_name = "Defect-GGA-FW-{}".format(i) # How to track the GGA FW + hybrid_name = "Defect-Hybrid-FW-{}".format(i) # How to track the hybrid FW + restart_filename = "{}-RESTART.wfn".format(gga_name) # GGA restart WFN + user_hybrid_settings.update({'wfn_restart_file_name': restart_filename}) + defect_structure = defect.generate_defect_structure(scale) + print(defect_structure.num_sites) + if relax_flag: + fws.append( + RelaxFW( + structure=defect_structure, + name=gga_name, + cp2k_input_set=cp2k_gga_input_set, + cp2k_input_set_params=user_gga_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=bulk_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=parents, + files_to_copy=None + ) + ) + else: + fws.append( + StaticFW( + structure=defect_structure, + name=gga_name, + cp2k_input_set=cp2k_gga_input_set, + cp2k_input_set_params=user_gga_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=bulk_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=parents, + files_to_copy=None + ) + ) + if hybrid_flag: + if hybrid_relax: + fws.append( + RelaxHybridFW( + structure=defect_structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=gga_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=fws[-1], + files_to_copy=restart_filename + ) + ) + else: + fws.append( + StaticHybridFW( + structure=defect_structure, + name=hybrid_name, + cp2k_input_set=cp2k_hybrid_input_set, + cp2k_input_set_params=user_hybrid_settings.copy(), + cp2k_cmd=cp2k_cmd, + prev_calc_loc=gga_name, + db_file=db_file, + cp2ktodb_kwargs=cp2ktodb_kwargs.copy(), + parents=fws[-1], + files_to_copy=restart_filename + ) + ) + + wfname = "{}:{}".format(structure.composition.reduced_formula, name) + final_wf = Workflow(fws, name=wfname) + + return final_wf + + diff --git a/atomate/cp2k/workflows/library/static.yaml b/atomate/cp2k/workflows/library/static.yaml new file mode 100644 index 000000000..2eae6b1db --- /dev/null +++ b/atomate/cp2k/workflows/library/static.yaml @@ -0,0 +1,7 @@ +fireworks: + - fw: atomate.cp2k.fireworks.core.StaticFW + - fw: atomate.cp2k.fireworks.core.HybridStaticFW + params: + parents: 1 + hybrid_functional: PBE0 + files_to_copy: \ No newline at end of file