Semi-automate a cluster expansion#

Note

In the context of WFacer documentation, semi-automation refers to manual execution of scripts for structural generation and model fitting, while Jobflow or Fireworks are allowed to manage merely the computation of each individual enumerated structure.

The following scripts demonstrate how use classes and utility functions to manually perform semi-automated steps in a cluster expansion iteration.

At the beginning first iteration, parameters for the cluster expansion and first-principles calculations must be initialized. The following script provides an example in doing so:

"""An example to semi-automate with fireworks. Generate initial document."""
import json

from monty.json import jsanitize
from pymatgen.core import Lattice, Structure

from WFacer.jobs import initialize_document

# construct a BCC Al-Li structure
alli_prim = Structure(
    lattice=Lattice.cubic(3.75),
    species=[
        {"Al": 0.5, "Li": 0.5},
        {"Al": 0.5, "Li": 0.5},
    ],
    coords=[[0, 0, 0], [0.5, 0.5, 0.5]],
)

# 333 supercell as objective.
# Loose relax, then tight, then static.
user_incar_settings_relax = {
    "ISMEAR": 1,
    "SIGMA": 0.2,
    "ENCUT": 300,
    "EDIFF": 1e-5,
    "EDIFFG": -0.02,
}
user_kpoints_settings_relax = {"reciprocal_density": 100}
user_incar_settings_tight = {
    "ISMEAR": 2,
    "SIGMA": 0.2,
    "ENCUT": 520,
    "EDIFF": 1e-5,
    "EDIFFG": -0.01,
}
user_kpoints_settings_tight = {"reciprocal_density": 400}
user_incar_settings_static = {"ENCUT": 680, "EDIFF": 1e-6, "EDIFFG": -0.01}
user_kpoints_settings_static = {"reciprocal_density": 800}

relax_kwargs = {
    "user_incar_settings": user_incar_settings_relax,
    "user_kpoints_settings": user_kpoints_settings_relax,
    "user_potcar_functional": "PBE_54",
}
tight_kwargs = {
    "user_incar_settings": user_incar_settings_tight,
    "user_kpoints_settings": user_kpoints_settings_tight,
    "user_potcar_functional": "PBE_54",
}
static_kwargs = {
    "user_incar_settings": user_incar_settings_static,
    "user_kpoints_settings": user_kpoints_settings_static,
    "user_potcar_functional": "PBE_54",
}

# Lasso, grid-search.
options = {
    "objective_num_sites": 54,
    "comp_enumeration_step": 3,
    "n_parallel": 8,
    "add_tight_relax": True,
    "relax_generator_kwargs": relax_kwargs,
    "tight_generator_kwargs": tight_kwargs,
    "static_generator_kwargs": static_kwargs,
    "cutoffs": {2: 9.0, 3: 8.0, 4: 5.0},
}

document = initialize_document(alli_prim, "alli_bcc_ce", options=options)
data = jsanitize(document, strict=True, enum_values=True)
with open("document.json", "w") as fout:
    json.dump(data, fout)

Using the cluster expansion constructed in the last iteration, you can enumerate new structures to be added in the current iteration and compute them with atomate2:

"""Generate structures for a next iteration."""
from warnings import warn

from fireworks import LaunchPad
from jobflow.managers.fireworks import flow_to_workflow
from monty.serialization import dumpfn
from pydantic import parse_file_as

from WFacer.jobs import enumerate_structures, get_structure_calculation_flows
from WFacer.schema import CeOutputsDocument


def __main__():
    document = parse_file_as(CeOutputsDocument, "document.json")

    iter_id = document.last_iter_id + 1
    max_iter = document.ce_options["max_iter"]
    if iter_id >= max_iter and not document.converged:
        warn(
            f"Maximum number of iterations: {max_iter}"
            f" reached, but cluster expansion model is"
            f" still not converged!"
        )
        return
    if document.converged:
        warn("Model already converged! No need for further operation!")
        return

    if document.enumerated_structures is None:
        pass
    else:
        len(document.enumerated_structures)

    print("Enumerating structures!")
    enum_output = enumerate_structures(last_ce_document=document)
    flows = get_structure_calculation_flows(enum_output, document)
    workflows = [flow_to_workflow(f) for f in flows]

    # Add workflows to launchpad to launch.
    # Remember to set my_qadapter.yaml to rlaunch singleshot, then
    # use qlaunch rapidfire to launch.
    print("Adding workflows!")
    lpad = LaunchPad.auto_load()
    lpad.bulk_add_wfs(workflows)

    print("Saving enumerated structures.")
    enum_fname = f"enum_iter_{iter_id}.json"
    dumpfn(enum_output, enum_fname)


if __name__ == "__main__":
    __main__()

In the final step, you would like to refit a cluster expansion model using the updated training set:

"""Parse and fit cluster expansion."""
import json

from emmet.core.tasks import TaskDoc  # emmet-core >= 0.60.0
from jobflow import SETTINGS
from monty.json import jsanitize
from monty.serialization import loadfn
from pydantic import parse_file_as, parse_obj_as

from WFacer.jobs import fit_calculations, parse_calculations, update_document
from WFacer.schema import CeOutputsDocument


# Execute this once all queue tasks has been completed and no job is lost.
def __main__():
    document = parse_file_as(CeOutputsDocument, "document.json")

    iter_id = document.last_iter_id + 1
    project_name = document.project_name

    enum_output = loadfn(f"enum_iter_{iter_id}.json")

    print("Loading TaskDocs!")
    store = SETTINGS.JOB_STORE
    store.connect()

    new_structures = enum_output["new_structures"]
    if document.enumerated_structures is None:
        struct_id = 0
    else:
        struct_id = len(document.enumerated_structures)
    taskdocs = []
    for i, structure in enumerate(new_structures):
        fid = i + struct_id
        supposed_name = project_name + f"_iter_{iter_id}_enum_{fid}" + "_static"
        try:
            data = store.query_one({"name": supposed_name}, load=True)
            doc = parse_obj_as(TaskDoc, data)
        except Exception:
            doc = None
        taskdocs.append(doc)

    print("Parsing task documents!")
    parse_output = parse_calculations(taskdocs, enum_output, document)
    print("Fitting calculations!")
    fit_output = fit_calculations(parse_output, document)
    print("Updating output document!")
    new_document = update_document(enum_output, parse_output, fit_output, document)
    new_data = jsanitize(new_document, strict=True, enum_values=True)
    with open("document.json", "w") as fout:
        json.dump(new_data, fout)
    print("Updated document saved! Check with generate.py!")


if __name__ == "__main__":
    __main__()