Semi-automate a cluster expansion#
Note
In the context of WFacer documentation, semi-automation refers to manual execution of scripts for structural generation and model fitting, while Jobflow or Fireworks are allowed to manage merely the computation of each individual enumerated structure.
The following scripts demonstrate how use classes and utility functions to manually perform semi-automated steps in a cluster expansion iteration.
At the beginning first iteration, parameters for the cluster expansion and first-principles calculations must be initialized. The following script provides an example in doing so:
"""An example to semi-automate with fireworks. Generate initial document."""
import json
from monty.json import jsanitize
from pymatgen.core import Lattice, Structure
from WFacer.jobs import initialize_document
# construct a BCC Al-Li structure
alli_prim = Structure(
lattice=Lattice.cubic(3.75),
species=[
{"Al": 0.5, "Li": 0.5},
{"Al": 0.5, "Li": 0.5},
],
coords=[[0, 0, 0], [0.5, 0.5, 0.5]],
)
# 333 supercell as objective.
# Loose relax, then tight, then static.
user_incar_settings_relax = {
"ISMEAR": 1,
"SIGMA": 0.2,
"ENCUT": 300,
"EDIFF": 1e-5,
"EDIFFG": -0.02,
}
user_kpoints_settings_relax = {"reciprocal_density": 100}
user_incar_settings_tight = {
"ISMEAR": 2,
"SIGMA": 0.2,
"ENCUT": 520,
"EDIFF": 1e-5,
"EDIFFG": -0.01,
}
user_kpoints_settings_tight = {"reciprocal_density": 400}
user_incar_settings_static = {"ENCUT": 680, "EDIFF": 1e-6, "EDIFFG": -0.01}
user_kpoints_settings_static = {"reciprocal_density": 800}
relax_kwargs = {
"user_incar_settings": user_incar_settings_relax,
"user_kpoints_settings": user_kpoints_settings_relax,
"user_potcar_functional": "PBE_54",
}
tight_kwargs = {
"user_incar_settings": user_incar_settings_tight,
"user_kpoints_settings": user_kpoints_settings_tight,
"user_potcar_functional": "PBE_54",
}
static_kwargs = {
"user_incar_settings": user_incar_settings_static,
"user_kpoints_settings": user_kpoints_settings_static,
"user_potcar_functional": "PBE_54",
}
# Lasso, grid-search.
options = {
"objective_num_sites": 54,
"comp_enumeration_step": 3,
"n_parallel": 8,
"add_tight_relax": True,
"relax_generator_kwargs": relax_kwargs,
"tight_generator_kwargs": tight_kwargs,
"static_generator_kwargs": static_kwargs,
"cutoffs": {2: 9.0, 3: 8.0, 4: 5.0},
}
document = initialize_document(alli_prim, "alli_bcc_ce", options=options)
data = jsanitize(document, strict=True, enum_values=True)
with open("document.json", "w") as fout:
json.dump(data, fout)
Using the cluster expansion constructed in the last iteration, you can enumerate new structures to be added in the current iteration and compute them with atomate2:
"""Generate structures for a next iteration."""
from warnings import warn
from fireworks import LaunchPad
from jobflow.managers.fireworks import flow_to_workflow
from monty.serialization import dumpfn
from pydantic import parse_file_as
from WFacer.jobs import enumerate_structures, get_structure_calculation_flows
from WFacer.schema import CeOutputsDocument
def __main__():
document = parse_file_as(CeOutputsDocument, "document.json")
iter_id = document.last_iter_id + 1
max_iter = document.ce_options["max_iter"]
if iter_id >= max_iter and not document.converged:
warn(
f"Maximum number of iterations: {max_iter}"
f" reached, but cluster expansion model is"
f" still not converged!"
)
return
if document.converged:
warn("Model already converged! No need for further operation!")
return
if document.enumerated_structures is None:
pass
else:
len(document.enumerated_structures)
print("Enumerating structures!")
enum_output = enumerate_structures(last_ce_document=document)
flows = get_structure_calculation_flows(enum_output, document)
workflows = [flow_to_workflow(f) for f in flows]
# Add workflows to launchpad to launch.
# Remember to set my_qadapter.yaml to rlaunch singleshot, then
# use qlaunch rapidfire to launch.
print("Adding workflows!")
lpad = LaunchPad.auto_load()
lpad.bulk_add_wfs(workflows)
print("Saving enumerated structures.")
enum_fname = f"enum_iter_{iter_id}.json"
dumpfn(enum_output, enum_fname)
if __name__ == "__main__":
__main__()
In the final step, you would like to refit a cluster expansion model using the updated training set:
"""Parse and fit cluster expansion."""
import json
from emmet.core.tasks import TaskDoc # emmet-core >= 0.60.0
from jobflow import SETTINGS
from monty.json import jsanitize
from monty.serialization import loadfn
from pydantic import parse_file_as, parse_obj_as
from WFacer.jobs import fit_calculations, parse_calculations, update_document
from WFacer.schema import CeOutputsDocument
# Execute this once all queue tasks has been completed and no job is lost.
def __main__():
document = parse_file_as(CeOutputsDocument, "document.json")
iter_id = document.last_iter_id + 1
project_name = document.project_name
enum_output = loadfn(f"enum_iter_{iter_id}.json")
print("Loading TaskDocs!")
store = SETTINGS.JOB_STORE
store.connect()
new_structures = enum_output["new_structures"]
if document.enumerated_structures is None:
struct_id = 0
else:
struct_id = len(document.enumerated_structures)
taskdocs = []
for i, structure in enumerate(new_structures):
fid = i + struct_id
supposed_name = project_name + f"_iter_{iter_id}_enum_{fid}" + "_static"
try:
data = store.query_one({"name": supposed_name}, load=True)
doc = parse_obj_as(TaskDoc, data)
except Exception:
doc = None
taskdocs.append(doc)
print("Parsing task documents!")
parse_output = parse_calculations(taskdocs, enum_output, document)
print("Fitting calculations!")
fit_output = fit_calculations(parse_output, document)
print("Updating output document!")
new_document = update_document(enum_output, parse_output, fit_output, document)
new_data = jsanitize(new_document, strict=True, enum_values=True)
with open("document.json", "w") as fout:
json.dump(new_data, fout)
print("Updated document saved! Check with generate.py!")
if __name__ == "__main__":
__main__()