Source code for smol.moca.kernel.wanglandau

"""
A Wang-Landau kernel for multicanonical estimation of the density of states.

Based on: https://link.aps.org/doi/10.1103/PhysRevLett.86.2050
"""

__author__ = "Luis Barroso-Luque, Fengyu Xie"

from functools import partial
from math import log

import numpy as np

from smol.moca.kernel.base import ALL_MCUSHERS, MCKernel



[docs]
class WangLandau(MCKernel):
    """A Wang-Landau sampling kernel.

    Allows wang-landau sampling method. Stores histogram and DOS in its instance.
    """

    valid_mcushers = ALL_MCUSHERS
    valid_bias = None  # Wang-Landau does not need bias.

    def __init__(
        self,
        ensemble,
        step_type,
        min_enthalpy,
        max_enthalpy,
        bin_size,
        *args,
        flatness=0.8,
        mod_factor=1.0,
        check_period=1000,
        update_period=1,
        mod_update=None,
        seed=None,
        **kwargs,
    ):
        """Initialize a WangLandau Kernel.

        Args:
            ensemble (Ensemble):
                The ensemble object to use to generate samples
            step_type (str):
                An MC step type corresponding to an MCUsher. See valid_mcushers
            min_enthalpy (float):
                The minimum enthalpy to sample. Enthalpy means the dot product
                of natural parameters and ensemble features. In canonical ensemble,
                it shall be the energy. In semigrand ensemble, it shall be E- mu N.
                Unit is eV scaled at the super-cell.
            max_enthalpy (float):
                The maximum enthalpy to sample.
            bin_size (float):
                The enthalpy bin size to determine different states.
            flatness (float): optional
                The flatness factor used when checking histogram flatness.
                Must be between 0 and 1.
            mod_factor (float):
                log of the initial modification factor used to update the
                DOS/entropy.
                Default is 1, which means the DOS shall be multiplied by e^1.
            check_period (int): optional
                The period in number of steps for the histogram flatness to be
                checked.
            update_period (int): optional
                The period in number of steps to update the histogram and entropy.
                In some cases periods slightly larger than each step, may allow
                more efficient exploration of energy space.
            mod_update (float|Callable): optional
                A function used to update the fill factor when the histogram
                satisfies the flatness criteria. It must monotonically decrease
                to 0.
                When a number is provided, at each update we will divide the
                mod factor by this number.
            seed (int): optional
                non-negative integer to seed the PRNG
            args:
                Positional arguments to instantiate MCUsher.
            kwargs:
                Keyword arguments to instantiate MCUsher.
        """
        if min_enthalpy > max_enthalpy:
            raise ValueError("min_enthalpy can not be larger than max_enthalpy.")
        if (max_enthalpy - min_enthalpy) / bin_size <= 1:
            raise ValueError(
                "The values provided for min and max enthalpy and bin sizer result in a single bin!"
            )
        if mod_factor <= 0:
            raise ValueError("mod_factor must be greater than 0.")

        self.flatness = flatness
        self.check_period = check_period
        self.update_period = update_period
        self._m = mod_factor
        self._window = (min_enthalpy, max_enthalpy, bin_size)

        if callable(mod_update):
            self._mod_update = mod_update
        elif mod_update is not None:
            self._mod_update = partial(_divide, m=mod_update)
        else:
            self._mod_update = partial(_divide, m=2.0)

        self._levels = np.arange(min_enthalpy, max_enthalpy, bin_size)

        # The correct initialization will be handled in set_aux_state.
        self._current_enthalpy = np.inf
        self._current_features = np.zeros(len(ensemble.natural_parameters))
        self._entropy = np.zeros(len(self._levels))
        self._histogram = np.zeros(len(self._levels), dtype=int)
        # Used to compute average features only.
        self._occurrences = np.zeros(len(self._levels), dtype=int)
        # Mean feature vector per level.
        self._mean_features = np.zeros(
            (len(self._levels), len(ensemble.natural_parameters))
        )
        self._steps_counter = 0  # Save number of valid states elapsed.

        # Population of initial trace included here.
        super().__init__(
            ensemble=ensemble, step_type=step_type, *args, seed=seed, **kwargs
        )

        if self.bias is not None:
            raise ValueError("Cannot apply bias to Wang-Landau simulation!")

        # add inputs to specification
        self.spec.min_enthalpy = min_enthalpy
        self.spec.max_enthalpy = max_enthalpy
        self.spec.bin_size = bin_size
        self.spec.flatness = flatness
        self.spec.check_period = check_period
        self.spec.update_period = update_period
        self.spec.levels = self._levels.tolist()

        # Additional clean-ups.
        self._histogram[:] = 0
        self._occurrences[:] = 0
        self._entropy[:] = 0
        self._steps_counter = 0  # Log n_steps elapsed.

    @property
    def bin_size(self):
        """Enthalpy bin size."""
        return self._window[2]

    @property
    def levels(self):
        """Visited enthalpy levels."""
        return self._levels[self._entropy > 0]

    @property
    def entropy(self):
        """log(dos) on visited levels."""
        return self._entropy[self._entropy > 0]

    @property
    def dos(self):
        """Density of states on visited levels."""
        return np.exp(self.entropy - self.entropy.min())

    @property
    def histogram(self):
        """Histogram on visited levels."""
        return self._histogram[self._entropy > 0]

    @property
    def mod_factor(self):
        """Mod factor."""
        return self._m

    def _get_bin_id(self, e):
        """Get bin index of an enthalpy."""
        # Does not check if enthalpy is in window.
        if e == np.inf:  # This happens at init.
            return np.inf
        return int((e - self._window[0]) // self._window[2])

    def _get_bin_enthalpy(self, bin_id):
        """Get enthalpy form bin index."""
        return bin_id * self._window[2] + self._window[0]

    def _accept_step(self, occupancy, step):
        bin_id = self._get_bin_id(self._current_enthalpy)
        new_enthalpy = self._current_enthalpy + self.trace.delta_trace.enthalpy

        # reject if outside of window
        if new_enthalpy < self._window[0] or new_enthalpy >= self._window[1]:
            self.trace.accepted = np.array(False)
        else:
            new_bin_id = self._get_bin_id(new_enthalpy)
            entropy = self._entropy[bin_id]
            new_entropy = self._entropy[new_bin_id]
            log_factor = self.mcusher.compute_log_priori_factor(occupancy, step)
            exponent = entropy - new_entropy + log_factor
            self.trace.accepted = np.array(
                True if exponent >= 0 else exponent > log(self._rng.random())
            )
        return self.trace.accepted

    def _do_accept_step(self, occupancy, step):
        """Accept/reject a given step and populate trace and aux states accordingly.

        Args:
            occupancy (ndarray):
                Current occupancy
            step (Sequence of tuple):
                Sequence of tuples of ints representing an MC step

        Returns:
            ndarray: new occupancy
        """
        occupancy = super()._do_accept_step(occupancy, step)
        self._current_features += self.trace.delta_trace.features
        self._current_enthalpy += self.trace.delta_trace.enthalpy

        return occupancy

    def _do_post_step(self):
        """Populate trace and aux states for a step regardless of acceptance.

        Populate histogram and entropy, and update counters accordingly.
        """
        bin_id = self._get_bin_id(self._current_enthalpy)

        # Only if bin_id is valid, because bin_id is not checked.
        if 0 <= bin_id < len(self._levels):
            # compute the cumulative statistics
            self._steps_counter += 1
            total = self._occurrences[bin_id]
            self._mean_features[bin_id, :] = (
                1
                / (total + 1)
                * (self._current_features + total * self._mean_features[bin_id, :])
            )
            # update histogram, entropy and occurrences each update_period steps
            if self._steps_counter % self.update_period == 0:
                # Add to DOS and histogram
                self._entropy[bin_id] += self._m
                self._histogram[bin_id] += 1
                self._occurrences[bin_id] += 1

        # fill up values in the trace
        self.trace.histogram = self._histogram
        self.trace.occurrences = self._occurrences
        self.trace.entropy = self._entropy
        self.trace.cumulative_mean_features = self._mean_features
        self.trace.mod_factor = np.array([self._m])

        if self._steps_counter % self.check_period == 0:
            # remove zero entropy entries (use entropy instead of histogram to mask so
            # that once a state is visited it is always considered there after)
            histogram = self._histogram[self._entropy > 0]
            # Check that at least two distinct bins have been visited, then check flatness
            if (
                len(histogram) >= 2
                and (histogram > self.flatness * histogram.mean()).all()
            ):
                # reset histogram and decrease modification factor
                self._histogram[:] = 0
                self._m = self._mod_update(self._m)

        return self.trace


[docs]
    def compute_initial_trace(self, occupancy):
        """Compute initial values for sample trace given an occupancy.

        Add the micro-canonical entropy and histograms to the trace.
        Args:
            occupancy (ndarray):
                Initial occupancy
        Returns:
            Trace
        """
        trace = super().compute_initial_trace(occupancy)

        # This will not be counting the state corresponding to the given occupancy.
        # but that is ok since we are not recording the initial trace when sampling.
        trace.histogram = self._histogram
        trace.occurrences = self._occurrences
        trace.entropy = self._entropy
        trace.cumulative_mean_features = self._mean_features
        trace.mod_factor = self._m

        return trace



[docs]
    def set_aux_state(self, occupancy, *args, **kwargs):
        """Set the auxiliary occupancies based on an occupancy.

        This is necessary for WangLandau to work properly because
        it needs to store the current enthalpy and features.
        """
        features = np.array(self.ensemble.compute_feature_vector(occupancy))
        enthalpy = np.dot(features, self.natural_params)
        self._current_features = features
        self._current_enthalpy = enthalpy
        self.mcusher.set_aux_state(occupancy)




def _divide(x, m):
    """Use to allow WangLandau to be pickled."""
    return x / m