Source code for sparselm.model._miqp._best_subset

"""MIQP based solvers for Best Subset Selection solutions.

Allows hierarchy constraints similar to mixed L0 solvers.
"""

from __future__ import annotations

__author__ = "Luis Barroso-Luque"

from numbers import Real
from types import SimpleNamespace
from typing import Any

import cvxpy as cp
from numpy.typing import ArrayLike, NDArray
from sklearn.utils._param_validation import Interval

from sparselm.model._base import TikhonovMixin

from ._base import MIQPl0


[docs]class BestSubsetSelection(MIQPl0):
    r"""MIQP Best Subset Selection Regressor.

    Generalized best subset that allows grouping subsets.

    Args:
        groups (ArrayLike):
            array-like of integers specifying groups. Length should be the
            same as model, where each integer entry specifies the group
            each parameter corresponds to. If no grouping is required,
            simply pass a list of all different numbers, i.e. using range.
        sparse_bound (int):
            Upper bound on sparsity. The upper bound on total number of
            nonzero coefficients.
        big_M (float):
            Upper bound on the norm of coefficients associated with each
            groups of coefficients :math:`||\beta_c||_2`.
        hierarchy (list):
            A list of lists of integers storing hierarchy relations between
            coefficients.
            Each sublist contains indices of other coefficients
            on which the coefficient associated with each element of
            the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
            coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
            dependence.
        ignore_psd_check (bool):
            Whether to ignore cvxpy's PSD checks of matrix used in
            quadratic form. Default is True to avoid raising errors for
            poorly conditioned matrices. But if you want to be strict set
            to False.
        fit_intercept (bool):
            Whether the intercept should be estimated or not.
            If False, the data is assumed to be already centered.
        copy_X (bool):
            If True, X will be copied; else, it may be overwritten.
        warm_start (bool):
            When set to True, reuse the solution of the previous call to
            fit as initialization, otherwise, just erase the previous
            solution.
        solver (str):
            cvxpy backend solver to use. Supported solvers are listed here:
            https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
        solver_options (dict):
            dictionary of keyword arguments passed to cvxpy solve.
            See docs in CVXRegressor for more information.

    Attributes:
        coef_ (NDArray):
            Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
        intercept_ (float):
            Independent term in decision function.
        canonicals_ (SimpleNamespace):
            Namespace that contains underlying cvxpy objects used to define
            the optimization problem. The objects included are the following:
                - objective - the objective function.
                - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
                - parameters - hyper-parameters
                - auxiliaries - auxiliary variables and expressions
                - constraints - solution constraints

    Notes:
        Installation of Gurobi is not a must, but highly recommended. An open source alternative
        is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
        See the Mixed-integer programs section of the cvxpy docs:
        https://www.cvxpy.org/tutorial/advanced/index.html

    WARNING:
        Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
        problems.
    """

    _cvx_parameter_constraints: dict[str, list[Any]] = {
        "sparse_bound": [Interval(type=Real, left=0, right=None, closed="left")],
        **MIQPl0._cvx_parameter_constraints,
    }

    def __init__(
        self,
        groups: ArrayLike | None = None,
        sparse_bound=100,
        big_M: int = 100,
        hierarchy: list[list[int]] | None = None,
        ignore_psd_check: bool = True,
        fit_intercept: bool = False,
        copy_X: bool = True,
        warm_start: bool = False,
        solver: str | None = None,
        solver_options: dict | None = None,
    ):
        super().__init__(
            groups=groups,
            big_M=big_M,
            hierarchy=hierarchy,
            ignore_psd_check=ignore_psd_check,
            fit_intercept=fit_intercept,
            copy_X=copy_X,
            warm_start=warm_start,
            solver=solver,
            solver_options=solver_options,
        )
        self.sparse_bound = sparse_bound

    def _generate_constraints(
        self,
        X: ArrayLike,
        y: ArrayLike,
        beta: cp.Variable,
        parameters: SimpleNamespace | None = None,
        auxiliaries: SimpleNamespace | None = None,
    ) -> list[cp.constraints]:
        """Generate the constraints for best subset selection."""
        constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries)
        constraints += [cp.sum(auxiliaries.z0) <= parameters.sparse_bound]
        return constraints


[docs]class RidgedBestSubsetSelection(TikhonovMixin, BestSubsetSelection):
    r"""MIQP best subset selection Regressor with Ridge/Tihkonov regularization.

    Args:
        groups (ArrayLike):
            array-like of integers specifying groups. Length should be the
            same as model, where each integer entry specifies the group
            each parameter corresponds to. If no grouping is required,
            simply pass a list of all different numbers, i.e. using range.
        sparse_bound (int):
            Upper bound on sparsity. The upper bound on total number of
            nonzero coefficients.
        eta (float):
            L2 regularization hyper-parameter.
        big_M (float):
            Upper bound on the norm of coefficients associated with each
            groups of coefficients :math:`||\beta_c||_2`.
        hierarchy (list):
            A list of lists of integers storing hierarchy relations between
            coefficients.
            Each sublist contains indices of other coefficients
            on which the coefficient associated with each element of
            the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
            coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
            dependence.
        tikhonov_w (np.array):
            Matrix to add weights to L2 regularization.
        ignore_psd_check (bool):
            Whether to ignore cvxpy's PSD checks of matrix used in
            quadratic form. Default is True to avoid raising errors for
            poorly conditioned matrices. But if you want to be strict set
            to False.
        fit_intercept (bool):
            Whether the intercept should be estimated or not.
            If False, the data is assumed to be already centered.
        copy_X (bool):
            If True, X will be copied; else, it may be overwritten.
        warm_start (bool):
            When set to True, reuse the solution of the previous call to
            fit as initialization, otherwise, just erase the previous
            solution.
        solver (str):
            cvxpy backend solver to use. Supported solvers are listed here:
            https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
        solver_options (dict):
            dictionary of keyword arguments passed to cvxpy solve.
            See docs in CVXRegressor for more information.

    Attributes:
        coef_ (NDArray):
            Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
        intercept_ (float):
            Independent term in decision function.
        canonicals_ (SimpleNamespace):
            Namespace that contains underlying cvxpy objects used to define
            the optimization problem. The objects included are the following:
                - objective - the objective function.
                - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
                - parameters - hyper-parameters
                - auxiliaries - auxiliary variables and expressions
                - constraints - solution constraints

    Notes:
        Installation of Gurobi is not a must, but highly recommended. An open source alternative
        is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
        See the Mixed-integer programs section of the cvxpy docs:
        https://www.cvxpy.org/tutorial/advanced/index.html

    WARNING:
        Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
        problems.
    """

    _cvx_parameter_constraints: dict[str, list[Any]] = {
        "eta": [Interval(type=Real, left=0.0, right=None, closed="left")],
        **BestSubsetSelection._cvx_parameter_constraints,
    }

    def __init__(
        self,
        groups: ArrayLike | None = None,
        sparse_bound: int = 100,
        eta: float = 1.0,
        big_M: int = 100,
        hierarchy: list[list[int]] | None = None,
        tikhonov_w: NDArray[float] | None = None,
        ignore_psd_check: bool = True,
        fit_intercept: bool = False,
        copy_X: bool = True,
        warm_start: bool = False,
        solver: str | None = None,
        solver_options: dict | None = None,
    ):
        super().__init__(
            groups=groups,
            sparse_bound=sparse_bound,
            big_M=big_M,
            hierarchy=hierarchy,
            ignore_psd_check=ignore_psd_check,
            fit_intercept=fit_intercept,
            copy_X=copy_X,
            warm_start=warm_start,
            solver=solver,
            solver_options=solver_options,
        )
        self.tikhonov_w = tikhonov_w
        self.eta = eta