Source code for sparselm.model._adaptive_lasso

"""A set of generalized adaptive lasso Regressors.

* Adaptive Lasso
* Adaptive Group Lasso
* Adaptive Overlap Group Lasso
* Adaptive Sparse Group Lasso
* Adaptive Ridged Group Lasso

Regressors follow scikit-learn interface, but use cvxpy to set up and solve
optimization problem.

NOTE: In certain cases these can yield infeasible problems. This can cause
processes to die and as a result make a calculation hang indefinitely when
using them in a multiprocess model selection tool such as sklearn
GridSearchCV with n_jobs > 1.

In that case either tweak settings/solvers around so that that does not happen
or run with n_jobs=1 (but that may take a while to solve)
"""

from __future__ import annotations

__author__ = "Luis Barroso-Luque"

import warnings
from numbers import Integral, Real
from types import SimpleNamespace
from typing import Any, Callable

import cvxpy as cp
import numpy as np
from numpy.typing import ArrayLike, NDArray
from sklearn.utils._param_validation import Interval

from ._lasso import (
    GroupLasso,
    Lasso,
    OverlapGroupLasso,
    RidgedGroupLasso,
    SparseGroupLasso,
)


[docs]class AdaptiveLasso(Lasso): r"""Adaptive Lasso implementation. Also known as iteratively re-weighted Lasso. Regularized regression objective: .. math:: \min_{\beta} || X \beta - y ||^2_2 + \alpha ||w^T \beta||_1 Where w represents a vector of weights that is iteratively updated. Args: alpha (float): Regularization hyper-parameter. max_iter (int): Maximum number of re-weighting iteration steps. eps (float): Value to add to denominator of weights. tol (float): Absolute convergence tolerance for difference between weights at successive steps. update_function (Callable): optional A function with signature f(beta, eps) used to update the weights at each iteration. Default is 1/(|beta| + eps) fit_intercept (bool): Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. copy_X (bool): If True, X will be copied; else, it may be overwritten. warm_start (bool): When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. solver (str): cvxpy backend solver to use. Supported solvers are listed here: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options solver_options (dict): dictionary of keyword arguments passed to cvxpy solve. See docs in CVXRegressor for more information. Attributes: coef_ (NDArray): Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). intercept_ (float): Independent term in decision function. canonicals_ (SimpleNamespace): Namespace that contains underlying cvxpy objects used to define the optimization problem. The objects included are the following: - objective - the objective function. - beta - variable to be optimized (corresponds to the estimated coef_ attribute). - parameters - hyper-parameters - auxiliaries - auxiliary variables and expressions - constraints - solution constraints """ _parameter_constraints: dict[str, list[Any]] = { "tol": [Interval(type=Real, left=0.0, right=1.0, closed="both")], "max_iter": [Interval(type=Integral, left=0, right=None, closed="left")], "eps": [Interval(type=Real, left=0.0, right=1.0, closed="both")], "update_function": [callable, None], **Lasso._parameter_constraints, } def __init__( self, alpha: float = 1.0, max_iter: int = 3, eps: float = 1e-6, tol: float = 1e-10, update_function: Callable[[float, float], float] | None = None, fit_intercept: bool = False, copy_X: bool = True, warm_start: bool = True, solver: str | None = None, solver_options: dict[str, Any] | None = None, **kwargs, ): super().__init__( alpha=alpha, fit_intercept=fit_intercept, copy_X=copy_X, warm_start=warm_start, solver=solver, solver_options=solver_options, **kwargs, ) self.tol = tol self.max_iter = max_iter self.eps = eps self.update_function = update_function def _validate_params(self, X: ArrayLike, y: ArrayLike) -> None: super()._validate_params(X, y) if self.max_iter == 1: warnings.warn( "max_iter is set to 1. It should ideally be set > 1, otherwise consider " "using a non-adaptive Regressor", UserWarning, ) def _set_param_values(self) -> None: """Set parameter values.""" super()._set_param_values() length = len(self.canonicals_.parameters.adaptive_weights.value) self.canonicals_.parameters.adaptive_weights.value = self.alpha * np.ones( length ) def _generate_params(self, X: ArrayLike, y: ArrayLike) -> SimpleNamespace | None: """Generate parameters for the problem.""" parameters = super()._generate_params(X, y) parameters.adaptive_weights = cp.Parameter( shape=X.shape[1], nonneg=True, value=self.alpha * np.ones(X.shape[1]) ) return parameters def _generate_regularization( self, X: ArrayLike, beta: cp.Variable, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> cp.Expression: """Generate regularization term.""" return cp.norm1(cp.multiply(parameters.adaptive_weights, beta)) def _get_update_function(self) -> Callable[[float, float], float]: if self.update_function is None: return lambda beta, eps: self.alpha / (abs(beta) + eps) return self.update_function @staticmethod def _get_weights_value(parameters: SimpleNamespace) -> NDArray[float]: """Simply return a copy of the value of adaptive weights.""" return parameters.adaptive_weights.value.copy() def _check_convergence( self, parameters: SimpleNamespace, previous_weights: ArrayLike ) -> bool: """Check if weights have converged to set tolerance.""" current_weights = parameters.adaptive_weights.value return np.linalg.norm(current_weights - previous_weights) <= self.tol def _iterative_update( self, beta: ArrayLike, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> None: """Update the adaptive weights.""" update = self._get_update_function() parameters.adaptive_weights.value = self.alpha * update(beta, self.eps) def _solve( self, X: ArrayLike, y: ArrayLike, solver_options: dict, *args, **kwargs ) -> NDArray[float]: """Solve Lasso problem iteratively adaptive weights.""" previous_weights = self._get_weights_value(self.canonicals_.parameters) for i in range(self.max_iter): self.canonicals_.problem.solve( solver=self.solver, warm_start=self.warm_start, **solver_options ) if ( self.canonicals_.beta.value is None and self.canonicals_.problem.value == -np.inf ): raise RuntimeError(f"{self.canonicals_.problem} is infeasible.") self.n_iter_ = i + 1 # save number of iterations for sklearn self._iterative_update( self.canonicals_.beta.value, self.canonicals_.parameters, self.canonicals_.auxiliaries, ) # check convergence if self._check_convergence(self.canonicals_.parameters, previous_weights): break previous_weights = self._get_weights_value(self.canonicals_.parameters) return self.canonicals_.beta.value
[docs]class AdaptiveGroupLasso(AdaptiveLasso, GroupLasso): r"""Adaptive Group Lasso, iteratively re-weighted group lasso. Regularized regression objective: .. math:: \min_{\beta} || X \beta - y ||^2_2 + \alpha * \sum_{G} w_G ||\beta_G||_2 Where w represents a vector of weights that is iteratively updated. Args: groups (list or ndarray): array-like of integers specifying groups. Length should be the same as model, where each integer entry specifies the group each parameter corresponds to. alpha (float): Regularization hyper-parameter. group_weights (ndarray): optional Weights for each group to use in the regularization term. The default is to use the sqrt of the group sizes, however any weight can be specified. The array must be the same length as the groups given. If you need all groups weighted equally just pass an array of ones. max_iter (int): Maximum number of re-weighting iteration steps. eps (float): Value to add to denominator of weights. tol (float): Absolute convergence tolerance for difference between weights at successive steps. update_function (Callable): optional A function with signature f(group_norms, eps) used to update the weights at each iteration. Where group_norms are the norms of the coefficients Beta for each group. Default is 1/(group_norms + eps) standardize (bool): optional Whether to standardize the group regularization penalty using the feature matrix. See the following for reference: http://faculty.washington.edu/nrsimon/standGL.pdf fit_intercept (bool): Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. copy_X (bool): If True, X will be copied; else, it may be overwritten. warm_start (bool): When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. solver (str): cvxpy backend solver to use. Supported solvers are listed here: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options solver_options (dict): dictionary of keyword arguments passed to cvxpy solve. See docs in CVXRegressor for more information. Attributes: coef_ (NDArray): Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). intercept_ (float): Independent term in decision function. canonicals_ (SimpleNamespace): Namespace that contains underlying cvxpy objects used to define the optimization problem. The objects included are the following: - objective - the objective function. - beta - variable to be optimized (corresponds to the estimated coef_ attribute). - parameters - hyper-parameters - auxiliaries - auxiliary variables and expressions - constraints - solution constraints """ def __init__( self, groups: ArrayLike | None = None, alpha: float = 1.0, group_weights: ArrayLike | None = None, max_iter: int = 3, eps: float = 1e-6, tol: float = 1e-10, update_function: Callable[[float, float], float] | None = None, standardize: bool = False, fit_intercept: bool = False, copy_X: bool = True, warm_start: bool = True, solver: str | None = None, solver_options: dict[str, Any] | None = None, **kwargs, ): # call with keywords to avoid MRO issues super().__init__( groups=groups, alpha=alpha, group_weights=group_weights, max_iter=max_iter, eps=eps, tol=tol, update_function=update_function, standardize=standardize, fit_intercept=fit_intercept, copy_X=copy_X, warm_start=warm_start, solver=solver, solver_options=solver_options, **kwargs, ) def _generate_params(self, X: ArrayLike, y: ArrayLike) -> SimpleNamespace | None: # skip AdaptiveLasso in super parameters = super(AdaptiveLasso, self)._generate_params(X, y) n_groups = X.shape[1] if self.groups is None else len(np.unique(self.groups)) parameters.adaptive_weights = cp.Parameter( shape=n_groups, nonneg=True, value=self.alpha * np.ones(n_groups), ) return parameters def _generate_regularization( self, X: ArrayLike, beta: cp.Variable, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> cp.Expression: return parameters.adaptive_weights @ auxiliaries.group_norms def _iterative_update( self, beta: ArrayLike, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> None: update = self._get_update_function() parameters.adaptive_weights.value = ( self.alpha * parameters.group_weights ) * update(auxiliaries.group_norms.value, self.eps)
[docs]class AdaptiveOverlapGroupLasso(OverlapGroupLasso, AdaptiveGroupLasso): r"""Adaptive Overlap Group Lasso implementation. Regularized regression objective: .. math:: \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} w_G ||\beta_G||_2 Where G represents groups of features/coefficients, and overlapping groups are acceptable. Meaning a coefficients can be in more than one group. Args: group_list (list of lists): list of lists of integers specifying groups. The length of the list holding lists should be the same as model. Each inner list has integers specifying the groups the coefficient for that index belongs to. i.e. [[1,2],[2,3],[1,2,3]] means the first coefficient belongs to group 1 and 2, the second to 2, and 3 and the third to 1, 2 and 3. In other words the 3 groups would be: (0, 2), (0, 1, 2), (1, 2) alpha (float): Regularization hyper-parameter. group_weights (ndarray): optional Weights for each group to use in the regularization term. The default is to use the sqrt of the group sizes, however any weight can be specified. The array must be the same length as the number of different groups given. If you need all groups weighted equally just pass an array of ones. max_iter (int): Maximum number of re-weighting iteration steps. eps (float): Value to add to denominator of weights. tol (float): Absolute convergence tolerance for difference between weights at successive steps. update_function (Callable): optional A function with signature f(group_norms, eps) used to update the weights at each iteration. Where group_norms are the norms of the coefficients Beta for each group. Default is 1/(group_norms + eps) standardize (bool): optional Whether to standardize the group regularization penalty using the feature matrix. See the following for reference: http://faculty.washington.edu/nrsimon/standGL.pdf fit_intercept (bool): Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. copy_X (bool): If True, X will be copied; else, it may be overwritten. warm_start (bool): When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. solver (str): cvxpy backend solver to use. Supported solvers are listed here: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options solver_options (dict): dictionary of keyword arguments passed to cvxpy solve. See docs in CVXRegressor for more information. Attributes: coef_ (NDArray): Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). intercept_ (float): Independent term in decision function. canonicals_ (SimpleNamespace): Namespace that contains underlying cvxpy objects used to define the optimization problem. The objects included are the following: - objective - the objective function. - beta - variable to be optimized (corresponds to the estimated coef_ attribute). - parameters - hyper-parameters - auxiliaries - auxiliary variables and expressions - constraints - solution constraints """ def __init__( self, group_list: list[list[int]] = None, alpha: float = 1.0, group_weights: ArrayLike | None = None, max_iter: int = 3, eps: float = 1e-6, tol: float = 1e-10, update_function: Callable[[float, float], float] | None = None, standardize: bool = False, fit_intercept: bool = False, copy_X: bool = True, warm_start: bool = True, solver: str | None = None, solver_options: dict[str, Any] | None = None, ): # call with keywords to avoid MRO issues super().__init__( group_list=group_list, alpha=alpha, group_weights=group_weights, max_iter=max_iter, eps=eps, tol=tol, update_function=update_function, standardize=standardize, fit_intercept=fit_intercept, copy_X=copy_X, warm_start=warm_start, solver=solver, solver_options=solver_options, ) def _generate_params(self, X: ArrayLike, y: ArrayLike) -> SimpleNamespace | None: parameters = super()._generate_params(X, y) if self.group_list is None: n_groups = X.shape[1] else: n_groups = len(np.unique([gid for grp in self.group_list for gid in grp])) parameters.adaptive_weights = cp.Parameter( shape=n_groups, nonneg=True, value=self.alpha * np.ones(n_groups), ) return parameters def _generate_objective( self, X: ArrayLike, y: ArrayLike, beta: cp.Variable, parameters: SimpleNamespace | None = None, auxiliaries: SimpleNamespace | None = None, ) -> cp.Expression: return AdaptiveGroupLasso._generate_objective( self, X, y, beta, parameters, auxiliaries ) def _solve( self, X: ArrayLike, y: ArrayLike, solver_options: dict, *args, **kwargs ) -> NDArray[float]: extended_indices = self.canonicals_.auxiliaries.extended_coef_indices beta = AdaptiveGroupLasso._solve( self, X[:, extended_indices], y, solver_options, *args, **kwargs ) beta = np.array([sum(beta[extended_indices == i]) for i in range(X.shape[1])]) return beta
[docs]class AdaptiveSparseGroupLasso(AdaptiveLasso, SparseGroupLasso): r"""Adaptive Sparse Group Lasso, iteratively re-weighted sparse group lasso. Regularized regression objective: .. math:: \min_{\beta} || X \beta - y ||^2_2 + \alpha r ||w^T \beta||_1 + \alpha (1 - r) \sum_{G} v_G ||\beta_G||_2 Where w, v represent vectors of weights that are iteratively updated. And r is the L1 ratio. Args: groups (list or ndarray): array-like of integers specifying groups. Length should be the same as model, where each integer entry specifies the group each parameter corresponds to. l1_ratio (float): Mixing parameter between l1 and group lasso regularization. alpha (float): Regularization hyper-parameter. group_weights (ndarray): optional Weights for each group to use in the regularization term. The default is to use the sqrt of the group sizes, however any weight can be specified. The array must be the same length as the groups given. If you need all groups weighted equally just pass an array of ones. max_iter (int): Maximum number of re-weighting iteration steps. eps (float): Value to add to denominator of weights. tol (float): Absolute convergence tolerance for difference between weights at successive steps. update_function (Callable): optional A function with signature f(group_norms, eps) used to update the weights at each iteration. Where group_norms are the norms of the coefficients Beta for each group. Default is 1/(group_norms + eps) standardize (bool): optional Whether to standardize the group regularization penalty using the feature matrix. See the following for reference: http://faculty.washington.edu/nrsimon/standGL.pdf fit_intercept (bool): Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. copy_X (bool): If True, X will be copied; else, it may be overwritten. warm_start (bool): When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. solver (str): cvxpy backend solver to use. Supported solvers are listed here: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options solver_options (dict): dictionary of keyword arguments passed to cvxpy solve. See docs in CVXRegressor for more information. Attributes: coef_ (NDArray): Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). intercept_ (float): Independent term in decision function. canonicals_ (SimpleNamespace): Namespace that contains underlying cvxpy objects used to define the optimization problem. The objects included are the following: - objective - the objective function. - beta - variable to be optimized (corresponds to the estimated coef_ attribute). - parameters - hyper-parameters - auxiliaries - auxiliary variables and expressions - constraints - solution constraints """ def __init__( self, groups: ArrayLike | None = None, l1_ratio: float = 0.5, alpha: float = 1.0, group_weights: ArrayLike | None = None, max_iter: int = 3, eps: float = 1e-6, tol: float = 1e-10, update_function: Callable[[float, float], float] | None = None, standardize: bool = False, fit_intercept: bool = False, copy_X: bool = True, warm_start: bool = True, solver: str | None = None, solver_options: dict[str, Any] | None = None, ): # call with keywords to avoid MRO issues super().__init__( groups=groups, l1_ratio=l1_ratio, alpha=alpha, group_weights=group_weights, max_iter=max_iter, eps=eps, tol=tol, update_function=update_function, standardize=standardize, fit_intercept=fit_intercept, copy_X=copy_X, warm_start=warm_start, solver=solver, solver_options=solver_options, ) def _set_param_values(self) -> None: SparseGroupLasso._set_param_values(self) group_weights = self.canonicals_.parameters.adaptive_group_weights.value group_weights = self.canonicals_.parameters.lambda1.value * np.ones_like( group_weights ) self.canonicals_.parameters.adaptive_group_weights.value = group_weights coef_weights = self.canonicals_.parameters.adaptive_coef_weights.value coef_weights = self.canonicals_.parameters.lambda1.value * np.ones_like( coef_weights ) self.canonicals_.parameters.adaptive_coef_weights.value = coef_weights def _generate_params(self, X: ArrayLike, y: ArrayLike) -> SimpleNamespace | None: # skip AdaptiveLasso in super parameters = SparseGroupLasso._generate_params(self, X, y) n_groups = X.shape[1] if self.groups is None else len(np.unique(self.groups)) parameters.adaptive_coef_weights = cp.Parameter( shape=X.shape[1], nonneg=True, value=parameters.lambda1.value * np.ones(X.shape[1]), ) parameters.adaptive_group_weights = cp.Parameter( shape=n_groups, nonneg=True, value=parameters.lambda2.value * np.ones(n_groups), ) return parameters def _generate_regularization( self, X: ArrayLike, beta: cp.Variable, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> cp.Expression: group_regularization = ( parameters.adaptive_group_weights @ auxiliaries.group_norms ) l1_regularization = cp.norm1( cp.multiply(parameters.adaptive_coef_weights, beta) ) return group_regularization + l1_regularization @staticmethod def _get_weights_value(parameters: SimpleNamespace) -> NDArray[float]: """Simply return a copy of the value of adaptive weights.""" # does concatenate copy? concat_weights = np.concatenate( ( parameters.adaptive_group_weights.value.copy(), parameters.adaptive_coef_weights.value.copy(), ) ) return concat_weights def _check_convergence( self, parameters: SimpleNamespace, previous_weights: ArrayLike ) -> bool: """Check if weights have converged to set tolerance.""" # This will technically check the norm of the concatenation instead of the sum # of the norm of each weight vector, so it's a bit of tighter tolerance. current_weights = np.concatenate( ( parameters.adaptive_group_weights.value, parameters.adaptive_coef_weights.value, ) ) return np.linalg.norm(current_weights - previous_weights) <= self.tol def _iterative_update( self, beta: ArrayLike, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> None: update = self._get_update_function() parameters.adaptive_coef_weights.value = ( self.canonicals_.parameters.lambda1.value * update(beta, self.eps) ) parameters.adaptive_group_weights.value = ( self.canonicals_.parameters.lambda2.value * parameters.group_weights ) * update(auxiliaries.group_norms.value, self.eps)
[docs]class AdaptiveRidgedGroupLasso(AdaptiveGroupLasso, RidgedGroupLasso): r"""Adaptive Ridged Group Lasso implementation. Regularized regression objective: .. math:: \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} w_G ||\beta_G||_2 + \sum_{G} w_l ||\beta_G||^2_2 Where G represents groups of features/coefficients, and w_l represents a vector of weights that are updated iteratively. For details on proper standardization refer to: http://faculty.washington.edu/nrsimon/standGL.pdf Adaptive iterative weights are only done on the group norm and not the ridge portion. Args: groups (list or ndarray): array-like of integers specifying groups. Length should be the same as model, where each integer entry specifies the group each parameter corresponds to. alpha (float): Regularization hyper-parameter. delta (ndarray): optional Positive 1D array. Regularization vector for ridge penalty. group_weights (ndarray): optional Weights for each group to use in the regularization term. The default is to use the sqrt of the group sizes, however any weight can be specified. The array must be the same length as the groups given. If you need all groups weighted equally just pass an array of ones. fit_intercept (bool): Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. max_iter (int): Maximum number of re-weighting iteration steps. eps (float): Value to add to denominator of weights. tol (float): Absolute convergence tolerance for difference between weights at successive steps. update_function (Callable): optional A function with signature f(group_norms, eps) used to update the weights at each iteration. Where group_norms are the norms of the coefficients Beta for each group. Default is 1/(group_norms + eps) copy_X (bool): If True, X will be copied; else, it may be overwritten. warm_start (bool): When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. solver (str): cvxpy backend solver to use. Supported solvers are listed here: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options solver_options (dict): dictionary of keyword arguments passed to cvxpy solve. See docs in CVXRegressor for more information. Attributes: coef_ (NDArray): Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). intercept_ (float): Independent term in decision function. canonicals_ (SimpleNamespace): Namespace that contains underlying cvxpy objects used to define the optimization problem. The objects included are the following: - objective - the objective function. - beta - variable to be optimized (corresponds to the estimated coef_ attribute). - parameters - hyper-parameters - auxiliaries - auxiliary variables and expressions - constraints - solution constraints """ def __init__( self, groups: ArrayLike | None = None, alpha: float = 1.0, delta: ArrayLike = (1.0,), group_weights: ArrayLike | None = None, max_iter: int = 3, eps: float = 1e-6, tol: float = 1e-10, update_function: Callable[[float, float], float] | None = None, standardize: bool = False, fit_intercept: bool = False, copy_X: bool = True, warm_start: bool = True, solver: str | None = None, solver_options: dict[str, Any] | None = None, ): super().__init__( groups=groups, alpha=alpha, delta=delta, max_iter=max_iter, eps=eps, tol=tol, update_function=update_function, group_weights=group_weights, standardize=standardize, fit_intercept=fit_intercept, copy_X=copy_X, warm_start=warm_start, solver=solver, solver_options=solver_options, ) def _generate_params(self, X: ArrayLike, y: ArrayLike) -> SimpleNamespace | None: return super()._generate_params(X, y) def _generate_regularization( self, X: ArrayLike, beta: cp.Variable, parameters: SimpleNamespace, auxiliaries: SimpleNamespace | None = None, ) -> cp.Expression: group_regularization = AdaptiveGroupLasso._generate_regularization( self, X, beta, parameters, auxiliaries ) # repetitive code... groups = np.arange(X.shape[1]) if self.groups is None else self.groups group_masks = [groups == i for i in np.sort(np.unique(groups))] ridge = cp.hstack([cp.sum_squares(beta[mask]) for mask in group_masks]) ridge_regularization = 0.5 * parameters.delta @ ridge return group_regularization + ridge_regularization