Source code for sparselm.tools

"""A variety of tools for fitting linear regression models to polish CE."""

from __future__ import annotations

__author__ = "Luis Barroso-Luque"

import warnings
from functools import wraps

import numpy as np
from numpy.typing import ArrayLike


[docs]def constrain_coefficients( indices: ArrayLike, high: float | ArrayLike = None, low: float | ArrayLike = None, ): """Constrain a fit method to keep coefficients within a specified range. Use this as a standard decorator with parameters: - At runtime: coefs = constrain_coefficients(indices, high, low)(fit_method)(X, y) - In fit_method definitions: @constrain_coefficients(indices, high, low) def your_fit_method(X, y): Args: indices (array or list): indices of coefficients to constrain high (float or array): upper bound for indices, low (float or array): lower bounds for indices """ indices = np.array(indices) if high is not None: high = ( high * np.ones(len(indices)) if isinstance(high, (int, float)) else np.array(high) ) else: high = np.inf * np.ones(len(indices)) if low is not None: low = ( low * np.ones(len(indices)) if isinstance(low, (int, float)) else np.array(low) ) else: low = -np.inf * np.ones(len(indices)) def decorate_fit_method(fit_method): """Decorate a fit method to constrain "dielectric constant". Args: fit_method (callable): the fit_method you will use to fit your regression model. Must take the feature matrix X and target vector y as first arguments. (i.e. fit_method(X, y, *args, **kwargs) """ @wraps(fit_method) def wrapped(X, y, *args, **kwargs): coefs = fit_method(X, y, *args, **kwargs) above_range = coefs[indices] > high below_range = coefs[indices] < low # TODO do not set features to zero, do the fit without them instead if sum(above_range) > 0 or sum(below_range) > 0: X_, y_ = X.copy(), y.copy() y_ -= np.sum(X_[:, indices[above_range]] * high[above_range], axis=1) X_[:, indices[above_range]] = 0.0 y_ -= np.sum(X_[:, indices[below_range]] * low[below_range], axis=1) X_[:, indices[below_range]] = 0.0 coefs = fit_method(X_, y_, *args, **kwargs) coefs[indices[above_range]] = high[above_range] coefs[indices[below_range]] = low[below_range] # check if new coeficients are now out of range above_range = coefs[indices] > high below_range = coefs[indices] < low if sum(above_range) > 0 or sum(below_range) > 0: warnings.warn( "Running the constrained fit has resulted in new out of" " range coefficients that were not so in the unconstrained" " fit.\n" "Double check the sensibility of the bounds you provided!", RuntimeWarning, ) return coefs return wrapped return decorate_fit_method
[docs]def r2_score_to_cv_error( score: float, y: ArrayLike, y_pred: ArrayLike, weights: ArrayLike | None = None ): """Convert r2 score to cross-validation error. Args: score (float): An r2 score obtained from cross validation. y (ArrayLike): 1D The target vector. y_pred (ArrayLike): 1D The fitted vector. weights (ArrayLike): 1D The weights of each sample. Default to 1. Returns: float: The CV error """ if weights is None: weights = np.ones(len(y)) weights = np.array(weights) if len(weights) != len(y): raise ValueError("Weights given but not the same length as sample.") if np.any(weights < 0) or np.allclose(weights, 0): raise ValueError("Weights can not be negative or all zero.") denominator = (weights * (y - y_pred) ** 2).sum() / weights.sum() return np.sqrt((1 - score) * denominator)