Module dropkick.logistic

Logistic regression model functions for dropkick

Expand source code
"""
Logistic regression model functions for dropkick
"""
import numpy as np
import scanpy as sc
from scipy import stats
from scipy.special import expit
from scipy.sparse import issparse, csc_matrix
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import check_array, check_X_y
from sklearn.utils.multiclass import check_classification_targets
from .errors import _check_error_flag

from _glmnet import lognet, splognet, lsolns
from .util import (
    _fix_lambda_path,
    _check_user_lambda,
    _interpolate_model,
    _score_lambda_path,
)


class LogitNet(BaseEstimator):
    """
    Logistic Regression with elastic net penalty.
    This is a wrapper for the glmnet function lognet.

    Parameters
    ----------
    alpha : float, default 1
        The alpha parameter, 0 <= alpha <= 1, 0 for ridge, 1 for lasso

    n_lambda : int, default 100
        Maximum number of lambda values to compute

    min_lambda_ratio : float, default 1e-4
        In combination with `n_lambda`, the ratio of the smallest and largest
        values of lambda computed.

    lambda_path : array, default None
        In place of supplying n_lambda, provide an array of specific values
        to compute. The specified values must be in decreasing order. When
        None, the path of lambda values will be determined automatically. A
        maximum of `n_lambda` values will be computed.

    standardize : bool, default True
        Standardize input features prior to fitting. The final coefficients
        will be on the scale of the original data regardless of the value
        of standardize.

    fit_intercept : bool, default True
        Include an intercept term in the model.

    lower_limits : array, (shape n_features,) default -infinity
        Array of lower limits for each coefficient, must be non-positive.
        Can be a single value (which is then replicated), else an array
        corresponding to the number of features.

    upper_limits : array, (shape n_features,) default +infinity
        Array of upper limits for each coefficient, must be positive.
        See lower_limits.

    cut_point : float, default 1
        The cut point to use for selecting lambda_best.
            arg_max lambda  cv_score(lambda) >= cv_score(lambda_max) - cut_point * standard_error(lambda_max)

    n_splits : int, default 3
        Number of cross validation folds for computing performance metrics and
        determining `lambda_best_` and `lambda_max_`. If non-zero, must be
        at least 3.

    scoring : string, callable or None, default None
        Scoring method for model selection during cross validation. When None,
        defaults to classification score. Valid options are `accuracy`,
        `roc_auc`, `average_precision`, `precision`, `recall`. Alternatively,
        supply a function or callable object with the following signature
        ``scorer(estimator, X, y)``. Note, the scoring function affects the
        selection of `lambda_best_` and `lambda_max_`, fitting the same data
        with different scoring methods will result in the selection of
        different models.

    n_jobs : int, default 1
        Maximum number of threads for computing cross validation metrics.

    tol : float, default 1e-7
        Convergence tolerance.

    max_iter : int, default 100000
        Maximum passes over the data.

    random_state : number, default None
        Seed for the random number generator. The glmnet solver is not
        deterministic, this seed is used for determining the cv folds.

    max_features : int
        Optional maximum number of features with nonzero coefficients after
        regularization. If not set, defaults to X.shape[1] during fit
        Note, this will be ignored if the user specifies lambda_path.

    verbose : bool, default False
        When True some warnings and log messages are suppressed.

    Attributes
    ----------
    classes_ : array, shape(n_classes,)
        The distinct classes/labels found in y.

    n_lambda_ : int
        The number of lambda values found by glmnet. Note, this may be less
        than the number specified via n_lambda.

    lambda_path_ : array, shape (n_lambda_,)
        The values of lambda found by glmnet, in decreasing order.

    coef_path_ : array, shape (n_classes, n_features, n_lambda_)
        The set of coefficients for each value of lambda in lambda_path_.

    coef_ : array, shape (n_clases, n_features)
        The coefficients corresponding to lambda_best_.

    intercept_ : array, shape (n_classes,)
        The intercept corresponding to lambda_best_.

    intercept_path_ : array, shape (n_classes, n_lambda_)
        The set of intercepts for each value of lambda in lambda_path_.

    cv_mean_score_ : array, shape (n_lambda_,)
        The mean cv score for each value of lambda. This will be set by fit_cv.

    cv_standard_error_ : array, shape (n_lambda_,)
        The standard error of the mean cv score for each value of lambda, this
        will be set by fit_cv.

    lambda_max_ : float
        The value of lambda that gives the best performance in cross
        validation.

    lambda_best_ : float
        The largest value of lambda which is greater than lambda_max_ and
        performs within cut_point * standard error of lambda_max_.
    """

    CV = StratifiedKFold

    def __init__(
        self,
        alpha=1,
        n_lambda=100,
        min_lambda_ratio=1e-4,
        lambda_path=None,
        standardize=True,
        fit_intercept=True,
        lower_limits=-np.inf,
        upper_limits=np.inf,
        cut_point=1.0,
        n_splits=3,
        scoring=None,
        n_jobs=1,
        tol=1e-7,
        max_iter=100000,
        random_state=None,
        max_features=None,
        verbose=False,
    ):

        self.alpha = alpha
        self.n_lambda = n_lambda
        self.min_lambda_ratio = min_lambda_ratio
        self.lambda_path = lambda_path
        self.standardize = standardize
        self.lower_limits = lower_limits
        self.upper_limits = upper_limits
        self.fit_intercept = fit_intercept
        self.cut_point = cut_point
        self.n_splits = n_splits
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self.max_features = max_features
        self.verbose = verbose

    def fit(self, adata, y, n_hvgs, sample_weight=None, relative_penalties=None):
        """
        Fit the model to training data. If n_splits > 1 also run n-fold cross
        validation on all values in lambda_path.

        The model will be fit n+1 times. On the first pass, the lambda_path
        will be determined, on the remaining passes, the model performance for
        each value of lambda. After cross validation, the attribute
        `cv_mean_score_` will contain the mean score over all folds for each
        value of lambda, and `cv_standard_error_` will contain the standard
        error of `cv_mean_score_` for each value of lambda. The value of lambda
        which achieves the best performance in cross validation will be saved
        to `lambda_max_` additionally, the largest value of lambda s.t.:
            cv_score(l) >= cv_score(lambda_max_) -\
                           cut_point * standard_error(lambda_max_)
        will be saved to `lambda_best_`.

        Parameters
        ----------
        adata : anndata.AnnData, shape (n_samples, n_features)
            Input features in .X

        y : array, shape (n_samples,)
            Target values

        n_hvgs : int, number of highly-variable genes to feature-select

        sample_weight : array, shape (n_samples,)
            Optional weight vector for observations

        relative_penalties: array, shape (n_features,)
            Optional relative weight vector for penalty.
            0 entries remove penalty.

        Returns
        -------
        self : object
            Returns self.
        """
        # X is scaled counts for all cells in HVGs only for first run
        X = adata[:, adata.var.highly_variable].X.copy()

        X, y = check_X_y(X, y, accept_sparse="csr", ensure_min_samples=2)
        if sample_weight is None:
            sample_weight = np.ones(X.shape[0])
        else:
            sample_weight = np.asarray(sample_weight)

        if not np.isscalar(self.lower_limits):
            self.lower_limits = np.asarray(self.lower_limits)
            if len(self.lower_limits) != X.shape[1]:
                raise ValueError("lower_limits must equal number of features")

        if not np.isscalar(self.upper_limits):
            self.upper_limits = np.asarray(self.upper_limits)
            if len(self.upper_limits) != X.shape[1]:
                raise ValueError("upper_limits must equal number of features")

        if (
            any(self.lower_limits > 0)
            if isinstance(self.lower_limits, np.ndarray)
            else self.lower_limits > 0
        ):
            raise ValueError("lower_limits must be non-positive")

        if (
            any(self.upper_limits < 0)
            if isinstance(self.upper_limits, np.ndarray)
            else self.upper_limits < 0
        ):
            raise ValueError("upper_limits must be positive")

        if self.alpha > 1 or self.alpha < 0:
            raise ValueError("alpha must be between 0 and 1")

        # fit the model
        self._fit(X, y, sample_weight, relative_penalties)

        # score each model on the path of lambda values found by glmnet and
        # select the best scoring
        if self.n_splits >= 3:
            self._cv = self.CV(
                n_splits=self.n_splits, shuffle=True, random_state=self.random_state
            )

            cv_scores, _hvgs = _score_lambda_path(
                self,
                adata,
                y,
                n_hvgs,
                sample_weight,
                relative_penalties,
                self.scoring,
                n_jobs=self.n_jobs,
                verbose=self.verbose,
            )

            self.cv_mean_score_ = np.atleast_1d(np.mean(cv_scores, axis=0))
            self.cv_standard_error_ = np.atleast_1d(stats.sem(cv_scores))

            self.lambda_max_inx_ = np.argmax(self.cv_mean_score_)
            self.lambda_max_ = self.lambda_path_[self.lambda_max_inx_]

            target_score = (
                self.cv_mean_score_[self.lambda_max_inx_]
                - self.cut_point * self.cv_standard_error_[self.lambda_max_inx_]
            )

            self.lambda_best_inx_ = np.argwhere(self.cv_mean_score_ >= target_score)[0]
            self.lambda_best_ = self.lambda_path_[self.lambda_best_inx_]

            self.coef_ = self.coef_path_[..., self.lambda_best_inx_]
            self.coef_ = self.coef_.squeeze(axis=self.coef_.ndim - 1)
            self.intercept_ = self.intercept_path_[..., self.lambda_best_inx_].squeeze()
            if self.intercept_.shape == ():  # convert 0d array to scalar
                self.intercept_ = float(self.intercept_)

        return self

    def _fit(self, X, y, sample_weight=None, relative_penalties=None):
        if self.lambda_path is not None:
            n_lambda = len(self.lambda_path)
            min_lambda_ratio = 1.0
        else:
            n_lambda = self.n_lambda
            min_lambda_ratio = self.min_lambda_ratio

        check_classification_targets(y)
        self.classes_ = np.unique(y)  # the output of np.unique is sorted
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Training data need to contain at least 2 " "classes.")

        # glmnet requires the labels a one-hot-encoded array of
        # (n_samples, n_classes)
        if n_classes == 2:
            # Normally we use 1/0 for the positive and negative classes. Since
            # np.unique sorts the output, the negative class will be in the 0th
            # column. We want a model predicting the positive class, not the
            # negative class, so we flip the columns here (the != condition).
            #
            # Broadcast comparison of self.classes_ to all rows of y. See the
            # numpy rules on broadcasting for more info, essentially this
            # "reshapes" y to (n_samples, n_classes) and self.classes_ to
            # (n_samples, n_classes) and performs an element-wise comparison
            # resulting in _y with shape (n_samples, n_classes).
            _y = (y[:, None] != self.classes_).astype(np.float64, order="F")
        else:
            # multinomial case, glmnet uses the entire array so we can
            # keep the original order.
            _y = (y[:, None] == self.classes_).astype(np.float64, order="F")

        # use sample weights, making sure all weights are positive
        # this is inspired by the R wrapper for glmnet, in lognet.R
        if sample_weight is not None:
            weight_gt_0 = sample_weight > 0
            sample_weight = sample_weight[weight_gt_0]
            _y = _y[weight_gt_0, :]
            X = X[weight_gt_0, :]
            _y = _y * np.expand_dims(sample_weight, 1)

        # we need some sort of "offset" array for glmnet
        # an array of shape (n_examples, n_classes)
        offset = np.zeros((X.shape[0], n_classes), dtype=np.float64, order="F")

        # You should have thought of that before you got here.
        exclude_vars = 0

        # how much each feature should be penalized relative to the others
        # this may be useful to expose to the caller if there are vars that
        # must be included in the final model or there is some prior knowledge
        # about how important some vars are relative to others, see the glmnet
        # vignette:
        # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
        if relative_penalties is None:
            relative_penalties = np.ones(X.shape[1], dtype=np.float64, order="F")

        coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order="F")
        coef_bounds[0, :] = self.lower_limits
        coef_bounds[1, :] = self.upper_limits

        if n_classes == 2:
            # binomial, tell glmnet there is only one class
            # otherwise we will get a coef matrix with two dimensions
            # where each pair are equal in magnitude and opposite in sign
            # also since the magnitudes are constrained to sum to one, the
            # returned coefficients would be one half of the proper values
            n_classes = 1

        # This is a stopping criterion (nx)
        # R defaults to nx = num_features, and ne = num_features + 1
        if self.max_features is None:
            max_features = X.shape[1]
        else:
            max_features = self.max_features

        if issparse(X):
            _x = csc_matrix(X, dtype=np.float64, copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr,
            ) = splognet(
                self.alpha,
                _x.shape[0],
                _x.shape[1],
                n_classes,
                _x.data,
                _x.indptr + 1,  # Fortran uses 1-based indexing
                _x.indices + 1,
                _y,
                offset,
                exclude_vars,
                relative_penalties,
                coef_bounds,
                max_features,
                X.shape[1] + 1,
                min_lambda_ratio,
                self.lambda_path,
                self.tol,
                n_lambda,
                self.standardize,
                self.fit_intercept,
                self.max_iter,
                0,
            )
        else:  # not sparse
            # some notes: glmnet requires both x and y to be float64, the two
            # arrays
            # may also be overwritten during the fitting process, so they need
            # to be copied prior to calling lognet. The fortran wrapper will
            # copy any arrays passed to a wrapped function if they are not in
            # the fortran layout, to avoid making extra copies, ensure x and y
            # are `F_CONTIGUOUS` prior to calling lognet.
            _x = X.astype(dtype=np.float64, order="F", copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr,
            ) = lognet(
                self.alpha,
                n_classes,
                _x,
                _y,
                offset,
                exclude_vars,
                relative_penalties,
                coef_bounds,
                X.shape[1] + 1,
                min_lambda_ratio,
                self.lambda_path,
                self.tol,
                max_features,
                n_lambda,
                self.standardize,
                self.fit_intercept,
                self.max_iter,
                0,
            )

        # raises RuntimeError if self.jerr_ is nonzero
        self.jerr_ = jerr
        _check_error_flag(self.jerr_, verbose_convergence=self.verbose)

        # glmnet may not return the requested number of lambda values, so we
        # need to trim the trailing zeros from the returned path so
        # len(lambda_path_) is equal to n_lambda_
        self.lambda_path_ = self.lambda_path_[: self.n_lambda_]
        # also fix the first value of lambda
        self.lambda_path_ = _fix_lambda_path(self.lambda_path_)
        self.intercept_path_ = self.intercept_path_[:, : self.n_lambda_]
        # also trim the compressed coefficient matrix
        ca = ca[:, :, : self.n_lambda_]
        # and trim the array of n_coef per lambda (may or may not be non-zero)
        nin = nin[: self.n_lambda_]
        # decompress the coefficients returned by glmnet, see doc.py
        self.coef_path_ = lsolns(X.shape[1], ca, ia, nin)
        # coef_path_ has shape (n_features, n_classes, n_lambda), we should
        # match shape for scikit-learn models:
        # (n_classes, n_features, n_lambda)
        self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2))

        return self

    def decision_function(self, X, lamb=None):
        lambda_best = None
        if hasattr(self, "lambda_best_"):
            lambda_best = self.lambda_best_

        lamb = _check_user_lambda(self.lambda_path_, lambda_best, lamb)
        coef, intercept = _interpolate_model(
            self.lambda_path_, self.coef_path_, self.intercept_path_, lamb
        )

        # coef must be (n_classes, n_features, n_lambda)
        if coef.ndim != 3:
            # we must be working with an intercept only model
            coef = coef[:, :, np.newaxis]
        # intercept must be (n_classes, n_lambda)
        if intercept.ndim != 2:
            intercept = intercept[:, np.newaxis]

        X = check_array(X, accept_sparse="csr")
        # return (n_samples, n_classes, n_lambda)
        z = np.empty((X.shape[0], coef.shape[0], coef.shape[-1]))
        # well... sometimes we just need a for loop
        for c in range(coef.shape[0]):  # all classes
            for l in range(coef.shape[-1]):  # all values of lambda
                z[:, c, l] = X.dot(coef[c, :, l])
        z += intercept

        # drop the last dimension (lambda) when we are predicting for a single
        # value of lambda, and drop the middle dimension (class) when we are
        # predicting from a binomial model (for consistency with scikit-learn)
        return z.squeeze()

    def predict_proba(self, X, lamb=None):
        """Probability estimates for each class given X.

        The returned estimates are in the same order as the values in
        classes_.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)

        lamb : array, shape (n_lambda,)
            Values of lambda from lambda_path_ from which to make predictions.
            If no values are provided, the returned predictions will be those
            corresponding to lambda_best_. The values of lamb must also be in
            the range of lambda_path_, values greater than max(lambda_path_)
            or less than  min(lambda_path_) will be clipped.

        Returns
        -------
        T : array, shape (n_samples, n_classes) or (n_samples, n_classes, n_lambda)
        """
        z = self.decision_function(X, lamb)
        expit(z, z)

        # reshape z to (n_samples, n_classes, n_lambda)
        n_lambda = len(np.atleast_1d(lamb))
        z = z.reshape(X.shape[0], -1, n_lambda)

        if z.shape[1] == 1:
            # binomial, for consistency and to match scikit-learn, add the
            # complement so z has shape (n_samples, 2, n_lambda)
            z = np.concatenate((1 - z, z), axis=1)
        else:
            # normalize for multinomial
            z /= np.expand_dims(z.sum(axis=1), axis=1)

        if n_lambda == 1:
            z = z.squeeze(axis=-1)
        return z

    def predict(self, X, lamb=None):
        """Predict class labels for samples in X.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)

        lamb : array, shape (n_lambda,)
            Values of lambda from lambda_path_ from which to make predictions.
            If no values are provided for lamb, the returned predictions will
            be those corresponding to lambda_best_. The values of lamb must
            also be in the range of lambda_path_, values greater than
            max(lambda_path_) or less than  min(lambda_path_) will be clipped.

        Returns
        -------
        T : array, shape (n_samples,) or (n_samples, n_lambda)
            Predicted class labels for each sample given each value of lambda
        """

        scores = self.predict_proba(X, lamb)
        indices = scores.argmax(axis=1)

        return self.classes_[indices]

    def score(self, X, y, lamb=None):
        """Returns the mean accuracy on the given test data and labels.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Test samples

        y : array, shape (n_samples,)
            True labels for X

        lamb : array, shape (n_lambda,)
            Values from lambda_path_ for which to score predictions.

        Returns
        -------
        score : array, shape (n_lambda,)
            Mean accuracy for each value of lambda.
        """
        pred = self.predict(X, lamb=lamb)
        return np.apply_along_axis(accuracy_score, 0, pred, y)

Classes

class LogitNet (alpha=1, n_lambda=100, min_lambda_ratio=0.0001, lambda_path=None, standardize=True, fit_intercept=True, lower_limits=-inf, upper_limits=inf, cut_point=1.0, n_splits=3, scoring=None, n_jobs=1, tol=1e-07, max_iter=100000, random_state=None, max_features=None, verbose=False)

Logistic Regression with elastic net penalty. This is a wrapper for the glmnet function lognet.

Parameters

alpha : float, default 1
The alpha parameter, 0 <= alpha <= 1, 0 for ridge, 1 for lasso
n_lambda : int, default 100
Maximum number of lambda values to compute
min_lambda_ratio : float, default 1e-4
In combination with n_lambda, the ratio of the smallest and largest values of lambda computed.
lambda_path : array, default None
In place of supplying n_lambda, provide an array of specific values to compute. The specified values must be in decreasing order. When None, the path of lambda values will be determined automatically. A maximum of n_lambda values will be computed.
standardize : bool, default True
Standardize input features prior to fitting. The final coefficients will be on the scale of the original data regardless of the value of standardize.
fit_intercept : bool, default True
Include an intercept term in the model.
lower_limits : array, (shape n_features,) default -infinity
Array of lower limits for each coefficient, must be non-positive. Can be a single value (which is then replicated), else an array corresponding to the number of features.
upper_limits : array, (shape n_features,) default +infinity
Array of upper limits for each coefficient, must be positive. See lower_limits.
cut_point : float, default 1
The cut point to use for selecting lambda_best. arg_max lambda cv_score(lambda) >= cv_score(lambda_max) - cut_point * standard_error(lambda_max)
n_splits : int, default 3
Number of cross validation folds for computing performance metrics and determining lambda_best_ and lambda_max_. If non-zero, must be at least 3.
scoring : string, callable or None, default None
Scoring method for model selection during cross validation. When None, defaults to classification score. Valid options are accuracy, roc_auc, average_precision, precision, recall. Alternatively, supply a function or callable object with the following signature scorer(estimator, X, y). Note, the scoring function affects the selection of lambda_best_ and lambda_max_, fitting the same data with different scoring methods will result in the selection of different models.
n_jobs : int, default 1
Maximum number of threads for computing cross validation metrics.
tol : float, default 1e-7
Convergence tolerance.
max_iter : int, default 100000
Maximum passes over the data.
random_state : number, default None
Seed for the random number generator. The glmnet solver is not deterministic, this seed is used for determining the cv folds.
max_features : int
Optional maximum number of features with nonzero coefficients after regularization. If not set, defaults to X.shape[1] during fit Note, this will be ignored if the user specifies lambda_path.
verbose : bool, default False
When True some warnings and log messages are suppressed.

Attributes

classes_ : array, shape(n_classes,)
The distinct classes/labels found in y.
n_lambda_ : int
The number of lambda values found by glmnet. Note, this may be less than the number specified via n_lambda.
lambda_path_ : array, shape (n_lambda_,)
The values of lambda found by glmnet, in decreasing order.
coef_path_ : array, shape (n_classes, n_features, n_lambda_)
The set of coefficients for each value of lambda in lambda_path_.
coef_ : array, shape (n_clases, n_features)
The coefficients corresponding to lambda_best_.
intercept_ : array, shape (n_classes,)
The intercept corresponding to lambda_best_.
intercept_path_ : array, shape (n_classes, n_lambda_)
The set of intercepts for each value of lambda in lambda_path_.
cv_mean_score_ : array, shape (n_lambda_,)
The mean cv score for each value of lambda. This will be set by fit_cv.
cv_standard_error_ : array, shape (n_lambda_,)
The standard error of the mean cv score for each value of lambda, this will be set by fit_cv.
lambda_max_ : float
The value of lambda that gives the best performance in cross validation.
lambda_best_ : float
The largest value of lambda which is greater than lambda_max_ and performs within cut_point * standard error of lambda_max_.
Expand source code
class LogitNet(BaseEstimator):
    """
    Logistic Regression with elastic net penalty.
    This is a wrapper for the glmnet function lognet.

    Parameters
    ----------
    alpha : float, default 1
        The alpha parameter, 0 <= alpha <= 1, 0 for ridge, 1 for lasso

    n_lambda : int, default 100
        Maximum number of lambda values to compute

    min_lambda_ratio : float, default 1e-4
        In combination with `n_lambda`, the ratio of the smallest and largest
        values of lambda computed.

    lambda_path : array, default None
        In place of supplying n_lambda, provide an array of specific values
        to compute. The specified values must be in decreasing order. When
        None, the path of lambda values will be determined automatically. A
        maximum of `n_lambda` values will be computed.

    standardize : bool, default True
        Standardize input features prior to fitting. The final coefficients
        will be on the scale of the original data regardless of the value
        of standardize.

    fit_intercept : bool, default True
        Include an intercept term in the model.

    lower_limits : array, (shape n_features,) default -infinity
        Array of lower limits for each coefficient, must be non-positive.
        Can be a single value (which is then replicated), else an array
        corresponding to the number of features.

    upper_limits : array, (shape n_features,) default +infinity
        Array of upper limits for each coefficient, must be positive.
        See lower_limits.

    cut_point : float, default 1
        The cut point to use for selecting lambda_best.
            arg_max lambda  cv_score(lambda) >= cv_score(lambda_max) - cut_point * standard_error(lambda_max)

    n_splits : int, default 3
        Number of cross validation folds for computing performance metrics and
        determining `lambda_best_` and `lambda_max_`. If non-zero, must be
        at least 3.

    scoring : string, callable or None, default None
        Scoring method for model selection during cross validation. When None,
        defaults to classification score. Valid options are `accuracy`,
        `roc_auc`, `average_precision`, `precision`, `recall`. Alternatively,
        supply a function or callable object with the following signature
        ``scorer(estimator, X, y)``. Note, the scoring function affects the
        selection of `lambda_best_` and `lambda_max_`, fitting the same data
        with different scoring methods will result in the selection of
        different models.

    n_jobs : int, default 1
        Maximum number of threads for computing cross validation metrics.

    tol : float, default 1e-7
        Convergence tolerance.

    max_iter : int, default 100000
        Maximum passes over the data.

    random_state : number, default None
        Seed for the random number generator. The glmnet solver is not
        deterministic, this seed is used for determining the cv folds.

    max_features : int
        Optional maximum number of features with nonzero coefficients after
        regularization. If not set, defaults to X.shape[1] during fit
        Note, this will be ignored if the user specifies lambda_path.

    verbose : bool, default False
        When True some warnings and log messages are suppressed.

    Attributes
    ----------
    classes_ : array, shape(n_classes,)
        The distinct classes/labels found in y.

    n_lambda_ : int
        The number of lambda values found by glmnet. Note, this may be less
        than the number specified via n_lambda.

    lambda_path_ : array, shape (n_lambda_,)
        The values of lambda found by glmnet, in decreasing order.

    coef_path_ : array, shape (n_classes, n_features, n_lambda_)
        The set of coefficients for each value of lambda in lambda_path_.

    coef_ : array, shape (n_clases, n_features)
        The coefficients corresponding to lambda_best_.

    intercept_ : array, shape (n_classes,)
        The intercept corresponding to lambda_best_.

    intercept_path_ : array, shape (n_classes, n_lambda_)
        The set of intercepts for each value of lambda in lambda_path_.

    cv_mean_score_ : array, shape (n_lambda_,)
        The mean cv score for each value of lambda. This will be set by fit_cv.

    cv_standard_error_ : array, shape (n_lambda_,)
        The standard error of the mean cv score for each value of lambda, this
        will be set by fit_cv.

    lambda_max_ : float
        The value of lambda that gives the best performance in cross
        validation.

    lambda_best_ : float
        The largest value of lambda which is greater than lambda_max_ and
        performs within cut_point * standard error of lambda_max_.
    """

    CV = StratifiedKFold

    def __init__(
        self,
        alpha=1,
        n_lambda=100,
        min_lambda_ratio=1e-4,
        lambda_path=None,
        standardize=True,
        fit_intercept=True,
        lower_limits=-np.inf,
        upper_limits=np.inf,
        cut_point=1.0,
        n_splits=3,
        scoring=None,
        n_jobs=1,
        tol=1e-7,
        max_iter=100000,
        random_state=None,
        max_features=None,
        verbose=False,
    ):

        self.alpha = alpha
        self.n_lambda = n_lambda
        self.min_lambda_ratio = min_lambda_ratio
        self.lambda_path = lambda_path
        self.standardize = standardize
        self.lower_limits = lower_limits
        self.upper_limits = upper_limits
        self.fit_intercept = fit_intercept
        self.cut_point = cut_point
        self.n_splits = n_splits
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self.max_features = max_features
        self.verbose = verbose

    def fit(self, adata, y, n_hvgs, sample_weight=None, relative_penalties=None):
        """
        Fit the model to training data. If n_splits > 1 also run n-fold cross
        validation on all values in lambda_path.

        The model will be fit n+1 times. On the first pass, the lambda_path
        will be determined, on the remaining passes, the model performance for
        each value of lambda. After cross validation, the attribute
        `cv_mean_score_` will contain the mean score over all folds for each
        value of lambda, and `cv_standard_error_` will contain the standard
        error of `cv_mean_score_` for each value of lambda. The value of lambda
        which achieves the best performance in cross validation will be saved
        to `lambda_max_` additionally, the largest value of lambda s.t.:
            cv_score(l) >= cv_score(lambda_max_) -\
                           cut_point * standard_error(lambda_max_)
        will be saved to `lambda_best_`.

        Parameters
        ----------
        adata : anndata.AnnData, shape (n_samples, n_features)
            Input features in .X

        y : array, shape (n_samples,)
            Target values

        n_hvgs : int, number of highly-variable genes to feature-select

        sample_weight : array, shape (n_samples,)
            Optional weight vector for observations

        relative_penalties: array, shape (n_features,)
            Optional relative weight vector for penalty.
            0 entries remove penalty.

        Returns
        -------
        self : object
            Returns self.
        """
        # X is scaled counts for all cells in HVGs only for first run
        X = adata[:, adata.var.highly_variable].X.copy()

        X, y = check_X_y(X, y, accept_sparse="csr", ensure_min_samples=2)
        if sample_weight is None:
            sample_weight = np.ones(X.shape[0])
        else:
            sample_weight = np.asarray(sample_weight)

        if not np.isscalar(self.lower_limits):
            self.lower_limits = np.asarray(self.lower_limits)
            if len(self.lower_limits) != X.shape[1]:
                raise ValueError("lower_limits must equal number of features")

        if not np.isscalar(self.upper_limits):
            self.upper_limits = np.asarray(self.upper_limits)
            if len(self.upper_limits) != X.shape[1]:
                raise ValueError("upper_limits must equal number of features")

        if (
            any(self.lower_limits > 0)
            if isinstance(self.lower_limits, np.ndarray)
            else self.lower_limits > 0
        ):
            raise ValueError("lower_limits must be non-positive")

        if (
            any(self.upper_limits < 0)
            if isinstance(self.upper_limits, np.ndarray)
            else self.upper_limits < 0
        ):
            raise ValueError("upper_limits must be positive")

        if self.alpha > 1 or self.alpha < 0:
            raise ValueError("alpha must be between 0 and 1")

        # fit the model
        self._fit(X, y, sample_weight, relative_penalties)

        # score each model on the path of lambda values found by glmnet and
        # select the best scoring
        if self.n_splits >= 3:
            self._cv = self.CV(
                n_splits=self.n_splits, shuffle=True, random_state=self.random_state
            )

            cv_scores, _hvgs = _score_lambda_path(
                self,
                adata,
                y,
                n_hvgs,
                sample_weight,
                relative_penalties,
                self.scoring,
                n_jobs=self.n_jobs,
                verbose=self.verbose,
            )

            self.cv_mean_score_ = np.atleast_1d(np.mean(cv_scores, axis=0))
            self.cv_standard_error_ = np.atleast_1d(stats.sem(cv_scores))

            self.lambda_max_inx_ = np.argmax(self.cv_mean_score_)
            self.lambda_max_ = self.lambda_path_[self.lambda_max_inx_]

            target_score = (
                self.cv_mean_score_[self.lambda_max_inx_]
                - self.cut_point * self.cv_standard_error_[self.lambda_max_inx_]
            )

            self.lambda_best_inx_ = np.argwhere(self.cv_mean_score_ >= target_score)[0]
            self.lambda_best_ = self.lambda_path_[self.lambda_best_inx_]

            self.coef_ = self.coef_path_[..., self.lambda_best_inx_]
            self.coef_ = self.coef_.squeeze(axis=self.coef_.ndim - 1)
            self.intercept_ = self.intercept_path_[..., self.lambda_best_inx_].squeeze()
            if self.intercept_.shape == ():  # convert 0d array to scalar
                self.intercept_ = float(self.intercept_)

        return self

    def _fit(self, X, y, sample_weight=None, relative_penalties=None):
        if self.lambda_path is not None:
            n_lambda = len(self.lambda_path)
            min_lambda_ratio = 1.0
        else:
            n_lambda = self.n_lambda
            min_lambda_ratio = self.min_lambda_ratio

        check_classification_targets(y)
        self.classes_ = np.unique(y)  # the output of np.unique is sorted
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Training data need to contain at least 2 " "classes.")

        # glmnet requires the labels a one-hot-encoded array of
        # (n_samples, n_classes)
        if n_classes == 2:
            # Normally we use 1/0 for the positive and negative classes. Since
            # np.unique sorts the output, the negative class will be in the 0th
            # column. We want a model predicting the positive class, not the
            # negative class, so we flip the columns here (the != condition).
            #
            # Broadcast comparison of self.classes_ to all rows of y. See the
            # numpy rules on broadcasting for more info, essentially this
            # "reshapes" y to (n_samples, n_classes) and self.classes_ to
            # (n_samples, n_classes) and performs an element-wise comparison
            # resulting in _y with shape (n_samples, n_classes).
            _y = (y[:, None] != self.classes_).astype(np.float64, order="F")
        else:
            # multinomial case, glmnet uses the entire array so we can
            # keep the original order.
            _y = (y[:, None] == self.classes_).astype(np.float64, order="F")

        # use sample weights, making sure all weights are positive
        # this is inspired by the R wrapper for glmnet, in lognet.R
        if sample_weight is not None:
            weight_gt_0 = sample_weight > 0
            sample_weight = sample_weight[weight_gt_0]
            _y = _y[weight_gt_0, :]
            X = X[weight_gt_0, :]
            _y = _y * np.expand_dims(sample_weight, 1)

        # we need some sort of "offset" array for glmnet
        # an array of shape (n_examples, n_classes)
        offset = np.zeros((X.shape[0], n_classes), dtype=np.float64, order="F")

        # You should have thought of that before you got here.
        exclude_vars = 0

        # how much each feature should be penalized relative to the others
        # this may be useful to expose to the caller if there are vars that
        # must be included in the final model or there is some prior knowledge
        # about how important some vars are relative to others, see the glmnet
        # vignette:
        # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
        if relative_penalties is None:
            relative_penalties = np.ones(X.shape[1], dtype=np.float64, order="F")

        coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order="F")
        coef_bounds[0, :] = self.lower_limits
        coef_bounds[1, :] = self.upper_limits

        if n_classes == 2:
            # binomial, tell glmnet there is only one class
            # otherwise we will get a coef matrix with two dimensions
            # where each pair are equal in magnitude and opposite in sign
            # also since the magnitudes are constrained to sum to one, the
            # returned coefficients would be one half of the proper values
            n_classes = 1

        # This is a stopping criterion (nx)
        # R defaults to nx = num_features, and ne = num_features + 1
        if self.max_features is None:
            max_features = X.shape[1]
        else:
            max_features = self.max_features

        if issparse(X):
            _x = csc_matrix(X, dtype=np.float64, copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr,
            ) = splognet(
                self.alpha,
                _x.shape[0],
                _x.shape[1],
                n_classes,
                _x.data,
                _x.indptr + 1,  # Fortran uses 1-based indexing
                _x.indices + 1,
                _y,
                offset,
                exclude_vars,
                relative_penalties,
                coef_bounds,
                max_features,
                X.shape[1] + 1,
                min_lambda_ratio,
                self.lambda_path,
                self.tol,
                n_lambda,
                self.standardize,
                self.fit_intercept,
                self.max_iter,
                0,
            )
        else:  # not sparse
            # some notes: glmnet requires both x and y to be float64, the two
            # arrays
            # may also be overwritten during the fitting process, so they need
            # to be copied prior to calling lognet. The fortran wrapper will
            # copy any arrays passed to a wrapped function if they are not in
            # the fortran layout, to avoid making extra copies, ensure x and y
            # are `F_CONTIGUOUS` prior to calling lognet.
            _x = X.astype(dtype=np.float64, order="F", copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr,
            ) = lognet(
                self.alpha,
                n_classes,
                _x,
                _y,
                offset,
                exclude_vars,
                relative_penalties,
                coef_bounds,
                X.shape[1] + 1,
                min_lambda_ratio,
                self.lambda_path,
                self.tol,
                max_features,
                n_lambda,
                self.standardize,
                self.fit_intercept,
                self.max_iter,
                0,
            )

        # raises RuntimeError if self.jerr_ is nonzero
        self.jerr_ = jerr
        _check_error_flag(self.jerr_, verbose_convergence=self.verbose)

        # glmnet may not return the requested number of lambda values, so we
        # need to trim the trailing zeros from the returned path so
        # len(lambda_path_) is equal to n_lambda_
        self.lambda_path_ = self.lambda_path_[: self.n_lambda_]
        # also fix the first value of lambda
        self.lambda_path_ = _fix_lambda_path(self.lambda_path_)
        self.intercept_path_ = self.intercept_path_[:, : self.n_lambda_]
        # also trim the compressed coefficient matrix
        ca = ca[:, :, : self.n_lambda_]
        # and trim the array of n_coef per lambda (may or may not be non-zero)
        nin = nin[: self.n_lambda_]
        # decompress the coefficients returned by glmnet, see doc.py
        self.coef_path_ = lsolns(X.shape[1], ca, ia, nin)
        # coef_path_ has shape (n_features, n_classes, n_lambda), we should
        # match shape for scikit-learn models:
        # (n_classes, n_features, n_lambda)
        self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2))

        return self

    def decision_function(self, X, lamb=None):
        lambda_best = None
        if hasattr(self, "lambda_best_"):
            lambda_best = self.lambda_best_

        lamb = _check_user_lambda(self.lambda_path_, lambda_best, lamb)
        coef, intercept = _interpolate_model(
            self.lambda_path_, self.coef_path_, self.intercept_path_, lamb
        )

        # coef must be (n_classes, n_features, n_lambda)
        if coef.ndim != 3:
            # we must be working with an intercept only model
            coef = coef[:, :, np.newaxis]
        # intercept must be (n_classes, n_lambda)
        if intercept.ndim != 2:
            intercept = intercept[:, np.newaxis]

        X = check_array(X, accept_sparse="csr")
        # return (n_samples, n_classes, n_lambda)
        z = np.empty((X.shape[0], coef.shape[0], coef.shape[-1]))
        # well... sometimes we just need a for loop
        for c in range(coef.shape[0]):  # all classes
            for l in range(coef.shape[-1]):  # all values of lambda
                z[:, c, l] = X.dot(coef[c, :, l])
        z += intercept

        # drop the last dimension (lambda) when we are predicting for a single
        # value of lambda, and drop the middle dimension (class) when we are
        # predicting from a binomial model (for consistency with scikit-learn)
        return z.squeeze()

    def predict_proba(self, X, lamb=None):
        """Probability estimates for each class given X.

        The returned estimates are in the same order as the values in
        classes_.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)

        lamb : array, shape (n_lambda,)
            Values of lambda from lambda_path_ from which to make predictions.
            If no values are provided, the returned predictions will be those
            corresponding to lambda_best_. The values of lamb must also be in
            the range of lambda_path_, values greater than max(lambda_path_)
            or less than  min(lambda_path_) will be clipped.

        Returns
        -------
        T : array, shape (n_samples, n_classes) or (n_samples, n_classes, n_lambda)
        """
        z = self.decision_function(X, lamb)
        expit(z, z)

        # reshape z to (n_samples, n_classes, n_lambda)
        n_lambda = len(np.atleast_1d(lamb))
        z = z.reshape(X.shape[0], -1, n_lambda)

        if z.shape[1] == 1:
            # binomial, for consistency and to match scikit-learn, add the
            # complement so z has shape (n_samples, 2, n_lambda)
            z = np.concatenate((1 - z, z), axis=1)
        else:
            # normalize for multinomial
            z /= np.expand_dims(z.sum(axis=1), axis=1)

        if n_lambda == 1:
            z = z.squeeze(axis=-1)
        return z

    def predict(self, X, lamb=None):
        """Predict class labels for samples in X.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)

        lamb : array, shape (n_lambda,)
            Values of lambda from lambda_path_ from which to make predictions.
            If no values are provided for lamb, the returned predictions will
            be those corresponding to lambda_best_. The values of lamb must
            also be in the range of lambda_path_, values greater than
            max(lambda_path_) or less than  min(lambda_path_) will be clipped.

        Returns
        -------
        T : array, shape (n_samples,) or (n_samples, n_lambda)
            Predicted class labels for each sample given each value of lambda
        """

        scores = self.predict_proba(X, lamb)
        indices = scores.argmax(axis=1)

        return self.classes_[indices]

    def score(self, X, y, lamb=None):
        """Returns the mean accuracy on the given test data and labels.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Test samples

        y : array, shape (n_samples,)
            True labels for X

        lamb : array, shape (n_lambda,)
            Values from lambda_path_ for which to score predictions.

        Returns
        -------
        score : array, shape (n_lambda,)
            Mean accuracy for each value of lambda.
        """
        pred = self.predict(X, lamb=lamb)
        return np.apply_along_axis(accuracy_score, 0, pred, y)

Ancestors

  • sklearn.base.BaseEstimator

Class variables

var CV

Stratified K-Folds cross-validator.

Provides train/test indices to split data in train/test sets.

This cross-validation object is a variation of KFold that returns stratified folds. The folds are made by preserving the percentage of samples for each class.

Read more in the :ref:User Guide <stratified_k_fold>.

Parameters

n_splits : int, default=5

Number of folds. Must be at least 2.

Changed in version: 0.22

n_splits default value changed from 3 to 5.

shuffle : bool, default=False
Whether to shuffle each class's samples before splitting into batches. Note that the samples within each split will not be shuffled.
random_state : int, RandomState instance or None, default=None
When shuffle is True, random_state affects the ordering of the indices, which controls the randomness of each fold for each class. Otherwise, leave random_state as None. Pass an int for reproducible output across multiple function calls. See :term:Glossary <random_state>.

Examples

>>> import numpy as np
>>> from sklearn.model_selection import StratifiedKFold
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 1, 1])
>>> skf = StratifiedKFold(n_splits=2)
>>> skf.get_n_splits(X, y)
2
>>> print(skf)
StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
>>> for train_index, test_index in skf.split(X, y):
...     print("TRAIN:", train_index, "TEST:", test_index)
...     X_train, X_test = X[train_index], X[test_index]
...     y_train, y_test = y[train_index], y[test_index]
TRAIN: [1 3] TEST: [0 2]
TRAIN: [0 2] TEST: [1 3]

Notes

The implementation is designed to:

  • Generate test sets such that all contain the same distribution of classes, or as close as possible.
  • Be invariant to class label: relabelling y = ["Happy", "Sad"] to y = [1, 0] should not change the indices generated.
  • Preserve order dependencies in the dataset ordering, when shuffle=False: all samples from class k in some test set were contiguous in y, or separated in y by samples from classes other than k.
  • Generate test sets where the smallest and largest differ by at most one sample.

Changed in version: 0.22

The previous implementation did not follow the last constraint.

See Also

RepeatedStratifiedKFold
Repeats Stratified K-Fold n times.

Methods

def decision_function(self, X, lamb=None)
Expand source code
def decision_function(self, X, lamb=None):
    lambda_best = None
    if hasattr(self, "lambda_best_"):
        lambda_best = self.lambda_best_

    lamb = _check_user_lambda(self.lambda_path_, lambda_best, lamb)
    coef, intercept = _interpolate_model(
        self.lambda_path_, self.coef_path_, self.intercept_path_, lamb
    )

    # coef must be (n_classes, n_features, n_lambda)
    if coef.ndim != 3:
        # we must be working with an intercept only model
        coef = coef[:, :, np.newaxis]
    # intercept must be (n_classes, n_lambda)
    if intercept.ndim != 2:
        intercept = intercept[:, np.newaxis]

    X = check_array(X, accept_sparse="csr")
    # return (n_samples, n_classes, n_lambda)
    z = np.empty((X.shape[0], coef.shape[0], coef.shape[-1]))
    # well... sometimes we just need a for loop
    for c in range(coef.shape[0]):  # all classes
        for l in range(coef.shape[-1]):  # all values of lambda
            z[:, c, l] = X.dot(coef[c, :, l])
    z += intercept

    # drop the last dimension (lambda) when we are predicting for a single
    # value of lambda, and drop the middle dimension (class) when we are
    # predicting from a binomial model (for consistency with scikit-learn)
    return z.squeeze()
def fit(self, adata, y, n_hvgs, sample_weight=None, relative_penalties=None)

Fit the model to training data. If n_splits > 1 also run n-fold cross validation on all values in lambda_path.

The model will be fit n+1 times. On the first pass, the lambda_path will be determined, on the remaining passes, the model performance for each value of lambda. After cross validation, the attribute cv_mean_score_ will contain the mean score over all folds for each value of lambda, and cv_standard_error_ will contain the standard error of cv_mean_score_ for each value of lambda. The value of lambda which achieves the best performance in cross validation will be saved to lambda_max_ additionally, the largest value of lambda s.t.: cv_score(l) >= cv_score(lambda_max_) - cut_point * standard_error(lambda_max_) will be saved to lambda_best_.

Parameters

adata : anndata.AnnData, shape (n_samples, n_features)
Input features in .X
y : array, shape (n_samples,)
Target values
n_hvgs : int, number of highly-variable genes to feature-select
 
sample_weight : array, shape (n_samples,)
Optional weight vector for observations
relative_penalties : array, shape (n_features,)
Optional relative weight vector for penalty. 0 entries remove penalty.

Returns

self : object
Returns self.
Expand source code
def fit(self, adata, y, n_hvgs, sample_weight=None, relative_penalties=None):
    """
    Fit the model to training data. If n_splits > 1 also run n-fold cross
    validation on all values in lambda_path.

    The model will be fit n+1 times. On the first pass, the lambda_path
    will be determined, on the remaining passes, the model performance for
    each value of lambda. After cross validation, the attribute
    `cv_mean_score_` will contain the mean score over all folds for each
    value of lambda, and `cv_standard_error_` will contain the standard
    error of `cv_mean_score_` for each value of lambda. The value of lambda
    which achieves the best performance in cross validation will be saved
    to `lambda_max_` additionally, the largest value of lambda s.t.:
        cv_score(l) >= cv_score(lambda_max_) -\
                       cut_point * standard_error(lambda_max_)
    will be saved to `lambda_best_`.

    Parameters
    ----------
    adata : anndata.AnnData, shape (n_samples, n_features)
        Input features in .X

    y : array, shape (n_samples,)
        Target values

    n_hvgs : int, number of highly-variable genes to feature-select

    sample_weight : array, shape (n_samples,)
        Optional weight vector for observations

    relative_penalties: array, shape (n_features,)
        Optional relative weight vector for penalty.
        0 entries remove penalty.

    Returns
    -------
    self : object
        Returns self.
    """
    # X is scaled counts for all cells in HVGs only for first run
    X = adata[:, adata.var.highly_variable].X.copy()

    X, y = check_X_y(X, y, accept_sparse="csr", ensure_min_samples=2)
    if sample_weight is None:
        sample_weight = np.ones(X.shape[0])
    else:
        sample_weight = np.asarray(sample_weight)

    if not np.isscalar(self.lower_limits):
        self.lower_limits = np.asarray(self.lower_limits)
        if len(self.lower_limits) != X.shape[1]:
            raise ValueError("lower_limits must equal number of features")

    if not np.isscalar(self.upper_limits):
        self.upper_limits = np.asarray(self.upper_limits)
        if len(self.upper_limits) != X.shape[1]:
            raise ValueError("upper_limits must equal number of features")

    if (
        any(self.lower_limits > 0)
        if isinstance(self.lower_limits, np.ndarray)
        else self.lower_limits > 0
    ):
        raise ValueError("lower_limits must be non-positive")

    if (
        any(self.upper_limits < 0)
        if isinstance(self.upper_limits, np.ndarray)
        else self.upper_limits < 0
    ):
        raise ValueError("upper_limits must be positive")

    if self.alpha > 1 or self.alpha < 0:
        raise ValueError("alpha must be between 0 and 1")

    # fit the model
    self._fit(X, y, sample_weight, relative_penalties)

    # score each model on the path of lambda values found by glmnet and
    # select the best scoring
    if self.n_splits >= 3:
        self._cv = self.CV(
            n_splits=self.n_splits, shuffle=True, random_state=self.random_state
        )

        cv_scores, _hvgs = _score_lambda_path(
            self,
            adata,
            y,
            n_hvgs,
            sample_weight,
            relative_penalties,
            self.scoring,
            n_jobs=self.n_jobs,
            verbose=self.verbose,
        )

        self.cv_mean_score_ = np.atleast_1d(np.mean(cv_scores, axis=0))
        self.cv_standard_error_ = np.atleast_1d(stats.sem(cv_scores))

        self.lambda_max_inx_ = np.argmax(self.cv_mean_score_)
        self.lambda_max_ = self.lambda_path_[self.lambda_max_inx_]

        target_score = (
            self.cv_mean_score_[self.lambda_max_inx_]
            - self.cut_point * self.cv_standard_error_[self.lambda_max_inx_]
        )

        self.lambda_best_inx_ = np.argwhere(self.cv_mean_score_ >= target_score)[0]
        self.lambda_best_ = self.lambda_path_[self.lambda_best_inx_]

        self.coef_ = self.coef_path_[..., self.lambda_best_inx_]
        self.coef_ = self.coef_.squeeze(axis=self.coef_.ndim - 1)
        self.intercept_ = self.intercept_path_[..., self.lambda_best_inx_].squeeze()
        if self.intercept_.shape == ():  # convert 0d array to scalar
            self.intercept_ = float(self.intercept_)

    return self
def predict(self, X, lamb=None)

Predict class labels for samples in X.

Parameters

X : array, shape (n_samples, n_features)
 
lamb : array, shape (n_lambda,)
Values of lambda from lambda_path_ from which to make predictions. If no values are provided for lamb, the returned predictions will be those corresponding to lambda_best_. The values of lamb must also be in the range of lambda_path_, values greater than max(lambda_path_) or less than min(lambda_path_) will be clipped.

Returns

T : array, shape (n_samples,) or (n_samples, n_lambda)
Predicted class labels for each sample given each value of lambda
Expand source code
def predict(self, X, lamb=None):
    """Predict class labels for samples in X.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)

    lamb : array, shape (n_lambda,)
        Values of lambda from lambda_path_ from which to make predictions.
        If no values are provided for lamb, the returned predictions will
        be those corresponding to lambda_best_. The values of lamb must
        also be in the range of lambda_path_, values greater than
        max(lambda_path_) or less than  min(lambda_path_) will be clipped.

    Returns
    -------
    T : array, shape (n_samples,) or (n_samples, n_lambda)
        Predicted class labels for each sample given each value of lambda
    """

    scores = self.predict_proba(X, lamb)
    indices = scores.argmax(axis=1)

    return self.classes_[indices]
def predict_proba(self, X, lamb=None)

Probability estimates for each class given X.

The returned estimates are in the same order as the values in classes_.

Parameters

X : array, shape (n_samples, n_features)
 
lamb : array, shape (n_lambda,)
Values of lambda from lambda_path_ from which to make predictions. If no values are provided, the returned predictions will be those corresponding to lambda_best_. The values of lamb must also be in the range of lambda_path_, values greater than max(lambda_path_) or less than min(lambda_path_) will be clipped.

Returns

T : array, shape (n_samples, n_classes) or (n_samples, n_classes, n_lambda)
 
Expand source code
def predict_proba(self, X, lamb=None):
    """Probability estimates for each class given X.

    The returned estimates are in the same order as the values in
    classes_.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)

    lamb : array, shape (n_lambda,)
        Values of lambda from lambda_path_ from which to make predictions.
        If no values are provided, the returned predictions will be those
        corresponding to lambda_best_. The values of lamb must also be in
        the range of lambda_path_, values greater than max(lambda_path_)
        or less than  min(lambda_path_) will be clipped.

    Returns
    -------
    T : array, shape (n_samples, n_classes) or (n_samples, n_classes, n_lambda)
    """
    z = self.decision_function(X, lamb)
    expit(z, z)

    # reshape z to (n_samples, n_classes, n_lambda)
    n_lambda = len(np.atleast_1d(lamb))
    z = z.reshape(X.shape[0], -1, n_lambda)

    if z.shape[1] == 1:
        # binomial, for consistency and to match scikit-learn, add the
        # complement so z has shape (n_samples, 2, n_lambda)
        z = np.concatenate((1 - z, z), axis=1)
    else:
        # normalize for multinomial
        z /= np.expand_dims(z.sum(axis=1), axis=1)

    if n_lambda == 1:
        z = z.squeeze(axis=-1)
    return z
def score(self, X, y, lamb=None)

Returns the mean accuracy on the given test data and labels.

Parameters

X : array, shape (n_samples, n_features)
Test samples
y : array, shape (n_samples,)
True labels for X
lamb : array, shape (n_lambda,)
Values from lambda_path_ for which to score predictions.

Returns

score : array, shape (n_lambda,)
Mean accuracy for each value of lambda.
Expand source code
def score(self, X, y, lamb=None):
    """Returns the mean accuracy on the given test data and labels.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        Test samples

    y : array, shape (n_samples,)
        True labels for X

    lamb : array, shape (n_lambda,)
        Values from lambda_path_ for which to score predictions.

    Returns
    -------
    score : array, shape (n_lambda,)
        Mean accuracy for each value of lambda.
    """
    pred = self.predict(X, lamb=lamb)
    return np.apply_along_axis(accuracy_score, 0, pred, y)