diff --git a/examples/pcovc/KPCovC_Comparison.py b/examples/pcovc/KPCovC_Comparison.py index 5028a9b5c..f47cf9b4e 100644 --- a/examples/pcovc/KPCovC_Comparison.py +++ b/examples/pcovc/KPCovC_Comparison.py @@ -33,6 +33,7 @@ random_state = 0 n_components = 2 +scale_z = True # %% # @@ -85,7 +86,7 @@ # Both PCA and PCovC fail to produce linearly separable latent space # maps. We will need a kernel method to effectively separate the moon classes. -mixing = 0.10 +mixing = 0.5 alpha_d = 0.5 alpha_p = 0.4 @@ -95,6 +96,7 @@ n_components=n_components, random_state=random_state, mixing=mixing, + scale_z=scale_z, classifier=LinearSVC(), ): "PCovC", } @@ -138,6 +140,7 @@ random_state=random_state, mixing=mixing, center=center, + scale_z=scale_z, **kernel_params, ): {"title": "Kernel PCovC", "eps": 2}, } @@ -220,6 +223,7 @@ mixing=mixing, classifier=model, center=center, + scale_z=scale_z, **models[model]["kernel_params"], ) t_kpcovc_train = kpcovc.fit_transform(X_train_scaled, y_train) diff --git a/examples/pcovc/KPCovC_Hyperparameters.py b/examples/pcovc/KPCovC_Hyperparameters.py index ce3948e25..d848d70bb 100644 --- a/examples/pcovc/KPCovC_Hyperparameters.py +++ b/examples/pcovc/KPCovC_Hyperparameters.py @@ -65,7 +65,8 @@ fig, axs = plt.subplots(2, len(kernels), figsize=(len(kernels) * 4, 8)) center = True -mixing = 0.10 +mixing = 0.5 +scale_z = True for i, kernel in enumerate(kernels): kpca = KernelPCA( @@ -83,6 +84,7 @@ random_state=random_state, **kernel_params.get(kernel, {}), center=center, + scale_z=scale_z, ) t_kpcovc = kpcovc.fit_transform(X_scaled, y) @@ -118,7 +120,7 @@ kpcovc = KernelPCovC( n_components=n_components, random_state=random_state, - mixing=mixing, + mixing=0.1, center=center, kernel="rbf", gamma=gamma, diff --git a/examples/pcovc/PCovC_Hyperparameters.py b/examples/pcovc/PCovC_Hyperparameters.py index 22989a95d..427ba6dab 100644 --- a/examples/pcovc/PCovC_Hyperparameters.py +++ b/examples/pcovc/PCovC_Hyperparameters.py @@ -77,6 +77,7 @@ n_components=n_components, random_state=random_state, classifier=LogisticRegressionCV(), + scale_z=True, ) pcovc.fit(X_scaled, y) @@ -120,6 +121,7 @@ n_components=n_components, random_state=random_state, classifier=model, + scale_z=True, ) pcovc.fit(X_scaled, y) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index e8965a223..f8a32edf4 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -1,3 +1,4 @@ +import warnings import numpy as np from sklearn import clone @@ -16,7 +17,7 @@ from sklearn.linear_model._base import LinearClassifierMixin from sklearn.utils.multiclass import check_classification_targets, type_of_target -from skmatter.preprocessing import KernelNormalizer +from skmatter.preprocessing import KernelNormalizer, StandardFlexibleScaler from skmatter.utils import check_cl_fit from skmatter.decomposition import _BaseKPCov @@ -86,6 +87,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. + scale_z: bool, default=False + Whether to scale Z prior to eigendecomposition. + kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear" Kernel. @@ -116,6 +120,14 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): and for matrix inversions. Must be of range [0.0, infinity). + z_mean_tol: float, default=1e-12 + Tolerance for the column means of Z. + Must be of range [0.0, infinity). + + z_var_tol: float, default=1.5 + Tolerance for the column variances of Z. + Must be of range [0.0, infinity). + n_jobs : int, default=None The number of parallel jobs to run. :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context. @@ -167,6 +179,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): The data used to fit the model. This attribute is used to build kernels from new data. + scale_z: bool + Whether Z is being scaled prior to eigendecomposition. + Examples -------- >>> import numpy as np @@ -174,7 +189,7 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): >>> from sklearn.preprocessing import StandardScaler >>> X = np.array([[-2, 3, -1, 0], [2, 0, -3, 1], [3, 0, -1, 3], [2, -2, 1, 0]]) >>> X = StandardScaler().fit_transform(X) - >>> Y = np.array([[2], [0], [1], [2]]) + >>> Y = np.array([2, 0, 1, 2]) >>> kpcovc = KernelPCovC( ... mixing=0.1, ... n_components=2, @@ -200,6 +215,7 @@ def __init__( n_components=None, svd_solver="auto", classifier=None, + scale_z=False, kernel="linear", gamma=None, degree=3, @@ -208,6 +224,8 @@ def __init__( center=False, fit_inverse_transform=False, tol=1e-12, + z_mean_tol=1e-12, + z_var_tol=1.5, n_jobs=None, iterated_power="auto", random_state=None, @@ -229,6 +247,9 @@ def __init__( fit_inverse_transform=fit_inverse_transform, ) self.classifier = classifier + self.scale_z = scale_z + self.z_mean_tol = z_mean_tol + self.z_var_tol = z_var_tol def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -323,6 +344,25 @@ def fit(self, X, Y, W=None): W = LogisticRegression().fit(K, Y).coef_.T Z = K @ W + if self.scale_z: + Z = StandardFlexibleScaler().fit_transform(Z) + + z_means_ = np.mean(Z, axis=0) + z_vars_ = np.var(Z, axis=0) + + if np.max(np.abs(z_means_)) > self.z_mean_tol: + warnings.warn( + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + + if np.max(z_vars_) > self.z_var_tol: + warnings.warn( + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) self._fit(K, Z, W) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index e0cee034e..167410e35 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -16,6 +16,8 @@ from sklearn.utils.validation import check_is_fitted, validate_data from skmatter.decomposition import _BasePCov from skmatter.utils import check_cl_fit +from skmatter.preprocessing import StandardFlexibleScaler +import warnings class PCovC(LinearClassifierMixin, _BasePCov): @@ -96,6 +98,14 @@ class PCovC(LinearClassifierMixin, _BasePCov): Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). + z_mean_tol: float, default=1e-12 + Tolerance for the column means of Z. + Must be of range [0.0, infinity). + + z_var_tol: float, default=1.5 + Tolerance for the column variances of Z. + Must be of range [0.0, infinity). + space: {'feature', 'sample', 'auto'}, default='auto' whether to compute the PCovC in ``sample`` or ``feature`` space. The default is equal to ``sample`` when :math:`{n_{samples} < n_{features}}` @@ -123,6 +133,9 @@ class PCovC(LinearClassifierMixin, _BasePCov): If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. + scale_z: bool, default=False + Whether to scale Z prior to eigendecomposition. + iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by svd_solver == 'randomized'. @@ -143,6 +156,14 @@ class PCovC(LinearClassifierMixin, _BasePCov): Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). + z_mean_tol: float + Tolerance for the column means of Z. + Must be of range [0.0, infinity). + + z_var_tol: float + Tolerance for the column variances of Z. + Must be of range [0.0, infinity). + space: {'feature', 'sample', 'auto'}, default='auto' whether to compute the PCovC in ``sample`` or ``feature`` space. The default is equal to ``sample`` when :math:`{n_{samples} < n_{features}}` @@ -174,6 +195,9 @@ class PCovC(LinearClassifierMixin, _BasePCov): the projector, or weights, from the latent-space projection :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}` + scale_z: bool + Whether Z is being scaled prior to eigendecomposition + explained_variance_ : numpy.ndarray of shape (n_components,) The amount of variance explained by each of the selected components. Equal to n_components largest eigenvalues @@ -208,8 +232,11 @@ def __init__( n_components=None, svd_solver="auto", tol=1e-12, + z_mean_tol=1e-12, + z_var_tol=1.5, space="auto", classifier=None, + scale_z=False, iterated_power="auto", random_state=None, whiten=False, @@ -225,6 +252,9 @@ def __init__( whiten=whiten, ) self.classifier = classifier + self.scale_z = scale_z + self.z_mean_tol = z_mean_tol + self.z_var_tol = z_var_tol def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -291,7 +321,7 @@ def fit(self, X, Y, W=None): classifier = self.classifier self.z_classifier_ = check_cl_fit(classifier, X, Y) - W = self.z_classifier_.coef_.T + W = self.z_classifier_.coef_.T.copy() else: # If precomputed, use default classifier to predict Y from T @@ -301,6 +331,28 @@ def fit(self, X, Y, W=None): Z = X @ W + if self.scale_z: + z_scaler = StandardFlexibleScaler().fit(Z) + Z = z_scaler.transform(Z) + W /= z_scaler.scale_.reshape(1, -1) + + z_means_ = np.mean(Z, axis=0) + z_vars_ = np.var(Z, axis=0) + + if np.max(np.abs(z_means_)) > self.z_mean_tol: + warnings.warn( + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + + if np.max(z_vars_) > self.z_var_tol: + warnings.warn( + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + if self.space_ == "feature": self._fit_feature_space(X, Y, Z) else: diff --git a/tests/test_kernel_pcovc.py b/tests/test_kernel_pcovc.py index 9b29b8437..0809a480c 100644 --- a/tests/test_kernel_pcovc.py +++ b/tests/test_kernel_pcovc.py @@ -1,4 +1,5 @@ import unittest +import warnings import numpy as np from sklearn import exceptions @@ -34,10 +35,12 @@ def __init__(self, *args, **kwargs): lambda mixing=0.5, classifier=LogisticRegression(), n_components=4, + scale_z=True, **kwargs: KernelPCovC( mixing=mixing, classifier=classifier, n_components=n_components, + scale_z=scale_z, svd_solver=kwargs.pop("svd_solver", "full"), **kwargs, ) @@ -327,6 +330,44 @@ def test_precomputed_classification(self): self.assertTrue(np.linalg.norm(t3 - t2) < self.error_tol) self.assertTrue(np.linalg.norm(t3 - t1) < self.error_tol) + def test_scale_z_parameter(self): + """Check that changing scale_z changes the eigendecomposition.""" + kpcovc_scaled = self.model(scale_z=True) + kpcovc_scaled.fit(self.X, self.Y) + + kpcovc_unscaled = self.model(scale_z=False) + kpcovc_unscaled.fit(self.X, self.Y) + assert not np.allclose(kpcovc_scaled.pkt_, kpcovc_unscaled.pkt_) + + def test_z_scaling(self): + """ + Check that KPCovC raises a warning if Z is not of scale, and does not + if it is. + """ + kpcovc = self.model(n_components=2, scale_z=True) + + with warnings.catch_warnings(): + kpcovc.fit(self.X, self.Y) + warnings.simplefilter("error") + self.assertEqual(1 + 1, 2) + + kpcovc = self.model(n_components=2, scale_z=False, z_mean_tol=0, z_var_tol=0) + + with warnings.catch_warnings(record=True) as w: + kpcovc.fit(self.X, self.Y) + self.assertEqual( + str(w[0].message), + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`.", + ) + self.assertEqual( + str(w[1].message), + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`.", + ) + class KernelTests(KernelPCovCBaseTest): def test_kernel_types(self): diff --git a/tests/test_pcovc.py b/tests/test_pcovc.py index 8607a2e2a..f552323ee 100644 --- a/tests/test_pcovc.py +++ b/tests/test_pcovc.py @@ -19,8 +19,11 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.model = ( - lambda mixing=0.5, classifier=LogisticRegression(), **kwargs: PCovC( - mixing=mixing, classifier=classifier, **kwargs + lambda mixing=0.5, + classifier=LogisticRegression(), + scale_z=True, + **kwargs: PCovC( + mixing=mixing, classifier=classifier, scale_z=scale_z, **kwargs ) ) @@ -402,6 +405,35 @@ def test_centering(self): "mean is greater than the supplied tolerance.", ) + def test_z_scaling(self): + """ + Check that PCovC raises a warning if Z is not of scale, and does not + if it is. + """ + pcovc = self.model(n_components=2, scale_z=True) + + with warnings.catch_warnings(): + pcovc.fit(self.X, self.Y) + warnings.simplefilter("error") + self.assertEqual(1 + 1, 2) + + pcovc = self.model(n_components=2, scale_z=False, z_mean_tol=0, z_var_tol=0) + + with warnings.catch_warnings(record=True) as w: + pcovc.fit(self.X, self.Y) + self.assertEqual( + str(w[0].message), + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`.", + ) + self.assertEqual( + str(w[1].message), + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`.", + ) + def test_T_shape(self): """Check that PCovC returns a latent space projection consistent with the shape of the input matrix. @@ -464,6 +496,9 @@ def test_default_ncomponents(self): self.assertEqual(pcovc.n_components_, min(self.X.shape)) def test_prefit_classifier(self): + """Check that a passed prefit classifier is not modified in + PCovC's `fit` call. + """ classifier = LinearSVC() classifier.fit(self.X, self.Y) pcovc = self.model(mixing=0.5, classifier=classifier) @@ -575,6 +610,17 @@ def test_incompatible_coef_shape(self): % (len(pcovc_multi.classes_), self.X.shape[1], cl_binary.coef_.shape), ) + def test_scale_z_parameter(self): + """Check that changing scale_z changes the eigendecomposition.""" + pcovc_scaled = self.model(scale_z=True) + pcovc_scaled.fit(self.X, self.Y) + + pcovc_unscaled = self.model(scale_z=False) + pcovc_unscaled.fit(self.X, self.Y) + assert not np.allclose( + pcovc_scaled.singular_values_, pcovc_unscaled.singular_values_ + ) + if __name__ == "__main__": unittest.main(verbosity=2)