diff --git a/CHANGELOG b/CHANGELOG index 679df31ae..46008bb47 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ The rules for CHANGELOG file: 0.3.0 (XXXX/XX/XX) ------------------ +- Fix rendering issues for `SparseKDE` and `QuickShift` (#236) - Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145) - Supported Python versions are now ranging from 3.9 - 3.12. - Updating ``skmatter.datasets`` submodule to support sklearn 1.5.0 (#229) diff --git a/examples/pcovr/PCovR_Regressors.py b/examples/pcovr/PCovR_Regressors.py index 0b62cac2b..777009d56 100644 --- a/examples/pcovr/PCovR_Regressors.py +++ b/examples/pcovr/PCovR_Regressors.py @@ -55,10 +55,11 @@ # Use a fitted regressor # ---------------------- # -# You can pass a fitted regressor to PCovR to rely on the predetermined -# regression parameters. Currently, scikit-matter supports ``scikit-learn`` -# classes ``LinearModel``, ``Ridge``, and ``RidgeCV``, with plans to support anu -# regressor with similar architecture in the future. +# You can pass a fitted regressor to ``PCovR`` to rely on the predetermined regression +# parameters. Currently, scikit-matter supports ``scikit-learn`` classes +# class:`LinearModel `, :class:`Ridge +# `, and class:`RidgeCV `, +# with plans to support any regressor with similar architecture in the future. regressor = Ridge(alpha=1e-6, fit_intercept=False, tol=1e-12) diff --git a/examples/selection/FeatureSelection-WHODataset.py b/examples/selection/FeatureSelection-WHODataset.py index afbdb519e..e7abbc6ab 100644 --- a/examples/selection/FeatureSelection-WHODataset.py +++ b/examples/selection/FeatureSelection-WHODataset.py @@ -120,7 +120,7 @@ # ^^^^^^^^ -pcur = PCovCUR(n_to_select=n_select, progress_bar=True, mixing=0.0) +pcur = PCovCUR(n_to_select=n_select, progress_bar=True, mixing=1e-3) pcur.fit(X_train, yp_train) # %% diff --git a/pyproject.toml b/pyproject.toml index 20399c980..e05c76026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,8 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = [ - "scikit-learn>=1.1.0", + "scikit-learn < 1.6.0", + "scipy < 1.15.0", ] dynamic = ["version"] diff --git a/src/skmatter/clustering/_quick_shift.py b/src/skmatter/clustering/_quick_shift.py index 92fc4eb6e..1e7b1c0d3 100644 --- a/src/skmatter/clustering/_quick_shift.py +++ b/src/skmatter/clustering/_quick_shift.py @@ -1,7 +1,6 @@ -from typing import Callable, Union +from typing import Callable, Optional import numpy as np -from numpy.typing import ArrayLike from sklearn.base import BaseEstimator from tqdm import tqdm @@ -34,19 +33,19 @@ class QuickShift(BaseEstimator): scale : float, default=1.0 Distance cutoff scaling factor used during the QS clustering. It will be squared since the squared distance is used in this class. - metric : Callable[[ArrayLike, ArrayLike, bool, dict], ArrayLike], \ - default= :func:`skmatter.metrics.pairwise_euclidean_distances()` + metric : Callable, default=None The metric to use. Your metric should be able to take at least three arguments in secquence: `X`, `Y`, and `squared=True`. Here, `X` and `Y` are two array-like of shape (n_samples, n_components). The return of the metric is an array-like of shape (n_samples, n_samples). If you want to use periodic boundary conditions, be sure to provide the cell length in the ``metric_params`` and - provide a metric that can take the cell argument. + provide a metric that can take the cell argument. If :obj:`None`, the + :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` is used. metric_params : dict, default=None Additional parameters to be passed to the use of metric. i.e. the dimension of a rectangular cell of side length :math:`a_i` - for :func:`skmatter.metrics.pairwise_euclidean_distances()` - `{'cell_length': [a_1, a_2, ..., a_n]}` + for :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` + ``{'cell_length': [a_1, a_2, ..., a_n]}`` Attributes ---------- @@ -97,13 +96,11 @@ class QuickShift(BaseEstimator): def __init__( self, - dist_cutoff_sq: Union[float, None] = None, - gabriel_shell: Union[int, None] = None, + dist_cutoff_sq: Optional[float] = None, + gabriel_shell: Optional[int] = None, scale: float = 1.0, - metric: Callable[ - [ArrayLike, ArrayLike, bool, dict], ArrayLike - ] = periodic_pairwise_euclidean_distances, - metric_params: Union[dict, None] = None, + metric: Optional[Callable] = None, + metric_params: Optional[dict] = None, ): if (dist_cutoff_sq is None) and (gabriel_shell is None): raise ValueError("Either dist_cutoff or gabriel_depth must be set.") @@ -115,6 +112,10 @@ def __init__( self.metric_params = ( metric_params if metric_params is not None else {"cell_length": None} ) + + if metric is None: + metric = periodic_pairwise_euclidean_distances + self.metric = lambda X, Y: metric(X, Y, squared=True, **self.metric_params) if isinstance(self.metric_params, dict): self.cell = self.metric_params["cell_length"] diff --git a/src/skmatter/neighbors/_sparsekde.py b/src/skmatter/neighbors/_sparsekde.py index b98c6a9e1..bbb7c1ea9 100644 --- a/src/skmatter/neighbors/_sparsekde.py +++ b/src/skmatter/neighbors/_sparsekde.py @@ -1,8 +1,7 @@ import warnings -from typing import Callable, Union +from typing import Callable, Optional, Union import numpy as np -from numpy.typing import ArrayLike from scipy.special import logsumexp as LSE from sklearn.base import BaseEstimator from sklearn.utils.validation import check_is_fitted, check_random_state @@ -33,19 +32,18 @@ class SparseKDE(BaseEstimator): weights: numpy.ndarray, default=None Weights of the descriptors. If None, all weights are set to `1/n_descriptors`. - metric : Callable[[ArrayLike, ArrayLike, bool, dict], ArrayLike], - default=:func:`skmatter.metrics.pairwise_euclidean_distances()` + metric : Callable, default=None The metric to use. Your metric should be able to take at least three arguments in secquence: `X`, `Y`, and `squared=True`. Here, `X` and `Y` are two array-like of shape (n_samples, n_components). The return of the metric is an array-like of - shape (n_samples, n_samples). If you want to use periodic boundary - conditions, be sure to provide the cell size in the metric_params and - provide a metric that can take the cell argument. + shape (n_samples, n_samples). If you want to use periodic boundary conditions, + be sure to provide the cell size in the metric_params and provide a metric that + can take the cell argument. If :obj:`None`, the + :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` is used. metric_params : dict, default=None - Additional parameters to be passed to the use of - metric. i.e. the cell dimension for - :func:`skmatter.metrics.pairwise_euclidean_distances()` - `{'cell_length': [side_length_1, ..., side_length_n]}` + Additional parameters to be passed to the use of metric. i.e. the cell + dimension for :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` + ``{'cell_length': [side_length_1, ..., side_length_n]}`` fspread : float, default=-1.0 The fractional "space" occupied by the voronoi cell of each grid. Use this when each cell is of a similar size. @@ -106,11 +104,9 @@ class SparseKDE(BaseEstimator): def __init__( self, descriptors: np.ndarray, - weights: Union[np.ndarray, None] = None, - metric: Callable[ - [ArrayLike, ArrayLike, bool, dict], ArrayLike - ] = periodic_pairwise_euclidean_distances, - metric_params: Union[dict, None] = None, + weights: Optional[np.ndarray] = None, + metric: Optional[Callable] = None, + metric_params: Optional[dict] = None, fspread: float = -1.0, fpoints: float = 0.15, kernel: str = "gaussian", @@ -119,6 +115,10 @@ def __init__( self.metric_params = ( metric_params if metric_params is not None else {"cell_length": None} ) + + if metric is None: + metric = periodic_pairwise_euclidean_distances + self.metric = lambda X, Y: metric(X, Y, squared=True, **self.metric_params) self.cell = metric_params["cell_length"] if metric_params is not None else None self._check_dimension(descriptors) diff --git a/tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx b/tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx new file mode 100644 index 000000000..5d637632b Binary files /dev/null and b/tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx differ