Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rendering issues and limit depencies #236

Merged
merged 2 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The rules for CHANGELOG file:

0.3.0 (XXXX/XX/XX)
------------------
- Fix rendering issues for `SparseKDE` and `QuickShift` (#236)
- Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145)
- Supported Python versions are now ranging from 3.9 - 3.12.
- Updating ``skmatter.datasets`` submodule to support sklearn 1.5.0 (#229)
Expand Down
9 changes: 5 additions & 4 deletions examples/pcovr/PCovR_Regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@
# Use a fitted regressor
# ----------------------
#
# You can pass a fitted regressor to PCovR to rely on the predetermined
# regression parameters. Currently, scikit-matter supports ``scikit-learn``
# classes ``LinearModel``, ``Ridge``, and ``RidgeCV``, with plans to support anu
# regressor with similar architecture in the future.
# You can pass a fitted regressor to ``PCovR`` to rely on the predetermined regression
# parameters. Currently, scikit-matter supports ``scikit-learn`` classes
# class:`LinearModel <sklearn.linear_model.LinearModel>`, :class:`Ridge
# <sklearn.linear_model.Ridge>`, and class:`RidgeCV <sklearn.linear_model.RidgeCV>`,
# with plans to support any regressor with similar architecture in the future.

regressor = Ridge(alpha=1e-6, fit_intercept=False, tol=1e-12)

Expand Down
2 changes: 1 addition & 1 deletion examples/selection/FeatureSelection-WHODataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
# ^^^^^^^^


pcur = PCovCUR(n_to_select=n_select, progress_bar=True, mixing=0.0)
pcur = PCovCUR(n_to_select=n_select, progress_bar=True, mixing=1e-3)
pcur.fit(X_train, yp_train)

# %%
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ classifiers = [
"Topic :: Scientific/Engineering",
]
dependencies = [
"scikit-learn>=1.1.0",
"scikit-learn < 1.6.0",
"scipy < 1.15.0",
]
dynamic = ["version"]

Expand Down
27 changes: 14 additions & 13 deletions src/skmatter/clustering/_quick_shift.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Callable, Union
from typing import Callable, Optional

import numpy as np
from numpy.typing import ArrayLike
from sklearn.base import BaseEstimator
from tqdm import tqdm

Expand Down Expand Up @@ -34,19 +33,19 @@ class QuickShift(BaseEstimator):
scale : float, default=1.0
Distance cutoff scaling factor used during the QS clustering. It will be squared
since the squared distance is used in this class.
metric : Callable[[ArrayLike, ArrayLike, bool, dict], ArrayLike], \
default= :func:`skmatter.metrics.pairwise_euclidean_distances()`
metric : Callable, default=None
The metric to use. Your metric should be able to take at least three arguments
in secquence: `X`, `Y`, and `squared=True`. Here, `X` and `Y` are two array-like
of shape (n_samples, n_components). The return of the metric is an array-like of
shape (n_samples, n_samples). If you want to use periodic boundary
conditions, be sure to provide the cell length in the ``metric_params`` and
provide a metric that can take the cell argument.
provide a metric that can take the cell argument. If :obj:`None`, the
:func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` is used.
metric_params : dict, default=None
Additional parameters to be passed to the use of
metric. i.e. the dimension of a rectangular cell of side length :math:`a_i`
for :func:`skmatter.metrics.pairwise_euclidean_distances()`
`{'cell_length': [a_1, a_2, ..., a_n]}`
for :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()`
``{'cell_length': [a_1, a_2, ..., a_n]}``

Attributes
----------
Expand Down Expand Up @@ -97,13 +96,11 @@ class QuickShift(BaseEstimator):

def __init__(
self,
dist_cutoff_sq: Union[float, None] = None,
gabriel_shell: Union[int, None] = None,
dist_cutoff_sq: Optional[float] = None,
gabriel_shell: Optional[int] = None,
scale: float = 1.0,
metric: Callable[
[ArrayLike, ArrayLike, bool, dict], ArrayLike
] = periodic_pairwise_euclidean_distances,
metric_params: Union[dict, None] = None,
metric: Optional[Callable] = None,
metric_params: Optional[dict] = None,
):
if (dist_cutoff_sq is None) and (gabriel_shell is None):
raise ValueError("Either dist_cutoff or gabriel_depth must be set.")
Expand All @@ -115,6 +112,10 @@ def __init__(
self.metric_params = (
metric_params if metric_params is not None else {"cell_length": None}
)

if metric is None:
metric = periodic_pairwise_euclidean_distances

self.metric = lambda X, Y: metric(X, Y, squared=True, **self.metric_params)
if isinstance(self.metric_params, dict):
self.cell = self.metric_params["cell_length"]
Expand Down
32 changes: 16 additions & 16 deletions src/skmatter/neighbors/_sparsekde.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import warnings
from typing import Callable, Union
from typing import Callable, Optional, Union

import numpy as np
from numpy.typing import ArrayLike
from scipy.special import logsumexp as LSE
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted, check_random_state
Expand Down Expand Up @@ -33,19 +32,18 @@ class SparseKDE(BaseEstimator):
weights: numpy.ndarray, default=None
Weights of the descriptors.
If None, all weights are set to `1/n_descriptors`.
metric : Callable[[ArrayLike, ArrayLike, bool, dict], ArrayLike],
default=:func:`skmatter.metrics.pairwise_euclidean_distances()`
metric : Callable, default=None
The metric to use. Your metric should be able to take at least three arguments
in secquence: `X`, `Y`, and `squared=True`. Here, `X` and `Y` are two array-like
of shape (n_samples, n_components). The return of the metric is an array-like of
shape (n_samples, n_samples). If you want to use periodic boundary
conditions, be sure to provide the cell size in the metric_params and
provide a metric that can take the cell argument.
shape (n_samples, n_samples). If you want to use periodic boundary conditions,
be sure to provide the cell size in the metric_params and provide a metric that
can take the cell argument. If :obj:`None`, the
:func:`skmatter.metrics.periodic_pairwise_euclidean_distances()` is used.
metric_params : dict, default=None
Additional parameters to be passed to the use of
metric. i.e. the cell dimension for
:func:`skmatter.metrics.pairwise_euclidean_distances()`
`{'cell_length': [side_length_1, ..., side_length_n]}`
Additional parameters to be passed to the use of metric. i.e. the cell
dimension for :func:`skmatter.metrics.periodic_pairwise_euclidean_distances()`
``{'cell_length': [side_length_1, ..., side_length_n]}``
fspread : float, default=-1.0
The fractional "space" occupied by the voronoi cell of each grid. Use this when
each cell is of a similar size.
Expand Down Expand Up @@ -106,11 +104,9 @@ class SparseKDE(BaseEstimator):
def __init__(
self,
descriptors: np.ndarray,
weights: Union[np.ndarray, None] = None,
metric: Callable[
[ArrayLike, ArrayLike, bool, dict], ArrayLike
] = periodic_pairwise_euclidean_distances,
metric_params: Union[dict, None] = None,
weights: Optional[np.ndarray] = None,
metric: Optional[Callable] = None,
metric_params: Optional[dict] = None,
fspread: float = -1.0,
fpoints: float = 0.15,
kernel: str = "gaussian",
Expand All @@ -119,6 +115,10 @@ def __init__(
self.metric_params = (
metric_params if metric_params is not None else {"cell_length": None}
)

if metric is None:
metric = periodic_pairwise_euclidean_distances

self.metric = lambda X, Y: metric(X, Y, squared=True, **self.metric_params)
self.cell = metric_params["cell_length"] if metric_params is not None else None
self._check_dimension(descriptors)
Expand Down
Binary file not shown.