|
1 | 1 | #
|
2 |
| -# Copyright (c) 2020-2023, NVIDIA CORPORATION. |
| 2 | +# Copyright (c) 2020-2025, NVIDIA CORPORATION. |
3 | 3 | #
|
4 | 4 | # Licensed under the Apache License, Version 2.0 (the "License");
|
5 | 5 | # you may not use this file except in compliance with the License.
|
|
13 | 13 | # See the License for the specific language governing permissions and
|
14 | 14 | # limitations under the License.
|
15 | 15 | #
|
| 16 | +import warnings |
16 | 17 |
|
17 |
| -from cuml.internals.input_utils import input_to_cupy_array |
18 | 18 | import cuml.internals
|
| 19 | +from cuml.internals.input_utils import input_to_cupy_array |
19 | 20 | from cuml.internals.safe_imports import cpu_only_import
|
20 | 21 | from cuml.internals.safe_imports import gpu_only_import
|
21 | 22 |
|
22 | 23 | cp = gpu_only_import("cupy")
|
| 24 | +cudf = gpu_only_import("cudf") |
23 | 25 | np = cpu_only_import("numpy")
|
24 | 26 |
|
25 | 27 |
|
| 28 | +def _input_to_cupy_or_cudf_series(x, check_rows=None): |
| 29 | + """Coerce the input to a 1D cupy array or cudf Series. |
| 30 | +
|
| 31 | + For classification problems we need to support the full range |
| 32 | + of supported input dtypes. cupy cannot support string labels, |
| 33 | + and cudf cannot support float16. To handle this, we prefer cudf |
| 34 | + if the input is cudf, otherwise try to coerce to cupy, falling |
| 35 | + back to cudf if the dtype isn't supported. |
| 36 | + """ |
| 37 | + if isinstance(x, cudf.Series): |
| 38 | + # Drop the index so comparisons don't try to align on index |
| 39 | + out = x.reset_index(drop=True) |
| 40 | + n_cols = 1 |
| 41 | + else: |
| 42 | + try: |
| 43 | + out, _, n_cols, _ = input_to_cupy_array(x) |
| 44 | + out = out.squeeze() # ensure 1D |
| 45 | + except ValueError: |
| 46 | + # Unsupported dtype, use cudf instead |
| 47 | + # Drop the index so comparisons don't try to align on index |
| 48 | + out = cudf.Series(x, nan_as_null=False, copy=False).reset_index( |
| 49 | + drop=True |
| 50 | + ) |
| 51 | + n_cols = 1 |
| 52 | + |
| 53 | + n_rows = len(out) |
| 54 | + |
| 55 | + if n_cols > 1: |
| 56 | + raise ValueError(f"Expected 1 column but got {n_cols} columns.") |
| 57 | + if check_rows is not None and n_rows != check_rows: |
| 58 | + raise ValueError(f"Expected {check_rows} rows but got {n_rows} rows.") |
| 59 | + |
| 60 | + return out |
| 61 | + |
| 62 | + |
| 63 | +@cuml.internals.api_return_any() |
| 64 | +def accuracy_score( |
| 65 | + y_true, y_pred, *, sample_weight=None, normalize=True, **kwargs |
| 66 | +): |
| 67 | + """ |
| 68 | + Accuracy classification score. |
| 69 | +
|
| 70 | + Parameters |
| 71 | + ---------- |
| 72 | + y_true : array-like of shape (n_samples,) |
| 73 | + Ground truth (correct) labels. |
| 74 | + y_pred : array-like of shape (n_samples,) |
| 75 | + Predicted labels. |
| 76 | + sample_weight : array-like of shape (n_samples,) |
| 77 | + Sample weights. |
| 78 | + normalize : bool |
| 79 | + If ``False``, return the number of correctly classified samples. |
| 80 | + Otherwise, return the fraction of correctly classified samples. |
| 81 | +
|
| 82 | + Returns |
| 83 | + ------- |
| 84 | + score : float |
| 85 | + The fraction of correctly classified samples, or the number of correctly |
| 86 | + classified samples if ``normalize == False``. |
| 87 | + """ |
| 88 | + |
| 89 | + if kwargs: |
| 90 | + warnings.warn( |
| 91 | + "`convert_dtype` and `handle` were deprecated from `accuracy_score` " |
| 92 | + "in version 25.04 and will be removed in 25.06.", |
| 93 | + FutureWarning, |
| 94 | + ) |
| 95 | + |
| 96 | + y_true = _input_to_cupy_or_cudf_series(y_true) |
| 97 | + y_pred = _input_to_cupy_or_cudf_series(y_pred, check_rows=len(y_true)) |
| 98 | + |
| 99 | + # Categorical dtypes in cudf currently don't coerce nicely on equality, |
| 100 | + # we need to manually cast to cudf.Series and align dtypes. |
| 101 | + # This whole code block can be removed once |
| 102 | + # https://github.com/rapidsai/cudf/issues/18196 is resolved. |
| 103 | + if y_true.dtype == "category": |
| 104 | + if y_pred.dtype != y_true.dtype: |
| 105 | + y_pred = cudf.Series(y_pred, copy=False, nan_as_null=False).astype( |
| 106 | + y_true.dtype |
| 107 | + ) |
| 108 | + elif y_pred.dtype == "category": |
| 109 | + y_true = cudf.Series(y_true, copy=False, nan_as_null=False).astype( |
| 110 | + y_pred.dtype |
| 111 | + ) |
| 112 | + |
| 113 | + if sample_weight is not None: |
| 114 | + sample_weight = input_to_cupy_array( |
| 115 | + sample_weight, |
| 116 | + check_dtype=[np.float32, np.float64, np.int32, np.int64], |
| 117 | + check_cols=1, |
| 118 | + check_rows=len(y_true), |
| 119 | + ).array.squeeze() # ensure 1D |
| 120 | + |
| 121 | + correct = y_true == y_pred |
| 122 | + |
| 123 | + if normalize: |
| 124 | + return float(cp.average(correct, weights=sample_weight)) |
| 125 | + elif sample_weight is not None: |
| 126 | + return float(cp.dot(correct, sample_weight)) |
| 127 | + else: |
| 128 | + return float(cp.count_nonzero(correct)) |
| 129 | + |
| 130 | + |
26 | 131 | @cuml.internals.api_return_any()
|
27 | 132 | def log_loss(
|
28 | 133 | y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None
|
|
0 commit comments