Skip to content

Commit

Permalink
Mypy fixes (#786)
Browse files Browse the repository at this point in the history
* Mypy fixes

* backwards compat

* Fix mistake get rid of Any import

* Try to get lint passing

* Ignore type setting

Co-authored-by: Tom Augspurger <[email protected]>
  • Loading branch information
jsignell and TomAugspurger committed Feb 15, 2021
1 parent 09d4ec0 commit be99d37
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 23 deletions.
9 changes: 8 additions & 1 deletion dask_ml/_typing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from typing import TypeVar, Union
from typing import Any, Sequence, TypeVar, Union

import dask.dataframe as dd
import numpy as np
from dask.array import Array
from pandas import DataFrame, Index, Series

try:
from numpy.typing import DTypeLike
except ImportError:
DTypeLike = Any # type: ignore


AnyArrayLike = TypeVar("AnyArrayLike", Index, Series, Array, np.ndarray)
ArrayLike = TypeVar("ArrayLike", Array, np.ndarray)
FrameOrSeriesUnion = Union[DataFrame, Series, dd.Series, dd.DataFrame]
SeriesType = Union[dd.Series, Series]
DataFrameType = Union[DataFrame, dd.DataFrame]
Number = Union[int, float, np.float64, np.int64, np.int32]
Int = Union[int, np.int64, np.int32]
NDArrayOrScalar = Union[np.ndarray, Sequence[Number], Number]
6 changes: 3 additions & 3 deletions dask_ml/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


def pairwise_distances_argmin_min(
X: ArrayLike,
X: da.Array,
Y: ArrayLike,
axis: int = 1,
metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean",
Expand Down Expand Up @@ -47,7 +47,7 @@ def pairwise_distances_argmin_min(


def pairwise_distances(
X: ArrayLike,
X: da.Array,
Y: ArrayLike,
metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean",
n_jobs: Optional[int] = None,
Expand Down Expand Up @@ -105,7 +105,7 @@ def euclidean_distances(


def check_pairwise_arrays(
X: ArrayLike, Y: ArrayLike, precomputed: bool = False
X: ArrayLike, Y: Optional[ArrayLike], precomputed: bool = False
) -> Tuple[ArrayLike, ArrayLike]:
# XXX
if Y is None:
Expand Down
3 changes: 2 additions & 1 deletion dask_ml/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def r2_score(
nonzero_denominator = denominator != 0
nonzero_numerator = numerator != 0
valid_score = nonzero_denominator & nonzero_numerator
output_scores = da.ones([y_true.shape[1]], chunks=y_true.chunks[1])
output_chunks = getattr(y_true, "chunks", [None, None])[1]
output_scores = da.ones([y_true.shape[1]], chunks=output_chunks)
with np.errstate(all="ignore"):
output_scores[valid_score] = 1 - (
numerator[valid_score] / denominator[valid_score]
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/preprocessing/_block_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
validate: bool = False,
**kw_args: Any
):
self.func = func
self.func: Callable[..., Union[ArrayLike, DataFrameType]] = func
self.validate = validate
self.kw_args = kw_args

Expand Down
4 changes: 2 additions & 2 deletions dask_ml/preprocessing/_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import sklearn.preprocessing

from .._compat import SK_024
from .._typing import ArrayLike, DataFrameType, SeriesType
from .._typing import ArrayLike, DataFrameType, DTypeLike, SeriesType
from ..utils import check_array
from .label import _encode, _encode_dask_array

Expand Down Expand Up @@ -116,7 +116,7 @@ def __init__(
categories: Union[str, ArrayLike] = "auto",
drop: Optional[bool] = None,
sparse: bool = True,
dtype: np.dtype = np.float64,
dtype: DTypeLike = np.float64,
handle_unknown: str = "error",
):
if drop is not None:
Expand Down
29 changes: 15 additions & 14 deletions dask_ml/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
from dask_ml._utils import copy_learned_attributes
from dask_ml.utils import check_array, handle_zeros_in_scale

from .._typing import ArrayLike, DataFrameType, SeriesType
from .._typing import ArrayLike, DataFrameType, NDArrayOrScalar, SeriesType

_PANDAS_VERSION = LooseVersion(pd.__version__)
_HAS_CTD = _PANDAS_VERSION >= "0.21.0"
BOUNDS_THRESHOLD = 1e-7


def _handle_zeros_in_scale(scale: np.ndarray, copy=True):
def _handle_zeros_in_scale(scale: NDArrayOrScalar, copy=True):
"""Makes sure that whenever scale is zero, we handle it correctly.
This happens in most scalers when we have constant features."""
Expand Down Expand Up @@ -75,7 +75,7 @@ def fit(
values = compute(*attributes.values())
for k, v in zip(attributes, values):
setattr(self, k, v)
self.n_features_in_ = X.shape[1]
self.n_features_in_: int = X.shape[1]
return self

def partial_fit(
Expand Down Expand Up @@ -142,7 +142,7 @@ def fit(
values = compute(*attributes.values())
for k, v in zip(attributes, values):
setattr(self, k, v)
self.n_features_in_ = X.shape[1]
self.n_features_in_: int = X.shape[1]
return self

def partial_fit(
Expand Down Expand Up @@ -227,7 +227,7 @@ def fit(
self.center_: List[float] = quantiles[:, 1]
self.scale_: List[float] = quantiles[:, 2] - quantiles[:, 0]
self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
self.n_features_in_ = X.shape[1]
self.n_features_in_: int = X.shape[1]
return self

def transform(
Expand Down Expand Up @@ -338,6 +338,7 @@ def _dense_fit(
references = self.references_ * 100
quantiles = [da.percentile(col, references) for col in X.T]
(self.quantiles_,) = compute(da.vstack(quantiles).T)
return None

def _transform(
self, X: Union[ArrayLike, DataFrameType], inverse: bool = False
Expand All @@ -352,7 +353,7 @@ def _transform(
return da.vstack(transformed, allow_unknown_chunksizes=True).T

def _transform_col(
self, X_col: ArrayLike, quantiles: ArrayLike, inverse: bool
self, X_col: da.Array, quantiles: ArrayLike, inverse: bool
) -> ArrayLike:
output_distribution = self.output_distribution

Expand Down Expand Up @@ -761,17 +762,17 @@ def inverse_transform(self, X: Union[ArrayLike, DataFrameType]) -> DataFrameType
if unknown:
lengths = blockwise(len, "i", X[:, 0], "i", dtype="i8").compute()
X = X.copy()
chunks: ArrayLike = (tuple(lengths), X.chunks[1])
chunks: tuple = (tuple(lengths), X.chunks[1])
X._chunks = chunks

X = dd.from_dask_array(X, columns=self.transformed_columns_)

big = isinstance(X, dd.DataFrame)

if big:
chunks = np.array(X.divisions)
chunks[-1] = chunks[-1] + 1
chunks = tuple(chunks[1:] - chunks[:-1])
divisions = np.array(X.divisions)
divisions[-1] = divisions[-1] + 1
chunks = tuple(divisions[1:] - divisions[:-1])

non_cat = X[list(self.non_categorical_columns_)]

Expand Down Expand Up @@ -986,17 +987,17 @@ def inverse_transform(
if unknown:
lengths = blockwise(len, "i", X[:, 0], "i", dtype="i8").compute()
X = X.copy()
chunks: ArrayLike = (tuple(lengths), X.chunks[1])
chunks: tuple = (tuple(lengths), X.chunks[1])
X._chunks = chunks

X = dd.from_dask_array(X, columns=self.columns_)

big = isinstance(X, dd.DataFrame)

if big:
chunks = np.array(X.divisions)
chunks[-1] = chunks[-1] + 1
chunks = tuple(chunks[1:] - chunks[:-1])
divisions = np.array(X.divisions)
divisions[-1] = divisions[-1] + 1
chunks = tuple(divisions[1:] - divisions[:-1])

X = X.copy()
for col in self.categorical_columns_:
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/preprocessing/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def inverse_transform(self, y: Union[ArrayLike, SeriesType]):


def _encode_categorical(
values: np.ndarray, uniques: np.ndarray = None, encode: bool = False
values: pd.Series, uniques: Optional[np.ndarray] = None, encode: bool = False
):
new_uniques = np.asarray(values.cat.categories)

Expand Down

0 comments on commit be99d37

Please sign in to comment.