Skip to content

Commit c7eada3

Browse files
authored
Compatibility with XGBoost 2.0 (#296)
Updates in tests and API calls for compatibility with XGBoost 2.0.
1 parent bd7e799 commit c7eada3

7 files changed

+68
-253
lines changed

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"distributed computing framework Ray.",
1212
url="https://github.com/ray-project/xgboost_ray",
1313
install_requires=[
14-
"ray>=2.0",
14+
"ray>=2.7",
1515
"numpy>=1.16",
1616
"pandas",
1717
"wrapt>=1.12.1",

xgboost_ray/main.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class EarlyStopException(XGBoostError):
2525
pass
2626

2727

28-
# From xgboost>=1.7.0, rabit is replaced by a collective communicator
28+
# From xgboost>=1.7.0, rabit is replaced by a collective communicator.
2929
try:
3030
from xgboost.collective import CommunicatorContext
3131

@@ -53,7 +53,10 @@ class EarlyStopException(XGBoostError):
5353
remove_placement_group,
5454
)
5555
from ray.util.queue import Queue
56-
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
56+
from ray.util.scheduling_strategies import (
57+
NodeAffinitySchedulingStrategy,
58+
PlacementGroupSchedulingStrategy,
59+
)
5760

5861
from xgboost_ray.util import Event, MultiActorTask, force_on_current_node
5962

@@ -136,6 +139,10 @@ class _XGBoostEnv:
136139
# when new actors become available
137140
ELASTIC_RESTART_GRACE_PERIOD_S: int = 10
138141

142+
# Whether to allow soft-placement of communication processes. If True,
143+
# the Queue and Event actors may be scheduled on non-driver nodes.
144+
COMMUNICATION_SOFT_PLACEMENT: bool = True
145+
139146
def __getattribute__(self, item):
140147
old_val = super(_XGBoostEnv, self).__getattribute__(item)
141148
new_val = _get_environ(item, old_val)
@@ -985,8 +992,15 @@ def _create_communication_processes(added_tune_callback: bool = False):
985992
else:
986993
# Create Queue and Event actors and make sure to colocate with
987994
# driver node.
988-
node_ip = get_node_ip_address()
989-
placement_option.update({"resources": {f"node:{node_ip}": 0.01}})
995+
node_id = ray.get_runtime_context().get_node_id()
996+
placement_option.update(
997+
{
998+
"scheduling_strategy": NodeAffinitySchedulingStrategy(
999+
node_id=node_id,
1000+
soft=ENV.COMMUNICATION_SOFT_PLACEMENT,
1001+
)
1002+
}
1003+
)
9901004
queue = Queue(actor_options=placement_option) # Queue actor
9911005
stop_event = Event(actor_options=placement_option) # Stop event actor
9921006
return queue, stop_event

xgboost_ray/sklearn.py

+16-136
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,6 @@ def validate_or_none(meta: Optional[List], name: str) -> List:
169169
return train_dmatrix, evals
170170

171171

172-
try:
173-
from xgboost.sklearn import _convert_ntree_limit
174-
except ImportError:
175-
_convert_ntree_limit = None
176-
177172
try:
178173
from xgboost.sklearn import _cls_predict_proba
179174
except ImportError:
@@ -195,10 +190,6 @@ def _cls_predict_proba(n_classes: int, prediction, vstack: Callable):
195190
_is_cudf_ser = None
196191
_is_cupy_array = None
197192

198-
try:
199-
from xgboost.compat import XGBoostLabelEncoder
200-
except ImportError:
201-
from sklearn.preprocessing import LabelEncoder as XGBoostLabelEncoder
202193

203194
_RAY_PARAMS_DOC = """ray_params : None or RayParams or Dict
204195
Parameters to configure Ray-specific behavior.
@@ -367,27 +358,15 @@ def _ray_predict(
367358
self: "XGBModel",
368359
X,
369360
output_margin=False,
370-
ntree_limit=None,
371361
validate_features=True,
372362
base_margin=None,
373363
iteration_range=None,
374364
ray_params: Union[None, RayParams, Dict] = None,
375365
_remote: Optional[bool] = None,
376366
ray_dmatrix_params: Optional[Dict] = None,
367+
**kwargs,
377368
):
378369
"""Distributed predict via Ray"""
379-
compat_predict_kwargs = {}
380-
if _convert_ntree_limit is not None:
381-
iteration_range = _convert_ntree_limit(
382-
self.get_booster(), ntree_limit, iteration_range
383-
)
384-
iteration_range = self._get_iteration_range(iteration_range)
385-
compat_predict_kwargs["iteration_range"] = iteration_range
386-
else:
387-
if ntree_limit is None:
388-
ntree_limit = getattr(self, "best_ntree_limit", 0)
389-
compat_predict_kwargs["ntree_limit"] = ntree_limit
390-
391370
ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs)
392371
ray_dmatrix_params = ray_dmatrix_params or {}
393372

@@ -407,7 +386,7 @@ def _ray_predict(
407386
validate_features=validate_features,
408387
ray_params=ray_params,
409388
_remote=_remote,
410-
**compat_predict_kwargs,
389+
**kwargs,
411390
)
412391

413392
def _ray_get_wrap_evaluation_matrices_compat_kwargs(
@@ -589,24 +568,24 @@ def predict(
589568
self,
590569
X,
591570
output_margin=False,
592-
ntree_limit=None,
593571
validate_features=True,
594572
base_margin=None,
595573
iteration_range=None,
596574
ray_params: Union[None, RayParams, Dict] = None,
597575
_remote: Optional[bool] = None,
598576
ray_dmatrix_params: Optional[Dict] = None,
577+
**kwargs,
599578
):
600579
return self._ray_predict(
601580
X,
602581
output_margin=output_margin,
603-
ntree_limit=ntree_limit,
604582
validate_features=validate_features,
605583
base_margin=base_margin,
606584
iteration_range=iteration_range,
607585
ray_params=ray_params,
608586
_remote=_remote,
609587
ray_dmatrix_params=ray_dmatrix_params,
588+
**kwargs,
610589
)
611590

612591
predict.__doc__ = _treat_X_doc(_get_doc(XGBRegressor.predict)) + _RAY_PARAMS_DOC
@@ -702,22 +681,13 @@ def fit(
702681
)
703682

704683
if train_dmatrix is not None:
705-
if not hasattr(self, "use_label_encoder"):
706-
warnings.warn(
707-
"If X is a RayDMatrix, no label encoding"
708-
" will be performed. Ensure the labels are"
709-
" encoded."
710-
)
711-
elif self.use_label_encoder:
712-
raise ValueError(
713-
"X cannot be a RayDMatrix if `use_label_encoder` " "is set to True"
714-
)
715684
if "num_class" not in params:
716685
raise ValueError(
717686
"`num_class` must be set during initalization if X"
718687
" is a RayDMatrix"
719688
)
720-
self.classes_ = list(range(0, params["num_class"]))
689+
if XGBOOST_VERSION < Version("2.0.0"):
690+
self.classes_ = list(range(0, params["num_class"]))
721691
self.n_classes_ = params["num_class"]
722692
if self.n_classes_ <= 2:
723693
params.pop("num_class")
@@ -730,7 +700,10 @@ def fit(
730700
"Please reshape the input data X into 2-dimensional " "matrix."
731701
)
732702

733-
label_transform = self._ray_fit_preprocess(y)
703+
label_transform = lambda x: x # noqa: E731
704+
if XGBOOST_VERSION < Version("2.0.0"):
705+
self.classes_ = np.unique(y)
706+
self.n_classes_ = len(np.unique(y))
734707

735708
if callable(self.objective):
736709
obj = _objective_decorator(self.objective)
@@ -819,99 +792,31 @@ def fit(
819792

820793
fit.__doc__ = _treat_X_doc(_get_doc(XGBClassifier.fit)) + _RAY_PARAMS_DOC
821794

822-
def _ray_fit_preprocess(self, y) -> Callable:
823-
"""This has been separated out so that it can be easily overwritten
824-
should a future xgboost version remove label encoding"""
825-
# pylint: disable = attribute-defined-outside-init,too-many-statements
826-
can_use_label_encoder = True
827-
use_label_encoder = getattr(self, "use_label_encoder", True)
828-
label_encoding_check_error = (
829-
"The label must consist of integer "
830-
"labels of form 0, 1, 2, ..., [num_class - 1]."
831-
)
832-
label_encoder_deprecation_msg = (
833-
"The use of label encoder in XGBClassifier is deprecated and will "
834-
"be removed in a future release. To remove this warning, do the "
835-
"following: 1) Pass option use_label_encoder=False when "
836-
"constructing XGBClassifier object; and 2) Encode your labels (y) "
837-
"as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1]."
838-
)
839-
840-
# ray: modified this to allow for compatibility with legacy xgboost
841-
if (_is_cudf_df and _is_cudf_df(y)) or (_is_cudf_ser and _is_cudf_ser(y)):
842-
import cupy as cp # pylint: disable=E0401
843-
844-
self.classes_ = cp.unique(y.values)
845-
self.n_classes_ = len(self.classes_)
846-
can_use_label_encoder = False
847-
expected_classes = cp.arange(self.n_classes_)
848-
if (
849-
self.classes_.shape != expected_classes.shape
850-
or not (self.classes_ == expected_classes).all()
851-
):
852-
raise ValueError(label_encoding_check_error)
853-
elif _is_cupy_array and _is_cupy_array(y):
854-
import cupy as cp # pylint: disable=E0401
855-
856-
self.classes_ = cp.unique(y)
857-
self.n_classes_ = len(self.classes_)
858-
can_use_label_encoder = False
859-
expected_classes = cp.arange(self.n_classes_)
860-
if (
861-
self.classes_.shape != expected_classes.shape
862-
or not (self.classes_ == expected_classes).all()
863-
):
864-
raise ValueError(label_encoding_check_error)
865-
else:
866-
self.classes_ = np.unique(y)
867-
self.n_classes_ = len(self.classes_)
868-
if not use_label_encoder and (
869-
not np.array_equal(self.classes_, np.arange(self.n_classes_))
870-
):
871-
raise ValueError(label_encoding_check_error)
872-
873-
if use_label_encoder:
874-
if not can_use_label_encoder:
875-
raise ValueError(
876-
"The option use_label_encoder=True is incompatible with "
877-
"inputs of type cuDF or cuPy. Please set "
878-
"use_label_encoder=False when constructing XGBClassifier "
879-
"object. NOTE:" + label_encoder_deprecation_msg
880-
)
881-
if hasattr(self, "use_label_encoder"):
882-
warnings.warn(label_encoder_deprecation_msg, UserWarning)
883-
self._le = XGBoostLabelEncoder().fit(y)
884-
label_transform = self._le.transform
885-
else:
886-
label_transform = lambda x: x # noqa: E731
887-
888-
return label_transform
889-
890795
def _can_use_inplace_predict(self) -> bool:
891796
return False
892797

893798
def predict(
894799
self,
895800
X,
896801
output_margin=False,
897-
ntree_limit=None,
898802
validate_features=True,
899803
base_margin=None,
900804
iteration_range: Optional[Tuple[int, int]] = None,
901805
ray_params: Union[None, RayParams, Dict] = None,
902806
_remote: Optional[bool] = None,
903807
ray_dmatrix_params: Optional[Dict] = None,
808+
**kwargs,
904809
):
905810
class_probs = self._ray_predict(
906811
X=X,
907812
output_margin=output_margin,
908-
ntree_limit=ntree_limit,
909813
validate_features=validate_features,
910814
base_margin=base_margin,
911815
iteration_range=iteration_range,
912816
ray_params=ray_params,
913817
_remote=_remote,
914818
ray_dmatrix_params=ray_dmatrix_params,
819+
**kwargs,
915820
)
916821
if output_margin:
917822
# If output_margin is active, simply return the scores
@@ -934,25 +839,25 @@ def predict(
934839
def predict_proba(
935840
self,
936841
X,
937-
ntree_limit=None,
938842
validate_features=False,
939843
base_margin=None,
940844
iteration_range: Optional[Tuple[int, int]] = None,
941845
ray_params: Union[None, RayParams, Dict] = None,
942846
_remote: Optional[bool] = None,
943847
ray_dmatrix_params: Optional[Dict] = None,
848+
**kwargs,
944849
) -> np.ndarray:
945850

946851
class_probs = self._ray_predict(
947852
X=X,
948853
output_margin=self.objective == "multi:softmax",
949-
ntree_limit=ntree_limit,
950854
validate_features=validate_features,
951855
base_margin=base_margin,
952856
iteration_range=iteration_range,
953857
ray_params=ray_params,
954858
_remote=_remote,
955859
ray_dmatrix_params=ray_dmatrix_params,
860+
**kwargs,
956861
)
957862
# If model is loaded from a raw booster there's no `n_classes_`
958863
return _cls_predict_proba(
@@ -979,31 +884,6 @@ class RayXGBRFClassifier(RayXGBClassifier):
979884
def __init__(self, *args, **kwargs):
980885
raise ValueError("RayXGBRFClassifier not available with xgboost<1.0.0")
981886

982-
# use_label_encoder added in xgboost commit
983-
# c8ec62103a36f1717d032b1ddff2bf9e0642508a (1.3.0)
984-
elif "use_label_encoder" in inspect.signature(XGBRFClassifier.__init__).parameters:
985-
986-
@_deprecate_positional_args
987-
@_xgboost_version_warn
988-
def __init__(
989-
self,
990-
*,
991-
learning_rate=1,
992-
subsample=0.8,
993-
colsample_bynode=0.8,
994-
reg_lambda=1e-5,
995-
use_label_encoder=True,
996-
**kwargs,
997-
):
998-
super().__init__(
999-
learning_rate=learning_rate,
1000-
subsample=subsample,
1001-
colsample_bynode=colsample_bynode,
1002-
reg_lambda=reg_lambda,
1003-
use_label_encoder=use_label_encoder,
1004-
**kwargs,
1005-
)
1006-
1007887
else:
1008888

1009889
@_deprecate_positional_args
@@ -1172,24 +1052,24 @@ def predict(
11721052
self,
11731053
X,
11741054
output_margin=False,
1175-
ntree_limit=None,
11761055
validate_features=True,
11771056
base_margin=None,
11781057
iteration_range=None,
11791058
ray_params: Union[None, RayParams, Dict] = None,
11801059
_remote: Optional[bool] = None,
11811060
ray_dmatrix_params: Optional[Dict] = None,
1061+
**kwargs,
11821062
):
11831063
return self._ray_predict(
11841064
X,
11851065
output_margin=output_margin,
1186-
ntree_limit=ntree_limit,
11871066
validate_features=validate_features,
11881067
base_margin=base_margin,
11891068
iteration_range=iteration_range,
11901069
ray_params=ray_params,
11911070
_remote=_remote,
11921071
ray_dmatrix_params=ray_dmatrix_params,
1072+
**kwargs,
11931073
)
11941074

11951075
predict.__doc__ = _treat_X_doc(_get_doc(XGBRanker.predict)) + _RAY_PARAMS_DOC

0 commit comments

Comments
 (0)