58
58
from scipy import sparse as sp
59
59
60
60
if sklearn_check_version ('1.2' ):
61
- from sklearn .utils ._param_validation import Interval
61
+ from sklearn .utils ._param_validation import Interval , StrOptions
62
62
63
63
64
64
class BaseRandomForest (ABC ):
@@ -193,7 +193,8 @@ class RandomForestClassifier(sklearn_RandomForestClassifier, BaseRandomForest):
193
193
_parameter_constraints : dict = {
194
194
** sklearn_RandomForestClassifier ._parameter_constraints ,
195
195
"max_bins" : [Interval (numbers .Integral , 2 , None , closed = "left" )],
196
- "min_bin_size" : [Interval (numbers .Integral , 1 , None , closed = "left" )]
196
+ "min_bin_size" : [Interval (numbers .Integral , 1 , None , closed = "left" )],
197
+ "splitter_mode" : [StrOptions ({"best" , "random" })]
197
198
}
198
199
199
200
if sklearn_check_version ('1.0' ):
@@ -218,7 +219,8 @@ def __init__(
218
219
ccp_alpha = 0.0 ,
219
220
max_samples = None ,
220
221
max_bins = 256 ,
221
- min_bin_size = 1 ):
222
+ min_bin_size = 1 ,
223
+ splitter_mode = 'best' ):
222
224
super (RandomForestClassifier , self ).__init__ (
223
225
n_estimators = n_estimators ,
224
226
criterion = criterion ,
@@ -243,6 +245,7 @@ def __init__(
243
245
self .max_bins = max_bins
244
246
self .min_bin_size = min_bin_size
245
247
self .min_impurity_split = None
248
+ self .splitter_mode = splitter_mode
246
249
# self._estimator = DecisionTreeClassifier()
247
250
else :
248
251
def __init__ (self ,
@@ -266,7 +269,8 @@ def __init__(self,
266
269
ccp_alpha = 0.0 ,
267
270
max_samples = None ,
268
271
max_bins = 256 ,
269
- min_bin_size = 1 ):
272
+ min_bin_size = 1 ,
273
+ splitter_mode = 'best' ):
270
274
super (RandomForestClassifier , self ).__init__ (
271
275
n_estimators = n_estimators ,
272
276
criterion = criterion ,
@@ -294,6 +298,7 @@ def __init__(self,
294
298
self .max_bins = max_bins
295
299
self .min_bin_size = min_bin_size
296
300
self .min_impurity_split = None
301
+ self .splitter_mode = splitter_mode
297
302
# self._estimator = DecisionTreeClassifier()
298
303
299
304
def fit (self , X , y , sample_weight = None ):
@@ -529,6 +534,11 @@ def _estimators_(self):
529
534
def _onedal_cpu_supported (self , method_name , * data ):
530
535
if method_name == 'ensemble.RandomForestClassifier.fit' :
531
536
ready , X , y , sample_weight = self ._onedal_ready (* data )
537
+ if self .splitter_mode == 'random' :
538
+ warnings .warn ("'random' splitter mode supports GPU devices only "
539
+ "and requires oneDAL version >= 2023.1.1. "
540
+ "Using 'best' mode instead." , RuntimeWarning )
541
+ self .splitter_mode = 'best'
532
542
if not ready :
533
543
return False
534
544
elif sp .issparse (X ):
@@ -570,6 +580,11 @@ def _onedal_cpu_supported(self, method_name, *data):
570
580
def _onedal_gpu_supported (self , method_name , * data ):
571
581
if method_name == 'ensemble.RandomForestClassifier.fit' :
572
582
ready , X , y , sample_weight = self ._onedal_ready (* data )
583
+ if self .splitter_mode == 'random' and \
584
+ not daal_check_version ((2023 , 'P' , 101 )):
585
+ warnings .warn ("'random' splitter mode requires OneDAL >= 2023.1.1. "
586
+ "Using 'best' mode instead." , RuntimeWarning )
587
+ self .splitter_mode = 'best'
573
588
if not ready :
574
589
return False
575
590
elif sp .issparse (X ):
@@ -687,6 +702,8 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None):
687
702
'min_bin_size' : self .min_bin_size ,
688
703
'max_samples' : self .max_samples
689
704
}
705
+ if daal_check_version ((2023 , 'P' , 101 )):
706
+ onedal_params ['splitter_mode' ] = self .splitter_mode
690
707
self ._cached_estimators_ = None
691
708
692
709
# Compute
@@ -729,7 +746,8 @@ class RandomForestRegressor(sklearn_RandomForestRegressor, BaseRandomForest):
729
746
_parameter_constraints : dict = {
730
747
** sklearn_RandomForestRegressor ._parameter_constraints ,
731
748
"max_bins" : [Interval (numbers .Integral , 2 , None , closed = "left" )],
732
- "min_bin_size" : [Interval (numbers .Integral , 1 , None , closed = "left" )]
749
+ "min_bin_size" : [Interval (numbers .Integral , 1 , None , closed = "left" )],
750
+ "splitter_mode" : [StrOptions ({"best" , "random" })]
733
751
}
734
752
735
753
if sklearn_check_version ('1.0' ):
@@ -754,7 +772,8 @@ def __init__(
754
772
ccp_alpha = 0.0 ,
755
773
max_samples = None ,
756
774
max_bins = 256 ,
757
- min_bin_size = 1 ):
775
+ min_bin_size = 1 ,
776
+ splitter_mode = 'best' ):
758
777
super (RandomForestRegressor , self ).__init__ (
759
778
n_estimators = n_estimators ,
760
779
criterion = criterion ,
@@ -778,6 +797,7 @@ def __init__(
778
797
self .max_bins = max_bins
779
798
self .min_bin_size = min_bin_size
780
799
self .min_impurity_split = None
800
+ self .splitter_mode = splitter_mode
781
801
else :
782
802
def __init__ (self ,
783
803
n_estimators = 100 , * ,
@@ -799,7 +819,8 @@ def __init__(self,
799
819
ccp_alpha = 0.0 ,
800
820
max_samples = None ,
801
821
max_bins = 256 ,
802
- min_bin_size = 1 ):
822
+ min_bin_size = 1 ,
823
+ splitter_mode = 'best' ):
803
824
super (RandomForestRegressor , self ).__init__ (
804
825
n_estimators = n_estimators ,
805
826
criterion = criterion ,
@@ -826,6 +847,7 @@ def __init__(self,
826
847
self .max_bins = max_bins
827
848
self .min_bin_size = min_bin_size
828
849
self .min_impurity_split = None
850
+ self .splitter_mode = splitter_mode
829
851
830
852
@property
831
853
def _estimators_ (self ):
@@ -902,6 +924,11 @@ def _onedal_ready(self, X, y, sample_weight):
902
924
def _onedal_cpu_supported (self , method_name , * data ):
903
925
if method_name == 'ensemble.RandomForestRegressor.fit' :
904
926
ready , X , y , sample_weight = self ._onedal_ready (* data )
927
+ if self .splitter_mode == 'random' :
928
+ warnings .warn ("'random' splitter mode supports GPU devices only "
929
+ "and requires oneDAL version >= 2023.1.1. "
930
+ "Using 'best' mode instead." , RuntimeWarning )
931
+ self .splitter_mode = 'best'
905
932
if not ready :
906
933
return False
907
934
elif not (self .oob_score and daal_check_version (
@@ -947,6 +974,11 @@ def _onedal_cpu_supported(self, method_name, *data):
947
974
def _onedal_gpu_supported (self , method_name , * data ):
948
975
if method_name == 'ensemble.RandomForestRegressor.fit' :
949
976
ready , X , y , sample_weight = self ._onedal_ready (* data )
977
+ if self .splitter_mode == 'random' and \
978
+ not daal_check_version ((2023 , 'P' , 101 )):
979
+ warnings .warn ("'random' splitter mode requires OneDAL >= 2023.1.1. "
980
+ "Using 'best' mode instead." , RuntimeWarning )
981
+ self .splitter_mode = 'best'
950
982
if not ready :
951
983
return False
952
984
elif not (self .oob_score and daal_check_version (
@@ -1035,6 +1067,8 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None):
1035
1067
'variable_importance_mode' : 'mdi' ,
1036
1068
'max_samples' : self .max_samples
1037
1069
}
1070
+ if daal_check_version ((2023 , 'P' , 101 )):
1071
+ onedal_params ['splitter_mode' ] = self .splitter_mode
1038
1072
self ._cached_estimators_ = None
1039
1073
self ._onedal_estimator = self ._onedal_regressor (** onedal_params )
1040
1074
self ._onedal_estimator .fit (X , y , sample_weight , queue = queue )
0 commit comments