1
1
"""
2
2
==========================================================================================
3
- [Pre-v1] Estimating prediction intervals of Gamma distributed target
3
+ Use MAPIE on data with gamma distribution
4
4
==========================================================================================
5
- **Note: we recently released MAPIE v1.0.0, which introduces breaking API changes.**
6
- **This notebook hasn't been updated to the new API yet.**
7
5
8
- This example uses :class:`~mapie.regression.MapieRegressor` to estimate
6
+
7
+ This example uses :class:`~mapie_v1.regression.CrossConformalRegressor` to estimate
9
8
prediction intervals associated with Gamma distributed target.
10
9
The limit of the absolute residual conformity score is illustrated.
11
10
17
16
The data is modelled by a Random Forest model
18
17
:class:`~sklearn.ensemble.RandomForestRegressor` with a fixed parameter set.
19
18
The prediction intervals are determined by means of the MAPIE regressor
20
- :class:`~mapie .regression.MapieRegressor ` considering two conformity scores:
21
- :class:`~mapie.conformity_scores.AbsoluteConformityScore ` which
19
+ :class:`~mapie_v1 .regression.CrossConformalRegressor ` considering two conformity scores:
20
+ ``"absolute"` ` which
22
21
considers the absolute residuals as the conformity scores and
23
- :class:`~mapie.conformity_scores.GammaConformityScore ` which
22
+ ``"gamma"` ` which
24
23
considers the residuals divided by the predicted means as conformity scores.
25
24
We consider the standard CV+ resampling method.
26
25
31
30
overcomes this issue by considering prediction intervals with width
32
31
proportional to the predicted mean. For low prices, the Gamma prediction
33
32
intervals are narrower than the default ones, conversely to high prices
34
- for which the conficence intervals are higher but visually more relevant.
33
+ for which the confidence intervals are higher but visually more relevant.
35
34
The empirical coverage is similar between the two conformity scores.
36
35
"""
37
36
import matplotlib .pyplot as plt
43
42
from sklearn .ensemble import RandomForestRegressor
44
43
from sklearn .model_selection import train_test_split
45
44
46
- from mapie .conformity_scores import GammaConformityScore
47
45
from mapie .metrics import regression_coverage_score
48
- from mapie .regression import MapieRegressor
46
+ from mapie_v1 .regression import CrossConformalRegressor
49
47
50
- random_state = 42
48
+ RANDOM_STATE = 42
51
49
52
50
# Parameters
53
51
features = [
59
57
]
60
58
target = "SalePrice"
61
59
62
- alpha = 0.05
63
- rf_kwargs = {"n_estimators" : 10 , "random_state" : random_state }
60
+ confidence_level = 0.95
61
+ rf_kwargs = {"n_estimators" : 10 , "random_state" : RANDOM_STATE }
64
62
model = RandomForestRegressor (** rf_kwargs )
65
63
66
64
##############################################################################
69
67
#
70
68
# We start by loading a dataset with a target following approximately
71
69
# a Gamma distribution.
72
- # The :class:`~mapie.conformity_scores.GammaConformityScore`` is relevant
73
- # in such cases.
74
70
# Two sub datasets are extracted: the training and test ones.
75
71
76
72
dataset_url = (
85
81
X = data [features ]
86
82
y = data [target ]
87
83
88
- X_train , X_test , y_train , y_test = train_test_split (
89
- X [features ], y , test_size = 0.2 , random_state = random_state
84
+ X_train_conformalize , X_test , y_train_conformalize , y_test = train_test_split (
85
+ X [features ], y , test_size = 0.2 , random_state = RANDOM_STATE
90
86
)
91
87
92
88
##############################################################################
95
91
#
96
92
# Two models are trained with two different conformity score:
97
93
#
98
- # - :class:`~mapie.conformity_scores.AbsoluteConformityScore ` (default
99
- # conformity score) relevant for target positive as well as negative.
94
+ # - ``conformity_score = "absolute"` ` (default
95
+ # conformity score) is relevant for target positive as well as negative.
100
96
# The prediction interval widths are, in this case, approximately the same
101
97
# over the range of prediction.
102
98
#
103
- # - :class:`~mapie.conformity_scores.GammaConformityScore` relevant for target
99
+ # - ``conformity_score = "gamma"`` is relevant for target
104
100
# following roughly a Gamma distribution. The prediction interval widths
105
101
# scale with the predicted value.
106
102
107
103
##############################################################################
108
104
# First, train model with
109
- # :class:`~mapie.conformity_scores.AbsoluteConformityScore`.
110
- mapie = MapieRegressor (model , random_state = random_state )
111
- mapie .fit (X_train , y_train )
112
- y_pred_absconfscore , y_pis_absconfscore = mapie .predict (
113
- X_test , alpha = alpha , ensemble = True
105
+ # conformity_score = "absolute".
106
+ mapie = CrossConformalRegressor (
107
+ model , confidence_level = confidence_level , conformity_score = "absolute"
108
+ )
109
+ mapie .fit_conformalize (X_train_conformalize , y_train_conformalize )
110
+ y_pred_absconfscore , y_pis_absconfscore = mapie .predict_interval (
111
+ X_test
114
112
)
115
113
116
114
coverage_absconfscore = regression_coverage_score (
@@ -138,14 +136,14 @@ def get_yerr(y_pred, y_pis):
138
136
)
139
137
140
138
##############################################################################
141
- # Then, train the model with
142
- # :class:`~mapie.conformity_scores.GammaConformityScore `.
143
- mapie = MapieRegressor (
144
- model , conformity_score = GammaConformityScore (), random_state = random_state
139
+ # Then, train the model with:
140
+ # `conformity_score = "gamma" `.
141
+ mapie = CrossConformalRegressor (
142
+ model , confidence_level = confidence_level , conformity_score = "gamma"
145
143
)
146
- mapie .fit ( X_train , y_train )
147
- y_pred_gammaconfscore , y_pis_gammaconfscore = mapie .predict (
148
- X_test , alpha = [ alpha ], ensemble = True
144
+ mapie .fit_conformalize ( X_train_conformalize , y_train_conformalize )
145
+ y_pred_gammaconfscore , y_pis_gammaconfscore = mapie .predict_interval (
146
+ X_test
149
147
)
150
148
151
149
coverage_gammaconfscore = regression_coverage_score (
@@ -164,9 +162,9 @@ def get_yerr(y_pred, y_pis):
164
162
#
165
163
# Once the models have been trained, we now compare the prediction intervals
166
164
# obtained from the two conformity scores. We can see that the
167
- # :class:`~mapie.conformity_scores.AbsoluteConformityScore` generates
165
+ # ``"absolute" ``conformity score generates
168
166
# prediction interval with almost the same width for all the predicted values.
169
- # Conversely, the `mapie.conformity_scores.GammaConformityScore`
167
+ # Conversely, the ``"gamma"`` conformity score
170
168
# yields prediction interval with width scaling with the predicted values.
171
169
#
172
170
# The choice of the conformity score depends on the problem we face.
@@ -209,7 +207,7 @@ def get_yerr(y_pred, y_pis):
209
207
axs [1 , img_id ].set_ylim ([ymin , ymax ])
210
208
211
209
fig .suptitle (
212
- f"Predicted values with the prediction intervals of level { alpha } "
210
+ f"Predicted values with the prediction intervals of level { confidence_level } "
213
211
)
214
212
plt .subplots_adjust (wspace = 0.3 , hspace = 0.3 )
215
213
plt .show ()
0 commit comments