Skip to content

Commit 9552e6c

Browse files
ENH: implement train_conformalize_test_split (#629)
* Implement train_conformalize_test_split * Add unit tests for train_conformalize_test_split * Rename mapie_v1._utils into mapie_v1.utils. * Use train_conformalize_test_split in regression documentation examples --------- Co-authored-by: Valentin Laurent <[email protected]>
1 parent 636e4ea commit 9552e6c

10 files changed

+427
-156
lines changed

examples/regression/1-quickstart/plot_prefit.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import scipy
2525
from lightgbm import LGBMRegressor
2626
from matplotlib import pyplot as plt
27-
from sklearn.model_selection import train_test_split
2827
from sklearn.neural_network import MLPRegressor
2928

3029
from mapie._typing import NDArray
3130
from mapie.metrics import regression_coverage_score
3231
from mapie_v1.regression import SplitConformalRegressor, ConformalizedQuantileRegressor
32+
from mapie_v1.utils import train_conformalize_test_split
3333

3434
warnings.filterwarnings("ignore")
3535

@@ -56,13 +56,13 @@ def f(x: NDArray) -> NDArray:
5656
X = np.linspace(0, 1, n_samples)
5757
y = f(X) + rng.normal(0, sigma, n_samples)
5858

59-
# Train/validation/test split
60-
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
61-
X, y, test_size=1 / 10, random_state=RANDOM_STATE
62-
)
63-
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
64-
X_train_conformalize, y_train_conformalize,
65-
test_size=1 / 9, random_state=RANDOM_STATE
59+
# Train/conformalize/test split
60+
(
61+
X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
62+
) = train_conformalize_test_split(
63+
X, y,
64+
train_size=0.8, conformalize_size=0.1, test_size=0.1,
65+
random_state=RANDOM_STATE
6666
)
6767

6868

examples/regression/1-quickstart/plot_toy_model.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@
88
import numpy as np
99
from matplotlib import pyplot as plt
1010
from sklearn.datasets import make_regression
11-
from sklearn.model_selection import train_test_split
1211

1312
from mapie.metrics import regression_coverage_score
1413
from mapie_v1.regression import SplitConformalRegressor
14+
from mapie_v1.utils import train_conformalize_test_split
1515

1616
RANDOM_STATE = 42
1717

1818
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
1919

20-
X_train, X_test_conformalize, y_train, y_test_conformalize = train_test_split(
21-
X, y, test_size=0.4, random_state=RANDOM_STATE
22-
)
23-
X_test, X_conformalize, y_test, y_conformalize = train_test_split(
24-
X_test_conformalize, y_test_conformalize, test_size=0.5, random_state=RANDOM_STATE
20+
(
21+
X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
22+
) = train_conformalize_test_split(
23+
X, y,
24+
train_size=0.6, conformalize_size=0.2, test_size=0.2,
25+
random_state=RANDOM_STATE
2526
)
2627

2728
confidence_level = [0.95, 0.68]

examples/regression/2-advanced-analysis/plot_ResidualNormalisedScore_tutorial.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from mapie.conformity_scores import ResidualNormalisedScore
2424
from mapie.metrics import regression_coverage_score_v2, regression_ssc_score
2525
from mapie_v1.regression import SplitConformalRegressor
26+
from mapie_v1.utils import train_conformalize_test_split
2627

2728
warnings.filterwarnings("ignore")
2829

@@ -64,17 +65,15 @@
6465

6566
np.array(X)
6667
np.array(y)
67-
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
68-
X,
69-
y,
70-
random_state=RANDOM_STATE,
71-
test_size=0.02
72-
)
73-
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
74-
X_train_conformalize,
75-
y_train_conformalize,
68+
69+
(
70+
X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
71+
) = train_conformalize_test_split(
72+
X, y,
73+
train_size=0.7, conformalize_size=0.28, test_size=0.02,
7674
random_state=RANDOM_STATE
7775
)
76+
7877
X_conformalize_prefit, X_res, y_conformalize_prefit, y_res = train_test_split(
7978
X_conformalize,
8079
y_conformalize,

examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
from matplotlib import pyplot as plt
2424
from sklearn.datasets import make_regression
2525
from sklearn.linear_model import LinearRegression
26-
from sklearn.model_selection import train_test_split
2726

2827
from mapie.conformity_scores import (AbsoluteConformityScore,
2928
ResidualNormalisedScore)
3029
from mapie_v1.regression import SplitConformalRegressor
30+
from mapie_v1.utils import train_conformalize_test_split
3131

3232
warnings.filterwarnings('ignore')
3333

@@ -43,12 +43,13 @@
4343
X, y = make_regression(
4444
n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
4545
)
46-
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
47-
X, y, test_size=0.3, random_state=RANDOM_STATE
48-
)
4946

50-
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
51-
X_train_conformalize, y_train_conformalize, test_size=0.3, random_state=RANDOM_STATE
47+
(
48+
X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
49+
) = train_conformalize_test_split(
50+
X, y,
51+
train_size=0.6, conformalize_size=0.2, test_size=0.2,
52+
random_state=RANDOM_STATE
5253
)
5354

5455

examples/regression/2-advanced-analysis/plot_cqr_symmetry_difference.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from matplotlib import pyplot as plt
1212
from sklearn.datasets import make_regression
1313
from sklearn.ensemble import GradientBoostingRegressor
14-
from sklearn.model_selection import train_test_split
1514

1615
from mapie.metrics import regression_coverage_score
1716
from mapie_v1.regression import ConformalizedQuantileRegressor
17+
from mapie_v1.utils import train_conformalize_test_split
1818

1919
RANDOM_STATE = 1
2020

@@ -25,13 +25,14 @@
2525
n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
2626
)
2727

28-
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
29-
X, y, test_size=0.3, random_state=RANDOM_STATE
28+
(
29+
X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
30+
) = train_conformalize_test_split(
31+
X, y,
32+
train_size=0.6, conformalize_size=0.2, test_size=0.2,
33+
random_state=RANDOM_STATE
3034
)
3135

32-
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
33-
X_train_conformalize, y_train_conformalize, test_size=0.3, random_state=RANDOM_STATE
34-
)
3536

3637
# Define confidence level
3738
confidence_level = 0.8

mapie_v1/_utils.py

-116
This file was deleted.

mapie_v1/classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from mapie._typing import ArrayLike, NDArray
1212
from mapie.classification import MapieClassifier
1313
from mapie.conformity_scores import BaseClassificationScore
14-
from mapie_v1._utils import (
14+
from mapie_v1.utils import (
1515
transform_confidence_level_to_alpha_list,
1616
prepare_params,
1717
cast_predictions_to_ndarray_tuple,

mapie_v1/regression.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from mapie.regression import MapieRegressor, MapieQuantileRegressor
1414
from mapie.utils import check_estimator_fit_predict
1515
from mapie_v1.conformity_scores._utils import check_and_select_conformity_score
16-
from mapie_v1._utils import (
16+
from mapie_v1.utils import (
1717
transform_confidence_level_to_alpha_list,
1818
check_if_param_in_allowed_values,
1919
check_cv_not_string,

0 commit comments

Comments
 (0)