1
+ from __future__ import annotations
2
+
1
3
import os
2
4
from pathlib import Path
3
5
6
8
import pandas as pd
7
9
import seaborn as sns
8
10
from matplotlib import animation
9
- from scipy .stats import norm
11
+ from matplotlib .figure import Figure
12
+ from scipy .stats import gamma , norm
10
13
from sklearn .datasets import fetch_california_housing
11
14
15
+ import cunumeric as cn
12
16
import legateboost as lb
13
17
14
18
sns .set ()
26
30
n_frames = 2 if os .environ .get ("CI" ) else 40
27
31
28
32
29
- def fit_normal_distribution ():
33
+ def fit_normal_distribution () -> tuple [lb .LBRegressor , list [cn .ndarray ]]:
34
+ obj = lb .NormalObjective ()
30
35
model = lb .LBRegressor (
31
36
verbose = True ,
32
37
init = "average" ,
33
38
base_models = (lb .models .Tree (max_depth = 2 ),),
34
39
n_estimators = n_estimators ,
35
40
learning_rate = 0.1 ,
36
41
random_state = rs ,
37
- objective = "normal" ,
42
+ objective = obj ,
43
+ )
44
+ return model , [model .partial_fit (X , y ).predict (X_test ) for _ in range (n_frames )]
45
+
46
+
47
+ def fit_gamma_distribution () -> tuple [lb .LBRegressor , list [cn .ndarray ]]:
48
+ obj = lb .GammaObjective ()
49
+ model = lb .LBRegressor (
50
+ verbose = True ,
51
+ init = "average" ,
52
+ base_models = (lb .models .Tree (max_depth = 2 ),),
53
+ n_estimators = n_estimators ,
54
+ learning_rate = 0.1 ,
55
+ random_state = rs ,
56
+ objective = obj ,
38
57
)
39
58
return model , [model .partial_fit (X , y ).predict (X_test ) for _ in range (n_frames )]
40
59
41
60
42
61
quantiles = np .array ([0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 ])
43
62
44
63
45
- def fit_quantile_regression ():
64
+ def fit_quantile_regression () -> tuple [ lb . LBRegressor , list ] :
46
65
model = lb .LBRegressor (
47
66
verbose = True ,
48
67
base_models = (lb .models .Tree (max_depth = 2 ),),
@@ -55,12 +74,15 @@ def fit_quantile_regression():
55
74
56
75
57
76
normal_model , normal_preds = fit_normal_distribution ()
77
+ gamma_mode , gamma_preds = fit_gamma_distribution ()
58
78
quantile_model , quantile_preds = fit_quantile_regression ()
59
79
60
- fig , ax = plt .subplots (1 , 2 , figsize = (12 , 6 ))
80
+ fig , ax = plt .subplots (1 , 3 , figsize = (12 , 6 ))
81
+
61
82
83
+ def animate (i : int ) -> tuple [Figure ]:
84
+ lower , upper = - 0.5 , 6.5
62
85
63
- def animate (i ):
64
86
fig .suptitle (
65
87
"Distribution of House Values: Boosting iterations {}" .format (
66
88
(i + 1 ) * n_estimators
@@ -70,12 +92,13 @@ def animate(i):
70
92
# Plot the normal distribution
71
93
ax [0 ].cla ()
72
94
ax [0 ].set_title ("Normal Distribution - 95% Confidence Interval" )
95
+ norm_obj = lb .NormalObjective ()
73
96
data = pd .DataFrame (
74
97
{
75
98
feature_name : X_test [:, 0 ],
76
99
"y" : y_test ,
77
- "Predicted house value" : normal_preds [i ][:, 0 ] ,
78
- "sigma" : np . exp (normal_preds [i ][:, 1 ]),
100
+ "Predicted house value" : norm_obj . mean ( normal_preds [i ]) ,
101
+ "sigma" : norm_obj . var (normal_preds [i ]),
79
102
}
80
103
).sort_values (by = feature_name )
81
104
sns .lineplot (
@@ -89,15 +112,42 @@ def animate(i):
89
112
0.95 , loc = data ["Predicted house value" ], scale = data ["sigma" ]
90
113
)
91
114
ax [0 ].fill_between (data [feature_name ], interval [0 ], interval [1 ], alpha = 0.2 )
92
- ax [0 ].set_ylim (- 0.5 , 5.5 )
115
+ ax [0 ].set_ylim (lower , upper )
93
116
94
117
sns .scatterplot (
95
118
x = feature_name , y = "y" , data = data , ax = ax [0 ], s = 15 , color = ".2" , alpha = 0.2
96
119
)
97
120
98
- # Plot the quantile regression
121
+ # Plot the gamma distribution
99
122
ax [1 ].cla ()
100
- ax [1 ].set_title ("Quantile Regression" )
123
+ ax [1 ].set_title ("Gamma Distribution - 95% Confidence Interval" )
124
+ gamma_obj = lb .GammaObjective ()
125
+ data = pd .DataFrame (
126
+ {
127
+ feature_name : X_test [:, 0 ],
128
+ "y" : y_test ,
129
+ "Predicted house value" : gamma_obj .mean (gamma_preds [i ]),
130
+ "shape" : gamma_obj .shape (gamma_preds [i ]),
131
+ "scale" : gamma_obj .scale (gamma_preds [i ]),
132
+ }
133
+ ).sort_values (by = feature_name )
134
+ sns .lineplot (
135
+ x = feature_name ,
136
+ y = "Predicted house value" ,
137
+ data = data [[feature_name , "Predicted house value" ]],
138
+ ax = ax [1 ],
139
+ errorbar = ("sd" , 0 ),
140
+ )
141
+ interval = gamma .interval (0.95 , data ["shape" ], scale = data ["scale" ])
142
+ ax [1 ].fill_between (data [feature_name ], interval [0 ], interval [1 ], alpha = 0.2 )
143
+ ax [1 ].set_ylim (lower , upper )
144
+ sns .scatterplot (
145
+ x = feature_name , y = "y" , data = data , ax = ax [1 ], s = 15 , color = ".2" , alpha = 0.2
146
+ )
147
+
148
+ # Plot the quantile regression
149
+ ax [2 ].cla ()
150
+ ax [2 ].set_title ("Quantile Regression" )
101
151
102
152
data = {
103
153
feature_name : X_test [:, 0 ],
@@ -115,14 +165,14 @@ def animate(i):
115
165
y = "Predicted house value" ,
116
166
data = lines ,
117
167
style = "quantile" ,
118
- ax = ax [1 ],
168
+ ax = ax [2 ],
119
169
dashes = dashes ,
120
170
errorbar = ("sd" , 0 ),
121
171
)
122
- ax [1 ].set_ylim (- 0.5 , 5.5 )
172
+ ax [2 ].set_ylim (lower , upper )
123
173
124
174
sns .scatterplot (
125
- x = feature_name , y = "y" , data = data , ax = ax [1 ], s = 15 , color = ".2" , alpha = 0.2
175
+ x = feature_name , y = "y" , data = data , ax = ax [2 ], s = 15 , color = ".2" , alpha = 0.2
126
176
)
127
177
128
178
plt .tight_layout ()
0 commit comments