Skip to content

Commit 6e2e40b

Browse files
committed
added compat entry
1 parent f16770d commit 6e2e40b

File tree

10 files changed

+264
-176
lines changed

10 files changed

+264
-176
lines changed

Project.toml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1515
CategoricalArrays = "0.10"
1616
MLJBase = "0.20, 0.21"
1717
MLJModelInterface = "1"
18+
NaturalSort = "1"
1819
Plots = "1"
1920
julia = "1.7"
2021

dev/quick_tour/notebook.jl

+118-76
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,14 @@ using InteractiveUtils
77
# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
88
macro bind(def, element)
99
quote
10-
local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
10+
local iv = try
11+
Base.loaded_modules[Base.PkgId(
12+
Base.UUID("6e696c72-6542-2067-7265-42206c756150"),
13+
"AbstractPlutoDingetjes",
14+
)].Bonds.initial_value
15+
catch
16+
b -> missing
17+
end
1118
local el = $(esc(element))
1219
global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
1320
el
@@ -16,17 +23,17 @@ end
1623

1724
# ╔═╡ aad62ef1-4136-4732-a9e6-3746524978ee
1825
begin
19-
using ConformalPrediction
20-
using Distributions
21-
using EvoTrees
22-
using LightGBM
23-
using MLJ
24-
using MLJDecisionTreeInterface
25-
using MLJLinearModels
26-
using NearestNeighborModels
27-
using Plots
28-
using PlutoUI
29-
include("utils.jl")
26+
using ConformalPrediction
27+
using Distributions
28+
using EvoTrees
29+
using LightGBM
30+
using MLJ
31+
using MLJDecisionTreeInterface
32+
using MLJLinearModels
33+
using NearestNeighborModels
34+
using Plots
35+
using PlutoUI
36+
include("utils.jl")
3037
end;
3138

3239
# ╔═╡ bc0d7575-dabd-472d-a0ce-db69d242ced8
@@ -49,18 +56,18 @@ First, we create a simple helper function that generates our data:
4956

5057
# ╔═╡ 2f1c8da3-77dc-4bd7-8fa4-7669c2861aaa
5158
begin
52-
function get_data(N=600, xmax=3.0, noise=0.5; fun::Function=fun(X) = X * sin(X))
53-
# Inputs:
54-
d = Distributions.Uniform(-xmax, xmax)
55-
X = rand(d, N)
56-
X = MLJ.table(reshape(X, :, 1))
57-
58-
# Outputs:
59-
ε = randn(N) .* noise
60-
y = @.(fun(X.x1)) + ε
61-
y = vec(y)
62-
return X, y
63-
end
59+
function get_data(N = 600, xmax = 3.0, noise = 0.5; fun::Function = fun(X) = X * sin(X))
60+
# Inputs:
61+
d = Distributions.Uniform(-xmax, xmax)
62+
X = rand(d, N)
63+
X = MLJ.table(reshape(X, :, 1))
64+
65+
# Outputs:
66+
ε = randn(N) .* noise
67+
y = @.(fun(X.x1)) + ε
68+
y = vec(y)
69+
return X, y
70+
end
6471
end;
6572

6673
# ╔═╡ eb251479-ce0f-4158-8627-099da3516c73
@@ -78,20 +85,27 @@ The slides can be used to change the number of observations `N`, the maximum (an
7885

7986
# ╔═╡ 931ce259-d5fb-4a56-beb8-61a69a2fc09e
8087
begin
81-
data_dict = Dict(
82-
"N" => (500:100:5000,1000),
83-
"noise" => (0.1:0.1:1.0,0.5),
84-
"xmax" => (1:10,5),
85-
)
86-
@bind data_specs multi_slider(data_dict, title="Parameters")
88+
data_dict = Dict(
89+
"N" => (500:100:5000, 1000),
90+
"noise" => (0.1:0.1:1.0, 0.5),
91+
"xmax" => (1:10, 5),
92+
)
93+
@bind data_specs multi_slider(data_dict, title = "Parameters")
8794
end
8895

8996
# ╔═╡ f0106aa5-b1c5-4857-af94-2711f80d25a8
9097
begin
91-
X, y = get_data(data_specs.N, data_specs.xmax, data_specs.noise; fun=f)
92-
scatter(X.x1, y, label="Observed data")
93-
xrange = range(-data_specs.xmax,data_specs.xmax,length=50)
94-
plot!(xrange, @.(f(xrange)), lw=4, label="Ground truth", ls=:dash, colour=:black)
98+
X, y = get_data(data_specs.N, data_specs.xmax, data_specs.noise; fun = f)
99+
scatter(X.x1, y, label = "Observed data")
100+
xrange = range(-data_specs.xmax, data_specs.xmax, length = 50)
101+
plot!(
102+
xrange,
103+
@.(f(xrange)),
104+
lw = 4,
105+
label = "Ground truth",
106+
ls = :dash,
107+
colour = :black,
108+
)
95109
end
96110

97111
# ╔═╡ 2fe1065e-d1b8-4e3c-930c-654f50349222
@@ -111,7 +125,7 @@ To start with, let's split our data into a training and test set:
111125
"""
112126

113127
# ╔═╡ 3a4fe2bc-387c-4d7e-b45f-292075a01bcd
114-
train, test = partition(eachindex(y), 0.4, 0.4, shuffle=true);
128+
train, test = partition(eachindex(y), 0.4, 0.4, shuffle = true);
115129

116130
# ╔═╡ a34b8c07-08e0-4a0e-a0f9-8054b41b038b
117131
md"Now let's choose a model for our regression task:"
@@ -121,8 +135,8 @@ md"Now let's choose a model for our regression task:"
121135

122136
# ╔═╡ 292978a2-1941-44d3-af5b-13456d16b656
123137
begin
124-
Model = eval(tested_atomic_models[:regression][model_name])
125-
model = Model()
138+
Model = eval(tested_atomic_models[:regression][model_name])
139+
model = Model()
126140
end;
127141

128142
# ╔═╡ 10340f3f-7981-42da-846a-7599a9edb7f3
@@ -137,7 +151,7 @@ mach_raw = machine(model, X, y);
137151
md"Then we fit the machine to the training data:"
138152

139153
# ╔═╡ aabfbbfb-7fb0-4f37-9a05-b96207636232
140-
MLJ.fit!(mach_raw, rows=train, verbosity=0);
154+
MLJ.fit!(mach_raw, rows = train, verbosity = 0);
141155

142156
# ╔═╡ 5506e1b5-5f2f-4972-a845-9c0434d4b31c
143157
md"""
@@ -146,13 +160,20 @@ The chart below shows the resulting point predictions for the test data set:
146160

147161
# ╔═╡ 9bb977fe-d7e0-4420-b472-a50e8bd6d94f
148162
begin
149-
Xtest = MLJ.matrix(selectrows(X, test))
150-
ytest = y[test]
151-
= MLJ.predict(mach_raw, Xtest)
152-
scatter(vec(Xtest), vec(ytest), label="Observed")
153-
_order = sortperm(vec(Xtest))
154-
plot!(vec(Xtest)[_order], vec(ŷ)[_order], lw=4, label="Predicted")
155-
plot!(xrange, @.(f(xrange)), lw=2, ls=:dash, colour=:black, label="Ground truth")
163+
Xtest = MLJ.matrix(selectrows(X, test))
164+
ytest = y[test]
165+
= MLJ.predict(mach_raw, Xtest)
166+
scatter(vec(Xtest), vec(ytest), label = "Observed")
167+
_order = sortperm(vec(Xtest))
168+
plot!(vec(Xtest)[_order], vec(ŷ)[_order], lw = 4, label = "Predicted")
169+
plot!(
170+
xrange,
171+
@.(f(xrange)),
172+
lw = 2,
173+
ls = :dash,
174+
colour = :black,
175+
label = "Ground truth",
176+
)
156177
end
157178

158179
# ╔═╡ 36eef47f-ad55-49be-ac60-7aa1cf50e61a
@@ -186,7 +207,7 @@ Then we fit the machine to the data:
186207
"""
187208

188209
# ╔═╡ 6b574688-ff3c-441a-a616-169685731883
189-
MLJ.fit!(mach, rows=train, verbosity=0);
210+
MLJ.fit!(mach, rows = train, verbosity = 0);
190211

191212
# ╔═╡ da6e8f90-a3f9-4d06-86ab-b0f6705bbf54
192213
md"""
@@ -196,15 +217,22 @@ Now let us look at the predictions for our test data again. The chart below show
196217
"""
197218

198219
# ╔═╡ 797746e9-235f-4fb1-8cdb-9be295b54bbe
199-
@bind coverage Slider(0.1:0.1:1.0, default=0.8, show_value=true)
220+
@bind coverage Slider(0.1:0.1:1.0, default = 0.8, show_value = true)
200221

201222
# ╔═╡ ad3e290b-c1f5-4008-81c7-a1a56ab10563
202223
begin
203-
_conf_model = conformal_model(model, coverage=coverage)
204-
_mach = machine(_conf_model, X, y)
205-
MLJ.fit!(_mach, rows=train, verbosity=0)
206-
plot(_mach.model, _mach.fitresult, Xtest, ytest, zoom=0, observed_lab="Test points")
207-
plot!(xrange, @.(f(xrange)), lw=2, ls=:dash, colour=:black, label="Ground truth")
224+
_conf_model = conformal_model(model, coverage = coverage)
225+
_mach = machine(_conf_model, X, y)
226+
MLJ.fit!(_mach, rows = train, verbosity = 0)
227+
plot(_mach.model, _mach.fitresult, Xtest, ytest, zoom = 0, observed_lab = "Test points")
228+
plot!(
229+
xrange,
230+
@.(f(xrange)),
231+
lw = 2,
232+
ls = :dash,
233+
colour = :black,
234+
label = "Ground truth",
235+
)
208236
end
209237

210238
# ╔═╡ b3a88859-0442-41ff-bfea-313437042830
@@ -225,29 +253,32 @@ To verify the marginal coverage property empirically we can look at the empirica
225253

226254
# ╔═╡ d1140af9-608a-4669-9595-aee72ffbaa46
227255
begin
228-
model_evaluation = evaluate!(_mach,operation=MLJ.predict,measure=emp_coverage, verbosity=0);
229-
println("Empirical coverage: $(round(model_evaluation.measurement[1], digits=3))")
230-
println("Coverage per fold: $(round.(model_evaluation.per_fold[1], digits=3))")
256+
model_evaluation =
257+
evaluate!(_mach, operation = MLJ.predict, measure = emp_coverage, verbosity = 0)
258+
println("Empirical coverage: $(round(model_evaluation.measurement[1], digits=3))")
259+
println("Coverage per fold: $(round.(model_evaluation.per_fold[1], digits=3))")
231260
end
232261

233262
# ╔═╡ f742440b-258e-488a-9c8b-c9267cf1fb99
234263
begin
235-
ncal = Int(conf_model.train_ratio * data_specs.N)
236-
Markdown.parse("""
237-
The empirical coverage rate should be close to the desired level of coverage. In most cases it will be slightly higher, since ``(1-\\alpha)`` is a lower bound.
238-
239-
> Found an empirical coverage rate that is slightly lower than desired? The coverage property is "marginal" in the sense that the probability averaged over the randomness in the data. For most purposes a large enough calibration set size (``n>1000``) mitigates that randomness enough. Depending on your choices above, the calibration set may be quite small (currently $ncal), which can lead to **coverage slack** (see Section 3 in the [tutorial](https://arxiv.org/pdf/2107.07511.pdf)).
240-
241-
### *So what's happening under the hood?*
242-
243-
Inductive Conformal Prediction (also referred to as Split Conformal Prediction) broadly speaking works as follows:
244-
245-
1. Partition the training into a proper training set and a separate calibration set
246-
2. Train the machine learning model on the proper training set.
247-
3. Using some heuristic notion of uncertainty (e.g. absolute error in the regression case) compute nonconformity scores using the calibration data and the fitted model.
248-
4. For the given coverage ratio compute the corresponding quantile of the empirical distribution of nonconformity scores.
249-
5. For the given quantile and test sample ``X_{\\text{test}}``, form the corresponding conformal prediction set like so: ``C(X_{\\text{test}})=\\{y:s(X_{\\text{test}},y) \\le \\hat{q}\\}``
250-
""")
264+
ncal = Int(conf_model.train_ratio * data_specs.N)
265+
Markdown.parse(
266+
"""
267+
The empirical coverage rate should be close to the desired level of coverage. In most cases it will be slightly higher, since ``(1-\\alpha)`` is a lower bound.
268+
269+
> Found an empirical coverage rate that is slightly lower than desired? The coverage property is "marginal" in the sense that the probability averaged over the randomness in the data. For most purposes a large enough calibration set size (``n>1000``) mitigates that randomness enough. Depending on your choices above, the calibration set may be quite small (currently $ncal), which can lead to **coverage slack** (see Section 3 in the [tutorial](https://arxiv.org/pdf/2107.07511.pdf)).
270+
271+
### *So what's happening under the hood?*
272+
273+
Inductive Conformal Prediction (also referred to as Split Conformal Prediction) broadly speaking works as follows:
274+
275+
1. Partition the training into a proper training set and a separate calibration set
276+
2. Train the machine learning model on the proper training set.
277+
3. Using some heuristic notion of uncertainty (e.g. absolute error in the regression case) compute nonconformity scores using the calibration data and the fitted model.
278+
4. For the given coverage ratio compute the corresponding quantile of the empirical distribution of nonconformity scores.
279+
5. For the given quantile and test sample ``X_{\\text{test}}``, form the corresponding conformal prediction set like so: ``C(X_{\\text{test}})=\\{y:s(X_{\\text{test}},y) \\le \\hat{q}\\}``
280+
""",
281+
)
251282
end
252283

253284
# ╔═╡ 74444c01-1a0a-47a7-9b14-749946614f07
@@ -267,14 +298,25 @@ Quite cool, right? Using a single API call we are able to generate rigorous pred
267298

268299

269300
# ╔═╡ 824bd383-2fcb-4888-8ad1-260c85333edf
270-
@bind xmax_ood Slider(data_specs.xmax:(data_specs.xmax+5), default=(data_specs.xmax), show_value=true)
301+
@bind xmax_ood Slider(
302+
data_specs.xmax:(data_specs.xmax+5),
303+
default = (data_specs.xmax),
304+
show_value = true,
305+
)
271306

272307
# ╔═╡ 072cc72d-20a2-4ee9-954c-7ea70dfb8eea
273308
begin
274-
Xood, yood = get_data(data_specs.N, xmax_ood, data_specs.noise; fun=f)
275-
plot(_mach.model, _mach.fitresult, Xood, yood, zoom=0, observed_lab="Test points")
276-
xood_range = range(-xmax_ood,xmax_ood,length=50)
277-
plot!(xood_range, @.(f(xood_range)), lw=2, ls=:dash, colour=:black, label="Ground truth")
309+
Xood, yood = get_data(data_specs.N, xmax_ood, data_specs.noise; fun = f)
310+
plot(_mach.model, _mach.fitresult, Xood, yood, zoom = 0, observed_lab = "Test points")
311+
xood_range = range(-xmax_ood, xmax_ood, length = 50)
312+
plot!(
313+
xood_range,
314+
@.(f(xood_range)),
315+
lw = 2,
316+
ls = :dash,
317+
colour = :black,
318+
label = "Ground truth",
319+
)
278320
end
279321

280322
# ╔═╡ 4f41ec7c-aedd-475f-942d-33e2d1174902

dev/quick_tour/utils.jl

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
function multi_slider(vals::Dict; title="")
2-
1+
function multi_slider(vals::Dict; title = "")
2+
33
return PlutoUI.combine() do Child
4-
4+
55
inputs = [
66
md""" $(_name): $(
77
Child(_name, Slider(_vals[1], default=_vals[2], show_value=true))
88
)"""
9-
9+
1010
for (_name, _vals) in vals
1111
]
12-
12+
1313
md"""
1414
#### $title
1515
$(inputs)
1616
"""
1717
end
18-
19-
end
18+
19+
end

0 commit comments

Comments
 (0)