-
Notifications
You must be signed in to change notification settings - Fork 0
/
1d_gp.py
217 lines (207 loc) · 9.6 KB
/
1d_gp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
"""
"""
# Please see `linear_ode.md` for initialization documentation.
#
from xstanpy.incremental import *
from xstanpy.plot import *
os.environ['CMDSTAN'] = '../cmdstan'
# While the following looks identical to the `linear_ode.md` example,
# under the hood there is one important difference. While for the linear ODE
# example the dimensionality of the posterior was independent of the
# `slice_variables` or the `refinement_variable`, for the Gaussian process model
# the value of `gp_no_basis_functions` influences the dimensionality of the posterior.
# This does not affect Stan's regular warm-up/sampling or the `Incremental` warm-up,
# but it forces us to change the `Adaptive` warm-up.
# **Currently, the below only works because there is some
# [prolongation](https://en.wikipedia.org/wiki/Multigrid_method) happening
# under the hood. For almost any other model, simply specifying a data variable
# which infuences the dimensionality of the posterior as the `refinement_variable`
# will not work!**
model = Model(
'stan/1d_gp.stan',
slice_variables={'no_observations': ('xi_observed', 'y_observed')},
refinement_variable='gp_no_basis_functions'
)
model.compilation_process.debug()
#~ There are two main obstacles to be able to use the `Adaptive` warm-up to reconfigure
# posteriors for which refinement of the used approximation increases the dimensionality
# of the approximate posterior:
#
# * First, we have to be able to take draws from the lower dimensional posterior
# and "prolongate" them onto the higher dimensional posterior.
# * Second, to be able to do PSIS, we have to figure out what the appropriate
# (log) posterior density of these prolongated draws is.
#
# For the prolongation, there are several options, but two appear to me most plausible
# in the current setting:
#
# * Fill the values of the "extra" basis function weights with zero or
# * fill the values of the "extra" basis function weights with draws from the prior.
#
# If we fill the extra values with zero, then the states of the Gaussian process
# will stay the same after prolongation. If on the other hand we fill the extra
# values with draws from the prior, the states of the Gaussian process will generally
# not stay the same.
#
# While the first option may sound reasonable, it is the wrong choice[^2]
# in this setting because the raw IS-weights passed to PSIS would all be identical.
# For the second option on the other hand we can reinterpret the draws from the lower
# dimensional posterior to stem from a model in the higher dimensional posterior
# where the **basis functions** of the new **basis function weights** are identically zero.
# **The IS-weights are then fully determined by the likelihood.**
#
# Implementation wise, the following happens currently:
#
# * The class `Adaptive` calls the function `Adaptive.data_reconfiguration` for each
# intermediate posterior.
# * There, `DrawsPool.psis` gets called.
# * This passes the prolongation bucket
# via `DrawsPool.draws_for` and `PosteriorDraws.draws_for` to `Model.draws_for`.
# * There, if the posteriors have the same dimensionality, nothing special happens.
# If however the dimensionality of the target posterior is larger than the dimensionality
# of the source posterior, the unconstrained parameter values get padded with draws
# from a standard normal. **This is very non-portable and has to be changed. For
# the current model this works because only the dimensionality of the `unit_weights`
# change, which in fact have a standard normal prior.**
# * Back in `DrawsPool.psis`, the prolongated draws are used to compute the
# log-likelihood difference.
# * The only extra work happens in `HMC.adaptation_for` via `HMC.pooled_metric_for`,
# where the metric from the lower dimensional posterior gets extended by an identity
# matrix in the new part of the diagonal. **This is of course also not portable**.
#
# [^2]: Or maybe not?
# What follows now is equivalent to what happens for the `linear_ode.md` example, with the
# only difference that different configurations have a different number of observations
# instead of a different number of (ODE) states.
configs = dict(
weakly_informative=10,
informative=100,
strongly_informative=1000,
incredibly_informative=10000,
)
for config_name, no_observations in configs.items():
np.random.seed(0)
config = Data(dict(no_observations=no_observations), name=config_name)
x_observed = np.cumsum(np.random.uniform(size=no_observations))
x_observed = x_observed / x_observed[-1]
xi_observed = 2 * x_observed - 1
prior_data = dict(
no_observations=no_observations,
xi_observed=xi_observed,
y_observed=np.zeros(no_observations),
# Make data more interesting
gp_lengthscale_multiplier=.1,
gp_sigma_multiplier=1,
gp_support_fraction=5/6,
gp_no_basis_functions=256,
likelihood=0
)
prior = Posterior(model, prior_data)
prior_fit = HMC.stan_regular(prior, warmup_no_draws=100, sampling_no_draws=100)
for idx in np.random.choice(len(prior_fit.samples.gp_unit_lengthscale.array), size=3):
posterior = prior.updated(dict(
y_observed=prior_fit.samples.y_generated.array[idx],
likelihood=1
))
#~ This again looks deceptively similar to what is done for the linear ODE example,
# **While with the proper preparation the call signature should stay the same,
# this preparation generally still has to happen in a custom `Model` subclass.**
adaptive = Adaptive(
posterior,
callback=lambda sequence: print(sequence[-1].posterior.integer_data),
)
#~ This is again just the same as for the linear ODE example.
#
fits = {
'adaptive': adaptive,
}
# To lower the runtime we only look at the following configuration
for warmup_no_draws in [200]:
key = f'{warmup_no_draws:04d}'
fits[f'stan_{key}'] = HMC.stan_regular(
posterior,
warmup_no_draws=warmup_no_draws,
sampling_no_draws=100
)
df = pd.DataFrame({
name: fit.information for name, fit in fits.items()
}).T.fillna(dict(relative_efficiency=1, pareto_shape_estimate=-np.inf))
suptitle = df.to_string(float_format='{:.2f}'.format)
print(suptitle)
Figure([
[
Ax([
LinePlot(prior_fit.samples.y_generated.array[idx], color='black', marker='x', linestyle='', label='measurements'),
] + [
FillPlot(fit.samples.y_computed.array, alpha=.5, label=key)
for key, fit in fits.items()
] + [
LinePlot(prior_fit.samples.y_computed.array[idx], color='black', label='true values'),
],
title='values & measurements',
show_legend=False
)
],
[
Ax([
FillPlot(fit.samples.gp_scaled_weights.array, alpha=.5, label=key)
for key, fit in fits.items()
] + [
FillPlot([
-prior_fit.samples.gp_scaled_weights_sigma.array[idx],
prior_fit.samples.gp_scaled_weights_sigma.array[idx]
], alpha=.25, color='black', label='|z|<=1'),
LinePlot(prior_fit.samples.gp_scaled_weights.array[idx], color='black', label='true values'),
],
title='scaled basis function weights',
xlabel='basis function index',
ylabel='scaled weight',
xlim=[0, 2*adaptive.sequence[-1].posterior.data['gp_no_basis_functions']],
show_legend=False
)
],
[
Ax([
FillPlot(fit.samples.gp_unit_weights.array, alpha=.5, label=key)
for key, fit in fits.items()
] + [
FillPlot([
-1-0*prior_fit.samples.gp_scaled_weights_sigma.array[idx],
+1+0*prior_fit.samples.gp_scaled_weights_sigma.array[idx]
], alpha=.25, color='black', label='|z|<=1'),
LinePlot(prior_fit.samples.gp_unit_weights.array[idx], color='black', label='true values'),
],
title='unit basis function weights',
xlabel='basis function index',
ylabel='unit weight',
xlim=[0, 2*adaptive.sequence[-1].posterior.data['gp_no_basis_functions']],
show_legend=False
)
],
[
Ax([
ScatterPlot(
fit.samples.gp_unit_lengthscale.array,
fit.samples.gp_unit_sigma.array,
label=key
)
for key, fit in fits.items()
] + [
VerticalLine(
prior_fit.samples.gp_unit_lengthscale.array[idx], color='black'
),
HorizontalLine(
prior_fit.samples.gp_unit_sigma.array[idx], color='black'
)
],
xlabel='gp_unit_lengthscale', ylabel='gp_unit_sigma',
xscale='log', yscale='log')
]
],
col_width=16,
suptitle=suptitle,
suptitle_family='monospace',
show_legend='row'
).save(
f'figs/{model.name}/{config.name}/{posterior.hash}.png'
)