Skip to content

Commit 82af29c

Browse files
authored
Regularized Regression with interactive controls
1 parent ee18ca4 commit 82af29c

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed

Diff for: Interactive ML-1.ipynb

+368
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Interactive Machine Learning Demo"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {
14+
"collapsed": true
15+
},
16+
"outputs": [],
17+
"source": [
18+
"from ipywidgets import interact, interactive, IntSlider, Layout, interact_manual\n",
19+
"import ipywidgets as widgets\n",
20+
"from IPython.display import display\n",
21+
"\n",
22+
"import numpy as np\n",
23+
"import matplotlib.pyplot as plt\n",
24+
"#%matplotlib inline\n",
25+
"\n",
26+
"import pandas as pd"
27+
]
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"metadata": {},
32+
"source": [
33+
"## Linear Regression and Regularization"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"metadata": {},
39+
"source": [
40+
"### Variables"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 2,
46+
"metadata": {
47+
"collapsed": true
48+
},
49+
"outputs": [],
50+
"source": [
51+
"N_samples = 25\n",
52+
"x_min = -5\n",
53+
"x_max = 5\n",
54+
"x1= np.linspace(x_min,x_max,N_samples*5)\n",
55+
"x= np.random.choice(x1,size=N_samples)\n",
56+
"noise_std=1\n",
57+
"noise_mean=0\n",
58+
"noise_magnitude = 2"
59+
]
60+
},
61+
{
62+
"cell_type": "markdown",
63+
"metadata": {},
64+
"source": [
65+
"### Function definitions (ideal fitting function and actual data generating function with noise)"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": 3,
71+
"metadata": {
72+
"collapsed": true
73+
},
74+
"outputs": [],
75+
"source": [
76+
"def func_gen(N_samples,x_min,x_max,noise_magnitude,noise_sd,noise_mean):\n",
77+
" x1= np.linspace(x_min,x_max,N_samples*5)\n",
78+
" x= np.random.choice(x1,size=N_samples)\n",
79+
" y=2*x-0.6*x**2+0.2*x**3+18*np.sin(x)\n",
80+
" y1=2*x1-0.6*x1**2+0.2*x1**3+18*np.sin(x1)\n",
81+
" y= y+noise_magnitude*np.random.normal(loc=noise_mean,scale=noise_sd,size=N_samples)\n",
82+
" plt.figure(figsize=(8,5))\n",
83+
" plt.plot(x1,y1,c='k',lw=2)\n",
84+
" plt.scatter(x,y,edgecolors='k',c='yellow',s=60)\n",
85+
" plt.grid(True)\n",
86+
" plt.show()\n",
87+
" return (x,y,x1,y1)"
88+
]
89+
},
90+
{
91+
"cell_type": "markdown",
92+
"metadata": {},
93+
"source": [
94+
"### Call the 'interactive' widget with the data generating function, which also plots the data real-time"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": 4,
100+
"metadata": {
101+
"scrolled": false
102+
},
103+
"outputs": [
104+
{
105+
"data": {
106+
"application/vnd.jupyter.widget-view+json": {
107+
"model_id": "2838efed54074b06bec67d01ad5bee7e",
108+
"version_major": 2,
109+
"version_minor": 0
110+
},
111+
"text/html": [
112+
"<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
113+
"<p>\n",
114+
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
115+
" that the widgets JavaScript is still loading. If this message persists, it\n",
116+
" likely means that the widgets JavaScript library is either not installed or\n",
117+
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
118+
" Widgets Documentation</a> for setup instructions.\n",
119+
"</p>\n",
120+
"<p>\n",
121+
" If you're reading this message in another frontend (for example, a static\n",
122+
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
123+
" it may mean that your frontend doesn't currently support widgets.\n",
124+
"</p>\n"
125+
],
126+
"text/plain": [
127+
"interactive(children=(Dropdown(description='N_samples', options={'Low (50 samples)': 50, 'High (200 samples)': 200}, value=50), IntSlider(value=-3, description='x_min', max=0, min=-5), IntSlider(value=2, description='x_max', max=5), IntSlider(value=2, description='noise_magnitude', max=5), FloatSlider(value=0.5, description='noise_sd', max=1.0, min=0.1), FloatSlider(value=0.0, description='noise_mean', max=2.0, min=-2.0, step=0.5), Output()), _dom_classes=('widget-interact',))"
128+
]
129+
},
130+
"metadata": {},
131+
"output_type": "display_data"
132+
}
133+
],
134+
"source": [
135+
"p=interactive(func_gen,N_samples={'Low (50 samples)':50,'High (200 samples)':200},x_min=(-5,0,1), x_max=(0,5,1),\n",
136+
" noise_magnitude=(0,5,1),noise_sd=(0.1,1,0.1),noise_mean=(-2,2,0.5))\n",
137+
"display(p)"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"### Extract the data"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": 5,
150+
"metadata": {
151+
"collapsed": true
152+
},
153+
"outputs": [],
154+
"source": [
155+
"x,y,x1,y1 = p.result"
156+
]
157+
},
158+
{
159+
"cell_type": "markdown",
160+
"metadata": {},
161+
"source": [
162+
"### Load scikit-learn libraries"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": 6,
168+
"metadata": {
169+
"collapsed": true
170+
},
171+
"outputs": [],
172+
"source": [
173+
"from sklearn.model_selection import train_test_split\n",
174+
"from sklearn.preprocessing import PolynomialFeatures\n",
175+
"from sklearn.linear_model import LassoCV\n",
176+
"from sklearn.linear_model import RidgeCV\n",
177+
"from sklearn.linear_model import LinearRegression\n",
178+
"from sklearn.pipeline import make_pipeline"
179+
]
180+
},
181+
{
182+
"cell_type": "markdown",
183+
"metadata": {},
184+
"source": [
185+
"### Machine learning (regression) model encapsulated within a function "
186+
]
187+
},
188+
{
189+
"cell_type": "code",
190+
"execution_count": 9,
191+
"metadata": {
192+
"collapsed": true
193+
},
194+
"outputs": [],
195+
"source": [
196+
"lasso_eps = 0.01\n",
197+
"lasso_nalpha=20\n",
198+
"lasso_iter=3000\n",
199+
"ridge_alphas = (0.001,0.01,0.1,1)\n",
200+
"\n",
201+
"def func_fit(model_type,test_size,degree):\n",
202+
" X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=55)\n",
203+
" \n",
204+
" t1=np.min(X_test)\n",
205+
" t2=np.max(X_test)\n",
206+
" t3=np.min(y_test)\n",
207+
" t4=np.max(y_test)\n",
208+
" \n",
209+
" t5=np.min(X_train)\n",
210+
" t6=np.max(X_train)\n",
211+
" t7=np.min(y_train)\n",
212+
" t8=np.max(y_train)\n",
213+
" \n",
214+
" posx_test=t1+(t2-t1)*0.7\n",
215+
" posx_train=t5+(t6-t5)*0.7\n",
216+
" posy_test=t3+(t4-t3)*0.2\n",
217+
" posy_train=t7+(t8-t7)*0.2\n",
218+
" \n",
219+
" if (model_type=='Linear regression'):\n",
220+
" model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
221+
" LinearRegression(normalize=True))\n",
222+
" if (model_type=='LASSO with CV'): \n",
223+
" model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
224+
" LassoCV(eps=lasso_eps,n_alphas=lasso_nalpha,max_iter=lasso_iter,normalize=True,cv=5))\n",
225+
" \n",
226+
" if (model_type=='Ridge with CV'): \n",
227+
" model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
228+
" RidgeCV(alphas=ridge_alphas,normalize=True,cv=5))\n",
229+
" \n",
230+
" X_train=X_train.reshape(-1,1)\n",
231+
" X_test=X_test.reshape(-1,1)\n",
232+
" \n",
233+
" model.fit(X_train,y_train)\n",
234+
" \n",
235+
" train_pred = np.array(model.predict(X_train))\n",
236+
" train_score = model.score(X_train,y_train)\n",
237+
" \n",
238+
" test_pred = np.array(model.predict(X_test))\n",
239+
" test_score = model.score(X_test,y_test)\n",
240+
" \n",
241+
" RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))\n",
242+
" RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))\n",
243+
" \n",
244+
" print(\"Test score: {}, Training score: {}\".format(test_score,train_score))\n",
245+
" \n",
246+
" print(\"RMSE Test: {}, RMSE train: {}\".format(RMSE_test,RMSE_train))\n",
247+
" \n",
248+
" plt.figure(figsize=(12,4))\n",
249+
" \n",
250+
" plt.subplot(1,2,1)\n",
251+
" plt.title(\"Test set performance\\n\",fontsize=16)\n",
252+
" plt.xlabel(\"X-test\",fontsize=13)\n",
253+
" plt.ylabel(\"y-test\",fontsize=13)\n",
254+
" plt.scatter(X_test,y_test,edgecolors='k',c='blue',s=60)\n",
255+
" plt.scatter(X_test,test_pred,edgecolors='k',c='yellow',s=60)\n",
256+
" plt.grid(True)\n",
257+
" plt.legend(['Actual test values','Predicted values'])\n",
258+
" plt.text(x=posx_test,y=posy_test,s='Test score: %.3f'%(test_score),fontsize=15)\n",
259+
" \n",
260+
" plt.subplot(1,2,2)\n",
261+
" plt.title(\"Training set performance\\n\",fontsize=16)\n",
262+
" plt.xlabel(\"X-train\",fontsize=13)\n",
263+
" plt.ylabel(\"y-train\",fontsize=13)\n",
264+
" plt.scatter(X_train,y_train,c='blue')\n",
265+
" plt.scatter(X_train,train_pred,c='yellow')\n",
266+
" plt.grid(True)\n",
267+
" plt.legend(['Actual training values','Fitted values'])\n",
268+
" plt.text(x=posx_train,y=posy_train,s='Training score: %.3f'%(train_score),fontsize=15)\n",
269+
" \n",
270+
" plt.show()\n",
271+
" \n",
272+
" return (train_score,test_score) "
273+
]
274+
},
275+
{
276+
"cell_type": "markdown",
277+
"metadata": {},
278+
"source": [
279+
"### Run the encapsulated ML function with ipywidget interactive"
280+
]
281+
},
282+
{
283+
"cell_type": "code",
284+
"execution_count": 11,
285+
"metadata": {},
286+
"outputs": [
287+
{
288+
"data": {
289+
"application/vnd.jupyter.widget-view+json": {
290+
"model_id": "c07f081012e6401a8e5a47a104103310",
291+
"version_major": 2,
292+
"version_minor": 0
293+
},
294+
"text/html": [
295+
"<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
296+
"<p>\n",
297+
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
298+
" that the widgets JavaScript is still loading. If this message persists, it\n",
299+
" likely means that the widgets JavaScript library is either not installed or\n",
300+
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
301+
" Widgets Documentation</a> for setup instructions.\n",
302+
"</p>\n",
303+
"<p>\n",
304+
" If you're reading this message in another frontend (for example, a static\n",
305+
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
306+
" it may mean that your frontend doesn't currently support widgets.\n",
307+
"</p>\n"
308+
],
309+
"text/plain": [
310+
"interactive(children=(RadioButtons(description='Choose Model', layout=Layout(width='250px'), options=('Linear regression', 'LASSO with CV', 'Ridge with CV'), style=DescriptionStyle(description_width='initial'), value='Linear regression'), Dropdown(description='Test set size', options={'10% of data': 0.1, '20% of data': 0.2, '30% of data': 0.3, '40% of data': 0.4, '50% of data': 0.5}, style=DescriptionStyle(description_width='initial'), value=0.1), IntSlider(value=1, continuous_update=False, description='Polynomial degree', max=10, min=1), Output(layout=Layout(height='350px'))), _dom_classes=('widget-interact',))"
311+
]
312+
},
313+
"metadata": {},
314+
"output_type": "display_data"
315+
}
316+
],
317+
"source": [
318+
"style = {'description_width': 'initial'}\n",
319+
"# Continuous_update = False for IntSlider control to stop continuous model evaluation while the slider is being dragged\n",
320+
"m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linear regression','LASSO with CV', 'Ridge with CV'],\n",
321+
" description = \"Choose Model\",style=style,\n",
322+
" layout=Layout(width='250px')),\n",
323+
" test_size=widgets.Dropdown(options={\"10% of data\":0.1,\"20% of data\":0.2, \"30% of data\":0.3,\n",
324+
" \"40% of data\":0.4,\"50% of data\":0.5},\n",
325+
" description=\"Test set size\",style=style),\n",
326+
" degree=widgets.IntSlider(min=1,max=10,step=1,description= 'Polynomial degree',\n",
327+
" stye=style,continuous_update=False))\n",
328+
"\n",
329+
"# Set the height of the control.children[-1] so that the output does not jump and flicker\n",
330+
"output = m.children[-1]\n",
331+
"output.layout.height = '350px'\n",
332+
"\n",
333+
"# Display the control\n",
334+
"display(m)"
335+
]
336+
},
337+
{
338+
"cell_type": "code",
339+
"execution_count": null,
340+
"metadata": {
341+
"collapsed": true
342+
},
343+
"outputs": [],
344+
"source": []
345+
}
346+
],
347+
"metadata": {
348+
"kernelspec": {
349+
"display_name": "Python 3",
350+
"language": "python",
351+
"name": "python3"
352+
},
353+
"language_info": {
354+
"codemirror_mode": {
355+
"name": "ipython",
356+
"version": 3
357+
},
358+
"file_extension": ".py",
359+
"mimetype": "text/x-python",
360+
"name": "python",
361+
"nbconvert_exporter": "python",
362+
"pygments_lexer": "ipython3",
363+
"version": "3.6.2"
364+
}
365+
},
366+
"nbformat": 4,
367+
"nbformat_minor": 2
368+
}

0 commit comments

Comments
 (0)