Regularized Regression with interactive controls

tirthajyoti · web-flow · commit 82af29c2862b · 2017-12-12T18:42:58.000-08:00
diff --git a/Interactive ML-1.ipynb b/Interactive ML-1.ipynb
@@ -0,0 +1,368 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Interactive Machine Learning Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from ipywidgets import interact, interactive, IntSlider, Layout, interact_manual\n",
+    "import ipywidgets as widgets\n",
+    "from IPython.display import display\n",
+    "\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "#%matplotlib inline\n",
+    "\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Linear Regression and Regularization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "N_samples = 25\n",
+    "x_min = -5\n",
+    "x_max = 5\n",
+    "x1= np.linspace(x_min,x_max,N_samples*5)\n",
+    "x= np.random.choice(x1,size=N_samples)\n",
+    "noise_std=1\n",
+    "noise_mean=0\n",
+    "noise_magnitude = 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Function definitions (ideal fitting function and actual data generating function with noise)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def func_gen(N_samples,x_min,x_max,noise_magnitude,noise_sd,noise_mean):\n",
+    "    x1= np.linspace(x_min,x_max,N_samples*5)\n",
+    "    x= np.random.choice(x1,size=N_samples)\n",
+    "    y=2*x-0.6*x**2+0.2*x**3+18*np.sin(x)\n",
+    "    y1=2*x1-0.6*x1**2+0.2*x1**3+18*np.sin(x1)\n",
+    "    y= y+noise_magnitude*np.random.normal(loc=noise_mean,scale=noise_sd,size=N_samples)\n",
+    "    plt.figure(figsize=(8,5))\n",
+    "    plt.plot(x1,y1,c='k',lw=2)\n",
+    "    plt.scatter(x,y,edgecolors='k',c='yellow',s=60)\n",
+    "    plt.grid(True)\n",
+    "    plt.show()\n",
+    "    return (x,y,x1,y1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Call the 'interactive' widget with the data generating function, which also plots the data real-time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2838efed54074b06bec67d01ad5bee7e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/html": [
+       "<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
+       "<p>\n",
+       "  If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
+       "  that the widgets JavaScript is still loading. If this message persists, it\n",
+       "  likely means that the widgets JavaScript library is either not installed or\n",
+       "  not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
+       "  Widgets Documentation</a> for setup instructions.\n",
+       "</p>\n",
+       "<p>\n",
+       "  If you're reading this message in another frontend (for example, a static\n",
+       "  rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
+       "  it may mean that your frontend doesn't currently support widgets.\n",
+       "</p>\n"
+      ],
+      "text/plain": [
+       "interactive(children=(Dropdown(description='N_samples', options={'Low (50 samples)': 50, 'High (200 samples)': 200}, value=50), IntSlider(value=-3, description='x_min', max=0, min=-5), IntSlider(value=2, description='x_max', max=5), IntSlider(value=2, description='noise_magnitude', max=5), FloatSlider(value=0.5, description='noise_sd', max=1.0, min=0.1), FloatSlider(value=0.0, description='noise_mean', max=2.0, min=-2.0, step=0.5), Output()), _dom_classes=('widget-interact',))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "p=interactive(func_gen,N_samples={'Low (50 samples)':50,'High (200 samples)':200},x_min=(-5,0,1), x_max=(0,5,1),\n",
+    "              noise_magnitude=(0,5,1),noise_sd=(0.1,1,0.1),noise_mean=(-2,2,0.5))\n",
+    "display(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Extract the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "x,y,x1,y1 = p.result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load scikit-learn libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import PolynomialFeatures\n",
+    "from sklearn.linear_model import LassoCV\n",
+    "from sklearn.linear_model import RidgeCV\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.pipeline import make_pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Machine learning (regression) model encapsulated within a function "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "lasso_eps = 0.01\n",
+    "lasso_nalpha=20\n",
+    "lasso_iter=3000\n",
+    "ridge_alphas = (0.001,0.01,0.1,1)\n",
+    "\n",
+    "def func_fit(model_type,test_size,degree):\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=55)\n",
+    "    \n",
+    "    t1=np.min(X_test)\n",
+    "    t2=np.max(X_test)\n",
+    "    t3=np.min(y_test)\n",
+    "    t4=np.max(y_test)\n",
+    "    \n",
+    "    t5=np.min(X_train)\n",
+    "    t6=np.max(X_train)\n",
+    "    t7=np.min(y_train)\n",
+    "    t8=np.max(y_train)\n",
+    "    \n",
+    "    posx_test=t1+(t2-t1)*0.7\n",
+    "    posx_train=t5+(t6-t5)*0.7\n",
+    "    posy_test=t3+(t4-t3)*0.2\n",
+    "    posy_train=t7+(t8-t7)*0.2\n",
+    "    \n",
+    "    if (model_type=='Linear regression'):\n",
+    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
+    "                          LinearRegression(normalize=True))\n",
+    "    if (model_type=='LASSO with CV'):    \n",
+    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
+    "                              LassoCV(eps=lasso_eps,n_alphas=lasso_nalpha,max_iter=lasso_iter,normalize=True,cv=5))\n",
+    "        \n",
+    "    if (model_type=='Ridge with CV'):    \n",
+    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
+    "                              RidgeCV(alphas=ridge_alphas,normalize=True,cv=5))\n",
+    "    \n",
+    "    X_train=X_train.reshape(-1,1)\n",
+    "    X_test=X_test.reshape(-1,1)\n",
+    "    \n",
+    "    model.fit(X_train,y_train)\n",
+    "    \n",
+    "    train_pred = np.array(model.predict(X_train))\n",
+    "    train_score = model.score(X_train,y_train)\n",
+    "    \n",
+    "    test_pred = np.array(model.predict(X_test))\n",
+    "    test_score = model.score(X_test,y_test)\n",
+    "    \n",
+    "    RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))\n",
+    "    RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))\n",
+    "    \n",
+    "    print(\"Test score: {}, Training score: {}\".format(test_score,train_score))\n",
+    "    \n",
+    "    print(\"RMSE Test: {}, RMSE train: {}\".format(RMSE_test,RMSE_train))\n",
+    "    \n",
+    "    plt.figure(figsize=(12,4))\n",
+    "    \n",
+    "    plt.subplot(1,2,1)\n",
+    "    plt.title(\"Test set performance\\n\",fontsize=16)\n",
+    "    plt.xlabel(\"X-test\",fontsize=13)\n",
+    "    plt.ylabel(\"y-test\",fontsize=13)\n",
+    "    plt.scatter(X_test,y_test,edgecolors='k',c='blue',s=60)\n",
+    "    plt.scatter(X_test,test_pred,edgecolors='k',c='yellow',s=60)\n",
+    "    plt.grid(True)\n",
+    "    plt.legend(['Actual test values','Predicted values'])\n",
+    "    plt.text(x=posx_test,y=posy_test,s='Test score: %.3f'%(test_score),fontsize=15)\n",
+    "    \n",
+    "    plt.subplot(1,2,2)\n",
+    "    plt.title(\"Training set performance\\n\",fontsize=16)\n",
+    "    plt.xlabel(\"X-train\",fontsize=13)\n",
+    "    plt.ylabel(\"y-train\",fontsize=13)\n",
+    "    plt.scatter(X_train,y_train,c='blue')\n",
+    "    plt.scatter(X_train,train_pred,c='yellow')\n",
+    "    plt.grid(True)\n",
+    "    plt.legend(['Actual training values','Fitted values'])\n",
+    "    plt.text(x=posx_train,y=posy_train,s='Training score: %.3f'%(train_score),fontsize=15)\n",
+    "    \n",
+    "    plt.show()\n",
+    "       \n",
+    "    return (train_score,test_score)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run the encapsulated ML function with ipywidget interactive"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c07f081012e6401a8e5a47a104103310",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/html": [
+       "<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
+       "<p>\n",
+       "  If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
+       "  that the widgets JavaScript is still loading. If this message persists, it\n",
+       "  likely means that the widgets JavaScript library is either not installed or\n",
+       "  not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
+       "  Widgets Documentation</a> for setup instructions.\n",
+       "</p>\n",
+       "<p>\n",
+       "  If you're reading this message in another frontend (for example, a static\n",
+       "  rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
+       "  it may mean that your frontend doesn't currently support widgets.\n",
+       "</p>\n"
+      ],
+      "text/plain": [
+       "interactive(children=(RadioButtons(description='Choose Model', layout=Layout(width='250px'), options=('Linear regression', 'LASSO with CV', 'Ridge with CV'), style=DescriptionStyle(description_width='initial'), value='Linear regression'), Dropdown(description='Test set size', options={'10% of data': 0.1, '20% of data': 0.2, '30% of data': 0.3, '40% of data': 0.4, '50% of data': 0.5}, style=DescriptionStyle(description_width='initial'), value=0.1), IntSlider(value=1, continuous_update=False, description='Polynomial degree', max=10, min=1), Output(layout=Layout(height='350px'))), _dom_classes=('widget-interact',))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "style = {'description_width': 'initial'}\n",
+    "# Continuous_update = False for IntSlider control to stop continuous model evaluation while the slider is being dragged\n",
+    "m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linear regression','LASSO with CV', 'Ridge with CV'],\n",
+    "                                                    description = \"Choose Model\",style=style,\n",
+    "                                                        layout=Layout(width='250px')),\n",
+    "                test_size=widgets.Dropdown(options={\"10% of data\":0.1,\"20% of data\":0.2, \"30% of data\":0.3,\n",
+    "                                                    \"40% of data\":0.4,\"50% of data\":0.5},\n",
+    "                                          description=\"Test set size\",style=style),\n",
+    "               degree=widgets.IntSlider(min=1,max=10,step=1,description= 'Polynomial degree',\n",
+    "                                       stye=style,continuous_update=False))\n",
+    "\n",
+    "# Set the height of the control.children[-1] so that the output does not jump and flicker\n",
+    "output = m.children[-1]\n",
+    "output.layout.height = '350px'\n",
+    "\n",
+    "# Display the control\n",
+    "display(m)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}