amammi
diff --git a/Diff for: ‎Lesson05/5Activity.ipynb renamed to ‎Lesson05/Activity01.ipynb
+45-63 b/Diff for: ‎Lesson05/5Activity.ipynb renamed to ‎Lesson05/Activity01.ipynb
+45-63
@@ -14,20 +14,20 @@
   },
   "cells": [
     {
+      "cell_type": "code",
       "metadata": {
         "id": "Y-IWQkicJBML",
         "colab_type": "code",
+        "outputId": "c27555c3-d97c-4b30-85e6-056ee57b620f",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 71
-        },
-        "outputId": "c27555c3-d97c-4b30-85e6-056ee57b620f"
+        }
       },
-      "cell_type": "code",
       "source": [
         "!pip install pyspark"
       ],
-      "execution_count": 1,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "stream",
@@ -40,12 +40,12 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "HuUYUBZOKJmO",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "from pyspark import SparkContext\n",
         "sc = SparkContext()\n",
@@ -56,54 +56,36 @@
       "outputs": []
     },
     {
+      "cell_type": "markdown",
       "metadata": {
         "id": "Fh9724PMLRzi",
         "colab_type": "text"
       },
-      "cell_type": "markdown",
       "source": [
         ""
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "YTuBXHztKLlR",
         "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 438
-        },
-        "outputId": "6d8d25e9-d440-4c60-c78e-d48c3a754351"
+        "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "from pyspark import SparkContext\n",
         "sc = SparkContext()"
       ],
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-3-33ce3f59c0b1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpyspark\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSparkContext\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSparkContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pyspark/context.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)\u001b[0m\n\u001b[1;32m    113\u001b[0m         \"\"\"\n\u001b[1;32m    114\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callsite\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfirst_spark_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mCallSite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m         \u001b[0mSparkContext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgateway\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgateway\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    116\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pyspark/context.py\u001b[0m in \u001b[0;36m_ensure_initialized\u001b[0;34m(cls, instance, gateway, conf)\u001b[0m\n\u001b[1;32m    312\u001b[0m                         \u001b[0;34m\" created by %s at %s:%s \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    313\u001b[0m                         % (currentAppName, currentMaster,\n\u001b[0;32m--> 314\u001b[0;31m                             callsite.function, callsite.file, callsite.linenum))\n\u001b[0m\u001b[1;32m    315\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    316\u001b[0m                     \u001b[0mSparkContext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_active_spark_context\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: Cannot run multiple SparkContexts at once; existing SparkContext(app=pyspark-shell, master=local[*]) created by __init__ at <ipython-input-2-817b62176d5d>:2 "
-          ]
-        }
-      ]
+      "execution_count": 0,
+      "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "n4uh3owiKOxo",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "from pyspark.sql import SQLContext\n",
         "sqlc = SQLContext(sc)"
@@ -112,34 +94,34 @@
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "RYAvQpGuKV6j",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         ""
       ],
       "execution_count": 0,
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "vkQ94MizKb-q",
         "colab_type": "code",
+        "outputId": "0919208d-e967-4bee-b010-2347dbb885c1",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 204
-        },
-        "outputId": "0919208d-e967-4bee-b010-2347dbb885c1"
+        }
       },
-      "cell_type": "code",
       "source": [
         "df = sqlc.read.format('com.databricks.spark.csv').options(header = 'true', inferschema = 'true').load('iris.csv')\n",
         "df.show(5)\n"
       ],
-      "execution_count": 6,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "stream",
@@ -161,12 +143,12 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "ivN5jB6kKkL-",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "from pyspark.sql.functions import mean\n",
         "avg_sl = df.select(mean('Sepallength')).toPandas()['avg(Sepallength)']\n"
@@ -175,22 +157,22 @@
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "6dPaqDT5Kt-t",
         "colab_type": "code",
+        "outputId": "4a87a76b-b7b7-4aad-c3d5-0de36350fcb0",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 136
-        },
-        "outputId": "4a87a76b-b7b7-4aad-c3d5-0de36350fcb0"
+        }
       },
-      "cell_type": "code",
       "source": [
         "y = df\n",
         "y = y.na.fill(float(avg_sl),['Sepallength'])\n",
         "y.describe().show(1)\n"
       ],
-      "execution_count": 9,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "stream",
@@ -208,12 +190,12 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "hZ3h4SwmKw5H",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "from pyspark.mllib.stat import Statistics\n",
         "import pandas as pd\n"
@@ -222,25 +204,25 @@
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "EYhtf-TDK2om",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "z = y.fillna(1)"
       ],
       "execution_count": 0,
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "xTEVwpImK4S4",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "a = z.drop('Species') \n",
         "features = a.rdd.map(lambda row: row[0:])\n"
@@ -249,35 +231,35 @@
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "uHjpSzLhK6Hd",
         "colab_type": "code",
         "colab": {}
       },
-      "cell_type": "code",
       "source": [
         "correlation_matrix = Statistics.corr(features, method=\"pearson\")"
       ],
       "execution_count": 0,
       "outputs": []
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "90YKC2n5K61I",
         "colab_type": "code",
+        "outputId": "fd7c04d5-ea85-4477-adbf-cd104606880e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 173
-        },
-        "outputId": "fd7c04d5-ea85-4477-adbf-cd104606880e"
+        }
       },
-      "cell_type": "code",
       "source": [
         "correlation_df = pd.DataFrame(correlation_matrix)\n",
         "correlation_df.index, correlation_df.columns = a.columns, a.columns\n",
         "correlation_df\n"
       ],
-      "execution_count": 14,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -356,22 +338,22 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "u6lph3zyK8wZ",
         "colab_type": "code",
+        "outputId": "8b987d54-97cc-4d61-cca5-9968fbbc020d",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
-        },
-        "outputId": "8b987d54-97cc-4d61-cca5-9968fbbc020d"
+        }
       },
-      "cell_type": "code",
       "source": [
         "import pandas as pd\n",
         "dat = y.toPandas()\n",
         "type(dat)\n"
       ],
-      "execution_count": 15,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -388,24 +370,24 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "Wgsl7lqvK-y_",
         "colab_type": "code",
+        "outputId": "fc9658c0-cbb9-4e37-cd2f-26b529a82c00",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 369
-        },
-        "outputId": "fc9658c0-cbb9-4e37-cd2f-26b529a82c00"
+        }
       },
-      "cell_type": "code",
       "source": [
         "import matplotlib.pyplot as plt\n",
         "import seaborn as sns\n",
         "%matplotlib inline\n",
         "sns.lmplot(x = \"Sepallength\", y = \"Petallength\", data = dat)\n",
         "plt.show()\n"
       ],
-      "execution_count": 16,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "display_data",
@@ -422,22 +404,22 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "I9llgXa5LB3Y",
         "colab_type": "code",
+        "outputId": "192b63b3-db45-4b3d-b880-8adf67cc5dbf",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 369
-        },
-        "outputId": "192b63b3-db45-4b3d-b880-8adf67cc5dbf"
+        }
       },
-      "cell_type": "code",
       "source": [
         "import seaborn as sns\n",
         "sns.lmplot(x = \"Sepallength\", y = \"Petalwidth\", data = dat)\n",
         "plt.show()\n"
       ],
-      "execution_count": 17,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "display_data",
@@ -454,21 +436,21 @@
       ]
     },
     {
+      "cell_type": "code",
       "metadata": {
         "id": "-0P4bwUoLDWh",
         "colab_type": "code",
+        "outputId": "85034f66-1b36-4903-e097-a7075bb34eb8",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 369
-        },
-        "outputId": "85034f66-1b36-4903-e097-a7075bb34eb8"
+        }
       },
-      "cell_type": "code",
       "source": [
         "sns.lmplot(x = \"Petallength\", y = \"Petalwidth\", data = dat)\n",
         "plt.show()\n"
       ],
-      "execution_count": 18,
+      "execution_count": 0,
       "outputs": [
         {
           "output_type": "display_data",