From fe7b6bbd814de622927bc4a71f516894c7e9bb84 Mon Sep 17 00:00:00 2001
From: hemantsinalkar-ssk
<87474238+hemantsinalkar-ssk@users.noreply.github.com>
Date: Thu, 3 Oct 2024 16:46:13 +0530
Subject: [PATCH 1/3] - Added cell to install tensorflow libraries
---
.../labs/improve_data_quality.ipynb | 3500 +++++++--------
.../solutions/improve_data_quality.ipynb | 3742 +++++++++--------
2 files changed, 3629 insertions(+), 3613 deletions(-)
diff --git a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
index 56d0c5cfb6..02889af0ba 100644
--- a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
+++ b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
@@ -37,6 +37,14 @@
"!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Install TensorFlow Libraries\n",
+ "!pip install tensorflow"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -388,92 +396,92 @@
"execution_count": 8,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Zip Code \n",
- " Vehicles \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 497.00000 \n",
- " 496.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 89838.23340 \n",
- " 74.512097 \n",
- " \n",
- " \n",
- " std \n",
- " 3633.35609 \n",
- " 243.839871 \n",
- " \n",
- " \n",
- " min \n",
- " 9001.00000 \n",
- " 1.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " 90001.00000 \n",
- " 14.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 90001.00000 \n",
- " 25.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 90001.00000 \n",
- " 56.250000 \n",
- " \n",
- " \n",
- " max \n",
- " 90002.00000 \n",
- " 3178.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Zip Code Vehicles\n",
- "count 497.00000 496.000000\n",
- "mean 89838.23340 74.512097\n",
- "std 3633.35609 243.839871\n",
- "min 9001.00000 1.000000\n",
- "25% 90001.00000 14.000000\n",
- "50% 90001.00000 25.000000\n",
- "75% 90001.00000 56.250000\n",
- "max 90002.00000 3178.000000"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Zip Code \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 497.00000 \n",
+ " 496.000000 \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " 89838.23340 \n",
+ " 74.512097 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " 3633.35609 \n",
+ " 243.839871 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " 9001.00000 \n",
+ " 1.000000 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " 90001.00000 \n",
+ " 14.000000 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " 90001.00000 \n",
+ " 25.000000 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " 90001.00000 \n",
+ " 56.250000 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " 90002.00000 \n",
+ " 3178.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Zip Code Vehicles\n",
+ "count 497.00000 496.000000\n",
+ "mean 89838.23340 74.512097\n",
+ "std 3633.35609 243.839871\n",
+ "min 9001.00000 1.000000\n",
+ "25% 90001.00000 14.000000\n",
+ "50% 90001.00000 25.000000\n",
+ "75% 90001.00000 56.250000\n",
+ "max 90002.00000 3178.000000"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df_transport.describe()"
@@ -491,149 +499,149 @@
"execution_count": 9,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " \n",
- " \n",
- " Fuel \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Battery Electric \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " No \n",
- " 4.0 \n",
- " \n",
- " \n",
- " Diesel and Diesel Hybrid \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " No \n",
- " 55.0 \n",
- " \n",
- " \n",
- " Flex-Fuel \n",
- " 10/14/2018 \n",
- " 90001.0 \n",
- " 2007 \n",
- " Type_A \n",
- " Yes \n",
- " 78.0 \n",
- " \n",
- " \n",
- " Gasoline \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " 2006 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " \n",
- " \n",
- " Hybrid Gasoline \n",
- " 10/24/2018 \n",
- " 90001.0 \n",
- " 2009 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 18.0 \n",
- " \n",
- " \n",
- " Natural Gas \n",
- " 10/25/2018 \n",
- " 90001.0 \n",
- " 2009 \n",
- " OTHER/UNK \n",
- " No \n",
- " 2.0 \n",
- " \n",
- " \n",
- " Other \n",
- " 10/8/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 6.0 \n",
- " \n",
- " \n",
- " Plug-in Hybrid \n",
- " 11/2/2018 \n",
- " 90001.0 \n",
- " 2012 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Make \\\n",
- "Fuel \n",
- "Battery Electric 10/1/2018 90000.0 <2006 OTHER/UNK \n",
- "Diesel and Diesel Hybrid 10/1/2018 90000.0 <2006 OTHER/UNK \n",
- "Flex-Fuel 10/14/2018 90001.0 2007 Type_A \n",
- "Gasoline 10/1/2018 90000.0 2006 OTHER/UNK \n",
- "Hybrid Gasoline 10/24/2018 90001.0 2009 OTHER/UNK \n",
- "Natural Gas 10/25/2018 90001.0 2009 OTHER/UNK \n",
- "Other 10/8/2018 90000.0 <2006 OTHER/UNK \n",
- "Plug-in Hybrid 11/2/2018 90001.0 2012 OTHER/UNK \n",
- "\n",
- " Light_Duty Vehicles \n",
- "Fuel \n",
- "Battery Electric No 4.0 \n",
- "Diesel and Diesel Hybrid No 55.0 \n",
- "Flex-Fuel Yes 78.0 \n",
- "Gasoline Yes 1.0 \n",
- "Hybrid Gasoline Yes 18.0 \n",
- "Natural Gas No 2.0 \n",
- "Other Yes 6.0 \n",
- "Plug-in Hybrid Yes 1.0 "
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " Fuel \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Battery Electric \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " Diesel and Diesel Hybrid \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 55.0 \n",
+ " \n",
+ " \n",
+ " Flex-Fuel \n",
+ " 10/14/2018 \n",
+ " 90001.0 \n",
+ " 2007 \n",
+ " Type_A \n",
+ " Yes \n",
+ " 78.0 \n",
+ " \n",
+ " \n",
+ " Gasoline \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " Hybrid Gasoline \n",
+ " 10/24/2018 \n",
+ " 90001.0 \n",
+ " 2009 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 18.0 \n",
+ " \n",
+ " \n",
+ " Natural Gas \n",
+ " 10/25/2018 \n",
+ " 90001.0 \n",
+ " 2009 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " Other \n",
+ " 10/8/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 6.0 \n",
+ " \n",
+ " \n",
+ " Plug-in Hybrid \n",
+ " 11/2/2018 \n",
+ " 90001.0 \n",
+ " 2012 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Make \\\n",
+ "Fuel \n",
+ "Battery Electric 10/1/2018 90000.0 <2006 OTHER/UNK \n",
+ "Diesel and Diesel Hybrid 10/1/2018 90000.0 <2006 OTHER/UNK \n",
+ "Flex-Fuel 10/14/2018 90001.0 2007 Type_A \n",
+ "Gasoline 10/1/2018 90000.0 2006 OTHER/UNK \n",
+ "Hybrid Gasoline 10/24/2018 90001.0 2009 OTHER/UNK \n",
+ "Natural Gas 10/25/2018 90001.0 2009 OTHER/UNK \n",
+ "Other 10/8/2018 90000.0 <2006 OTHER/UNK \n",
+ "Plug-in Hybrid 11/2/2018 90001.0 2012 OTHER/UNK \n",
+ "\n",
+ " Light_Duty Vehicles \n",
+ "Fuel \n",
+ "Battery Electric No 4.0 \n",
+ "Diesel and Diesel Hybrid No 55.0 \n",
+ "Flex-Fuel Yes 78.0 \n",
+ "Gasoline Yes 1.0 \n",
+ "Hybrid Gasoline Yes 18.0 \n",
+ "Natural Gas No 2.0 \n",
+ "Other Yes 6.0 \n",
+ "Plug-in Hybrid Yes 1.0 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df_transport.groupby('Fuel').first() # Get the first entry for each month. "
@@ -660,23 +668,23 @@
"execution_count": 10,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Date 2\n",
- "Zip Code 2\n",
- "Model Year 2\n",
- "Fuel 2\n",
- "Make 3\n",
- "Light_Duty 3\n",
- "Vehicles 3\n",
- "dtype: int64"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Date 2\n",
+ "Zip Code 2\n",
+ "Model Year 2\n",
+ "Fuel 2\n",
+ "Make 3\n",
+ "Light_Duty 3\n",
+ "Vehicles 3\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df_transport.isnull().sum()"
@@ -694,36 +702,36 @@
"execution_count": 11,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 10/1/2018\n",
- "1 10/1/2018\n",
- "2 NaN\n",
- "3 10/1/2018\n",
- "4 10/1/2018\n",
- " ... \n",
- "494 12/3/2018\n",
- "495 12/4/2018\n",
- "496 12/5/2018\n",
- "497 12/6/2018\n",
- "498 12/7/2018\n",
- "Name: Date, Length: 499, dtype: object\n",
- "0 False\n",
- "1 False\n",
- "2 True\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Date, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 10/1/2018\n",
+ "1 10/1/2018\n",
+ "2 NaN\n",
+ "3 10/1/2018\n",
+ "4 10/1/2018\n",
+ " ... \n",
+ "494 12/3/2018\n",
+ "495 12/4/2018\n",
+ "496 12/5/2018\n",
+ "497 12/6/2018\n",
+ "498 12/7/2018\n",
+ "Name: Date, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 False\n",
+ "2 True\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Date, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Date'])\n",
@@ -735,36 +743,36 @@
"execution_count": 12,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 OTHER/UNK\n",
- "1 NaN\n",
- "2 OTHER/UNK\n",
- "3 OTHER/UNK\n",
- "4 OTHER/UNK\n",
- " ... \n",
- "494 Type_I\n",
- "495 Type_B\n",
- "496 Type_C\n",
- "497 Type_J\n",
- "498 Type_J\n",
- "Name: Make, Length: 499, dtype: object\n",
- "0 False\n",
- "1 True\n",
- "2 False\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Make, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 OTHER/UNK\n",
+ "1 NaN\n",
+ "2 OTHER/UNK\n",
+ "3 OTHER/UNK\n",
+ "4 OTHER/UNK\n",
+ " ... \n",
+ "494 Type_I\n",
+ "495 Type_B\n",
+ "496 Type_C\n",
+ "497 Type_J\n",
+ "498 Type_J\n",
+ "Name: Make, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 True\n",
+ "2 False\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Make, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Make'])\n",
@@ -776,36 +784,36 @@
"execution_count": 13,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 2006\n",
- "1 2014\n",
- "2 NaN\n",
- "3 2017\n",
- "4 <2006\n",
- " ... \n",
- "494 2010\n",
- "495 2010\n",
- "496 2010\n",
- "497 2010\n",
- "498 2010\n",
- "Name: Model Year, Length: 499, dtype: object\n",
- "0 False\n",
- "1 False\n",
- "2 True\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Model Year, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 2006\n",
+ "1 2014\n",
+ "2 NaN\n",
+ "3 2017\n",
+ "4 <2006\n",
+ " ... \n",
+ "494 2010\n",
+ "495 2010\n",
+ "496 2010\n",
+ "497 2010\n",
+ "498 2010\n",
+ "Name: Model Year, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 False\n",
+ "2 True\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Model Year, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Model Year'])\n",
@@ -826,29 +834,29 @@
"execution_count": 14,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Rows : 499\n",
- "Columns : 7\n",
- "\n",
- "Features : \n",
- " ['Date', 'Zip Code', 'Model Year', 'Fuel', 'Make', 'Light_Duty', 'Vehicles']\n",
- "\n",
- "Unique values : \n",
- " Date 130\n",
- "Zip Code 4\n",
- "Model Year 15\n",
- "Fuel 8\n",
- "Make 43\n",
- "Light_Duty 2\n",
- "Vehicles 151\n",
- "dtype: int64\n",
- "\n",
- "Missing values : 17\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Rows : 499\n",
+ "Columns : 7\n",
+ "\n",
+ "Features : \n",
+ " ['Date', 'Zip Code', 'Model Year', 'Fuel', 'Make', 'Light_Duty', 'Vehicles']\n",
+ "\n",
+ "Unique values : \n",
+ " Date 130\n",
+ "Zip Code 4\n",
+ "Model Year 15\n",
+ "Fuel 8\n",
+ "Make 43\n",
+ "Light_Duty 2\n",
+ "Vehicles 151\n",
+ "dtype: int64\n",
+ "\n",
+ "Missing values : 17\n"
+ ]
+ }
],
"source": [
"print (\"Rows : \" ,df_transport.shape[0])\n",
@@ -870,104 +878,104 @@
"execution_count": 15,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Fuel \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 494 \n",
- " 12/3/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_I \n",
- " Yes \n",
- " 11.0 \n",
- " \n",
- " \n",
- " 495 \n",
- " 12/4/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_B \n",
- " Yes \n",
- " 58.0 \n",
- " \n",
- " \n",
- " 496 \n",
- " 12/5/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_C \n",
- " Yes \n",
- " 45.0 \n",
- " \n",
- " \n",
- " 497 \n",
- " 12/6/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_J \n",
- " Yes \n",
- " 82.0 \n",
- " \n",
- " \n",
- " 498 \n",
- " 12/7/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_J \n",
- " Yes \n",
- " 12.0 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Fuel Make Light_Duty Vehicles\n",
- "494 12/3/2018 90002.0 2010 Gasoline Type_I Yes 11.0\n",
- "495 12/4/2018 90002.0 2010 Gasoline Type_B Yes 58.0\n",
- "496 12/5/2018 90002.0 2010 Gasoline Type_C Yes 45.0\n",
- "497 12/6/2018 90002.0 2010 Gasoline Type_J Yes 82.0\n",
- "498 12/7/2018 90002.0 2010 Gasoline Type_J Yes 12.0"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Fuel \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 494 \n",
+ " 12/3/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_I \n",
+ " Yes \n",
+ " 11.0 \n",
+ " \n",
+ " \n",
+ " 495 \n",
+ " 12/4/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_B \n",
+ " Yes \n",
+ " 58.0 \n",
+ " \n",
+ " \n",
+ " 496 \n",
+ " 12/5/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_C \n",
+ " Yes \n",
+ " 45.0 \n",
+ " \n",
+ " \n",
+ " 497 \n",
+ " 12/6/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_J \n",
+ " Yes \n",
+ " 82.0 \n",
+ " \n",
+ " \n",
+ " 498 \n",
+ " 12/7/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_J \n",
+ " Yes \n",
+ " 12.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Fuel Make Light_Duty Vehicles\n",
+ "494 12/3/2018 90002.0 2010 Gasoline Type_I Yes 11.0\n",
+ "495 12/4/2018 90002.0 2010 Gasoline Type_B Yes 58.0\n",
+ "496 12/5/2018 90002.0 2010 Gasoline Type_C Yes 45.0\n",
+ "497 12/6/2018 90002.0 2010 Gasoline Type_J Yes 82.0\n",
+ "498 12/7/2018 90002.0 2010 Gasoline Type_J Yes 12.0"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df_transport.tail()"
@@ -1030,27 +1038,27 @@
"execution_count": 16,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Date 2\n",
- "Zip Code 2\n",
- "Model Year 2\n",
- "Fuel 2\n",
- "Make 3\n",
- "Light_Duty 3\n",
- "Vehicles 3\n",
- "dtype: int64"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# TODO 1a\n",
- "# TODO -- Your code here.\n"
+ {
+ "data": {
+ "text/plain": [
+ "Date 2\n",
+ "Zip Code 2\n",
+ "Model Year 2\n",
+ "Fuel 2\n",
+ "Make 3\n",
+ "Light_Duty 3\n",
+ "Vehicles 3\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# TODO 1a\n",
+ "# TODO -- Your code here.\n"
]
},
{
@@ -1082,23 +1090,23 @@
"execution_count": 18,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Date 0\n",
- "Zip Code 0\n",
- "Model Year 0\n",
- "Fuel 0\n",
- "Make 0\n",
- "Light_Duty 0\n",
- "Vehicles 0\n",
- "dtype: int64"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Date 0\n",
+ "Zip Code 0\n",
+ "Model Year 0\n",
+ "Fuel 0\n",
+ "Make 0\n",
+ "Light_Duty 0\n",
+ "Vehicles 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# TODO 1c\n",
@@ -1149,26 +1157,26 @@
"execution_count": 20,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 499 entries, 0 to 498\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 Date 499 non-null datetime64[ns]\n",
- " 1 Zip Code 499 non-null float64 \n",
- " 2 Model Year 499 non-null object \n",
- " 3 Fuel 499 non-null object \n",
- " 4 Make 499 non-null object \n",
- " 5 Light_Duty 499 non-null object \n",
- " 6 Vehicles 499 non-null float64 \n",
- "dtypes: datetime64[ns](1), float64(2), object(4)\n",
- "memory usage: 27.4+ KB\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 499 entries, 0 to 498\n",
+ "Data columns (total 7 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Date 499 non-null datetime64[ns]\n",
+ " 1 Zip Code 499 non-null float64 \n",
+ " 2 Model Year 499 non-null object \n",
+ " 3 Fuel 499 non-null object \n",
+ " 4 Make 499 non-null object \n",
+ " 5 Light_Duty 499 non-null object \n",
+ " 6 Vehicles 499 non-null float64 \n",
+ "dtypes: datetime64[ns](1), float64(2), object(4)\n",
+ "memory usage: 27.4+ KB\n"
+ ]
+ }
],
"source": [
"# TODO 2b\n",
@@ -1187,29 +1195,29 @@
"execution_count": 21,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 499 entries, 0 to 498\n",
- "Data columns (total 10 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 Date 499 non-null datetime64[ns]\n",
- " 1 Zip Code 499 non-null float64 \n",
- " 2 Model Year 499 non-null object \n",
- " 3 Fuel 499 non-null object \n",
- " 4 Make 499 non-null object \n",
- " 5 Light_Duty 499 non-null object \n",
- " 6 Vehicles 499 non-null float64 \n",
- " 7 year 499 non-null int64 \n",
- " 8 month 499 non-null int64 \n",
- " 9 day 499 non-null int64 \n",
- "dtypes: datetime64[ns](1), float64(2), int64(3), object(4)\n",
- "memory usage: 39.1+ KB\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 499 entries, 0 to 498\n",
+ "Data columns (total 10 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Date 499 non-null datetime64[ns]\n",
+ " 1 Zip Code 499 non-null float64 \n",
+ " 2 Model Year 499 non-null object \n",
+ " 3 Fuel 499 non-null object \n",
+ " 4 Make 499 non-null object \n",
+ " 5 Light_Duty 499 non-null object \n",
+ " 6 Vehicles 499 non-null float64 \n",
+ " 7 year 499 non-null int64 \n",
+ " 8 month 499 non-null int64 \n",
+ " 9 day 499 non-null int64 \n",
+ "dtypes: datetime64[ns](1), float64(2), int64(3), object(4)\n",
+ "memory usage: 39.1+ KB\n"
+ ]
+ }
],
"source": [
"df_transport['year'] = df_transport['Date'].dt.year\n",
@@ -1232,151 +1240,151 @@
"execution_count": 22,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Fuel \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " year \n",
- " day \n",
- " \n",
- " \n",
- " month \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 1 \n",
- " 2019-01-01 \n",
- " 90001.0 \n",
- " 2016 \n",
- " Gasoline \n",
- " Type_G \n",
- " Yes \n",
- " 18.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 2 \n",
- " 2019-02-01 \n",
- " 90001.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " Type_D \n",
- " Yes \n",
- " 13.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2019-03-01 \n",
- " 90001.0 \n",
- " 2018 \n",
- " Gasoline \n",
- " Type_C \n",
- " Yes \n",
- " 32.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 10 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 1 \n",
- " \n",
- " \n",
- " 11 \n",
- " 2018-11-01 \n",
- " 90001.0 \n",
- " 2007 \n",
- " Gasoline \n",
- " Type_M \n",
- " Yes \n",
- " 15.0 \n",
- " 2018 \n",
- " 1 \n",
- " \n",
- " \n",
- " 12 \n",
- " 2018-12-02 \n",
- " 90001.0 \n",
- " 2015 \n",
- " Gasoline \n",
- " Type_G \n",
- " Yes \n",
- " 19.0 \n",
- " 2018 \n",
- " 2 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Fuel Make Light_Duty \\\n",
- "month \n",
- "1 2019-01-01 90001.0 2016 Gasoline Type_G Yes \n",
- "2 2019-02-01 90001.0 2017 Gasoline Type_D Yes \n",
- "3 2019-03-01 90001.0 2018 Gasoline Type_C Yes \n",
- "10 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes \n",
- "11 2018-11-01 90001.0 2007 Gasoline Type_M Yes \n",
- "12 2018-12-02 90001.0 2015 Gasoline Type_G Yes \n",
- "\n",
- " Vehicles year day \n",
- "month \n",
- "1 18.0 2019 1 \n",
- "2 13.0 2019 1 \n",
- "3 32.0 2019 1 \n",
- "10 1.0 2018 1 \n",
- "11 15.0 2018 1 \n",
- "12 19.0 2018 2 "
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Fuel \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " year \n",
+ " day \n",
+ " \n",
+ " \n",
+ " month \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2019-01-01 \n",
+ " 90001.0 \n",
+ " 2016 \n",
+ " Gasoline \n",
+ " Type_G \n",
+ " Yes \n",
+ " 18.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2019-02-01 \n",
+ " 90001.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " Type_D \n",
+ " Yes \n",
+ " 13.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2019-03-01 \n",
+ " 90001.0 \n",
+ " 2018 \n",
+ " Gasoline \n",
+ " Type_C \n",
+ " Yes \n",
+ " 32.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 2018-11-01 \n",
+ " 90001.0 \n",
+ " 2007 \n",
+ " Gasoline \n",
+ " Type_M \n",
+ " Yes \n",
+ " 15.0 \n",
+ " 2018 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 2018-12-02 \n",
+ " 90001.0 \n",
+ " 2015 \n",
+ " Gasoline \n",
+ " Type_G \n",
+ " Yes \n",
+ " 19.0 \n",
+ " 2018 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Fuel Make Light_Duty \\\n",
+ "month \n",
+ "1 2019-01-01 90001.0 2016 Gasoline Type_G Yes \n",
+ "2 2019-02-01 90001.0 2017 Gasoline Type_D Yes \n",
+ "3 2019-03-01 90001.0 2018 Gasoline Type_C Yes \n",
+ "10 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes \n",
+ "11 2018-11-01 90001.0 2007 Gasoline Type_M Yes \n",
+ "12 2018-12-02 90001.0 2015 Gasoline Type_G Yes \n",
+ "\n",
+ " Vehicles year day \n",
+ "month \n",
+ "1 18.0 2019 1 \n",
+ "2 13.0 2019 1 \n",
+ "3 32.0 2019 1 \n",
+ "10 1.0 2018 1 \n",
+ "11 15.0 2018 1 \n",
+ "12 19.0 2018 2 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Here, we are creating a new dataframe called \"grouped_data\" and grouping by on the column \"Make\"\n",
@@ -1419,7 +1427,7 @@
},
{
"data": {
- "image/png": "\n",
+ "image/png": "",
"text/plain": [
""
]
@@ -1458,84 +1466,84 @@
"execution_count": 24,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make lightduty vehicles \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes 1.0 \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK Yes 1.0 \n",
- "\n",
- " year month day \n",
- "0 2018 10 1 \n",
- "1 2018 10 1 "
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make lightduty vehicles \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes 1.0 \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK Yes 1.0 \n",
+ "\n",
+ " year month day \n",
+ "0 2018 10 1 \n",
+ "1 2018 10 1 "
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# TODO 3a\n",
@@ -1578,30 +1586,30 @@
"execution_count": 26,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "2007 53\n",
- "2008 45\n",
- "2006 36\n",
- "2010 34\n",
- "2014 31\n",
- "2015 30\n",
- "2017 29\n",
- "2016 29\n",
- "2013 27\n",
- "2009 25\n",
- "2012 25\n",
- "2011 24\n",
- "2018 23\n",
- "2019 5\n",
- "Name: modelyear, dtype: int64"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "2007 53\n",
+ "2008 45\n",
+ "2006 36\n",
+ "2010 34\n",
+ "2014 31\n",
+ "2015 30\n",
+ "2017 29\n",
+ "2016 29\n",
+ "2013 27\n",
+ "2009 25\n",
+ "2012 25\n",
+ "2011 24\n",
+ "2018 23\n",
+ "2019 5\n",
+ "Name: modelyear, dtype: int64"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df['modelyear'].value_counts(0)"
@@ -1635,18 +1643,18 @@
"execution_count": 27,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Yes 374\n",
- "No 42\n",
- "Name: lightduty, dtype: int64"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Yes 374\n",
+ "No 42\n",
+ "Name: lightduty, dtype: int64"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df['lightduty'].value_counts(0)"
@@ -1664,18 +1672,18 @@
"execution_count": 28,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "1 374\n",
- "0 42\n",
- "Name: lightduty, dtype: int64"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "1 374\n",
+ "0 42\n",
+ "Name: lightduty, dtype: int64"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df.loc[:,'lightduty'] = df['lightduty'].apply(lambda x: 0 if x=='No' else 1)\n",
@@ -1687,129 +1695,129 @@
"execution_count": 29,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 16 \n",
- " 2018-10-09 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " Type_C \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " \n",
- " \n",
- " 17 \n",
- " 2018-10-10 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " OTHER/UNK \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
- "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
- "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
- "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
- "\n",
- " lightduty vehicles year month day \n",
- "0 1 1.0 2018 10 1 \n",
- "1 1 1.0 2018 10 1 \n",
- "3 1 1.0 2018 10 1 \n",
- "16 0 16.0 2018 10 9 \n",
- "17 0 23.0 2018 10 10 "
- ]
- },
- "execution_count": 31,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2018-10-09 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " Type_C \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2018-10-10 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " OTHER/UNK \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
+ "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
+ "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
+ "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
+ "\n",
+ " lightduty vehicles year month day \n",
+ "0 1 1.0 2018 10 1 \n",
+ "1 1 1.0 2018 10 1 \n",
+ "3 1 1.0 2018 10 1 \n",
+ "16 0 16.0 2018 10 9 \n",
+ "17 0 23.0 2018 10 10 "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Confirm that \"lightduty\" has been converted.\n",
@@ -1840,219 +1848,219 @@
"execution_count": 30,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " zipcode \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " modelyear_2012 \n",
- " modelyear_2013 \n",
- " modelyear_2014 \n",
- " modelyear_2015 \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 90000.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 90000.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 49 columns
\n",
- "
"
- ],
- "text/plain": [
- " zipcode modelyear_2007 modelyear_2008 modelyear_2009 modelyear_2010 \\\n",
- "0 90000.0 0 0 0 0 \n",
- "1 90001.0 0 0 0 0 \n",
- "3 90000.0 0 0 0 0 \n",
- "16 90001.0 0 0 0 0 \n",
- "17 90001.0 0 0 0 0 \n",
- "\n",
- " modelyear_2011 modelyear_2012 modelyear_2013 modelyear_2014 \\\n",
- "0 0 0 0 0 \n",
- "1 0 0 0 1 \n",
- "3 0 0 0 0 \n",
- "16 0 0 0 0 \n",
- "17 0 0 0 0 \n",
- "\n",
- " modelyear_2015 ... make_Type_P make_Type_Q make_Type_R make_Type_S \\\n",
- "0 0 ... 0 0 0 0 \n",
- "1 0 ... 0 0 0 0 \n",
- "3 0 ... 0 0 0 0 \n",
- "16 0 ... 0 0 0 0 \n",
- "17 0 ... 0 0 0 0 \n",
- "\n",
- " make_Type_T make_Type_U make_Type_V make_Type_W make_Type_X \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_Y \n",
- "0 0 \n",
- "1 0 \n",
- "3 0 \n",
- "16 0 \n",
- "17 0 \n",
- "\n",
- "[5 rows x 49 columns]"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " zipcode \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " modelyear_2012 \n",
+ " modelyear_2013 \n",
+ " modelyear_2014 \n",
+ " modelyear_2015 \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 90000.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 90000.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 49 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " zipcode modelyear_2007 modelyear_2008 modelyear_2009 modelyear_2010 \\\n",
+ "0 90000.0 0 0 0 0 \n",
+ "1 90001.0 0 0 0 0 \n",
+ "3 90000.0 0 0 0 0 \n",
+ "16 90001.0 0 0 0 0 \n",
+ "17 90001.0 0 0 0 0 \n",
+ "\n",
+ " modelyear_2011 modelyear_2012 modelyear_2013 modelyear_2014 \\\n",
+ "0 0 0 0 0 \n",
+ "1 0 0 0 1 \n",
+ "3 0 0 0 0 \n",
+ "16 0 0 0 0 \n",
+ "17 0 0 0 0 \n",
+ "\n",
+ " modelyear_2015 ... make_Type_P make_Type_Q make_Type_R make_Type_S \\\n",
+ "0 0 ... 0 0 0 0 \n",
+ "1 0 ... 0 0 0 0 \n",
+ "3 0 ... 0 0 0 0 \n",
+ "16 0 ... 0 0 0 0 \n",
+ "17 0 ... 0 0 0 0 \n",
+ "\n",
+ " make_Type_T make_Type_U make_Type_V make_Type_W make_Type_X \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_Y \n",
+ "0 0 \n",
+ "1 0 \n",
+ "3 0 \n",
+ "16 0 \n",
+ "17 0 \n",
+ "\n",
+ "[5 rows x 49 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Making dummy variables for categorical data with more inputs. \n",
@@ -2073,212 +2081,212 @@
"execution_count": 31,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 2018-10-09 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " Type_C \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 2018-10-10 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " OTHER/UNK \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 59 columns
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
- "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
- "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
- "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
- "\n",
- " lightduty vehicles year month day ... make_Type_P make_Type_Q \\\n",
- "0 1 1.0 2018 10 1 ... 0 0 \n",
- "1 1 1.0 2018 10 1 ... 0 0 \n",
- "3 1 1.0 2018 10 1 ... 0 0 \n",
- "16 0 16.0 2018 10 9 ... 0 0 \n",
- "17 0 23.0 2018 10 10 ... 0 0 \n",
- "\n",
- " make_Type_R make_Type_S make_Type_T make_Type_U make_Type_V \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_W make_Type_X make_Type_Y \n",
- "0 0 0 0 \n",
- "1 0 0 0 \n",
- "3 0 0 0 \n",
- "16 0 0 0 \n",
- "17 0 0 0 \n",
- "\n",
- "[5 rows x 59 columns]"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2018-10-09 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " Type_C \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2018-10-10 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " OTHER/UNK \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
+ "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
+ "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
+ "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
+ "\n",
+ " lightduty vehicles year month day ... make_Type_P make_Type_Q \\\n",
+ "0 1 1.0 2018 10 1 ... 0 0 \n",
+ "1 1 1.0 2018 10 1 ... 0 0 \n",
+ "3 1 1.0 2018 10 1 ... 0 0 \n",
+ "16 0 16.0 2018 10 9 ... 0 0 \n",
+ "17 0 23.0 2018 10 10 ... 0 0 \n",
+ "\n",
+ " make_Type_R make_Type_S make_Type_T make_Type_U make_Type_V \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_W make_Type_X make_Type_Y \n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "3 0 0 0 \n",
+ "16 0 0 0 \n",
+ "17 0 0 0 \n",
+ "\n",
+ "[5 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# TODO 4a\n",
@@ -2307,212 +2315,212 @@
"execution_count": 33,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 53 columns
\n",
- "
"
- ],
- "text/plain": [
- " lightduty vehicles year month day modelyear_2007 modelyear_2008 \\\n",
- "0 1 1.0 2018 10 1 0 0 \n",
- "1 1 1.0 2018 10 1 0 0 \n",
- "3 1 1.0 2018 10 1 0 0 \n",
- "16 0 16.0 2018 10 9 0 0 \n",
- "17 0 23.0 2018 10 10 0 0 \n",
- "\n",
- " modelyear_2009 modelyear_2010 modelyear_2011 ... make_Type_P \\\n",
- "0 0 0 0 ... 0 \n",
- "1 0 0 0 ... 0 \n",
- "3 0 0 0 ... 0 \n",
- "16 0 0 0 ... 0 \n",
- "17 0 0 0 ... 0 \n",
- "\n",
- " make_Type_Q make_Type_R make_Type_S make_Type_T make_Type_U \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_V make_Type_W make_Type_X make_Type_Y \n",
- "0 0 0 0 0 \n",
- "1 0 0 0 0 \n",
- "3 0 0 0 0 \n",
- "16 0 0 0 0 \n",
- "17 0 0 0 0 \n",
- "\n",
- "[5 rows x 53 columns]"
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 53 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lightduty vehicles year month day modelyear_2007 modelyear_2008 \\\n",
+ "0 1 1.0 2018 10 1 0 0 \n",
+ "1 1 1.0 2018 10 1 0 0 \n",
+ "3 1 1.0 2018 10 1 0 0 \n",
+ "16 0 16.0 2018 10 9 0 0 \n",
+ "17 0 23.0 2018 10 10 0 0 \n",
+ "\n",
+ " modelyear_2009 modelyear_2010 modelyear_2011 ... make_Type_P \\\n",
+ "0 0 0 0 ... 0 \n",
+ "1 0 0 0 ... 0 \n",
+ "3 0 0 0 ... 0 \n",
+ "16 0 0 0 ... 0 \n",
+ "17 0 0 0 ... 0 \n",
+ "\n",
+ " make_Type_Q make_Type_R make_Type_S make_Type_T make_Type_U \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_V make_Type_W make_Type_X make_Type_Y \n",
+ "0 0 0 0 0 \n",
+ "1 0 0 0 0 \n",
+ "3 0 0 0 0 \n",
+ "16 0 0 0 0 \n",
+ "17 0 0 0 0 \n",
+ "\n",
+ "[5 rows x 53 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Confirm that 'zipcode','modelyear', 'fuel', and 'make' have been dropped.\n",
@@ -2539,16 +2547,16 @@
"execution_count": 34,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Unique values of month: [10 11 12 1 2 3]\n",
- "Unique values of day: [ 1 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n",
- " 2 3 4 5 6 7 8]\n",
- "Unique values of year: [2018 2019]\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unique values of month: [10 11 12 1 2 3]\n",
+ "Unique values of day: [ 1 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n",
+ " 2 3 4 5 6 7 8]\n",
+ "Unique values of year: [2018 2019]\n"
+ ]
+ }
],
"source": [
"print ('Unique values of month:',df.month.unique())\n",
@@ -2592,184 +2600,184 @@
"execution_count": 39,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " lightduty \n",
- " vehicles \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " modelyear_2012 \n",
- " modelyear_2013 \n",
- " modelyear_2014 \n",
- " ... \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " day_sin \n",
- " day_cos \n",
- " month_sin \n",
- " month_cos \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 495 \n",
- " 1 \n",
- " 58.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.724793 \n",
- " 0.688967 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 496 \n",
- " 1 \n",
- " 45.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.848644 \n",
- " 0.528964 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 497 \n",
- " 1 \n",
- " 82.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.937752 \n",
- " 0.347305 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 498 \n",
- " 1 \n",
- " 12.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.988468 \n",
- " 0.151428 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- "
\n",
- "
4 rows × 54 columns
\n",
- "
"
- ],
- "text/plain": [
- " lightduty vehicles modelyear_2007 modelyear_2008 modelyear_2009 \\\n",
- "495 1 58.0 0 0 0 \n",
- "496 1 45.0 0 0 0 \n",
- "497 1 82.0 0 0 0 \n",
- "498 1 12.0 0 0 0 \n",
- "\n",
- " modelyear_2010 modelyear_2011 modelyear_2012 modelyear_2013 \\\n",
- "495 1 0 0 0 \n",
- "496 1 0 0 0 \n",
- "497 1 0 0 0 \n",
- "498 1 0 0 0 \n",
- "\n",
- " modelyear_2014 ... make_Type_T make_Type_U make_Type_V make_Type_W \\\n",
- "495 0 ... 0 0 0 0 \n",
- "496 0 ... 0 0 0 0 \n",
- "497 0 ... 0 0 0 0 \n",
- "498 0 ... 0 0 0 0 \n",
- "\n",
- " make_Type_X make_Type_Y day_sin day_cos month_sin month_cos \n",
- "495 0 0 0.724793 0.688967 -0.5 0.866025 \n",
- "496 0 0 0.848644 0.528964 -0.5 0.866025 \n",
- "497 0 0 0.937752 0.347305 -0.5 0.866025 \n",
- "498 0 0 0.988468 0.151428 -0.5 0.866025 \n",
- "\n",
- "[4 rows x 54 columns]"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " lightduty \n",
+ " vehicles \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " modelyear_2012 \n",
+ " modelyear_2013 \n",
+ " modelyear_2014 \n",
+ " ... \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " day_sin \n",
+ " day_cos \n",
+ " month_sin \n",
+ " month_cos \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 495 \n",
+ " 1 \n",
+ " 58.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.724793 \n",
+ " 0.688967 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 496 \n",
+ " 1 \n",
+ " 45.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.848644 \n",
+ " 0.528964 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 497 \n",
+ " 1 \n",
+ " 82.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.937752 \n",
+ " 0.347305 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 498 \n",
+ " 1 \n",
+ " 12.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.988468 \n",
+ " 0.151428 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
4 rows × 54 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lightduty vehicles modelyear_2007 modelyear_2008 modelyear_2009 \\\n",
+ "495 1 58.0 0 0 0 \n",
+ "496 1 45.0 0 0 0 \n",
+ "497 1 82.0 0 0 0 \n",
+ "498 1 12.0 0 0 0 \n",
+ "\n",
+ " modelyear_2010 modelyear_2011 modelyear_2012 modelyear_2013 \\\n",
+ "495 1 0 0 0 \n",
+ "496 1 0 0 0 \n",
+ "497 1 0 0 0 \n",
+ "498 1 0 0 0 \n",
+ "\n",
+ " modelyear_2014 ... make_Type_T make_Type_U make_Type_V make_Type_W \\\n",
+ "495 0 ... 0 0 0 0 \n",
+ "496 0 ... 0 0 0 0 \n",
+ "497 0 ... 0 0 0 0 \n",
+ "498 0 ... 0 0 0 0 \n",
+ "\n",
+ " make_Type_X make_Type_Y day_sin day_cos month_sin month_cos \n",
+ "495 0 0 0.724793 0.688967 -0.5 0.866025 \n",
+ "496 0 0 0.848644 0.528964 -0.5 0.866025 \n",
+ "497 0 0 0.937752 0.347305 -0.5 0.866025 \n",
+ "498 0 0 0.988468 0.151428 -0.5 0.866025 \n",
+ "\n",
+ "[4 rows x 54 columns]"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# scroll left to see the converted month and day coluumns.\n",
diff --git a/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb b/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
index 66319e42a6..9d6c877338 100644
--- a/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
+++ b/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
@@ -38,6 +38,14 @@
"!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Install TensorFlow Libraries\n",
+ "!pip install tensorflow"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -301,21 +309,21 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 499 entries, 0 to 498\n",
- "Data columns (total 7 columns):\n",
- "Date 497 non-null object\n",
- "Zip Code 497 non-null object\n",
- "Model Year 497 non-null object\n",
- "Fuel 497 non-null object\n",
- "Make 496 non-null object\n",
- "Light_Duty 496 non-null object\n",
- "Vehicles 496 non-null float64\n",
- "dtypes: float64(2), object(5)\n",
- "memory usage: 27.4+ KB\n"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 499 entries, 0 to 498\n",
+ "Data columns (total 7 columns):\n",
+ "Date 497 non-null object\n",
+ "Zip Code 497 non-null object\n",
+ "Model Year 497 non-null object\n",
+ "Fuel 497 non-null object\n",
+ "Make 496 non-null object\n",
+ "Light_Duty 496 non-null object\n",
+ "Vehicles 496 non-null float64\n",
+ "dtypes: float64(2), object(5)\n",
+ "memory usage: 27.4+ KB\n"
]
}
],
@@ -337,36 +345,36 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " Date Zip Code Model Year Fuel Make \\\n",
- "0 10/1/2018 90000 2006 Gasoline OTHER/UNK \n",
- "1 10/1/2018 NaN 2014 Gasoline NaN \n",
- "2 NaN 90000 NaN Gasoline OTHER/UNK \n",
- "3 10/1/2018 90000 2017 Gasoline OTHER/UNK \n",
- "4 10/1/2018 90000 <2006 Diesel and Diesel Hybrid OTHER/UNK \n",
- ".. ... ... ... ... ... \n",
- "494 6/7/2019 90003 2012 Gasoline Type_R \n",
- "495 6/8/2019 90003 2012 Hybrid Gasoline OTHER/UNK \n",
- "496 6/9/2019 90003 2012 Hybrid Gasoline Type_Q \n",
- "497 6/10/2019 90003 2012 Natural Gas OTHER/UNK \n",
- "498 6/11/2019 90003 2012 Plug-in Hybrid OTHER/UNK \n",
- "\n",
- " Light_Duty Vehicles \n",
- "0 NaN 1.0 \n",
- "1 Yes 1.0 \n",
- "2 Yes NaN \n",
- "3 Yes 1.0 \n",
- "4 No 55.0 \n",
- ".. ... ... \n",
- "494 Yes 26.0 \n",
- "495 Yes 4.0 \n",
- "496 Yes 25.0 \n",
- "497 Yes 1.0 \n",
- "498 Yes 3.0 \n",
- "\n",
- "[499 rows x 7 columns] 5\n"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Date Zip Code Model Year Fuel Make \\\n",
+ "0 10/1/2018 90000 2006 Gasoline OTHER/UNK \n",
+ "1 10/1/2018 NaN 2014 Gasoline NaN \n",
+ "2 NaN 90000 NaN Gasoline OTHER/UNK \n",
+ "3 10/1/2018 90000 2017 Gasoline OTHER/UNK \n",
+ "4 10/1/2018 90000 <2006 Diesel and Diesel Hybrid OTHER/UNK \n",
+ ".. ... ... ... ... ... \n",
+ "494 6/7/2019 90003 2012 Gasoline Type_R \n",
+ "495 6/8/2019 90003 2012 Hybrid Gasoline OTHER/UNK \n",
+ "496 6/9/2019 90003 2012 Hybrid Gasoline Type_Q \n",
+ "497 6/10/2019 90003 2012 Natural Gas OTHER/UNK \n",
+ "498 6/11/2019 90003 2012 Plug-in Hybrid OTHER/UNK \n",
+ "\n",
+ " Light_Duty Vehicles \n",
+ "0 NaN 1.0 \n",
+ "1 Yes 1.0 \n",
+ "2 Yes NaN \n",
+ "3 Yes 1.0 \n",
+ "4 No 55.0 \n",
+ ".. ... ... \n",
+ "494 Yes 26.0 \n",
+ "495 Yes 4.0 \n",
+ "496 Yes 25.0 \n",
+ "497 Yes 1.0 \n",
+ "498 Yes 3.0 \n",
+ "\n",
+ "[499 rows x 7 columns] 5\n"
]
}
],
@@ -389,92 +397,92 @@
"execution_count": 9,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Zip Code \n",
- " Vehicles \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 497.00000 \n",
- " 496.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 89838.23340 \n",
- " 74.512097 \n",
- " \n",
- " \n",
- " std \n",
- " 3633.35609 \n",
- " 243.839871 \n",
- " \n",
- " \n",
- " min \n",
- " 9001.00000 \n",
- " 1.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " 90001.00000 \n",
- " 14.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 90001.00000 \n",
- " 25.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 90001.00000 \n",
- " 56.250000 \n",
- " \n",
- " \n",
- " max \n",
- " 90002.00000 \n",
- " 3178.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Zip Code Vehicles\n",
- "count 497.00000 496.000000\n",
- "mean 89838.23340 74.512097\n",
- "std 3633.35609 243.839871\n",
- "min 9001.00000 1.000000\n",
- "25% 90001.00000 14.000000\n",
- "50% 90001.00000 25.000000\n",
- "75% 90001.00000 56.250000\n",
- "max 90002.00000 3178.000000"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Zip Code \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 497.00000 \n",
+ " 496.000000 \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " 89838.23340 \n",
+ " 74.512097 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " 3633.35609 \n",
+ " 243.839871 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " 9001.00000 \n",
+ " 1.000000 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " 90001.00000 \n",
+ " 14.000000 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " 90001.00000 \n",
+ " 25.000000 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " 90001.00000 \n",
+ " 56.250000 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " 90002.00000 \n",
+ " 3178.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Zip Code Vehicles\n",
+ "count 497.00000 496.000000\n",
+ "mean 89838.23340 74.512097\n",
+ "std 3633.35609 243.839871\n",
+ "min 9001.00000 1.000000\n",
+ "25% 90001.00000 14.000000\n",
+ "50% 90001.00000 25.000000\n",
+ "75% 90001.00000 56.250000\n",
+ "max 90002.00000 3178.000000"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# We can use .describe() to see some summary statistics for the numeric fields in our dataframe.\n",
@@ -493,149 +501,149 @@
"execution_count": 10,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " \n",
- " \n",
- " Fuel \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Battery Electric \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " No \n",
- " 4.0 \n",
- " \n",
- " \n",
- " Diesel and Diesel Hybrid \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " No \n",
- " 55.0 \n",
- " \n",
- " \n",
- " Flex-Fuel \n",
- " 10/14/2018 \n",
- " 90001.0 \n",
- " 2007 \n",
- " Type_A \n",
- " Yes \n",
- " 78.0 \n",
- " \n",
- " \n",
- " Gasoline \n",
- " 10/1/2018 \n",
- " 90000.0 \n",
- " 2006 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " \n",
- " \n",
- " Hybrid Gasoline \n",
- " 10/24/2018 \n",
- " 90001.0 \n",
- " 2009 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 18.0 \n",
- " \n",
- " \n",
- " Natural Gas \n",
- " 10/25/2018 \n",
- " 90001.0 \n",
- " 2009 \n",
- " OTHER/UNK \n",
- " No \n",
- " 2.0 \n",
- " \n",
- " \n",
- " Other \n",
- " 10/8/2018 \n",
- " 90000.0 \n",
- " <2006 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 6.0 \n",
- " \n",
- " \n",
- " Plug-in Hybrid \n",
- " 11/2/2018 \n",
- " 90001.0 \n",
- " 2012 \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Make \\\n",
- "Fuel \n",
- "Battery Electric 10/1/2018 90000.0 <2006 OTHER/UNK \n",
- "Diesel and Diesel Hybrid 10/1/2018 90000.0 <2006 OTHER/UNK \n",
- "Flex-Fuel 10/14/2018 90001.0 2007 Type_A \n",
- "Gasoline 10/1/2018 90000.0 2006 OTHER/UNK \n",
- "Hybrid Gasoline 10/24/2018 90001.0 2009 OTHER/UNK \n",
- "Natural Gas 10/25/2018 90001.0 2009 OTHER/UNK \n",
- "Other 10/8/2018 90000.0 <2006 OTHER/UNK \n",
- "Plug-in Hybrid 11/2/2018 90001.0 2012 OTHER/UNK \n",
- "\n",
- " Light_Duty Vehicles \n",
- "Fuel \n",
- "Battery Electric No 4.0 \n",
- "Diesel and Diesel Hybrid No 55.0 \n",
- "Flex-Fuel Yes 78.0 \n",
- "Gasoline Yes 1.0 \n",
- "Hybrid Gasoline Yes 18.0 \n",
- "Natural Gas No 2.0 \n",
- "Other Yes 6.0 \n",
- "Plug-in Hybrid Yes 1.0 "
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " Fuel \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Battery Electric \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " Diesel and Diesel Hybrid \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 55.0 \n",
+ " \n",
+ " \n",
+ " Flex-Fuel \n",
+ " 10/14/2018 \n",
+ " 90001.0 \n",
+ " 2007 \n",
+ " Type_A \n",
+ " Yes \n",
+ " 78.0 \n",
+ " \n",
+ " \n",
+ " Gasoline \n",
+ " 10/1/2018 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " Hybrid Gasoline \n",
+ " 10/24/2018 \n",
+ " 90001.0 \n",
+ " 2009 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 18.0 \n",
+ " \n",
+ " \n",
+ " Natural Gas \n",
+ " 10/25/2018 \n",
+ " 90001.0 \n",
+ " 2009 \n",
+ " OTHER/UNK \n",
+ " No \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " Other \n",
+ " 10/8/2018 \n",
+ " 90000.0 \n",
+ " <2006 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 6.0 \n",
+ " \n",
+ " \n",
+ " Plug-in Hybrid \n",
+ " 11/2/2018 \n",
+ " 90001.0 \n",
+ " 2012 \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Make \\\n",
+ "Fuel \n",
+ "Battery Electric 10/1/2018 90000.0 <2006 OTHER/UNK \n",
+ "Diesel and Diesel Hybrid 10/1/2018 90000.0 <2006 OTHER/UNK \n",
+ "Flex-Fuel 10/14/2018 90001.0 2007 Type_A \n",
+ "Gasoline 10/1/2018 90000.0 2006 OTHER/UNK \n",
+ "Hybrid Gasoline 10/24/2018 90001.0 2009 OTHER/UNK \n",
+ "Natural Gas 10/25/2018 90001.0 2009 OTHER/UNK \n",
+ "Other 10/8/2018 90000.0 <2006 OTHER/UNK \n",
+ "Plug-in Hybrid 11/2/2018 90001.0 2012 OTHER/UNK \n",
+ "\n",
+ " Light_Duty Vehicles \n",
+ "Fuel \n",
+ "Battery Electric No 4.0 \n",
+ "Diesel and Diesel Hybrid No 55.0 \n",
+ "Flex-Fuel Yes 78.0 \n",
+ "Gasoline Yes 1.0 \n",
+ "Hybrid Gasoline Yes 18.0 \n",
+ "Natural Gas No 2.0 \n",
+ "Other Yes 6.0 \n",
+ "Plug-in Hybrid Yes 1.0 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# The .groupby() function is used for spliting the data into groups based on some criteria.\n",
@@ -666,23 +674,23 @@
"execution_count": 11,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Date 2\n",
- "Zip Code 2\n",
- "Model Year 2\n",
- "Fuel 2\n",
- "Make 3\n",
- "Light_Duty 3\n",
- "Vehicles 3\n",
- "dtype: int64"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Date 2\n",
+ "Zip Code 2\n",
+ "Model Year 2\n",
+ "Fuel 2\n",
+ "Make 3\n",
+ "Light_Duty 3\n",
+ "Vehicles 3\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"df_transport.isnull().sum()"
@@ -700,36 +708,36 @@
"execution_count": 12,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 10/1/2018\n",
- "1 10/1/2018\n",
- "2 NaN\n",
- "3 10/1/2018\n",
- "4 10/1/2018\n",
- " ... \n",
- "494 12/3/2018\n",
- "495 12/4/2018\n",
- "496 12/5/2018\n",
- "497 12/6/2018\n",
- "498 12/7/2018\n",
- "Name: Date, Length: 499, dtype: object\n",
- "0 False\n",
- "1 False\n",
- "2 True\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Date, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 10/1/2018\n",
+ "1 10/1/2018\n",
+ "2 NaN\n",
+ "3 10/1/2018\n",
+ "4 10/1/2018\n",
+ " ... \n",
+ "494 12/3/2018\n",
+ "495 12/4/2018\n",
+ "496 12/5/2018\n",
+ "497 12/6/2018\n",
+ "498 12/7/2018\n",
+ "Name: Date, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 False\n",
+ "2 True\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Date, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Date'])\n",
@@ -741,36 +749,36 @@
"execution_count": 13,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 OTHER/UNK\n",
- "1 NaN\n",
- "2 OTHER/UNK\n",
- "3 OTHER/UNK\n",
- "4 OTHER/UNK\n",
- " ... \n",
- "494 Type_I\n",
- "495 Type_B\n",
- "496 Type_C\n",
- "497 Type_J\n",
- "498 Type_J\n",
- "Name: Make, Length: 499, dtype: object\n",
- "0 False\n",
- "1 True\n",
- "2 False\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Make, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 OTHER/UNK\n",
+ "1 NaN\n",
+ "2 OTHER/UNK\n",
+ "3 OTHER/UNK\n",
+ "4 OTHER/UNK\n",
+ " ... \n",
+ "494 Type_I\n",
+ "495 Type_B\n",
+ "496 Type_C\n",
+ "497 Type_J\n",
+ "498 Type_J\n",
+ "Name: Make, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 True\n",
+ "2 False\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Make, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Make'])\n",
@@ -782,36 +790,36 @@
"execution_count": 14,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 2006\n",
- "1 2014\n",
- "2 NaN\n",
- "3 2017\n",
- "4 <2006\n",
- " ... \n",
- "494 2010\n",
- "495 2010\n",
- "496 2010\n",
- "497 2010\n",
- "498 2010\n",
- "Name: Model Year, Length: 499, dtype: object\n",
- "0 False\n",
- "1 False\n",
- "2 True\n",
- "3 False\n",
- "4 False\n",
- " ... \n",
- "494 False\n",
- "495 False\n",
- "496 False\n",
- "497 False\n",
- "498 False\n",
- "Name: Model Year, Length: 499, dtype: bool\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 2006\n",
+ "1 2014\n",
+ "2 NaN\n",
+ "3 2017\n",
+ "4 <2006\n",
+ " ... \n",
+ "494 2010\n",
+ "495 2010\n",
+ "496 2010\n",
+ "497 2010\n",
+ "498 2010\n",
+ "Name: Model Year, Length: 499, dtype: object\n",
+ "0 False\n",
+ "1 False\n",
+ "2 True\n",
+ "3 False\n",
+ "4 False\n",
+ " ... \n",
+ "494 False\n",
+ "495 False\n",
+ "496 False\n",
+ "497 False\n",
+ "498 False\n",
+ "Name: Model Year, Length: 499, dtype: bool\n"
+ ]
+ }
],
"source": [
"print (df_transport['Model Year'])\n",
@@ -837,29 +845,29 @@
"execution_count": 15,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Rows : 499\n",
- "Columns : 7\n",
- "\n",
- "Features : \n",
- " ['Date', 'Zip Code', 'Model Year', 'Fuel', 'Make', 'Light_Duty', 'Vehicles']\n",
- "\n",
- "Unique values : \n",
- " Date 130\n",
- "Zip Code 4\n",
- "Model Year 15\n",
- "Fuel 8\n",
- "Make 43\n",
- "Light_Duty 2\n",
- "Vehicles 151\n",
- "dtype: int64\n",
- "\n",
- "Missing values : 17\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Rows : 499\n",
+ "Columns : 7\n",
+ "\n",
+ "Features : \n",
+ " ['Date', 'Zip Code', 'Model Year', 'Fuel', 'Make', 'Light_Duty', 'Vehicles']\n",
+ "\n",
+ "Unique values : \n",
+ " Date 130\n",
+ "Zip Code 4\n",
+ "Model Year 15\n",
+ "Fuel 8\n",
+ "Make 43\n",
+ "Light_Duty 2\n",
+ "Vehicles 151\n",
+ "dtype: int64\n",
+ "\n",
+ "Missing values : 17\n"
+ ]
+ }
],
"source": [
"# In Python shape() is used in pandas to give the number of rows/columns.\n",
@@ -884,181 +892,181 @@
"execution_count": 16,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Fuel \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 494 \n",
- " 12/3/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_I \n",
- " Yes \n",
- " 11.0 \n",
- " \n",
- " \n",
- " 495 \n",
- " 12/4/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_B \n",
- " Yes \n",
- " 58.0 \n",
- " \n",
- " \n",
- " 496 \n",
- " 12/5/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_C \n",
- " Yes \n",
- " 45.0 \n",
- " \n",
- " \n",
- " 497 \n",
- " 12/6/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_J \n",
- " Yes \n",
- " 82.0 \n",
- " \n",
- " \n",
- " 498 \n",
- " 12/7/2018 \n",
- " 90002.0 \n",
- " 2010 \n",
- " Gasoline \n",
- " Type_J \n",
- " Yes \n",
- " 12.0 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Fuel Make Light_Duty Vehicles\n",
- "494 12/3/2018 90002.0 2010 Gasoline Type_I Yes 11.0\n",
- "495 12/4/2018 90002.0 2010 Gasoline Type_B Yes 58.0\n",
- "496 12/5/2018 90002.0 2010 Gasoline Type_C Yes 45.0\n",
- "497 12/6/2018 90002.0 2010 Gasoline Type_J Yes 82.0\n",
- "498 12/7/2018 90002.0 2010 Gasoline Type_J Yes 12.0"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Output the last five rows in the dataset.\n",
- "df_transport.tail()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### What Are Our Data Quality Issues?\n",
- "\n",
- "1. **Data Quality Issue #1**: \n",
- "> **Missing Values**:\n",
- "Each feature column has multiple missing values. In fact, we have a total of 18 missing values.\n",
- "2. **Data Quality Issue #2**: \n",
- "> **Date DataType**: Date is shown as an \"object\" datatype and should be a datetime. In addition, Date is in one column. Our business requirement is to see the Date parsed out to year, month, and day. \n",
- "3. **Data Quality Issue #3**: \n",
- "> **Model Year**: We are only interested in years greater than 2006, not \"<2006\".\n",
- "4. **Data Quality Issue #4**: \n",
- "> **Categorical Columns**: The feature column \"Light_Duty\" is categorical and has a \"Yes/No\" choice. We cannot feed values like this into a machine learning model. In addition, we need to \"one-hot encode the remaining \"string\"/\"object\" columns.\n",
- "5. **Data Quality Issue #5**: \n",
- "> **Temporal Features**: How do we handle year, month, and day?\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Data Quality Issue #1: \n",
- "##### Resolving Missing Values"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Most algorithms do not accept missing values. Yet, when we see missing values in our dataset, there is always a tendency to just \"drop all the rows\" with missing values. Although Pandas will fill in the blank space with “NaN\", we should \"handle\" them in some way.\n",
- "\n",
- "While all the methods to handle missing values is beyond the scope of this lab, there are a few methods you should consider. For numeric columns, use the \"mean\" values to fill in the missing numeric values. For categorical columns, use the \"mode\" (or most frequent values) to fill in missing categorical values. \n",
- "\n",
- "In this lab, we use the .apply and Lambda functions to fill every column with its own most frequent value. You'll learn more about Lambda functions later in the lab."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's check again for missing values by showing how many rows contain NaN values for each feature column."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Date 2\n",
- "Zip Code 2\n",
- "Model Year 2\n",
- "Fuel 2\n",
- "Make 3\n",
- "Light_Duty 3\n",
- "Vehicles 3\n",
- "dtype: int64"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# The isnull() method is used to check and manage NULL values in a data frame.\n",
- "# TODO 1a\n",
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Fuel \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 494 \n",
+ " 12/3/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_I \n",
+ " Yes \n",
+ " 11.0 \n",
+ " \n",
+ " \n",
+ " 495 \n",
+ " 12/4/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_B \n",
+ " Yes \n",
+ " 58.0 \n",
+ " \n",
+ " \n",
+ " 496 \n",
+ " 12/5/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_C \n",
+ " Yes \n",
+ " 45.0 \n",
+ " \n",
+ " \n",
+ " 497 \n",
+ " 12/6/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_J \n",
+ " Yes \n",
+ " 82.0 \n",
+ " \n",
+ " \n",
+ " 498 \n",
+ " 12/7/2018 \n",
+ " 90002.0 \n",
+ " 2010 \n",
+ " Gasoline \n",
+ " Type_J \n",
+ " Yes \n",
+ " 12.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Fuel Make Light_Duty Vehicles\n",
+ "494 12/3/2018 90002.0 2010 Gasoline Type_I Yes 11.0\n",
+ "495 12/4/2018 90002.0 2010 Gasoline Type_B Yes 58.0\n",
+ "496 12/5/2018 90002.0 2010 Gasoline Type_C Yes 45.0\n",
+ "497 12/6/2018 90002.0 2010 Gasoline Type_J Yes 82.0\n",
+ "498 12/7/2018 90002.0 2010 Gasoline Type_J Yes 12.0"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Output the last five rows in the dataset.\n",
+ "df_transport.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### What Are Our Data Quality Issues?\n",
+ "\n",
+ "1. **Data Quality Issue #1**: \n",
+ "> **Missing Values**:\n",
+ "Each feature column has multiple missing values. In fact, we have a total of 18 missing values.\n",
+ "2. **Data Quality Issue #2**: \n",
+ "> **Date DataType**: Date is shown as an \"object\" datatype and should be a datetime. In addition, Date is in one column. Our business requirement is to see the Date parsed out to year, month, and day. \n",
+ "3. **Data Quality Issue #3**: \n",
+ "> **Model Year**: We are only interested in years greater than 2006, not \"<2006\".\n",
+ "4. **Data Quality Issue #4**: \n",
+ "> **Categorical Columns**: The feature column \"Light_Duty\" is categorical and has a \"Yes/No\" choice. We cannot feed values like this into a machine learning model. In addition, we need to \"one-hot encode the remaining \"string\"/\"object\" columns.\n",
+ "5. **Data Quality Issue #5**: \n",
+ "> **Temporal Features**: How do we handle year, month, and day?\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Data Quality Issue #1: \n",
+ "##### Resolving Missing Values"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Most algorithms do not accept missing values. Yet, when we see missing values in our dataset, there is always a tendency to just \"drop all the rows\" with missing values. Although Pandas will fill in the blank space with “NaN\", we should \"handle\" them in some way.\n",
+ "\n",
+ "While all the methods to handle missing values is beyond the scope of this lab, there are a few methods you should consider. For numeric columns, use the \"mean\" values to fill in the missing numeric values. For categorical columns, use the \"mode\" (or most frequent values) to fill in missing categorical values. \n",
+ "\n",
+ "In this lab, we use the .apply and Lambda functions to fill every column with its own most frequent value. You'll learn more about Lambda functions later in the lab."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's check again for missing values by showing how many rows contain NaN values for each feature column."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Date 2\n",
+ "Zip Code 2\n",
+ "Model Year 2\n",
+ "Fuel 2\n",
+ "Make 3\n",
+ "Light_Duty 3\n",
+ "Vehicles 3\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The isnull() method is used to check and manage NULL values in a data frame.\n",
+ "# TODO 1a\n",
"df_transport.isnull().sum()"
]
},
@@ -1093,23 +1101,23 @@
"execution_count": 19,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Date 0\n",
- "Zip Code 0\n",
- "Model Year 0\n",
- "Fuel 0\n",
- "Make 0\n",
- "Light_Duty 0\n",
- "Vehicles 0\n",
- "dtype: int64"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Date 0\n",
+ "Zip Code 0\n",
+ "Model Year 0\n",
+ "Fuel 0\n",
+ "Make 0\n",
+ "Light_Duty 0\n",
+ "Vehicles 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# The isnull() method is used to check and manage NULL values in a data frame.\n",
@@ -1142,26 +1150,26 @@
"execution_count": 21,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 499 entries, 0 to 498\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 Date 499 non-null datetime64[ns]\n",
- " 1 Zip Code 499 non-null float64 \n",
- " 2 Model Year 499 non-null object \n",
- " 3 Fuel 499 non-null object \n",
- " 4 Make 499 non-null object \n",
- " 5 Light_Duty 499 non-null object \n",
- " 6 Vehicles 499 non-null float64 \n",
- "dtypes: datetime64[ns](1), float64(2), object(4)\n",
- "memory usage: 27.4+ KB\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 499 entries, 0 to 498\n",
+ "Data columns (total 7 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Date 499 non-null datetime64[ns]\n",
+ " 1 Zip Code 499 non-null float64 \n",
+ " 2 Model Year 499 non-null object \n",
+ " 3 Fuel 499 non-null object \n",
+ " 4 Make 499 non-null object \n",
+ " 5 Light_Duty 499 non-null object \n",
+ " 6 Vehicles 499 non-null float64 \n",
+ "dtypes: datetime64[ns](1), float64(2), object(4)\n",
+ "memory usage: 27.4+ KB\n"
+ ]
+ }
],
"source": [
"# Date is now converted and will display the concise summary of an dataframe.\n",
@@ -1174,29 +1182,29 @@
"execution_count": 22,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 499 entries, 0 to 498\n",
- "Data columns (total 10 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 Date 499 non-null datetime64[ns]\n",
- " 1 Zip Code 499 non-null float64 \n",
- " 2 Model Year 499 non-null object \n",
- " 3 Fuel 499 non-null object \n",
- " 4 Make 499 non-null object \n",
- " 5 Light_Duty 499 non-null object \n",
- " 6 Vehicles 499 non-null float64 \n",
- " 7 year 499 non-null int64 \n",
- " 8 month 499 non-null int64 \n",
- " 9 day 499 non-null int64 \n",
- "dtypes: datetime64[ns](1), float64(2), int64(3), object(4)\n",
- "memory usage: 39.1+ KB\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 499 entries, 0 to 498\n",
+ "Data columns (total 10 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Date 499 non-null datetime64[ns]\n",
+ " 1 Zip Code 499 non-null float64 \n",
+ " 2 Model Year 499 non-null object \n",
+ " 3 Fuel 499 non-null object \n",
+ " 4 Make 499 non-null object \n",
+ " 5 Light_Duty 499 non-null object \n",
+ " 6 Vehicles 499 non-null float64 \n",
+ " 7 year 499 non-null int64 \n",
+ " 8 month 499 non-null int64 \n",
+ " 9 day 499 non-null int64 \n",
+ "dtypes: datetime64[ns](1), float64(2), int64(3), object(4)\n",
+ "memory usage: 39.1+ KB\n"
+ ]
+ }
],
"source": [
"# Now we will parse Date into three columns that is year, month, and day.\n",
@@ -1223,151 +1231,151 @@
"execution_count": 23,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Date \n",
- " Zip Code \n",
- " Model Year \n",
- " Fuel \n",
- " Make \n",
- " Light_Duty \n",
- " Vehicles \n",
- " year \n",
- " day \n",
- " \n",
- " \n",
- " month \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 1 \n",
- " 2019-01-01 \n",
- " 90001.0 \n",
- " 2016 \n",
- " Gasoline \n",
- " Type_G \n",
- " Yes \n",
- " 18.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 2 \n",
- " 2019-02-01 \n",
- " 90001.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " Type_D \n",
- " Yes \n",
- " 13.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2019-03-01 \n",
- " 90001.0 \n",
- " 2018 \n",
- " Gasoline \n",
- " Type_C \n",
- " Yes \n",
- " 32.0 \n",
- " 2019 \n",
- " 1 \n",
- " \n",
- " \n",
- " 10 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 1 \n",
- " \n",
- " \n",
- " 11 \n",
- " 2018-11-01 \n",
- " 90001.0 \n",
- " 2007 \n",
- " Gasoline \n",
- " Type_M \n",
- " Yes \n",
- " 15.0 \n",
- " 2018 \n",
- " 1 \n",
- " \n",
- " \n",
- " 12 \n",
- " 2018-12-02 \n",
- " 90001.0 \n",
- " 2015 \n",
- " Gasoline \n",
- " Type_G \n",
- " Yes \n",
- " 19.0 \n",
- " 2018 \n",
- " 2 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Date Zip Code Model Year Fuel Make Light_Duty \\\n",
- "month \n",
- "1 2019-01-01 90001.0 2016 Gasoline Type_G Yes \n",
- "2 2019-02-01 90001.0 2017 Gasoline Type_D Yes \n",
- "3 2019-03-01 90001.0 2018 Gasoline Type_C Yes \n",
- "10 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes \n",
- "11 2018-11-01 90001.0 2007 Gasoline Type_M Yes \n",
- "12 2018-12-02 90001.0 2015 Gasoline Type_G Yes \n",
- "\n",
- " Vehicles year day \n",
- "month \n",
- "1 18.0 2019 1 \n",
- "2 13.0 2019 1 \n",
- "3 32.0 2019 1 \n",
- "10 1.0 2018 1 \n",
- "11 15.0 2018 1 \n",
- "12 19.0 2018 2 "
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date \n",
+ " Zip Code \n",
+ " Model Year \n",
+ " Fuel \n",
+ " Make \n",
+ " Light_Duty \n",
+ " Vehicles \n",
+ " year \n",
+ " day \n",
+ " \n",
+ " \n",
+ " month \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2019-01-01 \n",
+ " 90001.0 \n",
+ " 2016 \n",
+ " Gasoline \n",
+ " Type_G \n",
+ " Yes \n",
+ " 18.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2019-02-01 \n",
+ " 90001.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " Type_D \n",
+ " Yes \n",
+ " 13.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2019-03-01 \n",
+ " 90001.0 \n",
+ " 2018 \n",
+ " Gasoline \n",
+ " Type_C \n",
+ " Yes \n",
+ " 32.0 \n",
+ " 2019 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 2018-11-01 \n",
+ " 90001.0 \n",
+ " 2007 \n",
+ " Gasoline \n",
+ " Type_M \n",
+ " Yes \n",
+ " 15.0 \n",
+ " 2018 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 2018-12-02 \n",
+ " 90001.0 \n",
+ " 2015 \n",
+ " Gasoline \n",
+ " Type_G \n",
+ " Yes \n",
+ " 19.0 \n",
+ " 2018 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Zip Code Model Year Fuel Make Light_Duty \\\n",
+ "month \n",
+ "1 2019-01-01 90001.0 2016 Gasoline Type_G Yes \n",
+ "2 2019-02-01 90001.0 2017 Gasoline Type_D Yes \n",
+ "3 2019-03-01 90001.0 2018 Gasoline Type_C Yes \n",
+ "10 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes \n",
+ "11 2018-11-01 90001.0 2007 Gasoline Type_M Yes \n",
+ "12 2018-12-02 90001.0 2015 Gasoline Type_G Yes \n",
+ "\n",
+ " Vehicles year day \n",
+ "month \n",
+ "1 18.0 2019 1 \n",
+ "2 13.0 2019 1 \n",
+ "3 32.0 2019 1 \n",
+ "10 1.0 2018 1 \n",
+ "11 15.0 2018 1 \n",
+ "12 19.0 2018 2 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Here, we are creating a new dataframe called \"grouped_data\" and grouping by on the column \"Make\"\n",
@@ -1410,7 +1418,7 @@
},
{
"data": {
- "image/png": "\n",
+ "image/png": "",
"text/plain": [
""
]
@@ -1454,104 +1462,104 @@
"execution_count": 25,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " Yes \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make lightduty vehicles \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes 1.0 \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK Yes 1.0 \n",
- "\n",
- " year month day \n",
- "0 2018 10 1 \n",
- "1 2018 10 1 "
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Let's remove all the spaces for feature columns by renaming them.\n",
- "# TODO 3a\n",
- "df_transport.rename(columns = { 'Date': 'date', 'Zip Code':'zipcode', 'Model Year': 'modelyear', 'Fuel': 'fuel', 'Make': 'make', 'Light_Duty': 'lightduty', 'Vehicles': 'vehicles'}, inplace = True) \n",
- "\n",
- "# Output the first two rows.\n",
- "df_transport.head(2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " **Note:** Next we create a copy of the dataframe to avoid the \"SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame\" warning. Run the cell to remove the value '<2006' from the modelyear feature column. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " Yes \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make lightduty vehicles \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK Yes 1.0 \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK Yes 1.0 \n",
+ "\n",
+ " year month day \n",
+ "0 2018 10 1 \n",
+ "1 2018 10 1 "
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's remove all the spaces for feature columns by renaming them.\n",
+ "# TODO 3a\n",
+ "df_transport.rename(columns = { 'Date': 'date', 'Zip Code':'zipcode', 'Model Year': 'modelyear', 'Fuel': 'fuel', 'Make': 'make', 'Light_Duty': 'lightduty', 'Vehicles': 'vehicles'}, inplace = True) \n",
+ "\n",
+ "# Output the first two rows.\n",
+ "df_transport.head(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " **Note:** Next we create a copy of the dataframe to avoid the \"SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame\" warning. Run the cell to remove the value '<2006' from the modelyear feature column. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
@@ -1565,30 +1573,30 @@
"execution_count": 27,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "2007 53\n",
- "2008 45\n",
- "2006 36\n",
- "2010 34\n",
- "2014 31\n",
- "2015 30\n",
- "2017 29\n",
- "2016 29\n",
- "2013 27\n",
- "2009 25\n",
- "2012 25\n",
- "2011 24\n",
- "2018 23\n",
- "2019 5\n",
- "Name: modelyear, dtype: int64"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "2007 53\n",
+ "2008 45\n",
+ "2006 36\n",
+ "2010 34\n",
+ "2014 31\n",
+ "2015 30\n",
+ "2017 29\n",
+ "2016 29\n",
+ "2013 27\n",
+ "2009 25\n",
+ "2012 25\n",
+ "2011 24\n",
+ "2018 23\n",
+ "2019 5\n",
+ "Name: modelyear, dtype: int64"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Here we will confirm that the modelyear value '<2006' has been removed by doing a value count.\n",
@@ -1616,18 +1624,18 @@
"execution_count": 28,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "Yes 374\n",
- "No 42\n",
- "Name: lightduty, dtype: int64"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "Yes 374\n",
+ "No 42\n",
+ "Name: lightduty, dtype: int64"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Lets count the number of \"Yes\" and\"No's\" in the 'lightduty' feature column.\n",
@@ -1639,18 +1647,18 @@
"execution_count": 29,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/plain": [
- "1 374\n",
- "0 42\n",
- "Name: lightduty, dtype: int64"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/plain": [
+ "1 374\n",
+ "0 42\n",
+ "Name: lightduty, dtype: int64"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Let's convert the Yes to 1 and No to 0.\n",
@@ -1664,129 +1672,129 @@
"execution_count": 30,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " \n",
- " \n",
- " 16 \n",
- " 2018-10-09 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " Type_C \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " \n",
- " \n",
- " 17 \n",
- " 2018-10-10 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " OTHER/UNK \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
- "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
- "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
- "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
- "\n",
- " lightduty vehicles year month day \n",
- "0 1 1.0 2018 10 1 \n",
- "1 1 1.0 2018 10 1 \n",
- "3 1 1.0 2018 10 1 \n",
- "16 0 16.0 2018 10 9 \n",
- "17 0 23.0 2018 10 10 "
- ]
- },
- "execution_count": 31,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2018-10-09 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " Type_C \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2018-10-10 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " OTHER/UNK \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
+ "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
+ "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
+ "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
+ "\n",
+ " lightduty vehicles year month day \n",
+ "0 1 1.0 2018 10 1 \n",
+ "1 1 1.0 2018 10 1 \n",
+ "3 1 1.0 2018 10 1 \n",
+ "16 0 16.0 2018 10 9 \n",
+ "17 0 23.0 2018 10 10 "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Confirm that \"lightduty\" has been converted.\n",
@@ -1816,219 +1824,219 @@
"execution_count": 31,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " zipcode \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " modelyear_2012 \n",
- " modelyear_2013 \n",
- " modelyear_2014 \n",
- " modelyear_2015 \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 90000.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 90000.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 90001.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 49 columns
\n",
- "
"
- ],
- "text/plain": [
- " zipcode modelyear_2007 modelyear_2008 modelyear_2009 modelyear_2010 \\\n",
- "0 90000.0 0 0 0 0 \n",
- "1 90001.0 0 0 0 0 \n",
- "3 90000.0 0 0 0 0 \n",
- "16 90001.0 0 0 0 0 \n",
- "17 90001.0 0 0 0 0 \n",
- "\n",
- " modelyear_2011 modelyear_2012 modelyear_2013 modelyear_2014 \\\n",
- "0 0 0 0 0 \n",
- "1 0 0 0 1 \n",
- "3 0 0 0 0 \n",
- "16 0 0 0 0 \n",
- "17 0 0 0 0 \n",
- "\n",
- " modelyear_2015 ... make_Type_P make_Type_Q make_Type_R make_Type_S \\\n",
- "0 0 ... 0 0 0 0 \n",
- "1 0 ... 0 0 0 0 \n",
- "3 0 ... 0 0 0 0 \n",
- "16 0 ... 0 0 0 0 \n",
- "17 0 ... 0 0 0 0 \n",
- "\n",
- " make_Type_T make_Type_U make_Type_V make_Type_W make_Type_X \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_Y \n",
- "0 0 \n",
- "1 0 \n",
- "3 0 \n",
- "16 0 \n",
- "17 0 \n",
- "\n",
- "[5 rows x 49 columns]"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " zipcode \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " modelyear_2012 \n",
+ " modelyear_2013 \n",
+ " modelyear_2014 \n",
+ " modelyear_2015 \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 90000.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 90000.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 90001.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 49 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " zipcode modelyear_2007 modelyear_2008 modelyear_2009 modelyear_2010 \\\n",
+ "0 90000.0 0 0 0 0 \n",
+ "1 90001.0 0 0 0 0 \n",
+ "3 90000.0 0 0 0 0 \n",
+ "16 90001.0 0 0 0 0 \n",
+ "17 90001.0 0 0 0 0 \n",
+ "\n",
+ " modelyear_2011 modelyear_2012 modelyear_2013 modelyear_2014 \\\n",
+ "0 0 0 0 0 \n",
+ "1 0 0 0 1 \n",
+ "3 0 0 0 0 \n",
+ "16 0 0 0 0 \n",
+ "17 0 0 0 0 \n",
+ "\n",
+ " modelyear_2015 ... make_Type_P make_Type_Q make_Type_R make_Type_S \\\n",
+ "0 0 ... 0 0 0 0 \n",
+ "1 0 ... 0 0 0 0 \n",
+ "3 0 ... 0 0 0 0 \n",
+ "16 0 ... 0 0 0 0 \n",
+ "17 0 ... 0 0 0 0 \n",
+ "\n",
+ " make_Type_T make_Type_U make_Type_V make_Type_W make_Type_X \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_Y \n",
+ "0 0 \n",
+ "1 0 \n",
+ "3 0 \n",
+ "16 0 \n",
+ "17 0 \n",
+ "\n",
+ "[5 rows x 49 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Making dummy variables for categorical data with more inputs. \n",
@@ -2043,212 +2051,212 @@
"execution_count": 32,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " date \n",
- " zipcode \n",
- " modelyear \n",
- " fuel \n",
- " make \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2006 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 2018-10-01 \n",
- " 90001.0 \n",
- " 2014 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 2018-10-01 \n",
- " 90000.0 \n",
- " 2017 \n",
- " Gasoline \n",
- " OTHER/UNK \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 2018-10-09 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " Type_C \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 2018-10-10 \n",
- " 90001.0 \n",
- " 2006 \n",
- " Diesel and Diesel Hybrid \n",
- " OTHER/UNK \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 59 columns
\n",
- "
"
- ],
- "text/plain": [
- " date zipcode modelyear fuel make \\\n",
- "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
- "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
- "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
- "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
- "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
- "\n",
- " lightduty vehicles year month day ... make_Type_P make_Type_Q \\\n",
- "0 1 1.0 2018 10 1 ... 0 0 \n",
- "1 1 1.0 2018 10 1 ... 0 0 \n",
- "3 1 1.0 2018 10 1 ... 0 0 \n",
- "16 0 16.0 2018 10 9 ... 0 0 \n",
- "17 0 23.0 2018 10 10 ... 0 0 \n",
- "\n",
- " make_Type_R make_Type_S make_Type_T make_Type_U make_Type_V \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_W make_Type_X make_Type_Y \n",
- "0 0 0 0 \n",
- "1 0 0 0 \n",
- "3 0 0 0 \n",
- "16 0 0 0 \n",
- "17 0 0 0 \n",
- "\n",
- "[5 rows x 59 columns]"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " date \n",
+ " zipcode \n",
+ " modelyear \n",
+ " fuel \n",
+ " make \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2006 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2018-10-01 \n",
+ " 90001.0 \n",
+ " 2014 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2018-10-01 \n",
+ " 90000.0 \n",
+ " 2017 \n",
+ " Gasoline \n",
+ " OTHER/UNK \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2018-10-09 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " Type_C \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2018-10-10 \n",
+ " 90001.0 \n",
+ " 2006 \n",
+ " Diesel and Diesel Hybrid \n",
+ " OTHER/UNK \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date zipcode modelyear fuel make \\\n",
+ "0 2018-10-01 90000.0 2006 Gasoline OTHER/UNK \n",
+ "1 2018-10-01 90001.0 2014 Gasoline OTHER/UNK \n",
+ "3 2018-10-01 90000.0 2017 Gasoline OTHER/UNK \n",
+ "16 2018-10-09 90001.0 2006 Diesel and Diesel Hybrid Type_C \n",
+ "17 2018-10-10 90001.0 2006 Diesel and Diesel Hybrid OTHER/UNK \n",
+ "\n",
+ " lightduty vehicles year month day ... make_Type_P make_Type_Q \\\n",
+ "0 1 1.0 2018 10 1 ... 0 0 \n",
+ "1 1 1.0 2018 10 1 ... 0 0 \n",
+ "3 1 1.0 2018 10 1 ... 0 0 \n",
+ "16 0 16.0 2018 10 9 ... 0 0 \n",
+ "17 0 23.0 2018 10 10 ... 0 0 \n",
+ "\n",
+ " make_Type_R make_Type_S make_Type_T make_Type_U make_Type_V \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_W make_Type_X make_Type_Y \n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "3 0 0 0 \n",
+ "16 0 0 0 \n",
+ "17 0 0 0 \n",
+ "\n",
+ "[5 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Merging (concatenate) original data frame with 'dummy' dataframe.\n",
@@ -2273,212 +2281,212 @@
"execution_count": 34,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " lightduty \n",
- " vehicles \n",
- " year \n",
- " month \n",
- " day \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " ... \n",
- " make_Type_P \n",
- " make_Type_Q \n",
- " make_Type_R \n",
- " make_Type_S \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 3 \n",
- " 1 \n",
- " 1.0 \n",
- " 2018 \n",
- " 10 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 16 \n",
- " 0 \n",
- " 16.0 \n",
- " 2018 \n",
- " 10 \n",
- " 9 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 17 \n",
- " 0 \n",
- " 23.0 \n",
- " 2018 \n",
- " 10 \n",
- " 10 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
5 rows × 53 columns
\n",
- "
"
- ],
- "text/plain": [
- " lightduty vehicles year month day modelyear_2007 modelyear_2008 \\\n",
- "0 1 1.0 2018 10 1 0 0 \n",
- "1 1 1.0 2018 10 1 0 0 \n",
- "3 1 1.0 2018 10 1 0 0 \n",
- "16 0 16.0 2018 10 9 0 0 \n",
- "17 0 23.0 2018 10 10 0 0 \n",
- "\n",
- " modelyear_2009 modelyear_2010 modelyear_2011 ... make_Type_P \\\n",
- "0 0 0 0 ... 0 \n",
- "1 0 0 0 ... 0 \n",
- "3 0 0 0 ... 0 \n",
- "16 0 0 0 ... 0 \n",
- "17 0 0 0 ... 0 \n",
- "\n",
- " make_Type_Q make_Type_R make_Type_S make_Type_T make_Type_U \\\n",
- "0 0 0 0 0 0 \n",
- "1 0 0 0 0 0 \n",
- "3 0 0 0 0 0 \n",
- "16 0 0 0 0 0 \n",
- "17 0 0 0 0 0 \n",
- "\n",
- " make_Type_V make_Type_W make_Type_X make_Type_Y \n",
- "0 0 0 0 0 \n",
- "1 0 0 0 0 \n",
- "3 0 0 0 0 \n",
- "16 0 0 0 0 \n",
- "17 0 0 0 0 \n",
- "\n",
- "[5 rows x 53 columns]"
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " lightduty \n",
+ " vehicles \n",
+ " year \n",
+ " month \n",
+ " day \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " ... \n",
+ " make_Type_P \n",
+ " make_Type_Q \n",
+ " make_Type_R \n",
+ " make_Type_S \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 9 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 0 \n",
+ " 23.0 \n",
+ " 2018 \n",
+ " 10 \n",
+ " 10 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 53 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lightduty vehicles year month day modelyear_2007 modelyear_2008 \\\n",
+ "0 1 1.0 2018 10 1 0 0 \n",
+ "1 1 1.0 2018 10 1 0 0 \n",
+ "3 1 1.0 2018 10 1 0 0 \n",
+ "16 0 16.0 2018 10 9 0 0 \n",
+ "17 0 23.0 2018 10 10 0 0 \n",
+ "\n",
+ " modelyear_2009 modelyear_2010 modelyear_2011 ... make_Type_P \\\n",
+ "0 0 0 0 ... 0 \n",
+ "1 0 0 0 ... 0 \n",
+ "3 0 0 0 ... 0 \n",
+ "16 0 0 0 ... 0 \n",
+ "17 0 0 0 ... 0 \n",
+ "\n",
+ " make_Type_Q make_Type_R make_Type_S make_Type_T make_Type_U \\\n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "16 0 0 0 0 0 \n",
+ "17 0 0 0 0 0 \n",
+ "\n",
+ " make_Type_V make_Type_W make_Type_X make_Type_Y \n",
+ "0 0 0 0 0 \n",
+ "1 0 0 0 0 \n",
+ "3 0 0 0 0 \n",
+ "16 0 0 0 0 \n",
+ "17 0 0 0 0 \n",
+ "\n",
+ "[5 rows x 53 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# Confirm that 'zipcode','modelyear', 'fuel', and 'make' have been dropped.\n",
@@ -2502,16 +2510,16 @@
"execution_count": 35,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Unique values of month: [10 11 12 1 2 3]\n",
- "Unique values of day: [ 1 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n",
- " 2 3 4 5 6 7 8]\n",
- "Unique values of year: [2018 2019]\n"
- ]
- }
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unique values of month: [10 11 12 1 2 3]\n",
+ "Unique values of day: [ 1 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n",
+ " 2 3 4 5 6 7 8]\n",
+ "Unique values of year: [2018 2019]\n"
+ ]
+ }
],
"source": [
"# Let's print the unique values for \"month\", \"day\" and \"year\" in our dataset. \n",
@@ -2551,184 +2559,184 @@
"execution_count": 37,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " lightduty \n",
- " vehicles \n",
- " modelyear_2007 \n",
- " modelyear_2008 \n",
- " modelyear_2009 \n",
- " modelyear_2010 \n",
- " modelyear_2011 \n",
- " modelyear_2012 \n",
- " modelyear_2013 \n",
- " modelyear_2014 \n",
- " ... \n",
- " make_Type_T \n",
- " make_Type_U \n",
- " make_Type_V \n",
- " make_Type_W \n",
- " make_Type_X \n",
- " make_Type_Y \n",
- " day_sin \n",
- " day_cos \n",
- " month_sin \n",
- " month_cos \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 495 \n",
- " 1 \n",
- " 58.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.724793 \n",
- " 0.688967 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 496 \n",
- " 1 \n",
- " 45.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.848644 \n",
- " 0.528964 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 497 \n",
- " 1 \n",
- " 82.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.937752 \n",
- " 0.347305 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- " 498 \n",
- " 1 \n",
- " 12.0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0.988468 \n",
- " 0.151428 \n",
- " -0.5 \n",
- " 0.866025 \n",
- " \n",
- " \n",
- "
\n",
- "
4 rows × 54 columns
\n",
- "
"
- ],
- "text/plain": [
- " lightduty vehicles modelyear_2007 modelyear_2008 modelyear_2009 \\\n",
- "495 1 58.0 0 0 0 \n",
- "496 1 45.0 0 0 0 \n",
- "497 1 82.0 0 0 0 \n",
- "498 1 12.0 0 0 0 \n",
- "\n",
- " modelyear_2010 modelyear_2011 modelyear_2012 modelyear_2013 \\\n",
- "495 1 0 0 0 \n",
- "496 1 0 0 0 \n",
- "497 1 0 0 0 \n",
- "498 1 0 0 0 \n",
- "\n",
- " modelyear_2014 ... make_Type_T make_Type_U make_Type_V make_Type_W \\\n",
- "495 0 ... 0 0 0 0 \n",
- "496 0 ... 0 0 0 0 \n",
- "497 0 ... 0 0 0 0 \n",
- "498 0 ... 0 0 0 0 \n",
- "\n",
- " make_Type_X make_Type_Y day_sin day_cos month_sin month_cos \n",
- "495 0 0 0.724793 0.688967 -0.5 0.866025 \n",
- "496 0 0 0.848644 0.528964 -0.5 0.866025 \n",
- "497 0 0 0.937752 0.347305 -0.5 0.866025 \n",
- "498 0 0 0.988468 0.151428 -0.5 0.866025 \n",
- "\n",
- "[4 rows x 54 columns]"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
- }
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " lightduty \n",
+ " vehicles \n",
+ " modelyear_2007 \n",
+ " modelyear_2008 \n",
+ " modelyear_2009 \n",
+ " modelyear_2010 \n",
+ " modelyear_2011 \n",
+ " modelyear_2012 \n",
+ " modelyear_2013 \n",
+ " modelyear_2014 \n",
+ " ... \n",
+ " make_Type_T \n",
+ " make_Type_U \n",
+ " make_Type_V \n",
+ " make_Type_W \n",
+ " make_Type_X \n",
+ " make_Type_Y \n",
+ " day_sin \n",
+ " day_cos \n",
+ " month_sin \n",
+ " month_cos \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 495 \n",
+ " 1 \n",
+ " 58.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.724793 \n",
+ " 0.688967 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 496 \n",
+ " 1 \n",
+ " 45.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.848644 \n",
+ " 0.528964 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 497 \n",
+ " 1 \n",
+ " 82.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.937752 \n",
+ " 0.347305 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ " 498 \n",
+ " 1 \n",
+ " 12.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.988468 \n",
+ " 0.151428 \n",
+ " -0.5 \n",
+ " 0.866025 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
4 rows × 54 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lightduty vehicles modelyear_2007 modelyear_2008 modelyear_2009 \\\n",
+ "495 1 58.0 0 0 0 \n",
+ "496 1 45.0 0 0 0 \n",
+ "497 1 82.0 0 0 0 \n",
+ "498 1 12.0 0 0 0 \n",
+ "\n",
+ " modelyear_2010 modelyear_2011 modelyear_2012 modelyear_2013 \\\n",
+ "495 1 0 0 0 \n",
+ "496 1 0 0 0 \n",
+ "497 1 0 0 0 \n",
+ "498 1 0 0 0 \n",
+ "\n",
+ " modelyear_2014 ... make_Type_T make_Type_U make_Type_V make_Type_W \\\n",
+ "495 0 ... 0 0 0 0 \n",
+ "496 0 ... 0 0 0 0 \n",
+ "497 0 ... 0 0 0 0 \n",
+ "498 0 ... 0 0 0 0 \n",
+ "\n",
+ " make_Type_X make_Type_Y day_sin day_cos month_sin month_cos \n",
+ "495 0 0 0.724793 0.688967 -0.5 0.866025 \n",
+ "496 0 0 0.848644 0.528964 -0.5 0.866025 \n",
+ "497 0 0 0.937752 0.347305 -0.5 0.866025 \n",
+ "498 0 0 0.988468 0.151428 -0.5 0.866025 \n",
+ "\n",
+ "[4 rows x 54 columns]"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
"source": [
"# scroll left to see the converted month and day coluumns.\n",
From b07839faaa8d8ea6453e5f085599aea4f5b96522 Mon Sep 17 00:00:00 2001
From: hemantsinalkar-ssk
<87474238+hemantsinalkar-ssk@users.noreply.github.com>
Date: Thu, 3 Oct 2024 16:54:00 +0530
Subject: [PATCH 2/3] - Updated the cell in code format
---
.../launching_into_ml/labs/improve_data_quality.ipynb | 6 ++++--
.../launching_into_ml/solutions/improve_data_quality.ipynb | 4 +++-
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
index 02889af0ba..0f02fcf271 100644
--- a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
+++ b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
@@ -38,10 +38,12 @@
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
+ "outputs": [],
"source": [
- "Install TensorFlow Libraries\n",
+ "# Install TensorFlow Libraries\n",
"!pip install tensorflow"
]
},
diff --git a/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb b/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
index 9d6c877338..3f63bf6966 100644
--- a/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
+++ b/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
@@ -39,8 +39,10 @@
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
+ "outputs": [],
"source": [
"# Install TensorFlow Libraries\n",
"!pip install tensorflow"
From 83a9a4aa9dd74daf47796d3652e77b3cd1917da5 Mon Sep 17 00:00:00 2001
From: hemantsinalkar-ssk
<87474238+hemantsinalkar-ssk@users.noreply.github.com>
Date: Thu, 3 Oct 2024 17:28:37 +0530
Subject: [PATCH 3/3] - Added comment for describing the cell
---
.../deepdive2/launching_into_ml/labs/improve_data_quality.ipynb | 1 +
1 file changed, 1 insertion(+)
diff --git a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
index 0f02fcf271..535a330cb4 100644
--- a/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
+++ b/courses/machine_learning/deepdive2/launching_into_ml/labs/improve_data_quality.ipynb
@@ -34,6 +34,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "# Use the chown command to change the ownership of the repository to user\n",
"!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst"
]
},