Fixed bug in testing set definition

lmoroney · lmoroney · commit f633ac2cc5b0 · 2019-04-26T07:39:06.000-07:00
diff --git a/Lab6-Cats-v-Dogs.ipynb b/Lab6-Cats-v-Dogs.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Lab6-Cats-v-Dogs.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"7v55rWlQehzL","colab_type":"text"},"cell_type":"markdown","source":["## Training with a Larger Dataset - Cats and Dogs\n","\n","In the previous lab you trained a classifier with a horses-v-humans dataset. You saw that despite getting great training results, when you tried to do classification with real images, there were many errors, due primarily to overfitting -- where the network  does very well with data that it has previously seen, but poorly with data it hasn't!\n","\n","In this lab you'll look at a real, and very large dataset, and see the impact this has to avoid overfitting."]},{"metadata":{"id":"dn-6c02VmqiN","colab_type":"code","colab":{}},"cell_type":"code","source":["import os\n","import zipfile\n","import random\n","import tensorflow as tf\n","from tensorflow.keras.optimizers import RMSprop\n","from tensorflow.keras.preprocessing.image import ImageDataGenerator\n","from shutil import copyfile"],"execution_count":0,"outputs":[]},{"metadata":{"id":"3sd9dQWa23aj","colab_type":"code","colab":{}},"cell_type":"code","source":["# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n","# And right click on the 'Download Manually' link to get a new URL to the dataset\n","\n","# Note: This is a very large dataset and will take time to download\n","\n","!wget --no-check-certificate \\\n","    \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n","    -O \"/tmp/cats-and-dogs.zip\"\n","\n","local_zip = '/tmp/cats-and-dogs.zip'\n","zip_ref   = zipfile.ZipFile(local_zip, 'r')\n","zip_ref.extractall('/tmp')\n","zip_ref.close()\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DM851ZmN28J3","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/PetImages/Cat/')))\n","print(len(os.listdir('/tmp/PetImages/Dog/')))\n","\n","# Expected Output:\n","# 12501\n","# 12501"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F-QkLjxpmyK2","colab_type":"code","colab":{}},"cell_type":"code","source":["try:\n","    os.mkdir('/tmp/cats-v-dogs')\n","    os.mkdir('/tmp/cats-v-dogs/training')\n","    os.mkdir('/tmp/cats-v-dogs/testing')\n","    os.mkdir('/tmp/cats-v-dogs/training/cats')\n","    os.mkdir('/tmp/cats-v-dogs/training/dogs')\n","    os.mkdir('/tmp/cats-v-dogs/testing/cats')\n","    os.mkdir('/tmp/cats-v-dogs/testing/dogs')\n","except OSError:\n","    pass"],"execution_count":0,"outputs":[]},{"metadata":{"id":"zvSODo0f9LaU","colab_type":"code","colab":{}},"cell_type":"code","source":["def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n","    files = []\n","    for filename in os.listdir(SOURCE):\n","        file = SOURCE + filename\n","        if os.path.getsize(file) > 0:\n","            files.append(filename)\n","        else:\n","            print(filename + \" is zero length, so ignoring.\")\n","\n","    training_length = int(len(files) * SPLIT_SIZE)\n","    testing_length = int(len(files) - training_length)\n","    shuffled_set = random.sample(files, len(files))\n","    training_set = shuffled_set[0:training_length]\n","    testing_set = shuffled_set[:testing_length]\n","\n","    for filename in training_set:\n","        this_file = SOURCE + filename\n","        destination = TRAINING + filename\n","        copyfile(this_file, destination)\n","\n","    for filename in testing_set:\n","        this_file = SOURCE + filename\n","        destination = TESTING + filename\n","        copyfile(this_file, destination)\n","\n","\n","CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n","TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n","TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n","DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n","TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n","TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n","\n","split_size = .9\n","split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n","split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n","\n","# Expected output\n","# 666.jpg is zero length, so ignoring\n","# 11702.jpg is zero length, so ignoring"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hwHXFhVG3786","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n","\n","# Expected output:\n","# 11250\n","# 11250\n","# 1250\n","# 1250"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-BQrav4anTmj","colab_type":"code","colab":{}},"cell_type":"code","source":["model = tf.keras.models.Sequential([\n","    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Flatten(),\n","    tf.keras.layers.Dense(512, activation='relu'),\n","    tf.keras.layers.Dense(1, activation='sigmoid')\n","])\n","\n","model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"fQrZfVgz4j2g","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","TRAINING_DIR = \"/tmp/cats-v-dogs/training/\"\n","train_datagen = ImageDataGenerator(rescale=1.0/255.)\n","train_generator = train_datagen.flow_from_directory(TRAINING_DIR,\n","                                                    batch_size=100,\n","                                                    class_mode='binary',\n","                                                    target_size=(150, 150))\n","\n","VALIDATION_DIR = \"/tmp/cats-v-dogs/testing/\"\n","validation_datagen = ImageDataGenerator(rescale=1.0/255.)\n","validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,\n","                                                              batch_size=100,\n","                                                              class_mode='binary',\n","                                                              target_size=(150, 150))\n","\n","# Expected Output:\n","# Found 22498 images belonging to 2 classes.\n","# Found 2500 images belonging to 2 classes."],"execution_count":0,"outputs":[]},{"metadata":{"id":"5qE1G6JB4fMn","colab_type":"code","colab":{}},"cell_type":"code","source":["# Note that this may take some time.\n","history = model.fit_generator(train_generator,\n","                              epochs=15,\n","                              verbose=1,\n","                              validation_data=validation_generator)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MWZrJN4-65RC","colab_type":"code","colab":{}},"cell_type":"code","source":["%matplotlib inline\n","\n","import matplotlib.image  as mpimg\n","import matplotlib.pyplot as plt\n","\n","#-----------------------------------------------------------\n","# Retrieve a list of list results on training and test data\n","# sets for each training epoch\n","#-----------------------------------------------------------\n","acc=history.history['acc']\n","val_acc=history.history['val_acc']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","\n","epochs=range(len(acc)) # Get number of epochs\n","\n","#------------------------------------------------\n","# Plot training and validation accuracy per epoch\n","#------------------------------------------------\n","plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n","plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n","plt.title('Training and validation accuracy')\n","plt.figure()\n","\n","#------------------------------------------------\n","# Plot training and validation loss per epoch\n","#------------------------------------------------\n","plt.plot(epochs, loss, 'r', \"Training Loss\")\n","plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n","plt.figure()\n","\n","\n","# Desired output. Charts with training and validation metrics. No crash :)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LqL6FYUrtXpf","colab_type":"code","colab":{}},"cell_type":"code","source":["# Here's a codeblock just for fun. You should be able to upload an image here \n","# and have it classified without crashing\n","import numpy as np\n","from google.colab import files\n","from keras.preprocessing import image\n","\n","uploaded = files.upload()\n","\n","for fn in uploaded.keys():\n"," \n","  # predicting images\n","  path = '/content/' + fn\n","  img = image.load_img(path, target_size=(150, 150))\n","  x = image.img_to_array(img)\n","  x = np.expand_dims(x, axis=0)\n","\n","  images = np.vstack([x])\n","  classes = model.predict(images, batch_size=10)\n","  print(classes[0])\n","  if classes[0]>0.5:\n","    print(fn + \" is a dog\")\n","  else:\n","    print(fn + \" is a cat\")"],"execution_count":0,"outputs":[]}]}
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Lab6-Cats-v-Dogs.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"7v55rWlQehzL","colab_type":"text"},"cell_type":"markdown","source":["## Training with a Larger Dataset - Cats and Dogs\n","\n","In the previous lab you trained a classifier with a horses-v-humans dataset. You saw that despite getting great training results, when you tried to do classification with real images, there were many errors, due primarily to overfitting -- where the network  does very well with data that it has previously seen, but poorly with data it hasn't!\n","\n","In this lab you'll look at a real, and very large dataset, and see the impact this has to avoid overfitting."]},{"metadata":{"id":"dn-6c02VmqiN","colab_type":"code","colab":{}},"cell_type":"code","source":["import os\n","import zipfile\n","import random\n","import tensorflow as tf\n","from tensorflow.keras.optimizers import RMSprop\n","from tensorflow.keras.preprocessing.image import ImageDataGenerator\n","from shutil import copyfile"],"execution_count":0,"outputs":[]},{"metadata":{"id":"3sd9dQWa23aj","colab_type":"code","colab":{}},"cell_type":"code","source":["# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n","# And right click on the 'Download Manually' link to get a new URL to the dataset\n","\n","# Note: This is a very large dataset and will take time to download\n","\n","!wget --no-check-certificate \\\n","    \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n","    -O \"/tmp/cats-and-dogs.zip\"\n","\n","local_zip = '/tmp/cats-and-dogs.zip'\n","zip_ref   = zipfile.ZipFile(local_zip, 'r')\n","zip_ref.extractall('/tmp')\n","zip_ref.close()\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DM851ZmN28J3","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/PetImages/Cat/')))\n","print(len(os.listdir('/tmp/PetImages/Dog/')))\n","\n","# Expected Output:\n","# 12501\n","# 12501"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F-QkLjxpmyK2","colab_type":"code","colab":{}},"cell_type":"code","source":["try:\n","    os.mkdir('/tmp/cats-v-dogs')\n","    os.mkdir('/tmp/cats-v-dogs/training')\n","    os.mkdir('/tmp/cats-v-dogs/testing')\n","    os.mkdir('/tmp/cats-v-dogs/training/cats')\n","    os.mkdir('/tmp/cats-v-dogs/training/dogs')\n","    os.mkdir('/tmp/cats-v-dogs/testing/cats')\n","    os.mkdir('/tmp/cats-v-dogs/testing/dogs')\n","except OSError:\n","    pass"],"execution_count":0,"outputs":[]},{"metadata":{"id":"zvSODo0f9LaU","colab_type":"code","colab":{}},"cell_type":"code","source":["def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n","    files = []\n","    for filename in os.listdir(SOURCE):\n","        file = SOURCE + filename\n","        if os.path.getsize(file) > 0:\n","            files.append(filename)\n","        else:\n","            print(filename + \" is zero length, so ignoring.\")\n","\n","    training_length = int(len(files) * SPLIT_SIZE)\n","    testing_length = int(len(files) - training_length)\n","    shuffled_set = random.sample(files, len(files))\n","    training_set = shuffled_set[0:training_length]\n","    testing_set = shuffled_set[-testing_length:]\n","\n","    for filename in training_set:\n","        this_file = SOURCE + filename\n","        destination = TRAINING + filename\n","        copyfile(this_file, destination)\n","\n","    for filename in testing_set:\n","        this_file = SOURCE + filename\n","        destination = TESTING + filename\n","        copyfile(this_file, destination)\n","\n","\n","CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n","TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n","TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n","DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n","TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n","TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n","\n","split_size = .9\n","split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n","split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n","\n","# Expected output\n","# 666.jpg is zero length, so ignoring\n","# 11702.jpg is zero length, so ignoring"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hwHXFhVG3786","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n","\n","# Expected output:\n","# 11250\n","# 11250\n","# 1250\n","# 1250"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-BQrav4anTmj","colab_type":"code","colab":{}},"cell_type":"code","source":["model = tf.keras.models.Sequential([\n","    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n","    tf.keras.layers.MaxPooling2D(2, 2),\n","    tf.keras.layers.Flatten(),\n","    tf.keras.layers.Dense(512, activation='relu'),\n","    tf.keras.layers.Dense(1, activation='sigmoid')\n","])\n","\n","model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"fQrZfVgz4j2g","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","TRAINING_DIR = \"/tmp/cats-v-dogs/training/\"\n","train_datagen = ImageDataGenerator(rescale=1.0/255.)\n","train_generator = train_datagen.flow_from_directory(TRAINING_DIR,\n","                                                    batch_size=100,\n","                                                    class_mode='binary',\n","                                                    target_size=(150, 150))\n","\n","VALIDATION_DIR = \"/tmp/cats-v-dogs/testing/\"\n","validation_datagen = ImageDataGenerator(rescale=1.0/255.)\n","validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,\n","                                                              batch_size=100,\n","                                                              class_mode='binary',\n","                                                              target_size=(150, 150))\n","\n","# Expected Output:\n","# Found 22498 images belonging to 2 classes.\n","# Found 2500 images belonging to 2 classes."],"execution_count":0,"outputs":[]},{"metadata":{"id":"5qE1G6JB4fMn","colab_type":"code","colab":{}},"cell_type":"code","source":["# Note that this may take some time.\n","history = model.fit_generator(train_generator,\n","                              epochs=15,\n","                              verbose=1,\n","                              validation_data=validation_generator)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MWZrJN4-65RC","colab_type":"code","colab":{}},"cell_type":"code","source":["%matplotlib inline\n","\n","import matplotlib.image  as mpimg\n","import matplotlib.pyplot as plt\n","\n","#-----------------------------------------------------------\n","# Retrieve a list of list results on training and test data\n","# sets for each training epoch\n","#-----------------------------------------------------------\n","acc=history.history['acc']\n","val_acc=history.history['val_acc']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","\n","epochs=range(len(acc)) # Get number of epochs\n","\n","#------------------------------------------------\n","# Plot training and validation accuracy per epoch\n","#------------------------------------------------\n","plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n","plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n","plt.title('Training and validation accuracy')\n","plt.figure()\n","\n","#------------------------------------------------\n","# Plot training and validation loss per epoch\n","#------------------------------------------------\n","plt.plot(epochs, loss, 'r', \"Training Loss\")\n","plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n","plt.figure()\n","\n","\n","# Desired output. Charts with training and validation metrics. No crash :)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LqL6FYUrtXpf","colab_type":"code","colab":{}},"cell_type":"code","source":["# Here's a codeblock just for fun. You should be able to upload an image here \n","# and have it classified without crashing\n","import numpy as np\n","from google.colab import files\n","from keras.preprocessing import image\n","\n","uploaded = files.upload()\n","\n","for fn in uploaded.keys():\n"," \n","  # predicting images\n","  path = '/content/' + fn\n","  img = image.load_img(path, target_size=(150, 150))\n","  x = image.img_to_array(img)\n","  x = np.expand_dims(x, axis=0)\n","\n","  images = np.vstack([x])\n","  classes = model.predict(images, batch_size=10)\n","  print(classes[0])\n","  if classes[0]>0.5:\n","    print(fn + \" is a dog\")\n","  else:\n","    print(fn + \" is a cat\")"],"execution_count":0,"outputs":[]}]}

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Lab6-Cats-v-Dogs.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"7v55rWlQehzL","colab_type":"text"},"cell_type":"markdown","source":["## Training with a Larger Dataset - Cats and Dogs\n","\n","In the previous lab you trained a classifier with a horses-v-humans dataset. You saw that despite getting great training results, when you tried to do classification with real images, there were many errors, due primarily to overfitting -- where the network does very well with data that it has previously seen, but poorly with data it hasn't!\n","\n","In this lab you'll look at a real, and very large dataset, and see the impact this has to avoid overfitting."]},{"metadata":{"id":"dn-6c02VmqiN","colab_type":"code","colab":{}},"cell_type":"code","source":["import os\n","import zipfile\n","import random\n","import tensorflow as tf\n","from tensorflow.keras.optimizers import RMSprop\n","from tensorflow.keras.preprocessing.image import ImageDataGenerator\n","from shutil import copyfile"],"execution_count":0,"outputs":[]},{"metadata":{"id":"3sd9dQWa23aj","colab_type":"code","colab":{}},"cell_type":"code","source":["# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n","# And right click on the 'Download Manually' link to get a new URL to the dataset\n","\n","# Note: This is a very large dataset and will take time to download\n","\n","!wget --no-check-certificate \\\n"," \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n"," -O \"/tmp/cats-and-dogs.zip\"\n","\n","local_zip = '/tmp/cats-and-dogs.zip'\n","zip_ref = zipfile.ZipFile(local_zip, 'r')\n","zip_ref.extractall('/tmp')\n","zip_ref.close()\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DM851ZmN28J3","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/PetImages/Cat/')))\n","print(len(os.listdir('/tmp/PetImages/Dog/')))\n","\n","# Expected Output:\n","# 12501\n","# 12501"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F-QkLjxpmyK2","colab_type":"code","colab":{}},"cell_type":"code","source":["try:\n"," os.mkdir('/tmp/cats-v-dogs')\n"," os.mkdir('/tmp/cats-v-dogs/training')\n"," os.mkdir('/tmp/cats-v-dogs/testing')\n"," os.mkdir('/tmp/cats-v-dogs/training/cats')\n"," os.mkdir('/tmp/cats-v-dogs/training/dogs')\n"," os.mkdir('/tmp/cats-v-dogs/testing/cats')\n"," os.mkdir('/tmp/cats-v-dogs/testing/dogs')\n","except OSError:\n"," pass"],"execution_count":0,"outputs":[]},{"metadata":{"id":"zvSODo0f9LaU","colab_type":"code","colab":{}},"cell_type":"code","source":["def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n"," files = []\n"," for filename in os.listdir(SOURCE):\n"," file = SOURCE + filename\n"," if os.path.getsize(file) > 0:\n"," files.append(filename)\n"," else:\n"," print(filename + \" is zero length, so ignoring.\")\n","\n"," training_length = int(len(files) * SPLIT_SIZE)\n"," testing_length = int(len(files) - training_length)\n"," shuffled_set = random.sample(files, len(files))\n"," training_set = shuffled_set[0:training_length]\n"," testing_set = shuffled_set[:testing_length]\n","\n"," for filename in training_set:\n"," this_file = SOURCE + filename\n"," destination = TRAINING + filename\n"," copyfile(this_file, destination)\n","\n"," for filename in testing_set:\n"," this_file = SOURCE + filename\n"," destination = TESTING + filename\n"," copyfile(this_file, destination)\n","\n","\n","CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n","TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n","TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n","DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n","TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n","TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n","\n","split_size = .9\n","split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n","split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n","\n","# Expected output\n","# 666.jpg is zero length, so ignoring\n","# 11702.jpg is zero length, so ignoring"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hwHXFhVG3786","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n","\n","# Expected output:\n","# 11250\n","# 11250\n","# 1250\n","# 1250"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-BQrav4anTmj","colab_type":"code","colab":{}},"cell_type":"code","source":["model = tf.keras.models.Sequential([\n"," tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Flatten(),\n"," tf.keras.layers.Dense(512, activation='relu'),\n"," tf.keras.layers.Dense(1, activation='sigmoid')\n","])\n","\n","model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"fQrZfVgz4j2g","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","TRAINING_DIR = \"/tmp/cats-v-dogs/training/\"\n","train_datagen = ImageDataGenerator(rescale=1.0/255.)\n","train_generator = train_datagen.flow_from_directory(TRAINING_DIR,\n"," batch_size=100,\n"," class_mode='binary',\n"," target_size=(150, 150))\n","\n","VALIDATION_DIR = \"/tmp/cats-v-dogs/testing/\"\n","validation_datagen = ImageDataGenerator(rescale=1.0/255.)\n","validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,\n"," batch_size=100,\n"," class_mode='binary',\n"," target_size=(150, 150))\n","\n","# Expected Output:\n","# Found 22498 images belonging to 2 classes.\n","# Found 2500 images belonging to 2 classes."],"execution_count":0,"outputs":[]},{"metadata":{"id":"5qE1G6JB4fMn","colab_type":"code","colab":{}},"cell_type":"code","source":["# Note that this may take some time.\n","history = model.fit_generator(train_generator,\n"," epochs=15,\n"," verbose=1,\n"," validation_data=validation_generator)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MWZrJN4-65RC","colab_type":"code","colab":{}},"cell_type":"code","source":["%matplotlib inline\n","\n","import matplotlib.image as mpimg\n","import matplotlib.pyplot as plt\n","\n","#-----------------------------------------------------------\n","# Retrieve a list of list results on training and test data\n","# sets for each training epoch\n","#-----------------------------------------------------------\n","acc=history.history['acc']\n","val_acc=history.history['val_acc']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","\n","epochs=range(len(acc)) # Get number of epochs\n","\n","#------------------------------------------------\n","# Plot training and validation accuracy per epoch\n","#------------------------------------------------\n","plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n","plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n","plt.title('Training and validation accuracy')\n","plt.figure()\n","\n","#------------------------------------------------\n","# Plot training and validation loss per epoch\n","#------------------------------------------------\n","plt.plot(epochs, loss, 'r', \"Training Loss\")\n","plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n","plt.figure()\n","\n","\n","# Desired output. Charts with training and validation metrics. No crash :)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LqL6FYUrtXpf","colab_type":"code","colab":{}},"cell_type":"code","source":["# Here's a codeblock just for fun. You should be able to upload an image here \n","# and have it classified without crashing\n","import numpy as np\n","from google.colab import files\n","from keras.preprocessing import image\n","\n","uploaded = files.upload()\n","\n","for fn in uploaded.keys():\n"," \n"," # predicting images\n"," path = '/content/' + fn\n"," img = image.load_img(path, target_size=(150, 150))\n"," x = image.img_to_array(img)\n"," x = np.expand_dims(x, axis=0)\n","\n"," images = np.vstack([x])\n"," classes = model.predict(images, batch_size=10)\n"," print(classes[0])\n"," if classes[0]>0.5:\n"," print(fn + \" is a dog\")\n"," else:\n"," print(fn + \" is a cat\")"],"execution_count":0,"outputs":[]}]}
	`1`	+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Lab6-Cats-v-Dogs.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"7v55rWlQehzL","colab_type":"text"},"cell_type":"markdown","source":["## Training with a Larger Dataset - Cats and Dogs\n","\n","In the previous lab you trained a classifier with a horses-v-humans dataset. You saw that despite getting great training results, when you tried to do classification with real images, there were many errors, due primarily to overfitting -- where the network does very well with data that it has previously seen, but poorly with data it hasn't!\n","\n","In this lab you'll look at a real, and very large dataset, and see the impact this has to avoid overfitting."]},{"metadata":{"id":"dn-6c02VmqiN","colab_type":"code","colab":{}},"cell_type":"code","source":["import os\n","import zipfile\n","import random\n","import tensorflow as tf\n","from tensorflow.keras.optimizers import RMSprop\n","from tensorflow.keras.preprocessing.image import ImageDataGenerator\n","from shutil import copyfile"],"execution_count":0,"outputs":[]},{"metadata":{"id":"3sd9dQWa23aj","colab_type":"code","colab":{}},"cell_type":"code","source":["# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n","# And right click on the 'Download Manually' link to get a new URL to the dataset\n","\n","# Note: This is a very large dataset and will take time to download\n","\n","!wget --no-check-certificate \\\n"," \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n"," -O \"/tmp/cats-and-dogs.zip\"\n","\n","local_zip = '/tmp/cats-and-dogs.zip'\n","zip_ref = zipfile.ZipFile(local_zip, 'r')\n","zip_ref.extractall('/tmp')\n","zip_ref.close()\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DM851ZmN28J3","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/PetImages/Cat/')))\n","print(len(os.listdir('/tmp/PetImages/Dog/')))\n","\n","# Expected Output:\n","# 12501\n","# 12501"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F-QkLjxpmyK2","colab_type":"code","colab":{}},"cell_type":"code","source":["try:\n"," os.mkdir('/tmp/cats-v-dogs')\n"," os.mkdir('/tmp/cats-v-dogs/training')\n"," os.mkdir('/tmp/cats-v-dogs/testing')\n"," os.mkdir('/tmp/cats-v-dogs/training/cats')\n"," os.mkdir('/tmp/cats-v-dogs/training/dogs')\n"," os.mkdir('/tmp/cats-v-dogs/testing/cats')\n"," os.mkdir('/tmp/cats-v-dogs/testing/dogs')\n","except OSError:\n"," pass"],"execution_count":0,"outputs":[]},{"metadata":{"id":"zvSODo0f9LaU","colab_type":"code","colab":{}},"cell_type":"code","source":["def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n"," files = []\n"," for filename in os.listdir(SOURCE):\n"," file = SOURCE + filename\n"," if os.path.getsize(file) > 0:\n"," files.append(filename)\n"," else:\n"," print(filename + \" is zero length, so ignoring.\")\n","\n"," training_length = int(len(files) * SPLIT_SIZE)\n"," testing_length = int(len(files) - training_length)\n"," shuffled_set = random.sample(files, len(files))\n"," training_set = shuffled_set[0:training_length]\n"," testing_set = shuffled_set[-testing_length:]\n","\n"," for filename in training_set:\n"," this_file = SOURCE + filename\n"," destination = TRAINING + filename\n"," copyfile(this_file, destination)\n","\n"," for filename in testing_set:\n"," this_file = SOURCE + filename\n"," destination = TESTING + filename\n"," copyfile(this_file, destination)\n","\n","\n","CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n","TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n","TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n","DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n","TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n","TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n","\n","split_size = .9\n","split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n","split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n","\n","# Expected output\n","# 666.jpg is zero length, so ignoring\n","# 11702.jpg is zero length, so ignoring"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hwHXFhVG3786","colab_type":"code","colab":{}},"cell_type":"code","source":["print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n","print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n","\n","# Expected output:\n","# 11250\n","# 11250\n","# 1250\n","# 1250"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-BQrav4anTmj","colab_type":"code","colab":{}},"cell_type":"code","source":["model = tf.keras.models.Sequential([\n"," tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n"," tf.keras.layers.MaxPooling2D(2, 2),\n"," tf.keras.layers.Flatten(),\n"," tf.keras.layers.Dense(512, activation='relu'),\n"," tf.keras.layers.Dense(1, activation='sigmoid')\n","])\n","\n","model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"fQrZfVgz4j2g","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","TRAINING_DIR = \"/tmp/cats-v-dogs/training/\"\n","train_datagen = ImageDataGenerator(rescale=1.0/255.)\n","train_generator = train_datagen.flow_from_directory(TRAINING_DIR,\n"," batch_size=100,\n"," class_mode='binary',\n"," target_size=(150, 150))\n","\n","VALIDATION_DIR = \"/tmp/cats-v-dogs/testing/\"\n","validation_datagen = ImageDataGenerator(rescale=1.0/255.)\n","validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,\n"," batch_size=100,\n"," class_mode='binary',\n"," target_size=(150, 150))\n","\n","# Expected Output:\n","# Found 22498 images belonging to 2 classes.\n","# Found 2500 images belonging to 2 classes."],"execution_count":0,"outputs":[]},{"metadata":{"id":"5qE1G6JB4fMn","colab_type":"code","colab":{}},"cell_type":"code","source":["# Note that this may take some time.\n","history = model.fit_generator(train_generator,\n"," epochs=15,\n"," verbose=1,\n"," validation_data=validation_generator)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MWZrJN4-65RC","colab_type":"code","colab":{}},"cell_type":"code","source":["%matplotlib inline\n","\n","import matplotlib.image as mpimg\n","import matplotlib.pyplot as plt\n","\n","#-----------------------------------------------------------\n","# Retrieve a list of list results on training and test data\n","# sets for each training epoch\n","#-----------------------------------------------------------\n","acc=history.history['acc']\n","val_acc=history.history['val_acc']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","\n","epochs=range(len(acc)) # Get number of epochs\n","\n","#------------------------------------------------\n","# Plot training and validation accuracy per epoch\n","#------------------------------------------------\n","plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n","plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n","plt.title('Training and validation accuracy')\n","plt.figure()\n","\n","#------------------------------------------------\n","# Plot training and validation loss per epoch\n","#------------------------------------------------\n","plt.plot(epochs, loss, 'r', \"Training Loss\")\n","plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n","plt.figure()\n","\n","\n","# Desired output. Charts with training and validation metrics. No crash :)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LqL6FYUrtXpf","colab_type":"code","colab":{}},"cell_type":"code","source":["# Here's a codeblock just for fun. You should be able to upload an image here \n","# and have it classified without crashing\n","import numpy as np\n","from google.colab import files\n","from keras.preprocessing import image\n","\n","uploaded = files.upload()\n","\n","for fn in uploaded.keys():\n"," \n"," # predicting images\n"," path = '/content/' + fn\n"," img = image.load_img(path, target_size=(150, 150))\n"," x = image.img_to_array(img)\n"," x = np.expand_dims(x, axis=0)\n","\n"," images = np.vstack([x])\n"," classes = model.predict(images, batch_size=10)\n"," print(classes[0])\n"," if classes[0]>0.5:\n"," print(fn + \" is a dog\")\n"," else:\n"," print(fn + \" is a cat\")"],"execution_count":0,"outputs":[]}]}