Chapter 05 complete

SuperSecureHuman · SuperSecureHuman · commit 87f9bf8ae715 · 2022-02-17T20:41:26.000+05:30
diff --git a/Chapter 05/1.Complete_Code.ipynb b/Chapter 05/1.Complete_Code.ipynb
@@ -0,0 +1,237 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import nnfs\n",
+    "from nnfs.datasets import spiral_data\n",
+    "nnfs.init()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dense layer\n",
+    "class Layer_Dense:\n",
+    "\n",
+    "    # Layer initialization\n",
+    "    def __init__(self, n_inputs, n_neurons):\n",
+    "        # Initialize weights and biases\n",
+    "        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)\n",
+    "        self.biases = np.zeros((1, n_neurons))\n",
+    "\n",
+    "    # Forward pass\n",
+    "    def forward(self, inputs):\n",
+    "        # Calculate output values from inputs, weights and biases\n",
+    "        self.output = np.dot(inputs, self.weights) + self.biases"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ReLU activation\n",
+    "class Activation_ReLU:\n",
+    "\n",
+    "    # Forward pass\n",
+    "    def forward(self, inputs):\n",
+    "        # Calculate output values from inputs\n",
+    "        self.output = np.maximum(0, inputs)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Softmax activation\n",
+    "class Activation_Softmax:\n",
+    "\n",
+    "    # Forward pass\n",
+    "    def forward(self, inputs):\n",
+    "\n",
+    "        # Get unnormalized probabilities\n",
+    "        exp_values = np.exp(inputs - np.max(inputs, axis=1,\n",
+    "                                            keepdims=True))\n",
+    "        # Normalize them for each sample\n",
+    "        probabilities = exp_values / np.sum(exp_values, axis=1,\n",
+    "                                            keepdims=True)\n",
+    "\n",
+    "        self.output = probabilities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Common loss class\n",
+    "class Loss:\n",
+    "\n",
+    "    # Calculates the data and regularization losses\n",
+    "    # given model output and ground truth values\n",
+    "    def calculate(self, output, y):\n",
+    "\n",
+    "        # Calculate sample losses\n",
+    "        sample_losses = self.forward(output, y)\n",
+    "\n",
+    "        # Calculate mean loss\n",
+    "        data_loss = np.mean(sample_losses)\n",
+    "\n",
+    "        # Return loss\n",
+    "        return data_loss\n",
+    "\n",
+    "\n",
+    "# Cross-entropy loss\n",
+    "class Loss_CategoricalCrossentropy(Loss):\n",
+    "\n",
+    "    # Forward pass\n",
+    "    def forward(self, y_pred, y_true):\n",
+    "\n",
+    "        # Number of samples in a batch\n",
+    "        samples = len(y_pred)\n",
+    "\n",
+    "        # Clip data to prevent division by 0\n",
+    "        # Clip both sides to not drag mean towards any value\n",
+    "        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n",
+    "\n",
+    "\n",
+    "        # Probabilities for target values -\n",
+    "        # only if categorical labels\n",
+    "        if len(y_true.shape) == 1:\n",
+    "            correct_confidences = y_pred_clipped[\n",
+    "                range(samples),\n",
+    "                y_true\n",
+    "            ]\n",
+    "\n",
+    "        # Mask values - only for one-hot encoded labels\n",
+    "        elif len(y_true.shape) == 2:\n",
+    "            correct_confidences = np.sum(\n",
+    "                y_pred_clipped*y_true,\n",
+    "                axis=1\n",
+    "            )\n",
+    "\n",
+    "        # Losses\n",
+    "        negative_log_likelihoods = -np.log(correct_confidences)\n",
+    "        return negative_log_likelihoods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create dataset\n",
+    "X, y = spiral_data(samples=100, classes=3)\n",
+    "\n",
+    "# Create Dense layer with 2 input features and 3 output values\n",
+    "dense1 = Layer_Dense(2, 3)\n",
+    "\n",
+    "# Create ReLU activation (to be used with Dense layer):\n",
+    "activation1 = Activation_ReLU()\n",
+    "\n",
+    "# Create second Dense layer with 3 input features (as we take output\n",
+    "# of previous layer here) and 3 output values\n",
+    "dense2 = Layer_Dense(3, 3)\n",
+    "\n",
+    "# Create Softmax activation (to be used with Dense layer):\n",
+    "activation2 = Activation_Softmax()\n",
+    "\n",
+    "# Create loss function\n",
+    "loss_function = Loss_CategoricalCrossentropy()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.33333334 0.33333334 0.33333334]\n",
+      " [0.3333332  0.3333332  0.33333364]\n",
+      " [0.3333329  0.33333293 0.3333342 ]\n",
+      " [0.3333326  0.33333263 0.33333477]\n",
+      " [0.33333233 0.3333324  0.33333528]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Perform a forward pass of our training data through this layer\n",
+    "dense1.forward(X)\n",
+    "\n",
+    "# Perform a forward pass through activation function\n",
+    "# it takes the output of first dense layer here\n",
+    "activation1.forward(dense1.output)\n",
+    "\n",
+    "\n",
+    "# Perform a forward pass through second Dense layer\n",
+    "# it takes outputs of activation function of first layer as inputs\n",
+    "dense2.forward(activation1.output)\n",
+    "\n",
+    "# Perform a forward pass through activation function\n",
+    "# it takes the output of second dense layer here\n",
+    "activation2.forward(dense2.output)\n",
+    "\n",
+    "# Let's see output of the first few samples:\n",
+    "print(activation2.output[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loss: 1.0986104\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Perform a forward pass through loss function\n",
+    "# it takes the output of second dense layer here and returns loss\n",
+    "loss = loss_function.calculate(activation2.output, y)\n",
+    "\n",
+    "# Print loss value\n",
+    "print('loss:', loss)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "9aeb3df5fbd6abcbd1bcdbd5cd117a35ff65e92160c2808640f7d55a504d1a5d"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.12 ('python38')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Chapter 05/README.md b/Chapter 05/README.md
@@ -0,0 +1,116 @@
+## Calculating Network Error with Loss
+
+---
+
+With any neural network, the aim is to reduce the error in the network. And to do this, we need to calculate the error. When we have the error, we can apply some math magic to manuplate wights and biases in such a way that the error is reduced.
+
+The loss function, also known as cost function, is used to calculate the error. Since loss is the model's error, we would ideally want it to be 0.
+
+### Categorical Cross Entropy
+
+
+
+![](./assets/Categorical_Cross_Entropy.png)
+
+This is the equation for categorical cross entropy.
+
+Imagine we have outputs of `[0.1, 0.7, 0.2]` and targets of `[0, 1, 0]`, then the calculation would be:
+
+```python
+LOSS = -(0 * log(0.1)) + (1 * log(0.7)) + (0 * log(0.2))   
+#(I am lazy to calculate this)
+```
+
+Now we will look at a sample, in python
+
+```python
+import math
+
+#Output of the softmax layer
+softmax_output = [0.7, 0.1, 0.2]
+
+#The actual output we need
+target_output = [1,0,0]
+
+#Calculate the loss
+loss = -1 * (
+    math.log(softmax_output[0]) * target_output[0] +
+    math.log(softmax_output[1]) * target_output[1] +
+    math.log(softmax_output[2]) * target_output[2] 
+)
+
+print(loss)
+
+>>>
+0.3566749439387324
+```
+
+Since we are mainly working with a classification problem, the target values will be 0, except for that 1 class which is the target.
+
+So, what we can do is, ignore all other outputs and calculate the loss only for that class of which the ouput was supposed to be 1. Because all other outputs are multipled by 0.\
+
+Now we will try to dynamically calculate the the entropy
+
+```python
+softmax_outputs = [ [0.7, 0.1, 0.2],
+                    [0.1, 0.5, 0.4],
+                    [0.02, 0.9, 0.08]]
+
+class_targets = [0, 1, 1]
+
+for targ_idx, distribution in zip(class_targets, softmax_outputs):
+    print(distribution[targ_idx])
+>>>
+0.7
+0.5
+0.9
+```
+
+
+You can see in this sample that by using zip(), we iterated over the produced outputs and desiered outputs. Now that we are sure that it selected the right output, we can use it to calculate the loss.
+
+Now we make a class to implement this in python
+
+```python
+# Cross-entropy loss
+class Loss_CategoricalCrossentropy(Loss):
+
+    # Forward pass
+    def forward(self, y_pred, y_true):
+
+        # Number of samples in a batch
+        samples = len(y_pred)
+
+        # Clip data to prevent division by 0
+        # Clip both sides to not drag mean towards any value
+        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
+
+
+        # Probabilities for target values -
+        # only if categorical labels
+        if len(y_true.shape) == 1:
+            correct_confidences = y_pred_clipped[
+                range(samples),
+                y_true
+            ]
+
+        # Mask values - only for one-hot encoded labels
+        elif len(y_true.shape) == 2:
+            correct_confidences = np.sum(
+                y_pred_clipped*y_true,
+                axis=1
+            )
+
+        # Losses
+        negative_log_likelihoods = -np.log(correct_confidences)
+        return negative_log_likelihoods
+```
+
+
+Now we will update the complete code upto now
+
+[Complete code upto now](./1.Complete_Code.ipynb)
+
+---
+
+Chapter 5 of nnfs book
diff --git a/Chapter 05/assets/Categorical_Cross_Entropy.png b/Chapter 05/assets/Categorical_Cross_Entropy.png