Skip to content

Commit

Permalink
Fix softmax overflow
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Ilioaica committed Sep 26, 2024
1 parent 186792a commit d05a6b6
Show file tree
Hide file tree
Showing 4 changed files with 2,730 additions and 154 deletions.
3 changes: 1 addition & 2 deletions src/loss_functions/cce.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ def backward(self):
sample_size = len(self.y_pred[batch])
for sample in range(sample_size):
for i in range(len(self.y_pred[batch][sample])):
pred_value = max(self.y_pred[batch][sample][i].value, self.epsilon)
self.y_pred[batch][sample][i].grad = -self.y_true[batch][sample][i].value / pred_value / batch_size
self.y_pred[batch][sample][i].grad = (self.y_pred[batch][sample][i].value - self.y_true[batch][sample][i].value)
self.y_pred[batch][sample][i].backward()

def __repr__(self):
Expand Down
6 changes: 3 additions & 3 deletions src/models/simple_model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from nn.linear_layer import LinearLayer
from activation_functions.relu import ReLUActivation
from activation_functions.sigmoid import SigmoidActivation
from normalization.softmax import Softmax
from nn.module import Module

class SimpleModel(Module):
def __init__(self, input_size, hidden_size, output_size):
self.layer1 = LinearLayer(input_size, hidden_size)
self.layer2 = LinearLayer(hidden_size, output_size)
self.relu = ReLUActivation()
self.sigmoid = SigmoidActivation()
self.softmax = Softmax()

def __call__(self, x):
x = self.layer1(x)
x = self.relu(x)
x = self.sigmoid(x)
x = self.layer2(x)
return self.softmax(x)
20 changes: 15 additions & 5 deletions src/normalization/softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,27 @@
import math

class Softmax(NormalizationFunction):
def __init__(self, temperature=3, clip_value=50):
self.temperature = temperature
self.clip_value = clip_value

def _softmax(self, x):
max_value = max([i.value for i in x])
e_x = [math.exp(i.value - max_value) for i in x]
return [i / sum(e_x) for i in e_x]


clipped_logits = [(i.value - max_value) / self.temperature for i in x]
clipped_logits = [min(self.clip_value, max(-self.clip_value, logit)) for logit in clipped_logits]

log_sum_exp = math.log(sum([math.exp(logit) for logit in clipped_logits]))
softmax_values = [math.exp(logit - log_sum_exp) for logit in clipped_logits]

return softmax_values

def forward(self, input):
softmax_values = self._softmax(input)
return softmax_values

def _build_backward_function(self, input, out):
def _backward():
input.grad += out.value * (1 - out.value) if out.requires_grad else 0
if out.requires_grad:
input.grad += out.value * (1 - out.value) * out.grad / self.temperature
return _backward

Loading

0 comments on commit d05a6b6

Please sign in to comment.