-
Notifications
You must be signed in to change notification settings - Fork 2
/
softmax_loss_layer.h
120 lines (93 loc) · 2.9 KB
/
softmax_loss_layer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#pragma once
#include "layer.h"
#include "util.h"
namespace con {
class SoftmaxLossLayer : public Layer {
public:
SoftmaxLossLayer(const string &name, Layer *prev) : Layer(name, prev->num, 1, 1, prev->depth, prev) {
reshape(num, 1, 1, depth, &e);
reshape(num, 1, 1, depth, &subtract);
sumE.resize(num);
maxProb.resize(num);
}
void getResults(vector<int> *results) {
results->clear();
for (int i = 0; i < num; i++) {
results->push_back(getResult(i));
}
}
int getResult(const int &i) {
int result = 0;
for (int j = 0; j < depth; j++) {
if (output[i][j] > output[i][result]) {
result = j;
}
}
return result;
}
void setLabels(const vector<int> &l) {
labels = l;
}
Real loss() {
Real loss = 0;
for (int n = 0; n < num; n++) {
maxProb[n] = 0;
for (int i = 1; i < depth; i++) {
if (prev->output[n][i] > prev->output[n][maxProb[n]]) {
maxProb[n] = i;
}
}
sumE[n] = 0;
for (int i = 0; i < depth; i++) {
subtract[n][i] = prev->output[n][i] - prev->output[n][maxProb[n]];
e[n][i] = exp(subtract[n][i]);
sumE[n] += e[n][i];
}
for (int i = 0; i < depth; i++) {
output[n][i] = e[n][i] / sumE[n];
}
loss -= log(output[n][labels[n]]);
}
loss /= num;
return loss;
}
void forward() {
l = loss();
}
// d(log(x)) / d(x)
Real logDerivative(const Real &x) {
return 1.0 / x;
}
// d(x / (x + a)) / d(x)
Real fractionXADerivative(const Real &x, const Real &a) {
return a / sqr(x + a);
}
// d(a / (x + b)) / d(x)
Real fractionXABDerivative(const Real &x, const Real &a, const Real &b) {
return -a / sqr(x + b);
}
void backProp(const vector<Vec> &nextErrors) {
clear(&errors);
for (int n = 0; n < num; n++) {
const int label = labels[n];
const Real dlog = -logDerivative(e[n][label] / sumE[n]) / num;
for (int i = 0; i < depth; i++) {
const Real dexplabel =
i == label ?
fractionXADerivative(e[n][label], sumE[n] - e[n][label]) :
fractionXABDerivative(e[n][i], e[n][label], sumE[n] - e[n][i]);
const Real dexpsubtract = exp(subtract[n][i]);
const Real dsubtract = i == maxProb[n] ? 1 : 1;
errors[n][i] = dlog * dexplabel * dexpsubtract * dsubtract;
}
}
}
void applyUpdate(const Real &lr, const Real &momentum, const Real &decay) {}
vector<int> labels;
vector<int> maxProb;
vector<Vec> e;
vector<Vec> subtract;
Vec sumE;
Real l;
};
}