Skip to content

Reworked Layers Phase 1 #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
op {
graph_op_name: "SoftmaxCrossEntropyWithLogits"
endpoint {
name: "nn.raw.SoftmaxCrossEntropyWithLogits"
name: "nn.SoftmaxCrossEntropyWithLogits"
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
op {
graph_op_name: "SparseSoftmaxCrossEntropyWithLogits"
endpoint {
name: "nn.raw.SparseSoftmaxCrossEntropyWithLogits"
name: "nn.SparseSoftmaxCrossEntropyWithLogits"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
import org.tensorflow.op.nn.Relu;
import org.tensorflow.op.nn.Relu6;
import org.tensorflow.op.nn.Selu;
import org.tensorflow.op.nn.SigmoidCrossEntropyWithLogits;
import org.tensorflow.op.nn.Softmax;
import org.tensorflow.op.nn.SoftmaxCrossEntropyWithLogits;
import org.tensorflow.op.nn.Softsign;
Expand All @@ -103,16 +102,13 @@
* @see {@link Ops}
*/
public final class NnOps {
public final NnRawOps raw;

private final Scope scope;

private final Ops ops;

NnOps(Ops ops) {
this.scope = ops.scope();
this.ops = ops;
raw = new NnRawOps(ops);
}

/**
Expand Down Expand Up @@ -1797,55 +1793,6 @@ public <T extends TNumber> Selu<T> selu(Operand<T> features) {
return Selu.create(scope, features);
}

/**
* Computes sigmoid cross entropy given <code>logits</code>.
*
* <p>Measures the probability error in discrete classification tasks in which each class is
* independent and not mutually exclusive. For instance, one could perform multilabel
* classification where a picture can contain both an elephant and a dog at the same time.
*
* <p>For brevity, let <code>x = logits</code>, <code>z = labels</code>. The logistic loss in
* pseudo-code is
*
* <pre>
* z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
* = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
* = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
* = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
* = (1 - z) * x + log(1 + exp(-x))
* = x - x * z + log(1 + exp(-x))
* </pre>
*
* <p>For <code>x < 0</code>, to avoid overflow in <code>exp(-x)</code>, we reformulate the above
*
* <pre>
* x - x * z + log(1 + exp(-x))
* = log(exp(x)) - x * z + log(1 + exp(-x))
* = - x * z + log(1 + exp(x))
* </pre>
*
* <p>Hence, to ensure stability and avoid overflow, the implementation uses this equivalent
* formulation
*
* <pre>
* max(x, 0) - x * z + log(1 + exp(-abs(x)))
* </pre>
*
* <p></ode>logits</code> and <code>labels</code> must have the same type and shape.
*
* <p>
*
* @param labels the labels
* @param logits the logits of type float32 or float64
* @param <T> the type of labels and logits
* @return the component-wise logistic losses.
* @throws IllegalArgumentException if logits' and labels' do not have the same shape
*/
public <T extends TNumber> Operand<T> sigmoidCrossEntropyWithLogits(Operand<T> labels,
Operand<T> logits) {
return SigmoidCrossEntropyWithLogits.sigmoidCrossEntropyWithLogits(scope, labels, logits);
}

/**
* Computes softmax activations.
* For each batch {@code i} and class {@code j} we have
Expand All @@ -1863,53 +1810,20 @@ public <T extends TNumber> Softmax<T> softmax(Operand<T> logits) {
}

/**
* Computes softmax cross entropy between <code>logits</code> and <code>labels</code>.
*
* <p>Measures the probability error in discrete classification tasks in which the classes are
* mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
* labeled with one and only one label: an image can be a dog or a truck, but not both.
*
* <p><b>NOTE:</b>
*
* <p>While the classes are mutually exclusive, their probabilities need not be. All that is
* required is that each row of <code>labels</code> is a valid probability distribution. If they
* are not, the computation of the gradient will be incorrect.
*
* <p>If using exclusive <code>labels</code> (wherein one and only one class is true at a time),
* see {@link org.tensorflow.op.NnOps#sparseSoftmaxCrossEntropyWithLogits}
*
* <p>Usage:
*
* <pre>
* Operand&lt;TFloat32&gt; logits =
* tf.constant(new float[][] {{4.0F, 2.0F, 1.0F}, {0.0F, 5.0F, 1.0F}} );
* Operand&lt;TFloat32&gt; labels =
* tf.constant(new float[][] {{1.0F, 0.0F, 0.0F}, {0.0F, 0.8F, 0.2F}} );
* Operand&lt;TFloat32&gt; output =
* tf.nn.softmaxCrossEntropyWithLogits(labels, logits, -1);
* // output Shape = [2]
* // dataType = FLOAT (1)
* // values { 0.169846, 0.824745 }
* </pre>
*
* <p>Backpropagation will happen into both <code>logits</code> and <code>labels</code>. To
* disallow backpropagation into <code>labels</code>, pass label tensors through <code>
* tf.stopGradient</code> before feeding it to this function.
* Computes softmax cross entropy cost and gradients to backpropagate.
* Inputs are the logits, not probabilities.
*
* @param labels Each vector along the class dimension should hold a valid probability
* distribution e.g. for the case in which labels are of shape <code>[batch_size, num_classes]
* </code>, each row of <code>labels[i]</code> must be a valid probability distribution.
* @param logits Per-label activations, typically a linear output. These activation energies are
* interpreted as unnormalized log probabilities.
* @param axis The class dimension. -1 is the last dimension.
* @param <T> the number type of the operands
* @return the softmax cross entropy loss. Its type is the same as <code>logits</code> and its
* shape is the same as <code>labels</code> except that it does not have the last dimension of
* <code>labels</code>.
* @param <T> data type for {@code loss} output
* @param features batch_size x num_classes matrix
* @param labels batch_size x num_classes matrix
* The caller must ensure that each batch of labels represents a valid
* probability distribution.
* @param <T> data type for {@code SoftmaxCrossEntropyWithLogits} output and operands
* @return a new instance of SoftmaxCrossEntropyWithLogits
*/
public <T extends TNumber, U extends TNumber> Operand<T> softmaxCrossEntropyWithLogits(
Operand<U> labels, Operand<T> logits, int axis) {
return SoftmaxCrossEntropyWithLogits.softmaxCrossEntropyWithLogits(scope, labels, logits, axis);
public <T extends TNumber> SoftmaxCrossEntropyWithLogits<T> softmaxCrossEntropyWithLogits(
Operand<T> features, Operand<T> labels) {
return SoftmaxCrossEntropyWithLogits.create(scope, features, labels);
}

/**
Expand Down Expand Up @@ -2096,50 +2010,23 @@ public <T extends TType> SpaceToDepth<T> spaceToDepth(Operand<T> input, Long blo
}

/**
* Computes sparse softmax cross entropy between <code>logits</code> and <code>labels</code>.
*
* <p>Measures the probability error in discrete classification tasks in which the classes are
* mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
* labeled with one and only one label: an image can be a dog or a truck, but not both.
*
* <p><b>NOTE:</b>
*
* <p>For this operation, the probability of a given label is considered exclusive. That is, soft
* classes are not allowed, and the <code>labels</code> vector must provide a single specific
* index for the true class for each row of <code>logits</code> (each minibatch entry). For soft
* softmax classification with a probability distribution for each entry, {@link
* org.tensorflow.op.NnOps#softmaxCrossEntropyWithLogits}.
*
* <p><b>WARNING:</b>
* Computes softmax cross entropy cost and gradients to backpropagate.
* Unlike {@code SoftmaxCrossEntropyWithLogits}, this operation does not accept
* a matrix of label probabilities, but rather a single label per row
* of features. This label is considered to have probability 1.0 for the
* given row.
* <p>Inputs are the logits, not probabilities.
*
* <p>This op expects unscaled logits, since it performs a <code>softmax</code> on <code>logits
* </code> internally for efficiency. Do not call this op with the output of <code>softmax</code>,
* as it will produce incorrect results.
*
* <p>A common use case is to have logits of shape <code>[batchSize, numClasses]</code> and have
* labels of shape <code>[batchSize]</code>, but higher dimensions are supported, in which case
* the <code>dim</code>-th dimension is assumed to be of size <code>numClasses</code>. <code>
* logits</code> must have the <cod>dataType</cod> of <code>TFloat16</code>, <code>TFloat32</code>
* , or <code>TFloat64</code>, and <code>labels</code> must have the dtype of <code>TInt32</code>
* or <code>TInt64</code>.
*
* @param labels <code>Tensor</code> of shape <code>[d_0, d_1, ..., d_{r-1}]</code> (where <code>r
* </code> is rank of <code>labels</code> and result) and the dataType is <code>TInt32</code>
* or <code>TInt64</code>. Each entry in <code>labels</code> must be an index in <code>[0,
* numClasses)</code>. Other values will raise an exception when this op is run on CPU, and
* return <code>NaN</code> for corresponding loss and gradient rows on GPU.
* @param logits Per-label activations (typically a linear output) of shape <code>[d_0, d_1, ...,
* d_{r-1}, numClasses]</code> and dataType of <code>TFloat16</code>, <code>TFloat32</code>,
* or <code>TFloat64</code>. These activation energies are interpreted as unnormalized log
* probabilities.
* @return A <code>Tensor</code> of the same shape as <code>labels</code> and of the same type as
* <code>logits</code> with the softmax cross entropy loss.
* @throws IllegalArgumentException If logits are scalars (need to have rank >= 1) or if the rank
* of the labels is not equal to the rank of the logits minus one.
*/
public <T extends TNumber, U extends TNumber> Operand sparseSoftmaxCrossEntropyWithLogits(
Operand<T> labels, Operand<U> logits) {
return SparseSoftmaxCrossEntropyWithLogits.sparseSoftmaxCrossEntropyWithLogits(scope, labels, logits);
* @param <T> data type for {@code loss} output
* @param features batch_size x num_classes matrix
* @param labels batch_size vector with values in [0, num_classes).
* This is the label for the given minibatch entry.
* @param <T> data type for {@code SparseSoftmaxCrossEntropyWithLogits} output and operands
* @return a new instance of SparseSoftmaxCrossEntropyWithLogits
*/
public <T extends TNumber> SparseSoftmaxCrossEntropyWithLogits<T> sparseSoftmaxCrossEntropyWithLogits(
Operand<T> features, Operand<? extends TNumber> labels) {
return SparseSoftmaxCrossEntropyWithLogits.create(scope, features, labels);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

// This class has been generated, DO NOT EDIT!

package org.tensorflow.op.nn.raw;
package org.tensorflow.op.nn;

import org.tensorflow.Operand;
import org.tensorflow.Operation;
Expand All @@ -34,7 +34,7 @@
* @param <T> data type for {@code loss} output
*/
@Operator(
group = "nn.raw"
group = "nn"
)
public final class SoftmaxCrossEntropyWithLogits<T extends TNumber> extends RawOp {
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

// This class has been generated, DO NOT EDIT!

package org.tensorflow.op.nn.raw;
package org.tensorflow.op.nn;

import org.tensorflow.Operand;
import org.tensorflow.Operation;
Expand All @@ -38,7 +38,7 @@
* @param <T> data type for {@code loss} output
*/
@Operator(
group = "nn.raw"
group = "nn"
)
public final class SparseSoftmaxCrossEntropyWithLogits<T extends TNumber> extends RawOp {
/**
Expand Down
Binary file modified tensorflow-core/tensorflow-core-api/src/gen/resources/ops.pb
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=======================================================================*/
package org.tensorflow.framework.layers;

import static org.tensorflow.framework.utils.CastHelper.cast;

import java.util.ArrayList;
import java.util.List;
import org.tensorflow.Operand;
import org.tensorflow.ndarray.Shape;
import org.tensorflow.op.Ops;
import org.tensorflow.types.TBool;
import org.tensorflow.types.family.TFloating;
import org.tensorflow.types.family.TType;

/**
* Layer that applies an activation function to an output.
*
* @param <T> the data type for the layer's weights and computation.
*/
public class Activation<T extends TFloating> extends Layer<T> {
private final org.tensorflow.framework.activations.Activation<T> activation;

/**
* Creates an Activation layer using {@link Class#getSimpleName()} for the name.
*
* @param activation the activation to apply
* @param type the data type for the weights and computation
*/
public Activation(org.tensorflow.framework.activations.Activation<T> activation, Class<T> type) {
this(null, activation, type, null);
}

/**
* Creates an Activation layer using {@link Class#getSimpleName()} for the name.
*
* @param activation the activation to apply
* @param type the data type for the weights and computation
* @param options the layer's options, may be null
*/
public Activation(
org.tensorflow.framework.activations.Activation<T> activation,
Class<T> type,
Options options) {
this(null, activation, type, options);
}

/**
* Creates an Activation layer
*
* @param name the unique name for this layer, if null will use {@link Class#getSimpleName()} for
* the name.
* @param activation the activation to apply
* @param type the data type for the weights and computation
*/
public Activation(
String name, org.tensorflow.framework.activations.Activation<T> activation, Class<T> type) {
this(name, activation, type, null);
}
/**
* Creates an Activation layer
*
* @param name the unique name for this layer, if null will use {@link Class#getSimpleName()} for
* the name.
* @param activation the activation to apply
* @param type the data type for the weights and computation
* @param options the layer's options, may be null
*/
public Activation(
String name,
org.tensorflow.framework.activations.Activation<T> activation,
Class<T> type,
Options options) {
super(name, true, type, options);
this.activation = activation;
}

/** {@inheritDoc} */
@Override
public <U extends TType> List<Operand<U>> call(
Ops tf,
List<Operand<? extends TType>> inputs,
List<Operand<TBool>> masks,
boolean training,
Class<U> resultType) {
Ops ltf = init(tf);
List<Operand<U>> results = new ArrayList<>();
inputs.forEach(
input ->
results.add(cast(ltf, activation.call(ltf, cast(ltf, input, getType())), resultType)));
return callPostProcess(results, training);
}

/** {@inheritDoc} */
@Override
public List<Shape> computeOutputShape(List<Shape> inputShapes) {
return inputShapes;
}
}
Loading