Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 8ec4233

Browse files
authored
Merge pull request #13 from rsepassi/push
v1.0.5
2 parents 7ec178b + 8195f34 commit 8ec4233

File tree

9 files changed

+238
-35
lines changed

9 files changed

+238
-35
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Compiled python modules.
2+
*.pyc
3+
4+
# Python egg metadata, regenerated from source files by setuptools.
5+
/*.egg-info

README.md

+48
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/t
66
Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues)
77
[![Contributions
88
welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md)
9+
[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby)
910
[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0)
1011

1112
[T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible
@@ -22,6 +23,8 @@ send along a pull request to add your data-set or model.
2223
See [our contribution
2324
doc](CONTRIBUTING.md) for details and our [open
2425
issues](https://github.com/tensorflow/tensor2tensor/issues).
26+
And chat with us and other users on
27+
[Gitter](https://gitter.im/tensor2tensor/Lobby).
2528

2629
---
2730

@@ -95,7 +98,14 @@ cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes
9598
## Installation
9699

97100
```
101+
# Assumes tensorflow or tensorflow-gpu installed
98102
pip install tensor2tensor
103+
104+
# Installs with tensorflow-gpu requirement
105+
pip install tensor2tensor[tensorflow_gpu]
106+
107+
# Installs with tensorflow (cpu) requirement
108+
pip install tensor2tensor[tensorflow]
99109
```
100110

101111
Binaries:
@@ -191,6 +201,44 @@ related flags control local and distributed training/evaluation
191201

192202
---
193203

204+
## Adding your own components
205+
206+
T2T's components are registered using a central registration mechanism that
207+
enables easily adding new ones and easily swapping amongst them by command-line
208+
flag. You can add your own components without editing the T2T codebase by
209+
specifying the `--t2t_usr_dir` flag in `t2t-trainer`.
210+
211+
You can currently do so for models, hyperparameter sets, and modalities. Please
212+
do submit a pull request if your component might be useful to others.
213+
214+
Here's an example with a new hyperparameter set:
215+
216+
```python
217+
# In ~/usr/t2t_usr/my_registrations.py
218+
219+
from tensor2tensor.models import transformer
220+
from tensor2tensor.utils import registry
221+
222+
@registry.register_hparams
223+
def transformer_my_very_own_hparams_set():
224+
hparams = transformer.transformer_base()
225+
hparams.hidden_size = 1024
226+
...
227+
```
228+
229+
```python
230+
# In ~/usr/t2t_usr/__init__.py
231+
import my_registrations
232+
```
233+
234+
```
235+
t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help
236+
```
237+
238+
You'll see under the registered HParams your
239+
`transformer_my_very_own_hparams_set`, which you can directly use on the command
240+
line with the `--hparams_set` flag.
241+
194242
## Adding a dataset
195243

196244
See the [data generators

setup.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='tensor2tensor',
8-
version='1.0.4',
8+
version='1.0.5',
99
description='Tensor2Tensor',
1010
author='Google Inc.',
1111
author_email='[email protected]',
@@ -17,8 +17,11 @@
1717
'numpy',
1818
'sympy',
1919
'six',
20-
'tensorflow-gpu>=1.2.0rc1',
2120
],
21+
extras_require={
22+
'tensorflow': ['tensorflow>=1.2.0rc1'],
23+
'tensorflow_gpu': ['tensorflow-gpu>=1.2.0rc1'],
24+
},
2225
classifiers=[
2326
'Development Status :: 4 - Beta',
2427
'Intended Audience :: Developers',

tensor2tensor/bin/t2t-trainer

+29-1
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,45 @@ from __future__ import absolute_import
2929
from __future__ import division
3030
from __future__ import print_function
3131

32+
import importlib
33+
import os
34+
import sys
35+
3236
# Dependency imports
3337

3438
from tensor2tensor.utils import trainer_utils as utils
3539

3640
import tensorflow as tf
3741

38-
FLAGS = tf.flags.FLAGS
42+
flags = tf.flags
43+
FLAGS = flags.FLAGS
44+
45+
flags.DEFINE_string("t2t_usr_dir", "",
46+
"Path to a Python module that will be imported. The "
47+
"__init__.py file should include the necessary imports. "
48+
"The imported files should contain registrations, "
49+
"e.g. @registry.register_model calls, that will then be "
50+
"available to the t2t-trainer.")
51+
52+
53+
def import_usr_dir():
54+
"""Import module at FLAGS.t2t_usr_dir, if provided."""
55+
if not FLAGS.t2t_usr_dir:
56+
return
57+
dir_path = os.path.expanduser(FLAGS.t2t_usr_dir)
58+
if dir_path[-1] == "/":
59+
dir_path = dir_path[:-1]
60+
containing_dir, module_name = os.path.split(dir_path)
61+
tf.logging.info("Importing user module %s from path %s", module_name,
62+
containing_dir)
63+
sys.path.insert(0, containing_dir)
64+
importlib.import_module(module_name)
65+
sys.path.pop(0)
3966

4067

4168
def main(_):
4269
tf.logging.set_verbosity(tf.logging.INFO)
70+
import_usr_dir()
4371
utils.log_registry()
4472
utils.validate_flags()
4573
utils.run(

tensor2tensor/data_generators/lm_example.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
tar xvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
3939
4040
# replace oov words with UNK
41-
./blaze-bin/third_party/py/tensor2tensor/data_generators/replace_oov \
41+
$BINARYDIR/replace_oov \
4242
--vocab_file=$DATADIR/vocab-2016-09-10.txt \
4343
--in_filepattern=\
4444
$DATADIR/1-billion-word-language-modeling-benchmark-r13output/\

tensor2tensor/models/common_attention.py

+93-14
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,14 @@ def attention_image_summary(attn, image_shapes=None):
271271
272272
Args:
273273
attn: a Tensor with shape [batch, num_heads, query_length, memory_length]
274-
image_shapes: optional quadruple of integer scalars.
274+
image_shapes: optional tuple of integer scalars.
275275
If the query positions and memory positions represent the
276-
pixels of a flattened image, then pass in their dimensions:
276+
pixels of flattened images, then pass in their dimensions:
277277
(query_rows, query_cols, memory_rows, memory_cols).
278+
If the query positions and memory positions represent the
279+
pixels x channels of flattened images, then pass in their dimensions:
280+
(query_rows, query_cols, query_channels,
281+
memory_rows, memory_cols, memory_channels).
278282
"""
279283
num_heads = attn.get_shape().as_list()[1]
280284
# [batch, query_length, memory_length, num_heads]
@@ -286,10 +290,20 @@ def attention_image_summary(attn, image_shapes=None):
286290
image = split_last_dimension(image, 3)
287291
image = tf.reduce_max(image, 4)
288292
if image_shapes is not None:
289-
q_rows, q_cols, m_rows, m_cols = list(image_shapes)
290-
image = tf.reshape(image, [-1, q_rows, q_cols, m_rows, m_cols, 3])
291-
image = tf.transpose(image, [0, 1, 3, 2, 4, 5])
292-
image = tf.reshape(image, [-1, q_rows * m_rows, q_cols * m_cols, 3])
293+
if len(image_shapes) == 4:
294+
q_rows, q_cols, m_rows, m_cols = list(image_shapes)
295+
image = tf.reshape(image, [-1, q_rows, q_cols, m_rows, m_cols, 3])
296+
image = tf.transpose(image, [0, 1, 3, 2, 4, 5])
297+
image = tf.reshape(image, [-1, q_rows * m_rows, q_cols * m_cols, 3])
298+
else:
299+
assert len(image_shapes) == 6
300+
q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels = list(
301+
image_shapes)
302+
image = tf.reshape(image, [-1, q_rows, q_cols, q_channnels,
303+
m_rows, m_cols, m_channels, 3])
304+
image = tf.transpose(image, [0, 1, 4, 3, 2, 5, 6, 7])
305+
image = tf.reshape(image, [-1, q_rows * m_rows * q_channnels,
306+
q_cols * m_cols * m_channels, 3])
293307
tf.summary.image("attention", image, max_outputs=1)
294308

295309

@@ -310,10 +324,8 @@ def dot_product_attention(q,
310324
bias: bias Tensor (see attention_bias())
311325
dropout_rate: a floating point number
312326
summaries: a boolean
313-
image_shapes: optional quadruple of integer scalars for image summary.
314-
If the query positions and memory positions represent the
315-
pixels of a flattened image, then pass in their dimensions:
316-
(query_rows, query_cols, memory_rows, memory_cols).
327+
image_shapes: optional tuple of integer scalars.
328+
see comments for attention_image_summary()
317329
name: an optional string
318330
319331
Returns:
@@ -356,10 +368,8 @@ def multihead_attention(query_antecedent,
356368
num_heads: an integer dividing total_key_depth and total_value_depth
357369
dropout_rate: a floating point number
358370
summaries: a boolean
359-
image_shapes: optional quadruple of integer scalars for image summary.
360-
If the query positions and memory positions represent the
361-
pixels of a flattened image, then pass in their dimensions:
362-
(query_rows, query_cols, memory_rows, memory_cols).
371+
image_shapes: optional tuple of integer scalars.
372+
see comments for attention_image_summary()
363373
name: an optional string
364374
365375
Returns:
@@ -398,3 +408,72 @@ def multihead_attention(query_antecedent,
398408
x = combine_heads(x)
399409
x = common_layers.conv1d(x, output_depth, 1, name="output_transform")
400410
return x
411+
412+
413+
def parameter_attention(x,
414+
total_key_depth,
415+
total_value_depth,
416+
output_depth,
417+
memory_rows,
418+
num_heads,
419+
dropout_rate,
420+
name=None):
421+
"""Attention over parameters.
422+
423+
We use the same multi-headed attention as in the other layers, but the memory
424+
keys and values are model parameters. There are no linear transformation
425+
on the keys or values.
426+
427+
We are also a bit more careful about memory usage, since the number of
428+
memory positions may be very large.
429+
430+
Args:
431+
x: a Tensor with shape [batch, length_q, channels]
432+
total_key_depth: an integer
433+
total_value_depth: an integer
434+
output_depth: an integer
435+
memory_rows: an integer
436+
num_heads: an integer dividing total_key_depth and total_value_depth
437+
dropout_rate: a floating point number
438+
name: an optional string
439+
440+
Returns:
441+
A Tensor.
442+
"""
443+
with tf.variable_scope(name, default_name="parameter_attention",
444+
values=[x]):
445+
head_size_k = total_key_depth // num_heads
446+
head_size_v = total_value_depth // num_heads
447+
var_shape_k = [num_heads, memory_rows, head_size_k]
448+
var_shape_v = [num_heads, memory_rows, head_size_v]
449+
k = tf.get_variable(
450+
"k", var_shape_k,
451+
initializer=tf.random_normal_initializer(
452+
0, output_depth ** -0.5)) * (num_heads ** 0.5)
453+
v = tf.get_variable(
454+
"v", var_shape_v,
455+
initializer=tf.random_normal_initializer(
456+
0, output_depth ** -0.5)) * (output_depth ** 0.5)
457+
batch_size = tf.shape(x)[0]
458+
length = tf.shape(x)[1]
459+
q = common_layers.conv1d(x, total_key_depth, 1, name="q_transform")
460+
if dropout_rate:
461+
# This is a cheaper form of attention dropout where we use to use
462+
# the same dropout decisions across batch elemets and query positions,
463+
# but different decisions across heads and memory positions.
464+
v = tf.nn.dropout(v, 1.0 - dropout_rate,
465+
noise_shape=[num_heads, memory_rows, 1])
466+
# query is [batch, length, hidden_size]
467+
# reshape and transpose it to [heads, batch * length, head_size]
468+
q = tf.reshape(q, [batch_size, length, num_heads, head_size_k])
469+
q = tf.transpose(q, [2, 0, 1, 3])
470+
q = tf.reshape(q, [num_heads, batch_size * length, head_size_k])
471+
weights = tf.matmul(q, k, transpose_b=True)
472+
weights = tf.nn.softmax(weights)
473+
y = tf.matmul(weights, v)
474+
y = tf.reshape(y, [num_heads, batch_size, length, head_size_v])
475+
y = tf.transpose(y, [1, 2, 0, 3])
476+
y = tf.reshape(y, [batch_size, length, total_value_depth])
477+
y.set_shape([None, None, total_value_depth])
478+
y = common_layers.conv1d(y, output_depth, 1, name="output_transform")
479+
return y

tensor2tensor/models/modalities.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,8 @@ class IdentityModality(modality.Modality):
441441
def targets_dimensionality(self):
442442
return self._vocab_size
443443

444-
def inputs_bottom_simple(self, inputs):
445-
return tf.to_float(inputs)
444+
def bottom(self, x):
445+
return tf.to_float(x)
446446

447-
def targets_top_simple(self, body_output, _):
447+
def top(self, body_output, _):
448448
return body_output

0 commit comments

Comments
 (0)