Skip to content

Commit

Permalink
Merge pull request #19 from ma7555/master
Browse files Browse the repository at this point in the history
@ma7555 Upgrade to TF 1.15.4, Fix Bug & Clean Code
  • Loading branch information
husseinmozannar authored Aug 4, 2023
2 parents 24c5ec8 + ebacc7c commit 1229d34
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 92 deletions.
93 changes: 45 additions & 48 deletions bert/Bert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,29 +379,29 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
end_position = 0

if example_index < 20:
tf.logging.info("*** Example ***")
tf.logging.info("unique_id: %s" % (unique_id))
tf.logging.info("example_index: %s" % (example_index))
tf.logging.info("doc_span_index: %s" % (doc_span_index))
tf.logging.info("tokens: %s" % " ".join(
tf.compat.v1.logging.info("*** Example ***")
tf.compat.v1.logging.info("unique_id: %s" % (unique_id))
tf.compat.v1.logging.info("example_index: %s" % (example_index))
tf.compat.v1.logging.info("doc_span_index: %s" % (doc_span_index))
tf.compat.v1.logging.info("tokens: %s" % " ".join(
[tokenization.printable_text(x) for x in tokens]))
tf.logging.info("token_to_orig_map: %s" % " ".join(
tf.compat.v1.logging.info("token_to_orig_map: %s" % " ".join(
["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
tf.logging.info("token_is_max_context: %s" % " ".join([
tf.compat.v1.logging.info("token_is_max_context: %s" % " ".join([
"%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
]))
tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
tf.logging.info(
tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
tf.compat.v1.logging.info(
"input_mask: %s" % " ".join([str(x) for x in input_mask]))
tf.logging.info(
tf.compat.v1.logging.info(
"segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
if is_training and example.is_impossible:
tf.logging.info("impossible example")
tf.compat.v1.logging.info("impossible example")
if is_training and not example.is_impossible:
answer_text = " ".join(tokens[start_position:(end_position + 1)])
tf.logging.info("start_position: %d" % (start_position))
tf.logging.info("end_position: %d" % (end_position))
tf.logging.info(
tf.compat.v1.logging.info("start_position: %d" % (start_position))
tf.compat.v1.logging.info("end_position: %d" % (end_position))
tf.compat.v1.logging.info(
"answer: %s" % (tokenization.printable_text(answer_text)))

feature = InputFeatures(
Expand Down Expand Up @@ -494,11 +494,11 @@ def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
seq_length = final_hidden_shape[1]
hidden_size = final_hidden_shape[2]

output_weights = tf.get_variable(
output_weights = tf.compat.v1.get_variable(
"cls/squad/output_weights", [2, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))

output_bias = tf.get_variable(
output_bias = tf.compat.v1.get_variable(
"cls/squad/output_bias", [2], initializer=tf.zeros_initializer())

final_hidden_matrix = tf.reshape(final_hidden,
Expand All @@ -524,9 +524,9 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""

tf.logging.info("*** Features ***")
tf.compat.v1.logging.info("*** Features ***")
for name in sorted(features.keys()):
tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape))

unique_ids = features["unique_ids"]
input_ids = features["input_ids"]
Expand All @@ -543,7 +543,7 @@ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
segment_ids=segment_ids,
use_one_hot_embeddings=use_one_hot_embeddings)

tvars = tf.trainable_variables()
tvars = tf.compat.v1.trainable_variables()

initialized_variable_names = {}
scaffold_fn = None
Expand All @@ -553,19 +553,19 @@ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
if use_tpu:

def tpu_scaffold():
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
return tf.train.Scaffold()

scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)

tf.logging.info("**** Trainable Variables ****")
tf.compat.v1.logging.info("**** Trainable Variables ****")
for var in tvars:
init_string = ""
if var.name in initialized_variable_names:
init_string = ", *INIT_FROM_CKPT*"
tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
init_string)

output_spec = None
Expand Down Expand Up @@ -617,10 +617,10 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
"""Creates an `input_fn` closure to be passed to TPUEstimator."""

name_to_features = {
"unique_ids": tf.FixedLenFeature([], tf.int64),
"input_ids": tf.FixedLenFeature([seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
"unique_ids": tf.io.FixedLenFeature([], tf.int64),
"input_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
"input_mask": tf.io.FixedLenFeature([seq_length], tf.int64),
"segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
}

if is_training:
Expand All @@ -629,14 +629,14 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):

def _decode_record(record, name_to_features):
"""Decodes a record to a TensorFlow example."""
example = tf.parse_single_example(record, name_to_features)
example = tf.io.parse_single_example(record, name_to_features)

# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
for name in list(example.keys()):
t = example[name]
if t.dtype == tf.int64:
t = tf.to_int32(t)
t = tf.cast(t, tf.int32)
example[name] = t

return example
Expand All @@ -647,13 +647,10 @@ def input_fn(params):

# For training, we want a lot of parallel reading and shuffling.
# For eval, we want no shuffling and parallel reading doesn't matter.
d = tf.data.TFRecordDataset(input_file)

d = d.apply(
tf.contrib.data.map_and_batch(
lambda record: _decode_record(record, name_to_features),
batch_size=batch_size,
drop_remainder=drop_remainder))
d = tf.data.TFRecordDataset(input_file)\
.map(lambda record: _decode_record(record, name_to_features))\
.batch(batch_size=batch_size,
drop_remainder=drop_remainder)

return d

Expand Down Expand Up @@ -865,7 +862,7 @@ def _strip_spaces(text):
start_position = tok_text.find(pred_text)
if start_position == -1:
if verbose_logging:
tf.logging.info(
tf.compat.v1.logging.info(
"Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
return orig_text
end_position = start_position + len(pred_text) - 1
Expand All @@ -875,7 +872,7 @@ def _strip_spaces(text):

if len(orig_ns_text) != len(tok_ns_text):
if verbose_logging:
tf.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
tf.compat.v1.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
orig_ns_text, tok_ns_text)
return orig_text

Expand All @@ -893,7 +890,7 @@ def _strip_spaces(text):

if orig_start_position is None:
if verbose_logging:
tf.logging.info("Couldn't map start position")
tf.compat.v1.logging.info("Couldn't map start position")
return orig_text

orig_end_position = None
Expand All @@ -904,7 +901,7 @@ def _strip_spaces(text):

if orig_end_position is None:
if verbose_logging:
tf.logging.info("Couldn't map end position")
tf.compat.v1.logging.info("Couldn't map end position")
return orig_text

output_text = orig_text[orig_start_position:(orig_end_position + 1)]
Expand Down Expand Up @@ -953,7 +950,7 @@ def __init__(self, filename, is_training):
self.filename = filename
self.is_training = is_training
self.num_features = 0
self._writer = tf.python_io.TFRecordWriter(filename)
self._writer = tf.io.TFRecordWriter(filename)

def process_feature(self, feature):
"""Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
Expand Down Expand Up @@ -991,9 +988,9 @@ def __init__(self, config, vocab, output):
self.vocab_file = vocab
self.output_dir = output
self.init_checkpoint = output
tf.logging.set_verbosity(0)
tf.compat.v1.logging.set_verbosity(0)
self.bert_config = modeling.BertConfig.from_json_file(self.bert_config_file)
tf.gfile.MakeDirs(self.output_dir)
tf.io.gfile.makedirs(self.output_dir)
self.tokenizer = tokenization.FullTokenizer(
vocab_file=self.vocab_file, do_lower_case=do_lower_case)
is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
Expand Down Expand Up @@ -1076,10 +1073,10 @@ def append_feature(feature):
output_fn=append_feature)
eval_writer.close()

tf.logging.info("***** Running predictions *****")
tf.logging.info(" Num orig examples = %d", len(eval_examples))
tf.logging.info(" Num split examples = %d", len(eval_features))
tf.logging.info(" Batch size = %d", predict_batch_size)
tf.compat.v1.logging.info("***** Running predictions *****")
tf.compat.v1.logging.info(" Num orig examples = %d", len(eval_examples))
tf.compat.v1.logging.info(" Num split examples = %d", len(eval_features))
tf.compat.v1.logging.info(" Batch size = %d", predict_batch_size)

all_results = []

Expand Down Expand Up @@ -1129,7 +1126,7 @@ def append_feature(feature):
output_fn=append_feature)
eval_writer.close()

tf.logging.info("***** Running predictions on single example *****")
tf.compat.v1.logging.info("***** Running predictions on single example *****")

all_results = []

Expand Down
46 changes: 23 additions & 23 deletions bert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def from_dict(cls, json_object):
@classmethod
def from_json_file(cls, json_file):
"""Constructs a `BertConfig` from a json file of parameters."""
with tf.gfile.GFile(json_file, "r") as reader:
with tf.io.gfile.GFile(json_file, "r") as reader:
text = reader.read()
return cls.from_dict(json.loads(text))

Expand Down Expand Up @@ -169,8 +169,8 @@ def __init__(self,
if token_type_ids is None:
token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)

with tf.variable_scope(scope, default_name="bert"):
with tf.variable_scope("embeddings"):
with tf.compat.v1.variable_scope(scope, default_name="bert"):
with tf.compat.v1.variable_scope("embeddings"):
# Perform embedding lookup on the word ids.
(self.embedding_output, self.embedding_table) = embedding_lookup(
input_ids=input_ids,
Expand All @@ -194,7 +194,7 @@ def __init__(self,
max_position_embeddings=config.max_position_embeddings,
dropout_prob=config.hidden_dropout_prob)

with tf.variable_scope("encoder"):
with tf.compat.v1.variable_scope("encoder"):
# This converts a 2D mask of shape [batch_size, seq_length] to a 3D
# mask of shape [batch_size, seq_length, seq_length] which is used
# for the attention scores.
Expand Down Expand Up @@ -222,11 +222,11 @@ def __init__(self,
# [batch_size, hidden_size]. This is necessary for segment-level
# (or segment-pair-level) classification tasks where we need a fixed
# dimensional representation of the segment.
with tf.variable_scope("pooler"):
with tf.compat.v1.variable_scope("pooler"):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token. We assume that this has been pre-trained
first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
self.pooled_output = tf.layers.dense(
self.pooled_output = tf.compat.v1.layers.dense(
first_token_tensor,
config.hidden_size,
activation=tf.tanh,
Expand Down Expand Up @@ -274,7 +274,7 @@ def gelu(input_tensor):
Returns:
`input_tensor` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))
cdf = 0.5 * (1.0 + tf.math.erf(input_tensor / tf.sqrt(2.0)))
return input_tensor * cdf


Expand Down Expand Up @@ -408,7 +408,7 @@ def embedding_lookup(input_ids,
if input_ids.shape.ndims == 2:
input_ids = tf.expand_dims(input_ids, axis=[-1])

embedding_table = tf.get_variable(
embedding_table = tf.compat.v1.get_variable(
name=word_embedding_name,
shape=[vocab_size, embedding_size],
initializer=create_initializer(initializer_range))
Expand Down Expand Up @@ -475,7 +475,7 @@ def embedding_postprocessor(input_tensor,
if token_type_ids is None:
raise ValueError("`token_type_ids` must be specified if"
"`use_token_type` is True.")
token_type_table = tf.get_variable(
token_type_table = tf.compat.v1.get_variable(
name=token_type_embedding_name,
shape=[token_type_vocab_size, width],
initializer=create_initializer(initializer_range))
Expand All @@ -489,9 +489,9 @@ def embedding_postprocessor(input_tensor,
output += token_type_embeddings

if use_position_embeddings:
assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
assert_op = tf.compat.v1.assert_less_equal(seq_length, max_position_embeddings)
with tf.control_dependencies([assert_op]):
full_position_embeddings = tf.get_variable(
full_position_embeddings = tf.compat.v1.get_variable(
name=position_embedding_name,
shape=[max_position_embeddings, width],
initializer=create_initializer(initializer_range))
Expand Down Expand Up @@ -665,23 +665,23 @@ def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
to_tensor_2d = reshape_to_matrix(to_tensor)

# `query_layer` = [B*F, N*H]
query_layer = tf.layers.dense(
query_layer = tf.compat.v1.layers.dense(
from_tensor_2d,
num_attention_heads * size_per_head,
activation=query_act,
name="query",
kernel_initializer=create_initializer(initializer_range))

# `key_layer` = [B*T, N*H]
key_layer = tf.layers.dense(
key_layer = tf.compat.v1.layers.dense(
to_tensor_2d,
num_attention_heads * size_per_head,
activation=key_act,
name="key",
kernel_initializer=create_initializer(initializer_range))

# `value_layer` = [B*T, N*H]
value_layer = tf.layers.dense(
value_layer = tf.compat.v1.layers.dense(
to_tensor_2d,
num_attention_heads * size_per_head,
activation=value_act,
Expand Down Expand Up @@ -826,12 +826,12 @@ def transformer_model(input_tensor,

all_layer_outputs = []
for layer_idx in range(num_hidden_layers):
with tf.variable_scope("layer_%d" % layer_idx):
with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
layer_input = prev_output

with tf.variable_scope("attention"):
with tf.compat.v1.variable_scope("attention"):
attention_heads = []
with tf.variable_scope("self"):
with tf.compat.v1.variable_scope("self"):
attention_head = attention_layer(
from_tensor=layer_input,
to_tensor=layer_input,
Expand All @@ -856,25 +856,25 @@ def transformer_model(input_tensor,

# Run a linear projection of `hidden_size` then add a residual
# with `layer_input`.
with tf.variable_scope("output"):
attention_output = tf.layers.dense(
with tf.compat.v1.variable_scope("output"):
attention_output = tf.compat.v1.layers.dense(
attention_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
attention_output = dropout(attention_output, hidden_dropout_prob)
attention_output = layer_norm(attention_output + layer_input)

# The activation is only applied to the "intermediate" hidden layer.
with tf.variable_scope("intermediate"):
intermediate_output = tf.layers.dense(
with tf.compat.v1.variable_scope("intermediate"):
intermediate_output = tf.compat.v1.layers.dense(
attention_output,
intermediate_size,
activation=intermediate_act_fn,
kernel_initializer=create_initializer(initializer_range))

# Down-project back to `hidden_size` then add the residual.
with tf.variable_scope("output"):
layer_output = tf.layers.dense(
with tf.compat.v1.variable_scope("output"):
layer_output = tf.compat.v1.layers.dense(
intermediate_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
Expand Down
2 changes: 1 addition & 1 deletion bert/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
return train_op


class AdamWeightDecayOptimizer(tf.train.Optimizer):
class AdamWeightDecayOptimizer(tf.compat.v1.train.Optimizer):
"""A basic Adam optimizer that includes "correct" L2 weight decay."""

def __init__(self,
Expand Down
Loading

0 comments on commit 1229d34

Please sign in to comment.