Merge pull request #19 from ma7555/master

@ma7555 Upgrade to TF 1.15.4, Fix Bug & Clean Code
husseinmozannar · Aug 4, 2023 · 1229d34 · 1229d34
2 parents 24c5ec8 + ebacc7c
commit 1229d34
Show file tree

Hide file tree

Showing 12 changed files with 77 additions and 92 deletions.
diff --git a/bert/Bert_model.py b/bert/Bert_model.py
@@ -379,29 +379,29 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
                 end_position = 0
 
             if example_index < 20:
-                tf.logging.info("*** Example ***")
-                tf.logging.info("unique_id: %s" % (unique_id))
-                tf.logging.info("example_index: %s" % (example_index))
-                tf.logging.info("doc_span_index: %s" % (doc_span_index))
-                tf.logging.info("tokens: %s" % " ".join(
+                tf.compat.v1.logging.info("*** Example ***")
+                tf.compat.v1.logging.info("unique_id: %s" % (unique_id))
+                tf.compat.v1.logging.info("example_index: %s" % (example_index))
+                tf.compat.v1.logging.info("doc_span_index: %s" % (doc_span_index))
+                tf.compat.v1.logging.info("tokens: %s" % " ".join(
                     [tokenization.printable_text(x) for x in tokens]))
-                tf.logging.info("token_to_orig_map: %s" % " ".join(
+                tf.compat.v1.logging.info("token_to_orig_map: %s" % " ".join(
                     ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
-                tf.logging.info("token_is_max_context: %s" % " ".join([
+                tf.compat.v1.logging.info("token_is_max_context: %s" % " ".join([
                     "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
                 ]))
-                tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
-                tf.logging.info(
+                tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                tf.compat.v1.logging.info(
                     "input_mask: %s" % " ".join([str(x) for x in input_mask]))
-                tf.logging.info(
+                tf.compat.v1.logging.info(
                     "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
                 if is_training and example.is_impossible:
-                    tf.logging.info("impossible example")
+                    tf.compat.v1.logging.info("impossible example")
                 if is_training and not example.is_impossible:
                     answer_text = " ".join(tokens[start_position:(end_position + 1)])
-                    tf.logging.info("start_position: %d" % (start_position))
-                    tf.logging.info("end_position: %d" % (end_position))
-                    tf.logging.info(
+                    tf.compat.v1.logging.info("start_position: %d" % (start_position))
+                    tf.compat.v1.logging.info("end_position: %d" % (end_position))
+                    tf.compat.v1.logging.info(
                         "answer: %s" % (tokenization.printable_text(answer_text)))
 
             feature = InputFeatures(
@@ -494,11 +494,11 @@ def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
     seq_length = final_hidden_shape[1]
     hidden_size = final_hidden_shape[2]
 
-    output_weights = tf.get_variable(
+    output_weights = tf.compat.v1.get_variable(
         "cls/squad/output_weights", [2, hidden_size],
         initializer=tf.truncated_normal_initializer(stddev=0.02))
 
-    output_bias = tf.get_variable(
+    output_bias = tf.compat.v1.get_variable(
         "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())
 
     final_hidden_matrix = tf.reshape(final_hidden,
@@ -524,9 +524,9 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
     def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
         """The `model_fn` for TPUEstimator."""
 
-        tf.logging.info("*** Features ***")
+        tf.compat.v1.logging.info("*** Features ***")
         for name in sorted(features.keys()):
-            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+            tf.compat.v1.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
 
         unique_ids = features["unique_ids"]
         input_ids = features["input_ids"]
@@ -543,7 +543,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
             segment_ids=segment_ids,
             use_one_hot_embeddings=use_one_hot_embeddings)
 
-        tvars = tf.trainable_variables()
+        tvars = tf.compat.v1.trainable_variables()
 
         initialized_variable_names = {}
         scaffold_fn = None
@@ -553,19 +553,19 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
             if use_tpu:
 
                 def tpu_scaffold():
-                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+                    tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
                     return tf.train.Scaffold()
 
                 scaffold_fn = tpu_scaffold
             else:
-                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+                tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
 
-        tf.logging.info("**** Trainable Variables ****")
+        tf.compat.v1.logging.info("**** Trainable Variables ****")
         for var in tvars:
             init_string = ""
             if var.name in initialized_variable_names:
                 init_string = ", *INIT_FROM_CKPT*"
-            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+            tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                             init_string)
 
         output_spec = None
@@ -617,10 +617,10 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
     """Creates an `input_fn` closure to be passed to TPUEstimator."""
 
     name_to_features = {
-        "unique_ids": tf.FixedLenFeature([], tf.int64),
-        "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
-        "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
-        "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
+        "unique_ids": tf.io.FixedLenFeature([], tf.int64),
+        "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
+        "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64),
+        "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
     }
 
     if is_training:
@@ -629,14 +629,14 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
 
     def _decode_record(record, name_to_features):
         """Decodes a record to a TensorFlow example."""
-        example = tf.parse_single_example(record, name_to_features)
+        example = tf.io.parse_single_example(record, name_to_features)
 
         # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
         # So cast all int64 to int32.
         for name in list(example.keys()):
             t = example[name]
             if t.dtype == tf.int64:
-                t = tf.to_int32(t)
+                t = tf.cast(t, tf.int32)
             example[name] = t
 
         return example
@@ -647,13 +647,10 @@ def input_fn(params):
 
         # For training, we want a lot of parallel reading and shuffling.
         # For eval, we want no shuffling and parallel reading doesn't matter.
-        d = tf.data.TFRecordDataset(input_file)
-
-        d = d.apply(
-            tf.contrib.data.map_and_batch(
-                lambda record: _decode_record(record, name_to_features),
-                batch_size=batch_size,
-                drop_remainder=drop_remainder))
+        d = tf.data.TFRecordDataset(input_file)\
+            .map(lambda record: _decode_record(record, name_to_features))\
+            .batch(batch_size=batch_size,
+                drop_remainder=drop_remainder)
 
         return d
 
@@ -865,7 +862,7 @@ def _strip_spaces(text):
     start_position = tok_text.find(pred_text)
     if start_position == -1:
         if verbose_logging:
-            tf.logging.info(
+            tf.compat.v1.logging.info(
                 "Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
         return orig_text
     end_position = start_position + len(pred_text) - 1
@@ -875,7 +872,7 @@ def _strip_spaces(text):
 
     if len(orig_ns_text) != len(tok_ns_text):
         if verbose_logging:
-            tf.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
+            tf.compat.v1.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
                             orig_ns_text, tok_ns_text)
         return orig_text
 
@@ -893,7 +890,7 @@ def _strip_spaces(text):
 
     if orig_start_position is None:
         if verbose_logging:
-            tf.logging.info("Couldn't map start position")
+            tf.compat.v1.logging.info("Couldn't map start position")
         return orig_text
 
     orig_end_position = None
@@ -904,7 +901,7 @@ def _strip_spaces(text):
 
     if orig_end_position is None:
         if verbose_logging:
-            tf.logging.info("Couldn't map end position")
+            tf.compat.v1.logging.info("Couldn't map end position")
         return orig_text
 
     output_text = orig_text[orig_start_position:(orig_end_position + 1)]
@@ -953,7 +950,7 @@ def __init__(self, filename, is_training):
         self.filename = filename
         self.is_training = is_training
         self.num_features = 0
-        self._writer = tf.python_io.TFRecordWriter(filename)
+        self._writer = tf.io.TFRecordWriter(filename)
 
     def process_feature(self, feature):
         """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
@@ -991,9 +988,9 @@ def __init__(self, config, vocab, output):
         self.vocab_file = vocab
         self.output_dir = output
         self.init_checkpoint = output
-        tf.logging.set_verbosity(0)
+        tf.compat.v1.logging.set_verbosity(0)
         self.bert_config = modeling.BertConfig.from_json_file(self.bert_config_file)
-        tf.gfile.MakeDirs(self.output_dir)
+        tf.io.gfile.makedirs(self.output_dir)
         self.tokenizer = tokenization.FullTokenizer(
             vocab_file=self.vocab_file, do_lower_case=do_lower_case)
         is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
@@ -1076,10 +1073,10 @@ def append_feature(feature):
             output_fn=append_feature)
         eval_writer.close()
 
-        tf.logging.info("***** Running predictions *****")
-        tf.logging.info("  Num orig examples = %d", len(eval_examples))
-        tf.logging.info("  Num split examples = %d", len(eval_features))
-        tf.logging.info("  Batch size = %d", predict_batch_size)
+        tf.compat.v1.logging.info("***** Running predictions *****")
+        tf.compat.v1.logging.info("  Num orig examples = %d", len(eval_examples))
+        tf.compat.v1.logging.info("  Num split examples = %d", len(eval_features))
+        tf.compat.v1.logging.info("  Batch size = %d", predict_batch_size)
 
         all_results = []
 
@@ -1129,7 +1126,7 @@ def append_feature(feature):
             output_fn=append_feature)
         eval_writer.close()
 
-        tf.logging.info("***** Running predictions on single example *****")
+        tf.compat.v1.logging.info("***** Running predictions on single example *****")
 
         all_results = []
 

diff --git a/bert/modeling.py b/bert/modeling.py
@@ -89,7 +89,7 @@ def from_dict(cls, json_object):
   @classmethod
   def from_json_file(cls, json_file):
     """Constructs a `BertConfig` from a json file of parameters."""
-    with tf.gfile.GFile(json_file, "r") as reader:
+    with tf.io.gfile.GFile(json_file, "r") as reader:
       text = reader.read()
     return cls.from_dict(json.loads(text))
 
@@ -169,8 +169,8 @@ def __init__(self,
     if token_type_ids is None:
       token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)
 
-    with tf.variable_scope(scope, default_name="bert"):
-      with tf.variable_scope("embeddings"):
+    with tf.compat.v1.variable_scope(scope, default_name="bert"):
+      with tf.compat.v1.variable_scope("embeddings"):
         # Perform embedding lookup on the word ids.
         (self.embedding_output, self.embedding_table) = embedding_lookup(
             input_ids=input_ids,
@@ -194,7 +194,7 @@ def __init__(self,
             max_position_embeddings=config.max_position_embeddings,
             dropout_prob=config.hidden_dropout_prob)
 
-      with tf.variable_scope("encoder"):
+      with tf.compat.v1.variable_scope("encoder"):
         # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
         # mask of shape [batch_size, seq_length, seq_length] which is used
         # for the attention scores.
@@ -222,11 +222,11 @@ def __init__(self,
       # [batch_size, hidden_size]. This is necessary for segment-level
       # (or segment-pair-level) classification tasks where we need a fixed
       # dimensional representation of the segment.
-      with tf.variable_scope("pooler"):
+      with tf.compat.v1.variable_scope("pooler"):
         # We "pool" the model by simply taking the hidden state corresponding
         # to the first token. We assume that this has been pre-trained
         first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
-        self.pooled_output = tf.layers.dense(
+        self.pooled_output = tf.compat.v1.layers.dense(
             first_token_tensor,
             config.hidden_size,
             activation=tf.tanh,
@@ -274,7 +274,7 @@ def gelu(input_tensor):
   Returns:
     `input_tensor` with the GELU activation applied.
   """
-  cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))
+  cdf = 0.5 * (1.0 + tf.math.erf(input_tensor / tf.sqrt(2.0)))
   return input_tensor * cdf
 
 
@@ -408,7 +408,7 @@ def embedding_lookup(input_ids,
   if input_ids.shape.ndims == 2:
     input_ids = tf.expand_dims(input_ids, axis=[-1])
 
-  embedding_table = tf.get_variable(
+  embedding_table = tf.compat.v1.get_variable(
       name=word_embedding_name,
       shape=[vocab_size, embedding_size],
       initializer=create_initializer(initializer_range))
@@ -475,7 +475,7 @@ def embedding_postprocessor(input_tensor,
     if token_type_ids is None:
       raise ValueError("`token_type_ids` must be specified if"
                        "`use_token_type` is True.")
-    token_type_table = tf.get_variable(
+    token_type_table = tf.compat.v1.get_variable(
         name=token_type_embedding_name,
         shape=[token_type_vocab_size, width],
         initializer=create_initializer(initializer_range))
@@ -489,9 +489,9 @@ def embedding_postprocessor(input_tensor,
     output += token_type_embeddings
 
   if use_position_embeddings:
-    assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
+    assert_op = tf.compat.v1.assert_less_equal(seq_length, max_position_embeddings)
     with tf.control_dependencies([assert_op]):
-      full_position_embeddings = tf.get_variable(
+      full_position_embeddings = tf.compat.v1.get_variable(
           name=position_embedding_name,
           shape=[max_position_embeddings, width],
           initializer=create_initializer(initializer_range))
@@ -665,23 +665,23 @@ def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
   to_tensor_2d = reshape_to_matrix(to_tensor)
 
   # `query_layer` = [B*F, N*H]
-  query_layer = tf.layers.dense(
+  query_layer = tf.compat.v1.layers.dense(
       from_tensor_2d,
       num_attention_heads * size_per_head,
       activation=query_act,
       name="query",
       kernel_initializer=create_initializer(initializer_range))
 
   # `key_layer` = [B*T, N*H]
-  key_layer = tf.layers.dense(
+  key_layer = tf.compat.v1.layers.dense(
       to_tensor_2d,
       num_attention_heads * size_per_head,
       activation=key_act,
       name="key",
       kernel_initializer=create_initializer(initializer_range))
 
   # `value_layer` = [B*T, N*H]
-  value_layer = tf.layers.dense(
+  value_layer = tf.compat.v1.layers.dense(
       to_tensor_2d,
       num_attention_heads * size_per_head,
       activation=value_act,
@@ -826,12 +826,12 @@ def transformer_model(input_tensor,
 
   all_layer_outputs = []
   for layer_idx in range(num_hidden_layers):
-    with tf.variable_scope("layer_%d" % layer_idx):
+    with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
       layer_input = prev_output
 
-      with tf.variable_scope("attention"):
+      with tf.compat.v1.variable_scope("attention"):
         attention_heads = []
-        with tf.variable_scope("self"):
+        with tf.compat.v1.variable_scope("self"):
           attention_head = attention_layer(
               from_tensor=layer_input,
               to_tensor=layer_input,
@@ -856,25 +856,25 @@ def transformer_model(input_tensor,
 
         # Run a linear projection of `hidden_size` then add a residual
         # with `layer_input`.
-        with tf.variable_scope("output"):
-          attention_output = tf.layers.dense(
+        with tf.compat.v1.variable_scope("output"):
+          attention_output = tf.compat.v1.layers.dense(
               attention_output,
               hidden_size,
               kernel_initializer=create_initializer(initializer_range))
           attention_output = dropout(attention_output, hidden_dropout_prob)
           attention_output = layer_norm(attention_output + layer_input)
 
       # The activation is only applied to the "intermediate" hidden layer.
-      with tf.variable_scope("intermediate"):
-        intermediate_output = tf.layers.dense(
+      with tf.compat.v1.variable_scope("intermediate"):
+        intermediate_output = tf.compat.v1.layers.dense(
             attention_output,
             intermediate_size,
             activation=intermediate_act_fn,
             kernel_initializer=create_initializer(initializer_range))
 
       # Down-project back to `hidden_size` then add the residual.
-      with tf.variable_scope("output"):
-        layer_output = tf.layers.dense(
+      with tf.compat.v1.variable_scope("output"):
+        layer_output = tf.compat.v1.layers.dense(
             intermediate_output,
             hidden_size,
             kernel_initializer=create_initializer(initializer_range))

diff --git a/bert/optimization.py b/bert/optimization.py
@@ -81,7 +81,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
   return train_op
 
 
-class AdamWeightDecayOptimizer(tf.train.Optimizer):
+class AdamWeightDecayOptimizer(tf.compat.v1.train.Optimizer):
   """A basic Adam optimizer that includes "correct" L2 weight decay."""
 
   def __init__(self,