intel
diff --git a/‎benchmarks/image_recognition/tensorflow/densenet169/README.md
+3-3 b/‎benchmarks/image_recognition/tensorflow/densenet169/README.md
+3-3
diff --git a/‎benchmarks/image_recognition/tensorflow/inceptionv3/README.md
+8-8 b/‎benchmarks/image_recognition/tensorflow/inceptionv3/README.md
+8-8
diff --git a/‎benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md
+16-16 b/‎benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md
+16-16
diff --git a/‎benchmarks/language_modeling/tensorflow/bert_large/README.md
+12-12 b/‎benchmarks/language_modeling/tensorflow/bert_large/README.md
+12-12
diff --git a/‎benchmarks/language_translation/tensorflow/bert/README.md
+1-1 b/‎benchmarks/language_translation/tensorflow/bert/README.md
+1-1
diff --git a/‎benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md
+3-3 b/‎benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md
+3-3
diff --git a/‎benchmarks/language_translation/tensorflow/transformer_lt_official/README.md
+2-2 b/‎benchmarks/language_translation/tensorflow/transformer_lt_official/README.md
+2-2
@@ -58,7 +58,7 @@ following modes/precisions:
         --batch-size 100 \
         --socket-id 0 \
         --in-graph /home/<user>/densenet169_fp32_pretrained_model.pb \
-        --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+        --docker-image intel/intel-optimized-tensorflow:2.3.0 \
         -- input_height=224 input_width=224 warmup_steps=20 steps=100 \
         input_layer="input" output_layer="densenet169/predictions/Reshape_1"
     ```
@@ -74,7 +74,7 @@ following modes/precisions:
         --batch-size 1 \
         --socket-id 0 \
         --in-graph /home/<user>/densenet169_fp32_pretrained_model.pb \
-        --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+        --docker-image intel/intel-optimized-tensorflow:2.3.0 \
         -- input_height=224 input_width=224 warmup_steps=20 steps=100 \
         input_layer="input" output_layer="densenet169/predictions/Reshape_1"
     ```
@@ -91,7 +91,7 @@ following modes/precisions:
         --batch-size 100 \
         --socket-id 0 \
         --in-graph /home/<user>/densenet169_fp32_pretrained_model.pb \
-        --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+        --docker-image intel/intel-optimized-tensorflow:2.3.0 \
         --data-location /home/<user>/imagenet_validation_dataset \
         -- input_height=224 input_width=224 \
         input_layer="input" output_layer="densenet169/predictions/Reshape_1"
 
@@ -97,7 +97,7 @@ python launch_benchmark.py \
     --framework tensorflow \
     --accuracy-only \
     --batch-size 100 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_int8_pretrained_model.pb \
     --data-location /home/<user>/datasets/ImageNet_TFRecords
 ```
@@ -118,7 +118,7 @@ python launch_benchmark.py \
     --benchmark-only \
     --batch-size 1 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_int8_pretrained_model.pb \
     --data-location /home/<user>/datasets/ImageNet_TFRecords \
     -- warmup_steps=50 steps=500
@@ -135,7 +135,7 @@ python launch_benchmark.py \
     --benchmark-only \
     --batch-size 1 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_int8_pretrained_model.pb \
     -- warmup_steps=50 steps=500
 ```
@@ -151,7 +151,7 @@ python launch_benchmark.py \
     --benchmark-only \
     --batch-size 128 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_int8_pretrained_model.pb \
     --data-location /home/<user>/datasets/ImageNet_TFRecords \
     -- warmup_steps=50 steps=500
@@ -168,7 +168,7 @@ python launch_benchmark.py \
     --benchmark-only \
     --batch-size 128 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_int8_pretrained_model.pb \
     -- warmup_steps=50 steps=500
 ```
@@ -258,7 +258,7 @@ python launch_benchmark.py \
     --framework tensorflow \
     --batch-size 1 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_fp32_pretrained_model.pb
 ```
 Example log tail when running for online inference:
@@ -289,7 +289,7 @@ python launch_benchmark.py \
     --framework tensorflow \
     --batch-size 128 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_fp32_pretrained_model.pb
 ```
 Example log tail when running for batch inference:
@@ -321,7 +321,7 @@ python launch_benchmark.py \
     --accuracy-only \
     --batch-size 100 \
     --data-location /dataset/Imagenet_Validation \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/inceptionv3_fp32_pretrained_model.pb
 ```
 Example log tail when running for accuracy:
 
@@ -47,10 +47,10 @@ $ git clone https://github.com/IntelAI/models.git
 The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and
 located at `models/models/image_recognition/tensorflow/resnet50v1_5/`.
 
-    The docker image (`intel/intel-optimized-tensorflow:2.2.0`)
+    The docker image (`intel/intel-optimized-tensorflow:2.3.0`)
     used in the commands above were built using
     [TensorFlow](https://github.com/tensorflow/tensorflow.git) master for TensorFlow
-    version 2.2.0.
+    version 2.3.0.
 
 * Calculate the model accuracy, the required parameters parameters include: the `ImageNet` dataset location (from step 1),
 the pre-trained `resnet50v1_5_int8_pretrained_model.pb` input graph file (from step 2), and the `--accuracy-only` flag.
@@ -66,7 +66,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=100 \
     --accuracy-only \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
 ```
 The log file is saved to the value of `--output-dir`.
 
@@ -105,7 +105,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=128 \
     --benchmark-only \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
     -- warmup_steps=50 steps=500
 ```
 The tail of the log output when the benchmarking completes should look
@@ -164,7 +164,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=1 \
     --socket-id=0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -202,7 +202,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=128 \
     --socket-id=0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -243,7 +243,7 @@ $ python launch_benchmark.py \
     --batch-size 100 \
     --socket-id=0 \
     --data-location /home/<user>/dataset/ImageNetData_directory \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -280,7 +280,7 @@ $ python launch_benchmark.py \
     --batch-size 100 \
     --socket-id=0 \
     --data-location /home/<user>/dataset/ImageNetData_directory \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0
+    --docker-image intel/intel-optimized-tensorflow:2.3.0
 ```
 The results file will be written to the
 `models/benchmarks/common/tensorflow/logs` directory, unless another
@@ -344,7 +344,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=1 \
     --socket-id 0 \
-    --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+    --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -380,7 +380,7 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=128 \
     --socket-id 0 \
-    --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+    --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -419,7 +419,7 @@ $ python launch_benchmark.py \
     --batch-size 100 \
     --socket-id 0 \
     --data-location /home/<user>/dataset/ImageNetData_directory \
-    --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+    --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 
 The log file is saved to the value of `--output-dir`.
@@ -454,7 +454,7 @@ $ python launch_benchmark.py \
     --batch-size 100 \
     --socket-id 0 \
     --data-location /home/<user>/dataset/ImageNetData_directory \
-    --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+    --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 The results file will be written to the
 `models/benchmarks/common/tensorflow/logs` directory, unless another
@@ -555,7 +555,7 @@ $ python launch_benchmark.py \
          --framework tensorflow \
          --checkpoint <location_to_store_training_checkpoints> \
          --data-location=/home/<user>/dataset/ImageNetData_directory \
-         --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+         --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 
 This run will take considerable amount of time since it is running for
@@ -585,7 +585,7 @@ $ python launch_benchmark.py \
          --framework tensorflow \
          --data-location=/home/<user>/dataset/ImageNetData_directory \
          --mpi_num_processes=2 \
-         --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly
+         --docker-image=intel/intel-optimized-tensorflow:2.3.0
 ```
 The above distributed training runs one MPI process per socket, to maximize performance, users can run more than one (commonly two) MPI processes per socket. The following command achieves launching 4 MPI processes over 2 sockets. Note that in this case we need to reduce the OMP_NUM_THREADS and intra_op_parallelism_threads by half (minus one or two for performance sometimes, e.g. half of 28 becomes 14, and we can use 12 for good performance).  This is controlled by "-a <half the amount of cores of per socket or less>". Batch size can remain the same for weak scaling or reduced by half as well for strong scaling.
 
@@ -598,7 +598,7 @@ $ python launch_benchmark.py \
          --data-location=/home/<user>/dataset/ImageNetData_directory \
          --mpi_num_processes=4 \
          --mpi_num_processes_per_socket=2 \
-         --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+         --docker-image=intel/intel-optimized-tensorflow:2.3.0 \
          -a <half the amount of cores per socket or less>
 ```
 
@@ -613,7 +613,7 @@ $ python launch_benchmark.py \
          --data-location=/home/<user>/dataset/ImageNetData_directory \
          --mpi_num_processes=2 \
          --mpi_num_processes_per_socket=1 \
-         --docker-image=intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+         --docker-image=intel/intel-optimized-tensorflow:2.3.0 \
          -a <half the amount of cores per socket or less>
 ```
 
 
@@ -50,7 +50,7 @@ python launch_benchmark.py \
     --mode=training \
     --framework=tensorflow \
     --batch-size=24 \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_LARGE_DIR:$BERT_LARGE_DIR \
     --volume $SQUAD_DIR:$SQUAD_DIR \
     -- train_option=SQuAD \
@@ -66,7 +66,7 @@ python launch_benchmark.py \
        max_seq_length=384 \
        doc_stride=128 \
        optimized_softmax=True \
-       experimental_gelu=True \
+       experimental_gelu=False \
        do_lower_case=True
 
 ```
@@ -111,7 +111,7 @@ python launch_benchmark.py \
     --framework=tensorflow \
     --batch-size=24 \ 
     --mpi_num_processes=<num_of_sockets> \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_LARGE_DIR:$BERT_LARGE_DIR \
     --volume $SQUAD_DIR:$SQUAD_DIR \
     -- train_option=SQuAD \
@@ -127,7 +127,7 @@ python launch_benchmark.py \
        max_seq_length=384 \
        doc_stride=128 \
        optimized_softmax=True \
-       experimental_gelu=True \
+       experimental_gelu=False \
        do_lower_case=True
 ```
 The results file will be written to the
@@ -148,7 +148,7 @@ python launch_benchmark.py \
     --mode=training \
     --framework=tensorflow \
     --batch-size=32 \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_BASE_DIR:$BERT_BASE_DIR \
     --volume $GLUE_DIR:$GLUE_DIR \
     -- train-option=Classifier \
@@ -163,7 +163,7 @@ python launch_benchmark.py \
        learning-rate=2e-5 \
        num-train-epochs=30 \
        optimized_softmax=True \
-       experimental_gelu=True \
+       experimental_gelu=False \
        do-lower-case=True
 
 ```
@@ -185,7 +185,7 @@ python launch_benchmark.py \
     --framework=tensorflow \
     --batch-size=32 \
     --mpi_num_processes=<num_of_sockets> \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_LARGE_DIR:$BERT_LARGE_DIR \
     --volume $GLUE_DIR:$GLUE_DIR \
     -- train-option=Classifier \
@@ -227,7 +227,7 @@ python launch_benchmark.py \
     --socket-id=0 \
     --num-intra-threads=24 \
     --num-inter-threads=1 \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_LARGE_DIR:$BERT_LARGE_DIR \
     --volume $PRETRAINING_DATA_DIR:$PRETRAINING_DATA_DIR \
     -- train-option=Pretraining \
@@ -262,7 +262,7 @@ python launch_benchmark.py \
     --num-intra-threads=22 \
     --num-inter-threads=1 \
     --mpi_num_processes=4 \
-    --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --volume $BERT_LARGE_DIR:$BERT_LARGE_DIR \
     --volume $PRETRAINING_DATA_DIR:$PRETRAINING_DATA_DIR \
     -- train-option=Pretraining \
@@ -344,7 +344,7 @@ FP32 training instructions are the same as Bfloat16 training instructions above,
             --checkpoint /home/<user>/bert_large_checkpoints \
             --output-dir /home/<user>/bert-squad-output \
             --benchmark-only \
-            --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+            --docker-image intel/intel-optimized-tensorflow:2.3.0 \
             -- infer_option=SQuAD
         ```
 
@@ -359,7 +359,7 @@ FP32 training instructions are the same as Bfloat16 training instructions above,
             --data-location /home/<user>/wwm_uncased_L-24_H-1024_A-16 \
             --checkpoint /home/<user>/bert_large_checkpoints \
             --output-dir /home/<user>/bert-squad-output \
-            --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+            --docker-image intel/intel-optimized-tensorflow:2.3.0 \
             -- profile=True infer_option=SQuAD
         ```
 
@@ -374,7 +374,7 @@ FP32 training instructions are the same as Bfloat16 training instructions above,
             --data-location /home/<user>/wwm_uncased_L-24_H-1024_A-16 \
             --checkpoint /home/<user>/bert_large_checkpoints \
             --output-dir /home/<user>/bert-squad-output \
-            --docker-image intel/intel-optimized-tensorflow:tensorflow-2.2-bf16-nightly \
+            --docker-image intel/intel-optimized-tensorflow:2.3.0 \
             --accuracy-only \
             -- infer_option=SQuAD
         ```
 
@@ -75,7 +75,7 @@ You can also use the helper script [download_glue_data.py](https://gist.github.c
      learning-rate=2e-5 \
      num_train_epochs=3.0
    ```
-   Using `--docker-image intel/intel-optimized-tensorflow:2.2.0` to run with Intel optimized TensorFlow docker container.
+   Using `--docker-image intel/intel-optimized-tensorflow:2.3.0` to run with Intel optimized TensorFlow docker container.
 
 
 6. The log file is saved to the `models/benchmarks/common/tensorflow/logs` directory. Below are examples of what the tail of your log file should look like for the different configs.
 
@@ -69,7 +69,7 @@ python launch_benchmark.py \
 --batch-size 1 \
 --socket-id 0 \
 --data-location /home/<user>/nmt/data \
---docker-image intel/intel-optimized-tensorflow:2.2.0 \
+--docker-image intel/intel-optimized-tensorflow:2.3.0 \
 --in-graph /home/<user>/mlperf_gnmt_fp32_pretrained_model.pb \
 --benchmark-only
 ```
@@ -84,7 +84,7 @@ python launch_benchmark.py \
 --batch-size 32 \
 --socket-id 0 \
 --data-location /home/<user>/nmt/data \
---docker-image intel/intel-optimized-tensorflow:2.2.0 \
+--docker-image intel/intel-optimized-tensorflow:2.3.0 \
 --in-graph /home/<user>/mlperf_gnmt_fp32_pretrained_model.pb \
 --benchmark-only
 ```
@@ -99,7 +99,7 @@ python launch_benchmark.py \
 --batch-size 32 \
 --socket-id 0 \
 --data-location /home/<user>/nmt/data \
---docker-image intel/intel-optimized-tensorflow:2.2.0 \
+--docker-image intel/intel-optimized-tensorflow:2.3.0 \
 --in-graph /home/<user>/mlperf_gnmt_fp32_pretrained_model.pb \
 --accuracy-only
 ```
@@ -52,7 +52,7 @@ python launch_benchmark.py \
     --framework tensorflow \
     --batch-size 1 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \
     --data-location /home/<user>/transformer_lt_official_fp32_pretrained_model/data \
     -- file=newstest2014.en \
@@ -71,7 +71,7 @@ python launch_benchmark.py \
     --framework tensorflow \
     --batch-size 64 \
     --socket-id 0 \
-    --docker-image intel/intel-optimized-tensorflow:2.2.0 \
+    --docker-image intel/intel-optimized-tensorflow:2.3.0 \
     --in-graph /home/<user>/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \
     --data-location /home/<user>/transformer_lt_official_fp32_pretrained_model/data \
     -- file=newstest2014.en \