Skip to content

Commit 9c8b3e1

Browse files
authored
Implemented Pytorch CPU Automation for DLRMv1 (#2426)
Added TESST_MODE for inference
1 parent df6ce76 commit 9c8b3e1

File tree

6 files changed

+163
-8
lines changed

6 files changed

+163
-8
lines changed

models_v2/pytorch/dlrm/inference/cpu/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ export WEIGHT_PATH=<path to the tb00_40M.pt file>
6666
6767
| **Parameter** | **export command** |
6868
|:---------------------------:|:------------------------------------------------------------------------------------:|
69-
| **THROUHGPUT** (leave unset if accuracy) | `export THROUGHPUT=true` |
69+
| **TEST_MODE** (THROUGHPUT, ACCURACY) | `export TEST_MODE=THROUGHPUT` |
7070
| **DATASET_DIR** | `export DATASET_DIR=<path-to-dlrm_data> or <path-to-preprocessed-data>` |
7171
| **WEIGHT_PATH** | `export WEIGHT_PATH=<path to the tb00_40M.pt file>` |
7272
| **BATCH_SIZE** (optional) | `export BATCH_SIZE=10000` |

models_v2/pytorch/dlrm/inference/cpu/run_model.sh

+12-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@
1414
# limitations under the License.
1515
#
1616

17+
if [[ "$TEST_MODE" == "THROUGHPUT" ]]; then
18+
echo "TEST_MODE set to THROUGHPUT"
19+
elif [[ "$TEST_MODE" == "ACCURACY" ]]; then
20+
echo "TEST_MODE set to ACCURACY"
21+
else
22+
echo "Please set TEST_MODE to THROUGHPUT or ACCURACY"
23+
exit
24+
fi
25+
1726
MODEL_DIR=${MODEL_DIR-$PWD}
1827
if [ ! -e "${MODEL_DIR}/../../common/dlrm_s_pytorch.py" ]; then
1928
echo "Could not find the script of dlrm_s_pytorch.py. Please set environment variable '\${MODEL_DIR}'."
@@ -22,10 +31,6 @@ if [ ! -e "${MODEL_DIR}/../../common/dlrm_s_pytorch.py" ]; then
2231
fi
2332
MODEL_SCRIPT=${MODEL_DIR}/../../common/dlrm_s_pytorch.py
2433

25-
echo "PRECISION: ${PRECISION}"
26-
echo "DATASET_DIR: ${DATASET_DIR}"
27-
echo "OUTPUT_DIR: ${OUTPUT_DIR}"
28-
2934
if [ -z "${OUTPUT_DIR}" ]; then
3035
echo "The required environment variable OUTPUT_DIR has not been set"
3136
exit 1
@@ -82,15 +87,15 @@ else
8287
fi
8388

8489
export OMP_NUM_THREADS=$CORES_PER_SOCKET
85-
if [ "$THROUGHPUT" ]; then
90+
if [ "$TEST_MODE" == "THROUGHPUT" ]; then
8691
LOG="${LOG}/throughput.log"
8792
else
8893
LOG="${LOG}/accuracy.log"
8994
fi
9095

9196
TORCH_INDUCTOR=${TORCH_INDUCTOR:-"0"}
9297

93-
if [ "$THROUGHPUT" ]; then
98+
if [ "$TEST_MODE" == "THROUGHPUT" ]; then
9499
if [[ "0" == ${TORCH_INDUCTOR} ]];then
95100
python -m intel_extension_for_pytorch.cpu.launch --throughput_mode --memory-allocator tcmalloc $MODEL_SCRIPT \
96101
--raw-data-file=${DATASET_DIR}/day --processed-data-file=${DATASET_DIR}/terabyte_processed.npz \
@@ -146,7 +151,7 @@ throughput="N/A"
146151
accuracy="N/A"
147152
latency="N/A"
148153

149-
if [ "$THROUGHPUT" ]; then
154+
if [ "$TEST_MODE" == "THROUGHPUT" ]; then
150155
throughput=$(grep 'Throughput:' ${LOG} |sed -e 's/.*Throughput//;s/[^0-9.]//g' |awk '
151156
BEGIN {
152157
sum = 0;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
set -e
3+
4+
echo "Setup PyTorch Test Enviroment for DLRMv1 Inference"
5+
6+
PRECISION=$1
7+
OUTPUT_DIR=${OUTPUT_DIR-"$(pwd)/tests/cicd/pytorch/dlrm/inference/cpu/output/${PRECISION}"}
8+
is_lkg_drop=$2
9+
TEST_MODE=$3
10+
DATASET_DIR=$4
11+
WEIGHT_PATH=$5
12+
13+
# Create the output directory in case it doesn't already exist
14+
mkdir -p ${OUTPUT_DIR}
15+
16+
if [[ "${is_lkg_drop}" == "true" ]]; then
17+
source ${WORKSPACE}/pytorch_setup/bin/activate pytorch
18+
fi
19+
20+
export LD_PRELOAD="${WORKSPACE}/jemalloc/lib/libjemalloc.so":"${WORKSPACE}/tcmalloc/lib/libtcmalloc.so":"/usr/local/lib/libiomp5.so":$LD_PRELOAD
21+
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
22+
export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX
23+
24+
# Install dependency
25+
cd models_v2/pytorch/dlrm/inference/cpu
26+
./setup.sh
27+
28+
# Run script
29+
OUTPUT_DIR=${OUTPUT_DIR} PRECISION=${PRECISION} DATASET_DIR=${DATASET_DIR} WEIGHT_PATH=${WEIGHT_PATH} TEST_MODE=${TEST_MODE} ./run_model.sh
30+
cd -
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
inference-fp32-throughput:
2+
cmd:
3+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
4+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
5+
env:
6+
PRECISION: "FP32"
7+
TEST_MODE: "THROUGHPUT"
8+
DATASET_DIR: "/pytorch/dlrm_data/"
9+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
10+
inference-bf32-throughput:
11+
cmd:
12+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
13+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
14+
env:
15+
PRECISION: "BF32"
16+
TEST_MODE: "THROUGHPUT"
17+
DATASET_DIR: "/pytorch/dlrm_data/"
18+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
19+
inference-bf16-throughput:
20+
cmd:
21+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
22+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
23+
env:
24+
PRECISION: "BF16"
25+
TEST_MODE: "THROUGHPUT"
26+
DATASET_DIR: "/pytorch/dlrm_data/"
27+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
28+
inference-int8-throughput:
29+
cmd:
30+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
31+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
32+
env:
33+
PRECISION: "INT8"
34+
TEST_MODE: "THROUGHPUT"
35+
DATASET_DIR: "/pytorch/dlrm_data/"
36+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
37+
inference-fp32-accuracy:
38+
cmd:
39+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
40+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
41+
env:
42+
PRECISION: "FP32"
43+
TEST_MODE: "ACCURACY"
44+
DATASET_DIR: "/pytorch/dlrm_data/"
45+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
46+
inference-bf32-accuracy:
47+
cmd:
48+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
49+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
50+
env:
51+
PRECISION: "BF32"
52+
TEST_MODE: "ACCURACY"
53+
DATASET_DIR: "/pytorch/dlrm_data/"
54+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
55+
inference-bf16-accuracy:
56+
cmd:
57+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
58+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
59+
env:
60+
PRECISION: "BF16"
61+
TEST_MODE: "ACCURACY"
62+
DATASET_DIR: "/pytorch/dlrm_data/"
63+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
64+
inference-int8-accuracy:
65+
cmd:
66+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
67+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/inference/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${TEST_MODE} ${DATASET_DIR} ${WEIGHT_PATH}
68+
env:
69+
PRECISION: "INT8"
70+
TEST_MODE: "ACCURACY"
71+
DATASET_DIR: "/pytorch/dlrm_data/"
72+
WEIGHT_PATH: "/pytorch/dlrm_weights/"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
set -e
3+
4+
echo "Setup PyTorch Test Enviroment for DLRMv1 Training"
5+
6+
PRECISION=$1
7+
OUTPUT_DIR=${OUTPUT_DIR-"$(pwd)/tests/cicd/pytorch/dlrm/training/cpu/output/${PRECISION}"}
8+
is_lkg_drop=$2
9+
DATASET_DIR=$3
10+
11+
# Create the output directory in case it doesn't already exist
12+
mkdir -p ${OUTPUT_DIR}
13+
14+
if [[ "${is_lkg_drop}" == "true" ]]; then
15+
source ${WORKSPACE}/pytorch_setup/bin/activate pytorch
16+
fi
17+
18+
export LD_PRELOAD="${WORKSPACE}/jemalloc/lib/libjemalloc.so":"${WORKSPACE}/tcmalloc/lib/libtcmalloc.so":"/usr/local/lib/libiomp5.so":$LD_PRELOAD
19+
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
20+
export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX
21+
22+
# Install dependency
23+
cd models_v2/pytorch/dlrm/training/cpu
24+
pip install -r requirements.txt
25+
26+
OUTPUT_DIR=${OUTPUT_DIR} PRECISION=${PRECISION} DATASET_DIR=${DATASET_DIR} TEST_MODE=${TEST_MODE} ./run_model.sh
27+
cd -
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
training-fp32:
2+
cmd:
3+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
4+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/training/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${DATASET_DIR}
5+
env:
6+
PRECISION: "fp32"
7+
DATASET_DIR: "/pytorch/dlrm_terabyte_preprocessed/"
8+
inference-bf32:
9+
cmd:
10+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
11+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/training/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${DATASET_DIR}
12+
env:
13+
PRECISION: "bf32"
14+
DATASET_DIR: "/pytorch/dlrm_terabyte_preprocessed/"
15+
inference-bf16:
16+
cmd:
17+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/pyt_cpu_setup.sh ${FRAMEWORK_VERSION} ${IS_LKG_DROP} ${AIKIT_VERSION};
18+
bash $GITHUB_WORKSPACE/tests/cicd/pytorch/dlrm/training/cpu/test_model.sh ${PRECISION} ${IS_LKG_DROP} ${DATASET_DIR}
19+
env:
20+
PRECISION: "bf16"
21+
DATASET_DIR: "/pytorch/dlrm_terabyte_preprocessed/"

0 commit comments

Comments
 (0)