Skip to content

Commit 483f52b

Browse files
committed
Merge branch 'add_some_jobs_dracorno' into 'main'
[READY]Add jobs 'hps_plugin_benchmark','147gb_model_benchmark' See merge request dl/hugectr/hugectr!1435
2 parents 64d89f5 + 9524386 commit 483f52b

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

ci/dracorno/ci.yml

+26-2
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ py_single_node:
151151
variables:
152152
GPFSFOLDER: $DRACO_LOGDIR/py_single_node
153153
CONT: $TRAIN_IMAGE_VERSIONED
154-
MOUNTS: ${DRACO_DATASET}:${DATASET_MOUNT},${DRACO_WDL_PARQUET_DATASET}:${NEW_CRITEO_MOUNT}
154+
MOUNTS: ${DRACO_DATASET}:${DATASET_MOUNT},${DRACO_DATASET_NEW_CRITEO}:${NEW_CRITEO_MOUNT}
155155
TEST_CMD: ./ci/integration_test/py_interface/py_single_node.sub
156156

157157
hugectr2onnx:
@@ -175,7 +175,7 @@ ebc_multi_node:
175175
GPFSFOLDER: $DRACO_LOGDIR/ebc_multi_node
176176
CONT: $TRAIN_IMAGE_MULTINODE_VERSIONED
177177
MOUNTS: ${DRACO_DATASET_NEW_CRITEO}:${DATASET_MOUNT}
178-
WALLTIME: "00:45:00"
178+
WALLTIME: "01:00:00"
179179
DGXNNODES: 2
180180
TEST_CMD: ./ci/integration_test/ebc/ebc.sub
181181

@@ -214,6 +214,30 @@ s3_backend_test:
214214
DGXNNODES: 1
215215
TEST_CMD: ./ci/integration_test/s3/s3_backend_test.sub
216216

217+
hps_plugin_benchmark:
218+
extends: .dracorno_test_job
219+
needs:
220+
- pipeline: $PARENT_PIPELINE_ID
221+
job: build_tf_hps_trt_plugin
222+
variables:
223+
GPFSFOLDER: $DRACO_LOGDIR/hps_plugin_benchmark
224+
CONT: $TF_TRT_IMAGE_VERSIONED
225+
MOUNTS: ${DATA_PREFIX}/fs1/projects/gpu_compute/datasets/hugectr-ci/hpc-hugectr/hps_tf_benchmark/hps_plugin_ci_model_repo:/model_repo,${DATA_PREFIX}/fs1/projects/gpu_compute/datasets/hugectr-ci/hpc-hugectr/hps_tf_benchmark/perf_data:/perf_data
226+
WALLTIME: "00:45:00"
227+
TEST_CMD: ./ci/benchmark/hps_plugin_benchmark/run.sub
228+
229+
147gb_model_benchmark:
230+
extends: .dracorno_test_job
231+
needs:
232+
- pipeline: $PARENT_PIPELINE_ID
233+
job: build_tf_hps_trt_plugin
234+
variables:
235+
GPFSFOLDER: $DRACO_LOGDIR/147gb_model_benchmark
236+
CONT: $TF_TRT_IMAGE_VERSIONED
237+
MOUNTS: ${DATA_PREFIX}/fs1/projects/gpu_compute/datasets/hugectr-ci/hpc-hugectr/hps_tf_benchmark/147gb_ci_model_repo:/model_repo
238+
WALLTIME: "00:45:00"
239+
TEST_CMD: ./ci/benchmark/147gb_model_benchmark/run.sub
240+
217241
#SOK ut tests
218242
sparse_operation_kit_ut-TF1:
219243
extends:

ci/template.yml

+8
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,14 @@ stages:
384384
GIT_CLONE_PATH: /lustre/fsw/devtech/hpc-hugectr/hugectr-ci/$CI_CONCURRENT_ID/$CI_PROJECT_NAME
385385
stage: post_test
386386

387+
.dracorno_post_test_job:
388+
extends:
389+
- .dracorno_test_job
390+
- .hugectr:rules:test_in_child
391+
variables:
392+
WALLTIME: "00:30:00"
393+
stage: post_test
394+
387395
.cluster_post_test_job_daily:
388396
extends:
389397
- .cluster_test_job

0 commit comments

Comments
 (0)