Skip to content

Commit 7f5fd9e

Browse files
yingcanwEmmaQiaoCh
authored andcommitted
Fix hps profiler issue with only cpu query case
1 parent 4f00f85 commit 7f5fd9e

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

HugeCTR/src/hps/embedding_cache.cpp

+10-7
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ void EmbeddingCache<TypeHashKey>::lookup(size_t const table_id, float* const d_v
296296
HCTR_LIB_THROW(
297297
cudaMemcpyAsync(d_vectors, workspace_handler.h_missing_emb_vec_[table_id],
298298
num_keys * cache_config_.embedding_vec_size_[table_id] * sizeof(float),
299-
cudaMemcpyHostToDevice, stream));
299+
cudaMemcpyDefault, stream));
300300
HCTR_LIB_THROW(cudaStreamSynchronize(stream));
301301
parameter_server_->free_buffer(memory_block);
302302
}
@@ -552,12 +552,15 @@ void EmbeddingCache<TypeHashKey>::finalize() {
552552
template <typename TypeHashKey>
553553
void EmbeddingCache<TypeHashKey>::insert_stream_for_sync(
554554
std::vector<cudaStream_t> lookup_streams_) {
555-
if (lookup_streams_.size() != gpu_emb_caches_.size()) {
556-
HCTR_OWN_THROW(Error_t::WrongInput,
557-
"The number of lookup streams is not equal to the number of embedding tables.");
558-
}
559-
for (size_t idx = 0; idx < lookup_streams_.size(); ++idx) {
560-
gpu_emb_caches_[idx]->Record(lookup_streams_[idx]);
555+
if (cache_config_.use_gpu_embedding_cache_) {
556+
if (lookup_streams_.size() != gpu_emb_caches_.size()) {
557+
HCTR_OWN_THROW(
558+
Error_t::WrongInput,
559+
"The number of lookup streams is not equal to the number of embedding tables.");
560+
}
561+
for (size_t idx = 0; idx < lookup_streams_.size(); ++idx) {
562+
gpu_emb_caches_[idx]->Record(lookup_streams_[idx]);
563+
}
561564
}
562565
}
563566

test/inference/hps/lookup_session_test.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,13 @@ def hps_dlpack(model_name, embedding_file_list, data_file, enable_cache, cache_t
146146
True,
147147
hugectr.inference.EmbeddingCacheType_t.Dynamic,
148148
)
149+
h1, h2 = hps_dlpack(
150+
model_name,
151+
embedding_file_list,
152+
data_file,
153+
False,
154+
hugectr.inference.EmbeddingCacheType_t.Dynamic,
155+
)
149156
u1, u2 = hps_dlpack(
150157
model_name, embedding_file_list, data_file, True, hugectr.inference.EmbeddingCacheType_t.UVM
151158
)
@@ -173,15 +180,28 @@ def hps_dlpack(model_name, embedding_file_list, data_file, enable_cache, cache_t
173180
diff = u2.reshape(1, 26 * 16) - d2.reshape(1, 26 * 16)
174181
if diff.mean() > 1e-3:
175182
raise RuntimeError(
176-
"The lookup results of UVM cache are consistent with Dynamic cache: {}".format(
183+
"The lookup results of UVM cache are not consistent with Dynamic cache: {}".format(
184+
diff.mean()
185+
)
186+
)
187+
sys.exit(1)
188+
else:
189+
print(
190+
"The lookup results on UVM are consistent with Dynamic cache, mse: {}".format(
191+
diff.mean()
192+
)
193+
)
194+
diff = h2.reshape(1, 26 * 16) - d2.reshape(1, 26 * 16)
195+
if diff.mean() > 1e-3:
196+
raise RuntimeError(
197+
"The lookup results of Database backend are not consistent with Dynamic cache: {}".format(
177198
diff.mean()
178199
)
179200
)
180201
sys.exit(1)
181202
else:
182203
print(
183-
"Pytorch dlpack on cpu results are consistent with native HPS lookup api, mse: {}".format(
204+
"The lookup results on Database backend are consistent with Dynamic cache, mse: {}".format(
184205
diff.mean()
185206
)
186207
)
187-
# hps_dlpack(model_name, network_file, dense_file, embedding_file_list, data_file, False)

0 commit comments

Comments
 (0)