Skip to content

Commit

Permalink
Merge Pull Request #13474 from trilinos/Trilinos/siefert1-c556693
Browse files Browse the repository at this point in the history
Automatically Merged using Trilinos Pull Request AutoTester
PR Title: b'Tpetra: Fixing AMD Unified Memory issue in packCrs[Graph|Matrix] test / ImportExport2 test compile error'
PR Author: csiefer2
  • Loading branch information
trilinos-autotester authored Sep 25, 2024
2 parents 7769bd4 + 777c71e commit 0b2099d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
11 changes: 7 additions & 4 deletions packages/tpetra/core/src/Tpetra_Details_packCrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,11 @@ class NumPacketsAndOffsetsFunctor{
int getError () const {
auto error_h = Kokkos::create_mirror_view (error_);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
using execution_space = typename device_type::execution_space;
Kokkos::deep_copy (execution_space(), error_h, error_);
// Note: In the UVM case, this would otherwise be a no-op
// and thus not fence, so the value might not be correct on return
// In the non-UVM case, create_mirror_view will block for the allocation
Kokkos::deep_copy (error_h, error_);

return error_h ();
}

Expand Down Expand Up @@ -267,7 +270,7 @@ computeNumPacketsAndOffsets(const OutputOffsetsViewType& outputOffsets,
<< " != numRowsToPack = " << numRowsToPack << ".");

functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
Kokkos::parallel_scan ("Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type (0, numRowsToPack + 1), f);

// At least in debug mode, this functor checks for errors.
const int errCode = f.getError ();
Expand Down Expand Up @@ -579,7 +582,7 @@ do_pack(const LocalGraph& local_graph,

typename pack_functor_type::value_type result;
range_type range (0, num_packets_per_lid.extent (0));
Kokkos::parallel_reduce (range, f, result);
Kokkos::parallel_reduce ("Tpetra::Details::computeNumPacketsAndOffsets::reduce",range, f, result);

if (result.first != 0) {
// We can't deep_copy from AnonymousSpace Views, so we can't
Expand Down
6 changes: 4 additions & 2 deletions packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,12 @@ class NumPacketsAndOffsetsFunctor {

//! Host function for getting the error.
int getError () const {
typedef typename device_type::execution_space execution_space;
auto error_h = Kokkos::create_mirror_view (error_);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), error_h, error_);
// Note: In the UVM case, this would otherwise be a no-op
// and thus not fence, so the value might not be correct on return
// In the non-UVM case, create_mirror_view will block for the allocation
Kokkos::deep_copy (error_h, error_);
return error_h ();
}

Expand Down

0 comments on commit 0b2099d

Please sign in to comment.