Skip to content

Commit

Permalink
Tpetra: CrsMatrix getLocalDiagCopy hang, see issue trilinos#13498
Browse files Browse the repository at this point in the history
Adding a new unit-test that covers code path for fillComplete and
fillActive states. The code paths that handled both these cases is
now merged and follows the fillComplete case as a staticCrsGraph is
assumed when calling getLocalDiagCopy.

Signed-off-by: Luc Berger-Vergiat <[email protected]>
  • Loading branch information
lucbv committed Nov 5, 2024
1 parent 1ff6055 commit f2e80bc
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 41 deletions.
58 changes: 26 additions & 32 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3271,22 +3271,22 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
"a view with global column indices by calling getGlobalRowCopy().");

const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
rowInfo.numEntries > 0) {
indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
rowInfo.offset1D,
rowInfo.numEntries,
Access::ReadOnly);
values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
rowInfo.numEntries,
Access::ReadOnly);
}
else {
// This does the right thing (reports an empty row) if the input
// row is invalid.
indices = local_inds_host_view_type();
values = values_host_view_type();
}
// if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
// rowInfo.numEntries > 0) {
// indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
// rowInfo.offset1D,
// rowInfo.numEntries,
// Access::ReadOnly);
// values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
// rowInfo.numEntries,
// Access::ReadOnly);
// }
// else {
// // This does the right thing (reports an empty row) if the input
// // row is invalid.
// indices = local_inds_host_view_type();
// values = values_host_view_type();
// }

#ifdef HAVE_TPETRA_DEBUG
const char suffix[] = ". This should never happen. Please report this "
Expand Down Expand Up @@ -3602,23 +3602,17 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
"diag.getMap ()->isCompatible (A.getRowMap ());");
#endif // HAVE_TPETRA_DEBUG

if (this->isFillComplete ()) {
const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
// 1-D subview of the first (and only) column of D_lcl.
const auto D_lcl_1d =
Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
// 1-D subview of the first (and only) column of D_lcl.
const auto D_lcl_1d =
Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);

const auto lclRowMap = rowMap.getLocalMap ();
const auto lclColMap = colMap.getLocalMap ();
using ::Tpetra::Details::getDiagCopyWithoutOffsets;
(void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
lclColMap,
getLocalMatrixDevice ());
}
else {
using ::Tpetra::Details::getLocalDiagCopyWithoutOffsetsNotFillComplete;
(void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
}
const auto lclRowMap = rowMap.getLocalMap ();
const auto lclColMap = colMap.getLocalMap ();
using ::Tpetra::Details::getDiagCopyWithoutOffsets;
(void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
lclColMap,
getLocalMatrixDevice ());
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ namespace Details {
template<class SC, class LO, class GO, class NT>
class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
public:
typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
using row_matrix_type = ::Tpetra::RowMatrix<SC, LO, GO, NT>;
using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;

typedef typename vec_type::impl_scalar_type IST;
using IST = typename vec_type::impl_scalar_type;
// The output Vector determines the execution space.

using host_execution_space = typename vec_type::dual_view_type::t_host::execution_space;
private:
typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
typedef typename vec_type::map_type map_type;
using map_type = typename vec_type::map_type;

static bool
graphIsSorted (const row_matrix_type& A)
{
using Teuchos::RCP;
using Teuchos::rcp_dynamic_cast;
typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
using crs_graph_type = Tpetra::CrsGraph<LO, GO, NT>;
using row_graph_type = Tpetra::RowGraph<LO, GO, NT>;

// We conservatively assume not sorted. RowGraph lacks an
// "isSorted" predicate, so we can't know for sure unless the cast
Expand Down Expand Up @@ -108,6 +108,7 @@ class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {

void operator () (const LO& lclRowInd, LO& errCount) const {
using KokkosSparse::findRelOffset;
Kokkos::printf("Not hanging yet [0]\n");

D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero ();
const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
Expand All @@ -119,19 +120,23 @@ class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
else { // row index is also in the column Map on this process
typename row_matrix_type::local_inds_host_view_type lclColInds;
typename row_matrix_type::values_host_view_type curVals;
Kokkos::printf("Not hanging yet [1]\n");
A_.getLocalRowView(lclRowInd, lclColInds, curVals);
Kokkos::printf("Not hanging yet [2]\n");
LO numEnt = lclColInds.extent(0);
// The search hint is always zero, since we only call this
// once per row of the matrix.
const LO hint = 0;
const LO offset =
findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
Kokkos::printf("Not hanging yet [3]\n");
if (offset == numEnt) { // didn't find the diagonal column index
errCount++;
}
else {
D_lcl_1d_(lclRowInd) = curVals[offset];
}
Kokkos::printf("Not hanging yet [4]\n");
}
}

Expand All @@ -155,8 +160,7 @@ getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>
using Teuchos::outArg;
using Teuchos::REDUCE_MIN;
using Teuchos::reduceAll;
typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
LO, GO, NT> functor_type;
using functor_type = GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC, LO, GO, NT>;

// The functor's constructor does error checking and executes the
// thread-parallel kernel.
Expand Down Expand Up @@ -212,6 +216,8 @@ getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>
}
}
else { // ! debug
Kokkos::printf("\n\n\nRunning on Kokkos::Serial: %s\n", std::is_same_v<typename functor_type::host_execution_space, Kokkos::Serial> ? "true" : "false");

functor_type functor (lclNumErrs, diag, A);
}

Expand Down
9 changes: 9 additions & 0 deletions packages/tpetra/core/test/CrsMatrix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,15 @@ if (
)
endif()

TRIBITS_ADD_EXECUTABLE_AND_TEST(
CrsMatrix_getLocalDiagCopy
SOURCES
CrsMatrix_getLocalDiagCopy.cpp
${TEUCHOS_STD_UNIT_TEST_MAIN}
COMM mpi
STANDARD_PASS_OUTPUT
)

SET(TIMING_INSTALLS "")

INSTALL(TARGETS ${TIMING_INSTALLS}
Expand Down
112 changes: 112 additions & 0 deletions packages/tpetra/core/test/CrsMatrix/CrsMatrix_getLocalDiagCopy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// @HEADER
// *****************************************************************************
// Tpetra: Templated Linear Algebra Services Package
//
// Copyright 2008 NTESS and the Tpetra contributors.
// SPDX-License-Identifier: BSD-3-Clause
// *****************************************************************************
// @HEADER

#include <Teuchos_UnitTestHarness.hpp>
#include <TpetraCore_ETIHelperMacros.h>

#include <Tpetra_Core.hpp>
#include <Tpetra_Map.hpp>
#include <Tpetra_Vector.hpp>
#include <Tpetra_CrsMatrix.hpp>

namespace { // (anonymous)

template <typename Scalar, typename LO, typename GO, typename Node, int Tag>
void getLocalDiagCopyTest(Teuchos::FancyOStream& out, bool& success) {
using Teuchos::RCP;

using map_type = Tpetra::Map<LO, GO, Node>;
using vec_type = Tpetra::Vector<Scalar, LO, GO, Node>;
using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, Node>;
using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, Node>;

using STS = Teuchos::ScalarTraits<Scalar>;
using MT = typename STS::magnitudeType;
const Scalar SC_ONE = STS::one();

using LOT = Teuchos::OrdinalTraits<LO>;
const LO LO_INVALID = LOT::invalid();
const LO LO_ONE = LOT::one();
const GO GO_ONE = Teuchos::OrdinalTraits<GO>::one();

int lclSuccess = 0;
int gblSuccess = 0;

RCP<const Teuchos::Comm<int> > comm = Tpetra::getDefaultComm ();
const size_t numProc = comm->getSize();
const size_t myProc = comm->getRank();

// create a Map
RCP<const map_type> map = Tpetra::createContigMapWithNode<LO,GO,Node> (LO_INVALID,
LO_ONE + LO_ONE,
comm);

// Create a matrix with at most 3 entries per row
RCP<crs_matrix_type> matrix = Teuchos::rcp (new crs_matrix_type (map, 3));
const Scalar rankAsScalar = static_cast<Scalar>(static_cast<MT>(comm->getRank()));

Teuchos::Array<Scalar> vals = {{SC_ONE, rankAsScalar + SC_ONE, SC_ONE}};
for(size_t lclRowIdx = 0; lclRowIdx < 2; ++lclRowIdx) {
const GO gblRowIdx = Teuchos::as<GO>(2*myProc + lclRowIdx);
Teuchos::Array<GO> cols = {{gblRowIdx - GO_ONE, gblRowIdx, gblRowIdx + GO_ONE}};

if((myProc == 0) && (lclRowIdx == 0)) { // First row of the matrix
matrix->insertGlobalValues(gblRowIdx, cols(1, 2), vals(1, 2));
} else if((myProc == numProc - 1) && (lclRowIdx == 1)) { // Last row of the matrix
matrix->insertGlobalValues(gblRowIdx, cols(0, 2), vals(0, 2));
} else {
matrix->insertGlobalValues(gblRowIdx, cols(), vals());
}
}

matrix->fillComplete();

// Make sure that all processes got this far.
{
lclSuccess = success ? 1 : 0;
gblSuccess = 0;
Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN, lclSuccess, Teuchos::outArg (gblSuccess));
success = success && (gblSuccess == 1);
TEST_EQUALITY_CONST( gblSuccess, 1 );
}

RCP<vec_type> diag = Teuchos::rcp(new vec_type(map));
diag->putScalar(-1);

if constexpr (Tag == 0) {
matrix->resumeFill();
}
matrix->getLocalDiagCopy(*diag);
}

// Unit test of getLocalDiagCopy
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, getLocalDiagCopy, Scalar, LO, GO, Node )
{
getLocalDiagCopyTest<Scalar, LO, GO, Node, 1>(out, success);
}

// Unit test of getLocalDiagCopy
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, getLocalDiagCopyFillActive, Scalar, LO, GO, Node )
{
getLocalDiagCopyTest<Scalar, LO, GO, Node, 0>(out, success);
}

//
// INSTANTIATIONS
//

#define UNIT_TEST_GROUP( SCALAR, LO, GO, NODE ) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( CrsMatrix, getLocalDiagCopy, SCALAR, LO, GO, NODE ) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( CrsMatrix, getLocalDiagCopyFillActive, SCALAR, LO, GO, NODE )

TPETRA_ETI_MANGLING_TYPEDEFS()

TPETRA_INSTANTIATE_SLGN( UNIT_TEST_GROUP )

} // namespace (anonymous)

0 comments on commit f2e80bc

Please sign in to comment.