Skip to content

Commit

Permalink
Merge pull request #1789 from LLNL/task/rhornung/clang-format-conflicts
Browse files Browse the repository at this point in the history
Fix clang format conflicts un unchecked policy branch
  • Loading branch information
MrBurmark authored Feb 18, 2025
2 parents a6acb96 + 3c71080 commit df10eff
Show file tree
Hide file tree
Showing 61 changed files with 7,800 additions and 1,757 deletions.
201 changes: 165 additions & 36 deletions docs/sphinx/user_guide/feature/policies.rst

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions include/RAJA/pattern/launch/launch_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,21 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx,
body);
}

RAJA_SUPPRESS_HD_WARN
template<typename POLICY_LIST,
typename CONTEXT,
typename SEGMENT,
typename BODY>
RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const& ctx,
SEGMENT const& segment0,
SEGMENT const& segment1,
BODY const& body)
{

LoopICountExecute<loop_policy<POLICY_LIST>, SEGMENT>::exec(ctx, segment0,
segment1, body);
}

RAJA_SUPPRESS_HD_WARN
template<typename POLICY_LIST,
typename CONTEXT,
Expand Down Expand Up @@ -796,6 +811,46 @@ RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx,
ctx, tile_size0, tile_size1, segment0, segment1, body);
}

template<typename POLICY_LIST,
typename CONTEXT,
typename TILE_T,
typename SEGMENT,
typename BODY>
RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx,
TILE_T tile_size0,
TILE_T tile_size1,
TILE_T tile_size2,
SEGMENT const& segment0,
SEGMENT const& segment1,
SEGMENT const& segment2,
BODY const& body)
{

TileExecute<loop_policy<POLICY_LIST>, SEGMENT>::exec(
ctx, tile_size0, tile_size1, tile_size2, segment0, segment1, segment2,
body);
}

template<typename POLICY_LIST,
typename CONTEXT,
typename TILE_T,
typename SEGMENT,
typename BODY>
RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx,
TILE_T tile_size0,
TILE_T tile_size1,
TILE_T tile_size2,
SEGMENT const& segment0,
SEGMENT const& segment1,
SEGMENT const& segment2,
BODY const& body)
{

TileTCountExecute<loop_policy<POLICY_LIST>, SEGMENT>::exec(
ctx, tile_size0, tile_size1, tile_size2, segment0, segment1, segment2,
body);
}

} // namespace expt

} // namespace RAJA
Expand Down
59 changes: 59 additions & 0 deletions include/RAJA/policy/cuda/kernel/For.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,65 @@ namespace RAJA
namespace internal
{

/*
* Executor for work sharing inside CudaKernel.
* Mapping without checking from IndexMapper to indices
* Assigns the loop index to offset ArgumentId
* Meets all sync requirements
*/
template<typename Data,
camp::idx_t ArgumentId,
typename IndexMapper,
kernel_sync_requirement sync,
typename... EnclosedStmts,
typename Types>
struct CudaStatementExecutor<
Data,
statement::For<
ArgumentId,
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
EnclosedStmts...>,
Types>
{

using stmt_list_t = StatementList<EnclosedStmts...>;

// Set the argument type for this loop
using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;

using enclosed_stmts_t =
CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;

using diff_t = segment_diff_type<ArgumentId, Data>;

using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>>;

static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
{
const diff_t i = IndexMapper::template index<diff_t>();

// Assign the index to the argument
data.template assign_offset<ArgumentId>(i);

// execute enclosed statements
enclosed_stmts_t::exec(data, thread_active);
}

static inline LaunchDims calculateDimensions(Data const& data)
{
const diff_t len = segment_length<ArgumentId>(data);

LaunchDims dims = DimensionCalculator::get_dimensions(len);

LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);

return combine(dims, enclosed_dims);
}
};

/*
* Executor for work sharing inside CudaKernel.
* Mapping directly from IndexMapper to indices
Expand Down
61 changes: 61 additions & 0 deletions include/RAJA/policy/cuda/kernel/ForICount.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,67 @@ namespace RAJA
namespace internal
{

/*
* Executor for work sharing inside CudaKernel.
* Provides a direct unchecked mapping.
* Assigns the loop index to offset ArgumentId
* Assigns the loop index to param ParamId
* Meets all sync requirements
*/
template<typename Data,
camp::idx_t ArgumentId,
typename ParamId,
typename IndexMapper,
kernel_sync_requirement sync,
typename... EnclosedStmts,
typename Types>
struct CudaStatementExecutor<
Data,
statement::ForICount<
ArgumentId,
ParamId,
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
EnclosedStmts...>,
Types>
: CudaStatementExecutor<
Data,
statement::For<ArgumentId,
RAJA::policy::cuda::cuda_indexer<
iteration_mapping::DirectUnchecked,
sync,
IndexMapper>,
EnclosedStmts...>,
Types>
{

using Base = CudaStatementExecutor<
Data,
statement::For<
ArgumentId,
RAJA::policy::cuda::cuda_indexer<iteration_mapping::DirectUnchecked,
sync,
IndexMapper>,
EnclosedStmts...>,
Types>;

using typename Base::diff_t;
using typename Base::enclosed_stmts_t;

static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
{
// grid stride loop
const diff_t i = IndexMapper::template index<diff_t>();

// Assign the index to the argument and param
data.template assign_offset<ArgumentId>(i);
data.template assign_param<ParamId>(i);

// execute enclosed statements
enclosed_stmts_t::exec(data, thread_active);
}
};

/*
* Executor for work sharing inside CudaKernel.
* Provides a direct mapping.
Expand Down
86 changes: 86 additions & 0 deletions include/RAJA/policy/cuda/kernel/Tile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,92 @@ namespace RAJA
namespace internal
{

/*!
* A specialized RAJA::kernel cuda_impl executor for statement::Tile
* Assigns the tile segment to segment ArgumentId
* Meets all sync requirements
*/
template<typename Data,
camp::idx_t ArgumentId,
camp::idx_t chunk_size,
typename IndexMapper,
kernel_sync_requirement sync,
typename... EnclosedStmts,
typename Types>
struct CudaStatementExecutor<
Data,
statement::Tile<
ArgumentId,
RAJA::tile_fixed<chunk_size>,
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
EnclosedStmts...>,
Types>
{

using stmt_list_t = StatementList<EnclosedStmts...>;

using enclosed_stmts_t = CudaStatementListExecutor<Data, stmt_list_t, Types>;

using diff_t = segment_diff_type<ArgumentId, Data>;

using DimensionCalculator = KernelDimensionCalculator<
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>>;

static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
{
// Get the segment referenced by this Tile statement
auto& segment = camp::get<ArgumentId>(data.segment_tuple);

using segment_t = camp::decay<decltype(segment)>;

// compute trip count
const diff_t i =
IndexMapper::template index<diff_t>() * static_cast<diff_t>(chunk_size);

// Keep copy of original segment, so we can restore it
segment_t orig_segment = segment;

// Assign our new tiled segment
segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));

// execute enclosed statements
enclosed_stmts_t::exec(data, thread_active);

// Set range back to original values
segment = orig_segment;
}

static inline LaunchDims calculateDimensions(Data const& data)
{
// Compute how many chunks
const diff_t full_len = segment_length<ArgumentId>(data);
const diff_t len =
RAJA_DIVIDE_CEILING_INT(full_len, static_cast<diff_t>(chunk_size));

LaunchDims dims = DimensionCalculator::get_dimensions(len);

// privatize data, so we can mess with the segments
using data_t = camp::decay<Data>;
data_t private_data = data;

// Get original segment
auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);

// restrict to first tile
segment = segment.slice(0, static_cast<diff_t>(chunk_size));

// NOTE: We do not detect improper uses of direct_unchecked policies under
// tiling. This happens when using a direct unchecked policy on a tiled
// range that is not evenly divisible by chunk_size.
LaunchDims enclosed_dims =
enclosed_stmts_t::calculateDimensions(private_data);

return combine(dims, enclosed_dims);
}
};

/*!
* A specialized RAJA::kernel cuda_impl executor for statement::Tile
* Assigns the tile segment to segment ArgumentId
Expand Down
76 changes: 76 additions & 0 deletions include/RAJA/policy/cuda/kernel/TileTCount.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,82 @@ namespace RAJA
namespace internal
{

/*!
* A specialized RAJA::kernel cuda_impl executor for statement::TileTCount
* Assigns the tile segment to segment ArgumentId
* Assigns the tile index to param ParamId
* Meets all sync requirements
*/
template<typename Data,
camp::idx_t ArgumentId,
typename ParamId,
camp::idx_t chunk_size,
typename IndexMapper,
kernel_sync_requirement sync,
typename... EnclosedStmts,
typename Types>
struct CudaStatementExecutor<
Data,
statement::TileTCount<
ArgumentId,
ParamId,
RAJA::tile_fixed<chunk_size>,
RAJA::policy::cuda::
cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
EnclosedStmts...>,
Types>
: public CudaStatementExecutor<
Data,
statement::Tile<ArgumentId,
RAJA::tile_fixed<chunk_size>,
RAJA::policy::cuda::cuda_indexer<
iteration_mapping::DirectUnchecked,
sync,
IndexMapper>,
EnclosedStmts...>,
Types>
{

using Base = CudaStatementExecutor<
Data,
statement::Tile<
ArgumentId,
RAJA::tile_fixed<chunk_size>,
RAJA::policy::cuda::cuda_indexer<iteration_mapping::DirectUnchecked,
sync,
IndexMapper>,
EnclosedStmts...>,
Types>;

using typename Base::diff_t;
using typename Base::enclosed_stmts_t;

static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
{
// Get the segment referenced by this Tile statement
auto& segment = camp::get<ArgumentId>(data.segment_tuple);

using segment_t = camp::decay<decltype(segment)>;

// compute trip count
const diff_t t = IndexMapper::template index<diff_t>();
const diff_t i = t * static_cast<diff_t>(chunk_size);

// Keep copy of original segment, so we can restore it
segment_t orig_segment = segment;

// Assign our new tiled segment
segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));
data.template assign_param<ParamId>(t);

// execute enclosed statements
enclosed_stmts_t::exec(data, thread_active);

// Set range back to original values
segment = orig_segment;
}
};

/*!
* A specialized RAJA::kernel cuda_impl executor for statement::TileTCount
* Assigns the tile segment to segment ArgumentId
Expand Down
Loading

0 comments on commit df10eff

Please sign in to comment.