Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8c3b93a

Browse files
committedMar 5, 2025··
clang-tidy
1 parent 938435d commit 8c3b93a

File tree

6 files changed

+73
-67
lines changed

6 files changed

+73
-67
lines changed
 

‎.clang-tidy

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Checks: >
1515
-misc-include-cleaner,
1616
-misc-use-internal-linkage,
1717
modernize-*,
18+
-modernize-use-trailing-return-type,
1819
hicpp-*,
1920
-hicpp-signed-bitwise,
2021
readability-*,

‎src/encoder/target_encoder.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ std::vector<std::unordered_map<T, int>> create_categories_map(
2929
const legate::Rect<1>& row_pointers_shape)
3030
{
3131
std::vector<std::unordered_map<T, int>> categories_map;
32-
for (int feature_idx = row_pointers_shape.lo[0]; feature_idx <= row_pointers_shape.hi[0] - 1;
32+
for (auto feature_idx = row_pointers_shape.lo[0]; feature_idx <= row_pointers_shape.hi[0] - 1;
3333
feature_idx++) {
3434
auto feature_start = row_pointers[feature_idx];
3535
auto feature_end = row_pointers[feature_idx + 1];

‎src/encoder/target_encoder.cu

+20-13
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,22 @@
1818
#include <vector>
1919
#include <unordered_map>
2020
#include <random>
21+
#include <tcb/span.hpp>
2122
#include "../cpp_utils/cpp_utils.cuh"
2223

2324
namespace legateboost {
2425

2526
template <typename T>
2627
struct CategoriesMap {
27-
legate::AccessorRO<T, 1> categories;
28-
legate::AccessorRO<int64_t, 1> row_pointers;
28+
tcb::span<const T> categories;
29+
tcb::span<const int64_t> row_pointers;
2930
__device__ int64_t GetIndex(T x, int64_t feature_idx) const
3031
{
31-
auto begin = row_pointers[feature_idx];
32-
auto end = row_pointers[feature_idx + 1];
33-
auto result = thrust::find(thrust::seq, categories.ptr(0) + begin, categories.ptr(0) + end, x);
34-
if (result == categories.ptr(0) + end) { return -1; }
35-
return result - categories.ptr(0);
32+
const auto* begin = categories.begin() + row_pointers[feature_idx];
33+
const auto* end = categories.begin() + row_pointers[feature_idx + 1];
34+
const auto* result = thrust::find(thrust::seq, begin, end, x);
35+
if (result == end) { return -1; }
36+
return result - categories.begin();
3637
}
3738
};
3839

@@ -53,7 +54,9 @@ struct target_encoder_mean_fn {
5354
auto [row_pointers, row_pointers_shape, row_pointers_accessor] =
5455
GetInputStore<int64_t, 1>(context.input(3).data());
5556

56-
CategoriesMap<T> categories_map{categories_accessor, row_pointers_accessor};
57+
const CategoriesMap<T> categories_map{
58+
{categories_accessor.ptr(0), categories_shape.volume()},
59+
{row_pointers_accessor.ptr(0), row_pointers_shape.volume()}};
5760
auto cv_fold = context.scalars().at(0).value<int64_t>();
5861
auto do_cv = context.scalars().at(1).value<bool>();
5962

@@ -68,7 +71,7 @@ struct target_encoder_mean_fn {
6871
auto means =
6972
context.reduction(0).data().reduce_accessor<legate::SumReduction<double>, true, 3>();
7073

71-
auto stream = context.get_task_stream();
74+
auto* stream = context.get_task_stream();
7275
auto thrust_alloc = ThrustAllocator(legate::Memory::GPU_FB_MEM);
7376
auto policy = DEFAULT_POLICY(thrust_alloc).on(stream);
7477
thrust::for_each_n(
@@ -109,7 +112,9 @@ struct target_encoder_variance_fn {
109112
auto [row_pointers, row_pointers_shape, row_pointers_accessor] =
110113
GetInputStore<int64_t, 1>(context.input(3).data());
111114

112-
CategoriesMap<T> categories_map{categories_accessor, row_pointers_accessor};
115+
const CategoriesMap<T> categories_map{
116+
{categories_accessor.ptr(0), categories_shape.volume()},
117+
{row_pointers_accessor.ptr(0), row_pointers_shape.volume()}};
113118

114119
// Sum and count per each category and output
115120
auto mean = context.input(4).data();
@@ -137,7 +142,7 @@ struct target_encoder_variance_fn {
137142
auto y_variance =
138143
context.reduction(1).data().reduce_accessor<legate::SumReduction<double>, true, 1, true>();
139144

140-
auto stream = context.get_task_stream();
145+
auto* stream = context.get_task_stream();
141146
auto thrust_alloc = ThrustAllocator(legate::Memory::GPU_FB_MEM);
142147
auto policy = DEFAULT_POLICY(thrust_alloc).on(stream);
143148
thrust::for_each_n(
@@ -200,8 +205,10 @@ struct target_encoder_encode_fn {
200205
cv_indices = cv_indices_accessor;
201206
}
202207

203-
CategoriesMap<T> categories_map{categories_accessor, row_pointers_accessor};
204-
auto stream = context.get_task_stream();
208+
const CategoriesMap<T> categories_map{
209+
{categories_accessor.ptr(0), categories_shape.volume()},
210+
{row_pointers_accessor.ptr(0), row_pointers_shape.volume()}};
211+
auto* stream = context.get_task_stream();
205212
auto thrust_alloc = ThrustAllocator(legate::Memory::GPU_FB_MEM);
206213
auto policy = DEFAULT_POLICY(thrust_alloc).on(stream);
207214
thrust::for_each_n(policy,

‎src/models/tree/build_tree.cc

+8-7
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ auto SelectSplitSamples(legate::TaskContext context,
178178
split_proposals[end - 1] = std::numeric_limits<T>::infinity();
179179
}
180180

181-
return SparseSplitProposals<T>(
182-
split_proposals, row_pointers, num_features, split_proposals_tmp.size());
181+
return SparseSplitProposals<T>({split_proposals.ptr(0), split_proposals_tmp.size()},
182+
{row_pointers.ptr(0), narrow<std::size_t>(num_features + 1)});
183183
}
184184

185185
template <typename T>
@@ -198,8 +198,9 @@ struct TreeBuilder {
198198
{
199199
sorted_positions = legate::create_buffer<std::tuple<int32_t, int32_t>>(num_rows);
200200
for (auto i = 0; i < num_rows; ++i) { sorted_positions[i] = {0, i}; }
201-
const std::size_t max_bytes = 1000000000; // 1 GB
202-
const std::size_t bytes_per_node = num_outputs * split_proposals.histogram_size * sizeof(GPair);
201+
const std::size_t max_bytes = 1000000000; // 1 GB
202+
const std::size_t bytes_per_node =
203+
num_outputs * split_proposals.HistogramSize() * sizeof(GPair);
203204
const std::size_t max_histogram_nodes = std::max(1UL, max_bytes / bytes_per_node);
204205
int depth = 0;
205206
while (BinaryTree::LevelEnd(depth + 1) <= max_histogram_nodes && depth <= max_depth) {
@@ -208,7 +209,7 @@ struct TreeBuilder {
208209
histogram = Histogram<GPair>(BinaryTree::LevelBegin(0),
209210
BinaryTree::LevelEnd(depth),
210211
num_outputs,
211-
split_proposals.histogram_size);
212+
split_proposals.HistogramSize());
212213
max_batch_size = max_histogram_nodes;
213214
}
214215
template <typename TYPE>
@@ -243,7 +244,7 @@ struct TreeBuilder {
243244
context,
244245
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
245246
tcb::span<double>(reinterpret_cast<double*>(histogram.Ptr(batch.node_idx_begin)),
246-
batch.NodesInBatch() * num_outputs * split_proposals.histogram_size * 2));
247+
batch.NodesInBatch() * num_outputs * split_proposals.HistogramSize() * 2));
247248
this->Scan(histogram, batch, tree);
248249
}
249250

@@ -383,7 +384,7 @@ struct TreeBuilder {
383384

384385
histogram.Destroy();
385386
histogram = Histogram<GPair>(
386-
batch.node_idx_begin, batch.node_idx_end, num_outputs, split_proposals.histogram_size);
387+
batch.node_idx_begin, batch.node_idx_end, num_outputs, split_proposals.HistogramSize());
387388
return histogram;
388389
}
389390

‎src/models/tree/build_tree.cu

+19-16
Original file line numberDiff line numberDiff line change
@@ -508,16 +508,16 @@ struct HistogramKernel {
508508
// Find feature groups
509509
// This is a bin packing problem
510510
// We want to pack as many features as possible into a group
511-
std::vector<int> split_proposal_row_pointers(split_proposals.num_features + 1);
511+
std::vector<int> split_proposal_row_pointers(split_proposals.row_pointers.size());
512512
CHECK_CUDA(cudaMemcpyAsync(split_proposal_row_pointers.data(),
513-
split_proposals.row_pointers.ptr(0),
514-
(split_proposals.num_features + 1) * sizeof(int),
513+
split_proposals.row_pointers.data(),
514+
split_proposals.row_pointers.size() * sizeof(int),
515515
cudaMemcpyDeviceToHost,
516516
stream));
517517
CHECK_CUDA(cudaStreamSynchronize(stream));
518518
std::vector<int> feature_groups({0});
519519
int current_bins_in_group = 0;
520-
for (int i = 0; i < split_proposals.num_features; i++) {
520+
for (int i = 0; i < split_proposals.NumFeatures(); i++) {
521521
int const bins_in_feature =
522522
split_proposal_row_pointers[i + 1] - split_proposal_row_pointers[i];
523523
EXPECT(bins_in_feature <= kMaxSharedBins, "Too many bins in a feature");
@@ -527,9 +527,10 @@ struct HistogramKernel {
527527
}
528528
current_bins_in_group += bins_in_feature;
529529
}
530-
feature_groups.push_back(split_proposals.num_features);
530+
feature_groups.push_back(split_proposals.NumFeatures());
531531
num_groups = narrow<int>(feature_groups.size() - 1);
532-
EXPECT(num_groups * kMaxSharedBins >= split_proposals.histogram_size, "Too few feature groups");
532+
EXPECT(num_groups * kMaxSharedBins >= split_proposals.HistogramSize(),
533+
"Too few feature groups");
533534
this->feature_groups = legate::create_buffer<int>(num_groups + 1);
534535
CHECK_CUDA(cudaMemcpyAsync(this->feature_groups.ptr(0),
535536
feature_groups.data(),
@@ -551,9 +552,9 @@ struct HistogramKernel {
551552
int64_t seed,
552553
cudaStream_t stream)
553554
{
554-
if (batch.InstancesInBatch() == 0) return;
555+
if (batch.InstancesInBatch() == 0) { return; }
555556

556-
int const average_features_per_group = split_proposals.num_features / num_groups;
557+
int const average_features_per_group = split_proposals.NumFeatures() / num_groups;
557558
std::size_t const average_elements_per_group =
558559
batch.InstancesInBatch() * average_features_per_group;
559560
auto min_blocks = (average_elements_per_group + kItemsPerTile - 1) / kItemsPerTile;
@@ -709,7 +710,7 @@ __global__ void __launch_bounds__(BLOCK_THREADS)
709710
double thread_best_gain = 0;
710711
int thread_best_bin_idx = -1;
711712

712-
for (int bin_idx = narrow_cast<int>(threadIdx.x); bin_idx < split_proposals.histogram_size;
713+
for (int bin_idx = narrow_cast<int>(threadIdx.x); bin_idx < split_proposals.HistogramSize();
713714
bin_idx += BLOCK_THREADS) {
714715
// Check if this feature is in the feature set
715716
if (optional_feature_set.has_value() &&
@@ -952,10 +953,11 @@ auto SelectSplitSamples(legate::TaskContext context,
952953
row_samples.destroy();
953954
draft_proposals.destroy();
954955
out_keys.destroy();
955-
return SparseSplitProposals<T>(split_proposals, row_pointers, num_features, n_unique);
956+
return SparseSplitProposals<T>({split_proposals.ptr(0), narrow<std::size_t>(n_unique)},
957+
{row_pointers.ptr(0), narrow<std::size_t>(num_features + 1)});
956958
}
957959

958-
// Can't put a device l1_regularization in constructor so make this a function
960+
// Can't put a lambda in constructor so make this a function
959961
void FillPositions(const legate::Buffer<cuda::std::tuple<int32_t, int32_t>>& sorted_positions,
960962
std::size_t num_rows,
961963
cudaStream_t stream)
@@ -993,8 +995,9 @@ struct TreeBuilder {
993995
// User a fixed reasonable upper bound on memory usage
994996
// CAUTION: all workers MUST have the same max_batch_size
995997
// Therefore we don't try to calculate this based on available memory
996-
const std::size_t max_bytes = 1000000000; // 1 GB
997-
const std::size_t bytes_per_node = num_outputs * split_proposals.histogram_size * sizeof(GPair);
998+
const std::size_t max_bytes = 1000000000; // 1 GB
999+
const std::size_t bytes_per_node =
1000+
num_outputs * split_proposals.HistogramSize() * sizeof(GPair);
9981001
const std::size_t max_histogram_nodes = std::max(1UL, max_bytes / bytes_per_node);
9991002
int depth = 0;
10001003
while (BinaryTree::LevelEnd(depth + 1) <= max_histogram_nodes && depth <= max_depth) {
@@ -1003,7 +1006,7 @@ struct TreeBuilder {
10031006
cached_histogram = Histogram<IntegerGPair>(BinaryTree::LevelBegin(0),
10041007
BinaryTree::LevelEnd(depth),
10051008
num_outputs,
1006-
split_proposals.histogram_size,
1009+
split_proposals.HistogramSize(),
10071010
stream);
10081011
max_batch_size = max_histogram_nodes;
10091012
}
@@ -1100,7 +1103,7 @@ struct TreeBuilder {
11001103
context,
11011104
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
11021105
tcb::span<ReduceT>(reinterpret_cast<ReduceT*>(histogram.Ptr(batch.node_idx_begin)),
1103-
batch.NodesInBatch() * num_outputs * split_proposals.histogram_size * 2),
1106+
batch.NodesInBatch() * num_outputs * split_proposals.HistogramSize() * 2),
11041107
stream);
11051108

11061109
const int kScanBlockThreads = 256;
@@ -1189,7 +1192,7 @@ struct TreeBuilder {
11891192
cached_histogram = Histogram<IntegerGPair>(batch.node_idx_begin,
11901193
batch.node_idx_end,
11911194
num_outputs,
1192-
split_proposals.histogram_size,
1195+
split_proposals.HistogramSize(),
11931196
stream);
11941197
return cached_histogram;
11951198
}

‎src/models/tree/build_tree.h

+24-30
Original file line numberDiff line numberDiff line change
@@ -155,56 +155,50 @@ __host__ __device__ inline auto CalculateLeafValue(double G,
155155
template <typename T>
156156
class SparseSplitProposals {
157157
public:
158-
legate::Buffer<T, 1> split_proposals;
159-
legate::Buffer<int32_t, 1> row_pointers;
160-
int32_t num_features;
161-
std::size_t histogram_size;
158+
tcb::span<T> split_proposals;
159+
tcb::span<int32_t> row_pointers;
162160
// The rightmost split proposal for each feature must be +inf
163-
SparseSplitProposals(const legate::Buffer<T, 1>& split_proposals,
164-
const legate::Buffer<int32_t, 1>& row_pointers,
165-
int32_t num_features,
166-
std::size_t histogram_size)
167-
: split_proposals(split_proposals),
168-
row_pointers(row_pointers),
169-
num_features(num_features),
170-
histogram_size(histogram_size)
161+
SparseSplitProposals(const tcb::span<T>& split_proposals, const tcb::span<int32_t> row_pointers)
162+
: split_proposals(split_proposals), row_pointers(row_pointers)
171163
{
172164
}
173165

166+
[[nodiscard]] __host__ __device__ std::size_t HistogramSize() const
167+
{
168+
return split_proposals.size();
169+
}
170+
[[nodiscard]] __host__ __device__ std::size_t NumFeatures() const
171+
{
172+
return row_pointers.size() - 1;
173+
}
174174
// Returns the bin index for a given feature and value
175175
// If the value is not in the split proposals, -1 is returned
176176
#ifdef __CUDACC__
177177
__device__ auto FindBin(T x, int feature) const -> int
178178
{
179-
auto feature_row_begin = row_pointers[feature];
180-
auto feature_row_end = row_pointers[feature + 1];
181-
auto* ptr = thrust::lower_bound(thrust::seq,
182-
split_proposals.ptr(0) + feature_row_begin,
183-
split_proposals.ptr(0) + feature_row_end,
184-
x);
185-
EXPECT_DEVICE(ptr != split_proposals.ptr(0) + feature_row_end,
186-
"Value not found in split proposals");
187-
return ptr - split_proposals.ptr(0);
179+
const auto begin = split_proposals.begin() + row_pointers[feature];
180+
const auto end = split_proposals.begin() + row_pointers[feature + 1];
181+
const auto* result = thrust::lower_bound(thrust::seq, begin, end, x);
182+
EXPECT_DEVICE(result != end, "Value not found in split proposals");
183+
return result - split_proposals.begin();
188184
}
189185
#else
190186
[[nodiscard]] auto FindBin(T x, int feature) const -> int
191187
{
192-
auto feature_row_begin = legate::coord_t{row_pointers[feature]};
193-
auto feature_row_end = legate::coord_t{row_pointers[feature + 1]};
194-
auto* ptr = std::lower_bound(
195-
split_proposals.ptr(feature_row_begin), split_proposals.ptr(feature_row_end), x);
196-
EXPECT(ptr != split_proposals.ptr(feature_row_end), "Value not found in split proposals");
197-
return ptr - split_proposals.ptr(0);
188+
const auto begin = split_proposals.begin() + row_pointers[feature];
189+
const auto end = split_proposals.begin() + row_pointers[feature + 1];
190+
const auto* result = std::lower_bound(begin, end, x);
191+
EXPECT(result != end, "Value not found in split proposals");
192+
return result - split_proposals.begin();
198193
}
199194
#endif
200195

201196
#ifdef __CUDACC__
202197
__host__ __device__ auto FindFeature(int bin_idx) const -> int
203198
{
204199
// Binary search for the feature
205-
return thrust::upper_bound(
206-
thrust::seq, row_pointers.ptr(0), row_pointers.ptr(num_features), bin_idx) -
207-
row_pointers.ptr(0) - 1;
200+
return thrust::upper_bound(thrust::seq, row_pointers.begin(), row_pointers.end(), bin_idx) -
201+
row_pointers.begin() - 1;
208202
}
209203
#endif
210204

0 commit comments

Comments
 (0)
Please sign in to comment.