Skip to content

Commit a229e78

Browse files
malfetpytorchmergebot
authored andcommitted
[BE] Enforce sign-compare (pytorch#96723)
Number of OSS PR were reverted, because new signed-unsigned comparison warnings, which are treated as errors in some internal builds. Not sure how those selective rules are applied, but this PR removes `-Wno-sign-compare` from PyTorch codebase. The only tricky part in this PR, as making sure that non-ASCII character detection works for both signed and unsigned chars here: https://github.com/pytorch/pytorch/blob/6e3d51b08ac108b27d892ab1be85eeb593ec1f0c/torch/csrc/jit/serialization/python_print.cpp#L926 Exclude several files from sign-compare if flash attention is used, due to the violation in cutlass, to be fixed by NVIDIA/cutlass#869 Do not try to fix sign compare violations in caffe2 codebase Pull Request resolved: pytorch#96723 Approved by: https://github.com/albanD
1 parent 96c745d commit a229e78

File tree

78 files changed

+194
-188
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+194
-188
lines changed

aten/src/ATen/cuda/jiterator_impl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ namespace native {
3939

4040
c10::SmallVector<std::string> get_extra_args_typenames(const c10::SmallVector<at::Scalar>& extra_args) {
4141
c10::SmallVector<std::string> args_typenames(extra_args.size());
42-
for (auto i = 0; i < extra_args.size(); ++i) {
42+
for (const auto i : c10::irange(extra_args.size())) {
4343
args_typenames[i] = at::cuda::jit::typeName(extra_args[i].type());
4444
}
4545
return args_typenames;

aten/src/ATen/native/cuda/DepthwiseConv3d.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ void conv_depthwise_shape_check(
333333
if (grad_output.defined()) {
334334
auto expected_output_size = conv_output_size(input.sizes(), weight.sizes(),
335335
padding, stride, dilation);
336-
TORCH_CHECK(grad_output.dim() == expected_output_size.size(),
336+
TORCH_CHECK(static_cast<size_t>(grad_output.dim()) == expected_output_size.size(),
337337
"Expect grad_output to be ",
338338
expected_output_size.size(), "D, got ",
339339
grad_output.dim(), "D.");

aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ FOREACH_BINARY_OP_SCALARLIST(all_types_complex_half_bfloat16, pow, power_functor
132132
// In the case of subtraction, we dont allow scalar to be boolean following the torch.sub logic
133133
void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef<Scalar> scalars) {
134134
check_foreach_api_restrictions(tensors, scalars);
135-
for (int i = 0; i < tensors.size(); i++) {
135+
for (const auto i: c10::irange(tensors.size())) {
136136
sub_check(tensors[i], scalars[i]);
137137
}
138138

@@ -147,7 +147,7 @@ void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef
147147

148148
std::vector<Tensor> foreach_tensor_sub_scalarlist_kernel_cuda(TensorList tensors, at::ArrayRef<Scalar> scalars) {
149149
check_foreach_api_restrictions(tensors, scalars);
150-
for (int i = 0; i < tensors.size(); i++) {
150+
for (const auto i: c10::irange(tensors.size())) {
151151
sub_check(tensors[i], scalars[i]);
152152
}
153153

aten/src/ATen/native/cuda/IndexKernel.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ static void launch_kernel(int64_t N, const func_t& f) {
5353
template <typename func_t>
5454
void gpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArrayRef index_stride, const func_t& f) {
5555
int num_indices = index_size.size();
56-
AT_ASSERT(num_indices == index_stride.size());
56+
AT_ASSERT(static_cast<size_t>(num_indices) == index_stride.size());
5757
AT_ASSERT(num_indices == iter.ntensors() - 2);
5858

5959
if (iter.numel() == 0) {

aten/src/ATen/native/cuda/LossCTC.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ std::tuple<Tensor, Tensor> ctc_loss_gpu_template(const Tensor& log_probs, const
226226
int64_t batch_size = log_probs.size(1);
227227
int64_t num_labels = log_probs.size(2);
228228
TORCH_CHECK((0 <= BLANK) && (BLANK < num_labels), "blank must be in label range");
229-
TORCH_CHECK(input_lengths.size() == batch_size, "input_lengths must be of size batch_size");
230-
TORCH_CHECK(target_lengths.size() == batch_size, "target_lengths must be of size batch_size");
229+
TORCH_CHECK(input_lengths.size() == static_cast<size_t>(batch_size), "input_lengths must be of size batch_size");
230+
TORCH_CHECK(target_lengths.size() == static_cast<size_t>(batch_size), "target_lengths must be of size batch_size");
231231

232232
int64_t tg_target_stride;
233233

aten/src/ATen/native/cuda/Shape.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ void parallel_cat(const Tensor &out, const MaterializedITensorListRef& inputs, i
174174
// Now we loop
175175
int batchCounter = 0;
176176
int64_t offset = 0;
177-
for (int i = 0; i < inputs.size() ; i += batch_size) {
177+
for (unsigned i = 0; i < inputs.size() ; i += batch_size) {
178178
for (batchCounter = 0;
179179
batchCounter < batch_size &&
180180
(i+batchCounter) < inputs.size();

aten/src/ATen/native/cuda/SpectralOps.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct HermitianSymmetryOffsetCalculator {
4444
}
4545

4646
mirror_dim_ = 0;
47-
for (int64_t i = 0; i < dim.size(); ++i) {
47+
for (const auto i: c10::irange(dim.size())) {
4848
mirror_dim_ |= (uint32_t{1} << dim[i]);
4949
}
5050
}

aten/src/ATen/native/cuda/SummaryOps.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ bool CUDA_tensor_histogram(
258258
memType = CUDAHistogramMemoryType::SHARED;
259259
} else if (
260260
nbins < THRESH_NUMBER_BINS_FOR_GLOBAL_MEM &&
261-
multiBlockMem < (maxGlobalMem / 2)) {
261+
multiBlockMem < static_cast<size_t>(maxGlobalMem / 2)) {
262262
// check against half of free mem to be extra safe
263263
// due to cached allocator, we may anyway have slightly more free mem
264264
memType = CUDAHistogramMemoryType::MULTI_BLOCK;

aten/src/ATen/native/cuda/TensorModeKernel.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ void calculate_mode(
141141
// to calculate the mode for --> we do this by manually doing the stride
142142
// calculations to get an offset
143143
scalar_t* data = self.data_ptr<scalar_t>();
144-
for (int64_t i = 0; i < position.size(); i++) {
144+
for (int64_t i = 0; i < static_cast<int64_t>(position.size()); i++) {
145145
data += position[i] * ensure_nonempty_stride(self, i);
146146
}
147147

@@ -159,7 +159,7 @@ void calculate_mode(
159159
scalar_t* values_data = values.data_ptr<scalar_t>();
160160
int64_t* indices_data = indices.data_ptr<int64_t>();
161161

162-
for (int64_t i = 0; i < position.size(); i++) {
162+
for (int64_t i = 0; i < static_cast<int64_t>(position.size()); i++) {
163163
int64_t pos = position[i];
164164
values_data += ensure_nonempty_stride(values, i) * pos;
165165
indices_data += ensure_nonempty_stride(indices, i) * pos;

aten/src/ATen/native/cuda/layer_norm_kernel.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ void LayerNormKernelImplInternal(
796796
constexpr int num_vec_elems = vec_size;
797797
constexpr int alignment = num_vec_elems * sizeof(T);
798798
if ((std::is_same<T, float>::value || std::is_same<T, at::Half>::value || std::is_same<T, at::BFloat16>::value) &&
799-
N <= 1ULL << std::numeric_limits<float>::digits && N % num_vec_elems == 0 &&
799+
N <= static_cast<int64_t>(1ULL << std::numeric_limits<float>::digits) && N % num_vec_elems == 0 &&
800800
can_vectorize(X_data, alignment) && can_vectorize(Y_data, alignment)) {
801801
launch_vectorized_layer_norm_kernel(static_cast<int>(N), M, eps, X_data, gamma_data, beta_data, Y_data, mean_data, rstd_data);
802802
} else {
@@ -1356,10 +1356,10 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_cuda(
13561356
const size_t axis = input.dim() - normalized_shape.size();
13571357

13581358
std::vector<int64_t> stat_shape;
1359-
for (size_t idx = 0; idx < axis; ++idx) {
1359+
for (const auto idx: c10::irange(axis)) {
13601360
stat_shape.push_back(input_shape[idx]);
13611361
}
1362-
for (size_t idx = axis; idx < input.dim(); ++idx) {
1362+
for (const auto C10_UNUSED idx: c10::irange(axis, input.dim())) {
13631363
stat_shape.push_back(1);
13641364
}
13651365

aten/src/ATen/native/cudnn/Conv_v8.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ void generate_and_filter_plans(const cudnnHandle_t handle, cudnn_frontend::Opera
373373
if (remove_invalid) {
374374
cudnn_frontend::executionPlans_t new_valid_plans;
375375
for (auto &plan : valid_plans) {
376-
if (plan.getWorkspaceSize() <= max_workspace_size) {
376+
if (static_cast<size_t>(plan.getWorkspaceSize()) <= max_workspace_size) {
377377
new_valid_plans.emplace_back(std::move(plan));
378378
}
379379
}

aten/src/ATen/native/mps/operations/Copy.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ size_t compute_strided_size(const at::Tensor& t) {
3535
}
3636

3737
bool is_strided_contiguous(const at::Tensor& t) {
38-
return compute_strided_size(t) == t.numel();
38+
return compute_strided_size(t) == static_cast<size_t>(t.numel());
3939
}
4040

4141
// Copy sourceBuffer into destBuffer, casting sourceBuffer to src.scalar_type().

aten/src/ATen/native/mps/operations/Indexing.mm

+4-4
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,11 @@ static void validateInputData(const TensorIteratorBase& iter,
156156
bool accumulate) {
157157
using namespace mps;
158158

159-
int64_t num_indices = index_size.size();
159+
const auto num_indices = index_size.size();
160160
TORCH_CHECK(num_indices <= 16, "Current limit allows up to 16 indices to be used in MPS indexing kernels");
161161

162162
AT_ASSERT(num_indices == index_stride.size());
163-
AT_ASSERT(num_indices == iter.ntensors() - 2);
163+
AT_ASSERT(static_cast<int>(num_indices) == iter.ntensors() - 2);
164164
const Tensor& inputTensor = iter.tensor(1);
165165

166166
if (accumulate) {
@@ -589,8 +589,8 @@ Tensor index_select_mps(const Tensor& self, int64_t dim, const Tensor& index) {
589589
std::vector<int64_t> shape_data(num_input_dims);
590590

591591
// Calculate new shape
592-
for (auto i : c10::irange(num_input_dims)) {
593-
if (i == dim) {
592+
for (const auto i : c10::irange(num_input_dims)) {
593+
if (i == static_cast<decltype(i)>(dim)) {
594594
shape_data[i] = num_indices;
595595
} else {
596596
shape_data[i] = input_shape[i];

aten/src/ATen/native/mps/operations/Normalization.mm

+7-7
Original file line numberDiff line numberDiff line change
@@ -1000,21 +1000,21 @@ string get_mem_string(c10::MemoryFormat memory_format) {
10001000

10011001
NSMutableArray<NSNumber*>* gamma_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_channel_dims];
10021002

1003-
for (int i = 0; i < num_channel_dims; i++)
1004-
gamma_axes[i] = [NSNumber numberWithInt:i];
1003+
for (const auto i : c10::irange(num_channel_dims))
1004+
gamma_axes[i] = [NSNumber numberWithInt:static_cast<int>(i)];
10051005

10061006
// Axes along which to reduce to get "batch norm" gradient
10071007
// This will be applied on shape [1, M, -1]
10081008
NSMutableArray<NSNumber*>* bn_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_normalized_dims];
1009-
for (int i = 0; i < num_normalized_dims; i++)
1010-
bn_axes[i] = [NSNumber numberWithInt:(1 + 1 + i)];
1009+
for (const auto i : c10::irange(num_normalized_dims))
1010+
bn_axes[i] = [NSNumber numberWithInt:static_cast<int>(1 + 1 + i)];
10111011

10121012
// Shape of input to do "batch norm" backward
10131013
// This is [1, M, -1]
10141014
NSMutableArray<NSNumber*>* bn_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
10151015
bn_shape[0] = [NSNumber numberWithInt:1];
10161016
bn_shape[1] = [NSNumber numberWithInt:M];
1017-
for (int i = 0; i < num_normalized_dims; i++)
1017+
for (const auto i : c10::irange(num_normalized_dims))
10181018
bn_shape[i + 2] = input_shape[i + num_channel_dims];
10191019

10201020
// Shape of mean to do "batch norm" backward
@@ -1023,7 +1023,7 @@ string get_mem_string(c10::MemoryFormat memory_format) {
10231023
[NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
10241024
bn_mean_shape[0] = [NSNumber numberWithInt:1];
10251025
bn_mean_shape[1] = [NSNumber numberWithInt:M];
1026-
for (int i = 0; i < num_normalized_dims; i++)
1026+
for (const auto i : c10::irange(num_normalized_dims))
10271027
bn_mean_shape[i + 2] = [NSNumber numberWithInt:1];
10281028

10291029
// Shape of gamma to multiply with "batch norm" backward
@@ -1032,7 +1032,7 @@ string get_mem_string(c10::MemoryFormat memory_format) {
10321032
[NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
10331033
bn_gamma_shape[0] = [NSNumber numberWithInt:1];
10341034
bn_gamma_shape[1] = [NSNumber numberWithInt:1];
1035-
for (int i = 0; i < num_normalized_dims; i++)
1035+
for (const auto i : c10::irange(num_normalized_dims))
10361036
bn_gamma_shape[i + 2] = input_shape[i + num_channel_dims];
10371037

10381038
string key = "layer_norm_backward_mps:" + std::to_string(has_weight) + ":" +

aten/src/ATen/native/mps/operations/ReduceOps.mm

+7-5
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,9 @@ void reduction_out_mps(const Tensor& input_t,
136136
IntArrayRef dim = opt_dim.value();
137137
for (const auto dim_val : dim) {
138138
auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());
139-
TORCH_CHECK(wrap_dim < (input_shape.size() == 0 ? input_t.numel() : input_shape.size()),
140-
func_name + ": reduction dim must be in the range of input shape")
139+
TORCH_CHECK(
140+
wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size() == 0 ? input_t.numel() : input_shape.size()),
141+
func_name + ": reduction dim must be in the range of input shape")
141142
}
142143
}
143144

@@ -395,7 +396,8 @@ void impl_func_norm_mps(const Tensor& input_tensor,
395396

396397
for (const auto dim_val : dim) {
397398
auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());
398-
TORCH_CHECK(wrap_dim < input_shape.size(), "norm_out_mps: reduction dim must be in the range of input shape")
399+
TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()),
400+
"norm_out_mps: reduction dim must be in the range of input shape")
399401
}
400402

401403
auto cache_ = MPSGraphCache::getInstance();
@@ -663,8 +665,8 @@ Tensor std_var_common_impl_mps(const Tensor& input_t,
663665
string errMessage = (stdVarType == STANDARD_DEVIATION) ? "std_mps" : "var_mps";
664666
errMessage += ": reduction dim must be in the range of input shape";
665667
for (const auto dim : dim_value) {
666-
auto wrap_dim = maybe_wrap_dim(dim, input_shape.size());
667-
TORCH_CHECK(wrap_dim < input_shape.size(), errMessage.c_str())
668+
auto wrap_dim = maybe_wrap_dim(dim, num_input_dims);
669+
TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()), errMessage.c_str())
668670
}
669671
}
670672

aten/src/ATen/native/mps/operations/Repeat.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ void computeRepeatIndices(index_t* repeat_ptr,
207207
[computeEncoder setBytes:&size length:sizeof(size) atIndex:3];
208208
MTLSize gridSize = MTLSizeMake(size, 1, 1);
209209
NSUInteger threadsPerThreadgroup_ = pipelineState.maxTotalThreadsPerThreadgroup;
210-
if (threadsPerThreadgroup_ > size) {
210+
if (threadsPerThreadgroup_ > static_cast<NSUInteger>(size)) {
211211
threadsPerThreadgroup_ = size;
212212
}
213213
MTLSize threadsPerThreadgroup = MTLSizeMake(threadsPerThreadgroup_, 1, 1);

aten/src/ATen/native/mps/operations/RnnOps.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
std::vector<long long> getTensorShape(MPSGraphTensor* mpsTensor) {
1818
std::vector<long long> output_dimensions = {};
1919
auto dims = mpsTensor.shape;
20-
for (int i = 0; i < [dims count]; i++) {
20+
for (NSUInteger i = 0; i < [dims count]; i++) {
2121
output_dimensions.push_back([dims[i] intValue]);
2222
}
2323
return output_dimensions;

aten/src/ATen/native/mps/operations/Unique.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@
9797
if (dimOpt.has_value() && [shape count] != 1) {
9898
NSMutableArray* axes = [[NSMutableArray alloc] initWithCapacity:[shape count] - 1];
9999
for (const auto axis : c10::irange([shape count])) {
100-
if (axis != dim) {
100+
if (static_cast<decltype(dim)>(axis) != dim) {
101101
[axes addObject:[NSNumber numberWithUnsignedInteger:axis]];
102102
}
103103
}

0 commit comments

Comments
 (0)