sywangyi
diff --git a/‎aten/src/ATen/cuda/jiterator_impl.h
+1-1 b/‎aten/src/ATen/cuda/jiterator_impl.h
+1-1
diff --git a/‎aten/src/ATen/native/cuda/DepthwiseConv3d.cu
+1-1 b/‎aten/src/ATen/native/cuda/DepthwiseConv3d.cu
+1-1
diff --git a/‎aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu
+2-2 b/‎aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu
+2-2
diff --git a/‎aten/src/ATen/native/cuda/IndexKernel.cu
+1-1 b/‎aten/src/ATen/native/cuda/IndexKernel.cu
+1-1
diff --git a/‎aten/src/ATen/native/cuda/LossCTC.cu
+2-2 b/‎aten/src/ATen/native/cuda/LossCTC.cu
+2-2
diff --git a/‎aten/src/ATen/native/cuda/Shape.cu
+1-1 b/‎aten/src/ATen/native/cuda/Shape.cu
+1-1
diff --git a/‎aten/src/ATen/native/cuda/SpectralOps.cu
+1-1 b/‎aten/src/ATen/native/cuda/SpectralOps.cu
+1-1
diff --git a/‎aten/src/ATen/native/cuda/SummaryOps.cu
+1-1 b/‎aten/src/ATen/native/cuda/SummaryOps.cu
+1-1
diff --git a/‎aten/src/ATen/native/cuda/TensorModeKernel.cu
+2-2 b/‎aten/src/ATen/native/cuda/TensorModeKernel.cu
+2-2
diff --git a/‎aten/src/ATen/native/cuda/layer_norm_kernel.cu
+3-3 b/‎aten/src/ATen/native/cuda/layer_norm_kernel.cu
+3-3
diff --git a/‎aten/src/ATen/native/cudnn/Conv_v8.cpp
+1-1 b/‎aten/src/ATen/native/cudnn/Conv_v8.cpp
+1-1
diff --git a/‎aten/src/ATen/native/mps/operations/Copy.mm
+1-1 b/‎aten/src/ATen/native/mps/operations/Copy.mm
+1-1
diff --git a/‎aten/src/ATen/native/mps/operations/Indexing.mm
+4-4 b/‎aten/src/ATen/native/mps/operations/Indexing.mm
+4-4
diff --git a/‎aten/src/ATen/native/mps/operations/Normalization.mm
+7-7 b/‎aten/src/ATen/native/mps/operations/Normalization.mm
+7-7
diff --git a/‎aten/src/ATen/native/mps/operations/ReduceOps.mm
+7-5 b/‎aten/src/ATen/native/mps/operations/ReduceOps.mm
+7-5
diff --git a/‎aten/src/ATen/native/mps/operations/Repeat.mm
+1-1 b/‎aten/src/ATen/native/mps/operations/Repeat.mm
+1-1
diff --git a/‎aten/src/ATen/native/mps/operations/RnnOps.mm
+1-1 b/‎aten/src/ATen/native/mps/operations/RnnOps.mm
+1-1
diff --git a/‎aten/src/ATen/native/mps/operations/Unique.mm
+1-1 b/‎aten/src/ATen/native/mps/operations/Unique.mm
+1-1
@@ -39,7 +39,7 @@ namespace native {
 
 c10::SmallVector<std::string> get_extra_args_typenames(const c10::SmallVector<at::Scalar>& extra_args) {
   c10::SmallVector<std::string> args_typenames(extra_args.size());
-  for (auto i = 0; i < extra_args.size(); ++i) {
+  for (const auto i : c10::irange(extra_args.size())) {
     args_typenames[i] = at::cuda::jit::typeName(extra_args[i].type());
   }
   return args_typenames;
 
@@ -333,7 +333,7 @@ void conv_depthwise_shape_check(
   if (grad_output.defined()) {
     auto expected_output_size = conv_output_size(input.sizes(), weight.sizes(),
                                                  padding, stride, dilation);
-    TORCH_CHECK(grad_output.dim() == expected_output_size.size(),
+    TORCH_CHECK(static_cast<size_t>(grad_output.dim()) == expected_output_size.size(),
                 "Expect grad_output to be ",
                 expected_output_size.size(), "D, got ",
                 grad_output.dim(), "D.");
 
@@ -132,7 +132,7 @@ FOREACH_BINARY_OP_SCALARLIST(all_types_complex_half_bfloat16, pow, power_functor
 // In the case of subtraction, we dont allow scalar to be boolean following the torch.sub logic
 void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef<Scalar> scalars) {
     check_foreach_api_restrictions(tensors, scalars);
-    for (int i = 0; i < tensors.size(); i++) {
+    for (const auto i: c10::irange(tensors.size())) {
         sub_check(tensors[i], scalars[i]);
     }
 
@@ -147,7 +147,7 @@ void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef
 
 std::vector<Tensor> foreach_tensor_sub_scalarlist_kernel_cuda(TensorList tensors, at::ArrayRef<Scalar> scalars) {
     check_foreach_api_restrictions(tensors, scalars);
-    for (int i = 0; i < tensors.size(); i++) {
+    for (const auto i: c10::irange(tensors.size())) {
         sub_check(tensors[i], scalars[i]);
     }
 
 
@@ -53,7 +53,7 @@ static void launch_kernel(int64_t N, const func_t& f) {
 template <typename func_t>
 void gpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArrayRef index_stride, const func_t& f) {
   int num_indices = index_size.size();
-  AT_ASSERT(num_indices == index_stride.size());
+  AT_ASSERT(static_cast<size_t>(num_indices) == index_stride.size());
   AT_ASSERT(num_indices == iter.ntensors() - 2);
 
   if (iter.numel() == 0) {
 
@@ -226,8 +226,8 @@ std::tuple<Tensor, Tensor> ctc_loss_gpu_template(const Tensor& log_probs, const
   int64_t batch_size = log_probs.size(1);
   int64_t num_labels = log_probs.size(2);
   TORCH_CHECK((0 <= BLANK) && (BLANK < num_labels), "blank must be in label range");
-  TORCH_CHECK(input_lengths.size() == batch_size, "input_lengths must be of size batch_size");
-  TORCH_CHECK(target_lengths.size() == batch_size, "target_lengths must be of size batch_size");
+  TORCH_CHECK(input_lengths.size() == static_cast<size_t>(batch_size), "input_lengths must be of size batch_size");
+  TORCH_CHECK(target_lengths.size() == static_cast<size_t>(batch_size), "target_lengths must be of size batch_size");
 
   int64_t tg_target_stride;
 
 
@@ -174,7 +174,7 @@ void parallel_cat(const Tensor &out, const MaterializedITensorListRef& inputs, i
   // Now we loop
   int batchCounter = 0;
   int64_t offset = 0;
-  for (int i = 0; i < inputs.size() ; i += batch_size) {
+  for (unsigned i = 0; i < inputs.size() ; i += batch_size) {
     for (batchCounter = 0;
           batchCounter < batch_size &&
             (i+batchCounter) < inputs.size();
 
@@ -44,7 +44,7 @@ struct HermitianSymmetryOffsetCalculator {
     }
 
     mirror_dim_ = 0;
-    for (int64_t i = 0; i < dim.size(); ++i) {
+    for (const auto i: c10::irange(dim.size())) {
       mirror_dim_ |= (uint32_t{1} << dim[i]);
     }
   }
 
@@ -258,7 +258,7 @@ bool CUDA_tensor_histogram(
     memType = CUDAHistogramMemoryType::SHARED;
   } else if (
       nbins < THRESH_NUMBER_BINS_FOR_GLOBAL_MEM &&
-      multiBlockMem < (maxGlobalMem / 2)) {
+      multiBlockMem < static_cast<size_t>(maxGlobalMem / 2)) {
     // check against half of free mem to be extra safe
     // due to cached allocator, we may anyway have slightly more free mem
     memType = CUDAHistogramMemoryType::MULTI_BLOCK;
 
@@ -141,7 +141,7 @@ void calculate_mode(
   // to calculate the mode for --> we do this by manually doing the stride
   // calculations to get an offset
   scalar_t* data = self.data_ptr<scalar_t>();
-  for (int64_t i = 0; i < position.size(); i++) {
+  for (int64_t i = 0; i < static_cast<int64_t>(position.size()); i++) {
     data += position[i] * ensure_nonempty_stride(self, i);
   }
 
@@ -159,7 +159,7 @@ void calculate_mode(
   scalar_t* values_data = values.data_ptr<scalar_t>();
   int64_t* indices_data = indices.data_ptr<int64_t>();
 
-  for (int64_t i = 0; i < position.size(); i++) {
+  for (int64_t i = 0; i < static_cast<int64_t>(position.size()); i++) {
     int64_t pos = position[i];
     values_data += ensure_nonempty_stride(values, i) * pos;
     indices_data += ensure_nonempty_stride(indices, i) * pos;
 
@@ -796,7 +796,7 @@ void LayerNormKernelImplInternal(
   constexpr int num_vec_elems = vec_size;
   constexpr int alignment = num_vec_elems * sizeof(T);
   if ((std::is_same<T, float>::value || std::is_same<T, at::Half>::value || std::is_same<T, at::BFloat16>::value) &&
-  N <= 1ULL << std::numeric_limits<float>::digits && N % num_vec_elems == 0 &&
+  N <= static_cast<int64_t>(1ULL << std::numeric_limits<float>::digits) && N % num_vec_elems == 0 &&
   can_vectorize(X_data, alignment) && can_vectorize(Y_data, alignment)) {
     launch_vectorized_layer_norm_kernel(static_cast<int>(N), M, eps, X_data, gamma_data, beta_data, Y_data, mean_data, rstd_data);
   } else {
@@ -1356,10 +1356,10 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_cuda(
   const size_t axis = input.dim() - normalized_shape.size();
 
   std::vector<int64_t> stat_shape;
-  for (size_t idx = 0; idx < axis; ++idx) {
+  for (const auto idx: c10::irange(axis)) {
     stat_shape.push_back(input_shape[idx]);
   }
-  for (size_t idx = axis; idx < input.dim(); ++idx) {
+  for (const auto C10_UNUSED idx: c10::irange(axis, input.dim())) {
     stat_shape.push_back(1);
   }
 
 
@@ -373,7 +373,7 @@ void generate_and_filter_plans(const cudnnHandle_t handle, cudnn_frontend::Opera
   if (remove_invalid) {
     cudnn_frontend::executionPlans_t new_valid_plans;
     for (auto &plan : valid_plans) {
-      if (plan.getWorkspaceSize() <= max_workspace_size) {
+      if (static_cast<size_t>(plan.getWorkspaceSize()) <= max_workspace_size) {
         new_valid_plans.emplace_back(std::move(plan));
       }
     }
 
@@ -35,7 +35,7 @@ size_t compute_strided_size(const at::Tensor& t) {
 }
 
 bool is_strided_contiguous(const at::Tensor& t) {
-  return compute_strided_size(t) == t.numel();
+  return compute_strided_size(t) == static_cast<size_t>(t.numel());
 }
 
 // Copy sourceBuffer into destBuffer, casting sourceBuffer to src.scalar_type().
 
@@ -156,11 +156,11 @@ static void validateInputData(const TensorIteratorBase& iter,
                               bool accumulate) {
   using namespace mps;
 
-  int64_t num_indices = index_size.size();
+  const auto num_indices = index_size.size();
   TORCH_CHECK(num_indices <= 16, "Current limit allows up to 16 indices to be used in MPS indexing kernels");
 
   AT_ASSERT(num_indices == index_stride.size());
-  AT_ASSERT(num_indices == iter.ntensors() - 2);
+  AT_ASSERT(static_cast<int>(num_indices) == iter.ntensors() - 2);
   const Tensor& inputTensor = iter.tensor(1);
 
   if (accumulate) {
@@ -589,8 +589,8 @@ Tensor index_select_mps(const Tensor& self, int64_t dim, const Tensor& index) {
   std::vector<int64_t> shape_data(num_input_dims);
 
   // Calculate new shape
-  for (auto i : c10::irange(num_input_dims)) {
-    if (i == dim) {
+  for (const auto i : c10::irange(num_input_dims)) {
+    if (i == static_cast<decltype(i)>(dim)) {
       shape_data[i] = num_indices;
     } else {
       shape_data[i] = input_shape[i];
 
@@ -1000,21 +1000,21 @@ string get_mem_string(c10::MemoryFormat memory_format) {
 
       NSMutableArray<NSNumber*>* gamma_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_channel_dims];
 
-      for (int i = 0; i < num_channel_dims; i++)
-        gamma_axes[i] = [NSNumber numberWithInt:i];
+      for (const auto i : c10::irange(num_channel_dims))
+        gamma_axes[i] = [NSNumber numberWithInt:static_cast<int>(i)];
 
       // Axes along which to reduce to get "batch norm" gradient
       // This will be applied on shape [1, M, -1]
       NSMutableArray<NSNumber*>* bn_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_normalized_dims];
-      for (int i = 0; i < num_normalized_dims; i++)
-        bn_axes[i] = [NSNumber numberWithInt:(1 + 1 + i)];
+      for (const auto i : c10::irange(num_normalized_dims))
+        bn_axes[i] = [NSNumber numberWithInt:static_cast<int>(1 + 1 + i)];
 
       // Shape of input to do "batch norm" backward
       // This is [1, M, -1]
       NSMutableArray<NSNumber*>* bn_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
       bn_shape[0] = [NSNumber numberWithInt:1];
       bn_shape[1] = [NSNumber numberWithInt:M];
-      for (int i = 0; i < num_normalized_dims; i++)
+      for (const auto i : c10::irange(num_normalized_dims))
         bn_shape[i + 2] = input_shape[i + num_channel_dims];
 
       // Shape of mean to do "batch norm" backward
@@ -1023,7 +1023,7 @@ string get_mem_string(c10::MemoryFormat memory_format) {
           [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
       bn_mean_shape[0] = [NSNumber numberWithInt:1];
       bn_mean_shape[1] = [NSNumber numberWithInt:M];
-      for (int i = 0; i < num_normalized_dims; i++)
+      for (const auto i : c10::irange(num_normalized_dims))
         bn_mean_shape[i + 2] = [NSNumber numberWithInt:1];
 
       // Shape of gamma to multiply with "batch norm" backward
@@ -1032,7 +1032,7 @@ string get_mem_string(c10::MemoryFormat memory_format) {
           [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims + 2)];
       bn_gamma_shape[0] = [NSNumber numberWithInt:1];
       bn_gamma_shape[1] = [NSNumber numberWithInt:1];
-      for (int i = 0; i < num_normalized_dims; i++)
+      for (const auto i : c10::irange(num_normalized_dims))
         bn_gamma_shape[i + 2] = input_shape[i + num_channel_dims];
 
       string key = "layer_norm_backward_mps:" + std::to_string(has_weight) + ":" +
 
@@ -136,8 +136,9 @@ void reduction_out_mps(const Tensor& input_t,
     IntArrayRef dim = opt_dim.value();
     for (const auto dim_val : dim) {
       auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());
-      TORCH_CHECK(wrap_dim < (input_shape.size() == 0 ? input_t.numel() : input_shape.size()),
-                  func_name + ": reduction dim must be in the range of input shape")
+      TORCH_CHECK(
+          wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size() == 0 ? input_t.numel() : input_shape.size()),
+          func_name + ": reduction dim must be in the range of input shape")
     }
   }
 
@@ -395,7 +396,8 @@ void impl_func_norm_mps(const Tensor& input_tensor,
 
   for (const auto dim_val : dim) {
     auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());
-    TORCH_CHECK(wrap_dim < input_shape.size(), "norm_out_mps: reduction dim must be in the range of input shape")
+    TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()),
+                "norm_out_mps: reduction dim must be in the range of input shape")
   }
 
   auto cache_ = MPSGraphCache::getInstance();
@@ -663,8 +665,8 @@ Tensor std_var_common_impl_mps(const Tensor& input_t,
     string errMessage = (stdVarType == STANDARD_DEVIATION) ? "std_mps" : "var_mps";
     errMessage += ": reduction dim must be in the range of input shape";
     for (const auto dim : dim_value) {
-      auto wrap_dim = maybe_wrap_dim(dim, input_shape.size());
-      TORCH_CHECK(wrap_dim < input_shape.size(), errMessage.c_str())
+      auto wrap_dim = maybe_wrap_dim(dim, num_input_dims);
+      TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()), errMessage.c_str())
     }
   }
 
 
@@ -207,7 +207,7 @@ void computeRepeatIndices(index_t* repeat_ptr,
       [computeEncoder setBytes:&size length:sizeof(size) atIndex:3];
       MTLSize gridSize = MTLSizeMake(size, 1, 1);
       NSUInteger threadsPerThreadgroup_ = pipelineState.maxTotalThreadsPerThreadgroup;
-      if (threadsPerThreadgroup_ > size) {
+      if (threadsPerThreadgroup_ > static_cast<NSUInteger>(size)) {
         threadsPerThreadgroup_ = size;
       }
       MTLSize threadsPerThreadgroup = MTLSizeMake(threadsPerThreadgroup_, 1, 1);
 
@@ -17,7 +17,7 @@
 std::vector<long long> getTensorShape(MPSGraphTensor* mpsTensor) {
   std::vector<long long> output_dimensions = {};
   auto dims = mpsTensor.shape;
-  for (int i = 0; i < [dims count]; i++) {
+  for (NSUInteger i = 0; i < [dims count]; i++) {
     output_dimensions.push_back([dims[i] intValue]);
   }
   return output_dimensions;
 
@@ -97,7 +97,7 @@
   if (dimOpt.has_value() && [shape count] != 1) {
     NSMutableArray* axes = [[NSMutableArray alloc] initWithCapacity:[shape count] - 1];
     for (const auto axis : c10::irange([shape count])) {
-      if (axis != dim) {
+      if (static_cast<decltype(dim)>(axis) != dim) {
         [axes addObject:[NSNumber numberWithUnsignedInteger:axis]];
       }
     }
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ namespace native {`
`39`	`39`
`40`	`40`	`c10::SmallVector<std::string> get_extra_args_typenames(const c10::SmallVector<at::Scalar>& extra_args) {`
`41`	`41`	`c10::SmallVector<std::string> args_typenames(extra_args.size());`
`42`		`- for (auto i = 0; i < extra_args.size(); ++i) {`
	`42`	`+ for (const auto i : c10::irange(extra_args.size())) {`
`43`	`43`	`args_typenames[i] = at::cuda::jit::typeName(extra_args[i].type());`
`44`	`44`	`}`
`45`	`45`	`return args_typenames;`
Original file line number	Diff line number	Diff line change
`@@ -132,7 +132,7 @@ FOREACH_BINARY_OP_SCALARLIST(all_types_complex_half_bfloat16, pow, power_functor`
`132`	`132`	`// In the case of subtraction, we dont allow scalar to be boolean following the torch.sub logic`
`133`	`133`	`void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef<Scalar> scalars) {`
`134`	`134`	`check_foreach_api_restrictions(tensors, scalars);`
`135`		`- for (int i = 0; i < tensors.size(); i++) {`
	`135`	`+ for (const auto i: c10::irange(tensors.size())) {`
`136`	`136`	`sub_check(tensors[i], scalars[i]);`
`137`	`137`	`}`
`138`	`138`
`@@ -147,7 +147,7 @@ void foreach_tensor_sub_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef`
`147`	`147`
`148`	`148`	`std::vector<Tensor> foreach_tensor_sub_scalarlist_kernel_cuda(TensorList tensors, at::ArrayRef<Scalar> scalars) {`
`149`	`149`	`check_foreach_api_restrictions(tensors, scalars);`
`150`		`- for (int i = 0; i < tensors.size(); i++) {`
	`150`	`+ for (const auto i: c10::irange(tensors.size())) {`
`151`	`151`	`sub_check(tensors[i], scalars[i]);`
`152`	`152`	`}`
`153`	`153`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ struct HermitianSymmetryOffsetCalculator {`
`44`	`44`	`}`
`45`	`45`
`46`	`46`	`mirror_dim_ = 0;`
`47`		`- for (int64_t i = 0; i < dim.size(); ++i) {`
	`47`	`+ for (const auto i: c10::irange(dim.size())) {`
`48`	`48`	`mirror_dim_ \|= (uint32_t{1} << dim[i]);`
`49`	`49`	`}`
`50`	`50`	`}`
Original file line number	Diff line number	Diff line change
`@@ -373,7 +373,7 @@ void generate_and_filter_plans(const cudnnHandle_t handle, cudnn_frontend::Opera`
`373`	`373`	`if (remove_invalid) {`
`374`	`374`	`cudnn_frontend::executionPlans_t new_valid_plans;`
`375`	`375`	`for (auto &plan : valid_plans) {`
`376`		`- if (plan.getWorkspaceSize() <= max_workspace_size) {`
	`376`	`+ if (static_cast<size_t>(plan.getWorkspaceSize()) <= max_workspace_size) {`
`377`	`377`	`new_valid_plans.emplace_back(std::move(plan));`
`378`	`378`	`}`
`379`	`379`	`}`
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ size_t compute_strided_size(const at::Tensor& t) {`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`bool is_strided_contiguous(const at::Tensor& t) {`
`38`		`- return compute_strided_size(t) == t.numel();`
	`38`	`+ return compute_strided_size(t) == static_cast<size_t>(t.numel());`
`39`	`39`	`}`
`40`	`40`
`41`	`41`	`// Copy sourceBuffer into destBuffer, casting sourceBuffer to src.scalar_type().`
Original file line number	Diff line number	Diff line change
`@@ -136,8 +136,9 @@ void reduction_out_mps(const Tensor& input_t,`
`136`	`136`	`IntArrayRef dim = opt_dim.value();`
`137`	`137`	`for (const auto dim_val : dim) {`
`138`	`138`	`auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());`
`139`		`- TORCH_CHECK(wrap_dim < (input_shape.size() == 0 ? input_t.numel() : input_shape.size()),`
`140`		`- func_name + ": reduction dim must be in the range of input shape")`
	`139`	`+ TORCH_CHECK(`
	`140`	`+ wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size() == 0 ? input_t.numel() : input_shape.size()),`
	`141`	`+ func_name + ": reduction dim must be in the range of input shape")`
`141`	`142`	`}`
`142`	`143`	`}`
`143`	`144`
`@@ -395,7 +396,8 @@ void impl_func_norm_mps(const Tensor& input_tensor,`
`395`	`396`
`396`	`397`	`for (const auto dim_val : dim) {`
`397`	`398`	`auto wrap_dim = maybe_wrap_dim(dim_val, input_shape.size());`
`398`		`- TORCH_CHECK(wrap_dim < input_shape.size(), "norm_out_mps: reduction dim must be in the range of input shape")`
	`399`	`+ TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()),`
	`400`	`+ "norm_out_mps: reduction dim must be in the range of input shape")`
`399`	`401`	`}`
`400`	`402`
`401`	`403`	`auto cache_ = MPSGraphCache::getInstance();`
`@@ -663,8 +665,8 @@ Tensor std_var_common_impl_mps(const Tensor& input_t,`
`663`	`665`	`string errMessage = (stdVarType == STANDARD_DEVIATION) ? "std_mps" : "var_mps";`
`664`	`666`	`errMessage += ": reduction dim must be in the range of input shape";`
`665`	`667`	`for (const auto dim : dim_value) {`
`666`		`- auto wrap_dim = maybe_wrap_dim(dim, input_shape.size());`
`667`		`- TORCH_CHECK(wrap_dim < input_shape.size(), errMessage.c_str())`
	`668`	`+ auto wrap_dim = maybe_wrap_dim(dim, num_input_dims);`
	`669`	`+ TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()), errMessage.c_str())`
`668`	`670`	`}`
`669`	`671`	`}`
`670`	`672`
Original file line number	Diff line number	Diff line change
`@@ -207,7 +207,7 @@ void computeRepeatIndices(index_t* repeat_ptr,`
`207`	`207`	`[computeEncoder setBytes:&size length:sizeof(size) atIndex:3];`
`208`	`208`	`MTLSize gridSize = MTLSizeMake(size, 1, 1);`
`209`	`209`	`NSUInteger threadsPerThreadgroup_ = pipelineState.maxTotalThreadsPerThreadgroup;`
`210`		`- if (threadsPerThreadgroup_ > size) {`
	`210`	`+ if (threadsPerThreadgroup_ > static_cast<NSUInteger>(size)) {`
`211`	`211`	`threadsPerThreadgroup_ = size;`
`212`	`212`	`}`
`213`	`213`	`MTLSize threadsPerThreadgroup = MTLSizeMake(threadsPerThreadgroup_, 1, 1);`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`std::vector<long long> getTensorShape(MPSGraphTensor* mpsTensor) {`
`18`	`18`	`std::vector<long long> output_dimensions = {};`
`19`	`19`	`auto dims = mpsTensor.shape;`
`20`		`- for (int i = 0; i < [dims count]; i++) {`
	`20`	`+ for (NSUInteger i = 0; i < [dims count]; i++) {`
`21`	`21`	`output_dimensions.push_back([dims[i] intValue]);`
`22`	`22`	`}`
`23`	`23`	`return output_dimensions;`
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@`
`97`	`97`	`if (dimOpt.has_value() && [shape count] != 1) {`
`98`	`98`	`NSMutableArray* axes = [[NSMutableArray alloc] initWithCapacity:[shape count] - 1];`
`99`	`99`	`for (const auto axis : c10::irange([shape count])) {`
`100`		`- if (axis != dim) {`
	`100`	`+ if (static_cast<decltype(dim)>(axis) != dim) {`
`101`	`101`	`[axes addObject:[NSNumber numberWithUnsignedInteger:axis]];`
`102`	`102`	`}`
`103`	`103`	`}`