Skip to content

Commit 9aa897b

Browse files
cyyeverpytorchmergebot
authored andcommitted
Remove unnecessary tensor clone (pytorch#148159)
Fixes #ISSUE_NUMBER Pull Request resolved: pytorch#148159 Approved by: https://github.com/Skylion007
1 parent 1d7397a commit 9aa897b

File tree

11 files changed

+20
-19
lines changed

11 files changed

+20
-19
lines changed

aten/src/ATen/functorch/BatchRulesScatterOps.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,9 @@ std::tuple<Tensor, std::optional<int64_t>> index_fill_int_scalar_batch_rule_impl
11551155
return std::make_tuple(self_, 0);
11561156
}
11571157

1158-
self_ = self_bdim.has_value() ? self_ : self_.clone();
1158+
if (!self_bdim.has_value()) {
1159+
self_ = self_.clone();
1160+
}
11591161

11601162
return index_fill_batch_rule_helper(batch_size, self_logical_rank, index_logical_rank, self_, dim, index_, value);
11611163
}
@@ -1209,7 +1211,9 @@ std::tuple<Tensor, std::optional<int64_t>> index_fill_int_tensor_batch_rule_impl
12091211
return std::make_tuple(self_, 0);
12101212
}
12111213

1212-
self_ = self_bdim.has_value() ? self_ : self_.clone();
1214+
if (!self_bdim.has_value()) {
1215+
self_ = self_.clone();
1216+
}
12131217

12141218
// calling .item() on value is safe here because value is guaranteed to not be a batched tensor.
12151219
return index_fill_batch_rule_helper(batch_size, self_logical_rank, index_logical_rank, self_, dim, index_, value.item());

aten/src/ATen/native/mps/operations/Inverse.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
TORCH_WARN_ONCE(
2020
"torch.linalg_inv_ex.inverse is supported by MPS on MacOS 13+, please upgrade. Falling back to CPU.");
2121
auto cpu_info = at::empty({0}, kInt, std::nullopt, kCPU, std::nullopt, std::nullopt);
22-
auto cpu_result = result.clone().to("cpu");
22+
auto cpu_result = result.to("cpu");
2323
at::linalg_inv_ex_out(cpu_result, cpu_info, A.to("cpu"));
2424
info.copy_(cpu_info);
2525
result.copy_(cpu_result);

aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ Tensor sparse_mask_sparse_compressed(
344344
}
345345

346346
if (!mask.numel() || !mask._nnz()) {
347-
return mask.clone().to(self.device(), self.scalar_type());
347+
return mask.to(self.device(), self.scalar_type(), /*non_blocking=*/false, /*copy=*/true);
348348
}
349349

350350
if (self.layout() == kStrided) {

aten/src/ATen/native/sparse/SparseTensor.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ SparseTensor sparse_mask(const Tensor& t, const SparseTensor& mask) {
795795

796796
if (t.layout() == at::kSparse) {
797797
if (!t._nnz()) {
798-
auto res = mask.clone().to(t.device(), t.scalar_type());
798+
auto res = mask.to(t.device(), t.scalar_type(), /*non_blocking=*/false, /*copy=*/true);
799799
res._values().zero_();
800800
return res;
801801
}

torch/csrc/api/include/torch/nn/module.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -689,11 +689,11 @@ void Module::to_impl(Ts&&... ts) {
689689
}
690690
// Then move every parameter to the new dtype/device.
691691
for (auto& parameter : named_parameters(/*recurse=*/false)) {
692-
parameter->set_data(autograd::Variable(*parameter).to(ts...));
692+
parameter->set_data(parameter->to(ts...));
693693
}
694694
// Then move every buffer to the new dtype/device.
695695
for (auto& buffer : named_buffers(/*recurse=*/false)) {
696-
buffer->set_data(autograd::Variable(*buffer).to(ts...));
696+
buffer->set_data(buffer->to(ts...));
697697
}
698698
}
699699

torch/csrc/distributed/c10d/reducer.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -2315,14 +2315,14 @@ void verify_params_across_processes(
23152315
}
23162316
}
23172317

2318-
auto metadata_dev = metadata.clone().to(params[0].device());
2319-
std::vector<at::Tensor> vec{metadata_dev};
2318+
metadata = metadata.to(params[0].device());
2319+
std::vector<at::Tensor> vec{metadata};
23202320
process_group->broadcast(vec)->wait();
23212321

23222322
// Technically, process 0 doesn't need to double-check metadata, because it
23232323
// was the source. But no harm keeping work aligned.
23242324
auto control = at::empty({static_cast<long>(i)}, options);
2325-
control.copy_(metadata_dev, /*non_blocking=*/false);
2325+
control.copy_(metadata, /*non_blocking=*/false);
23262326
auto control_accessor = control.accessor<int64_t, 1>();
23272327
i = 0;
23282328
for (const auto p : c10::irange(params.size())) {

torch/csrc/jit/python/python_ir.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,7 @@ void initPythonIRBindings(PyObject* module_) {
756756
[](Node& n, const char* name, const at::Tensor& v) {
757757
return n.t_(
758758
Symbol::attr(name),
759-
autograd::Variable(v.view(std::vector<int64_t>{}))
760-
.set_requires_grad(false));
759+
v.view(std::vector<int64_t>{}).set_requires_grad(false));
761760
})
762761
.def(
763762
"z",
@@ -782,8 +781,7 @@ void initPythonIRBindings(PyObject* module_) {
782781
"zs_",
783782
[](Node& n, const char* name, TensorsAttr::ValueType v) {
784783
for (auto& i : v) {
785-
i = autograd::Variable(i.view(std::vector<int64_t>{}))
786-
.set_requires_grad(false);
784+
i = i.view(std::vector<int64_t>{}).set_requires_grad(false);
787785
}
788786
return n.ts_(Symbol::attr(name), std::move(v));
789787
})

torch/csrc/jit/runtime/argument_spec.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct ArgumentSpec {
101101
const at::Tensor* t = reinterpret_cast<const at::Tensor*>(&input);
102102
arg.defined_ = t->defined();
103103
if (arg.defined_) {
104-
arg.requires_grad_ = with_grad && autograd::Variable(*t).requires_grad();
104+
arg.requires_grad_ = with_grad && t->requires_grad();
105105
arg.dim_ = t->dim();
106106
at::Device device = t->device();
107107
arg.dev_type_ =

torch/csrc/jit/runtime/graph_executor.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,8 @@ struct DifferentiableGraphBackward : public autograd::Node {
322322
}
323323

324324
void addOutputForTensor(const at::Tensor& tensor) {
325-
auto v = Variable(tensor);
326325
add_next_edge(
327-
v.defined() ? torch::autograd::impl::gradient_edge(v)
326+
tensor.defined() ? torch::autograd::impl::gradient_edge(tensor)
328327
: autograd::Edge{});
329328
}
330329
void addOutputForIValue(const IValue& value) {

torch/csrc/jit/tensorexpr/kernel.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1448,7 +1448,7 @@ void TensorExprKernel::bindConstant(const torch::jit::Value* v) {
14481448
ToDtype(scalar_type));
14491449

14501450
if (!const_tensor.is_contiguous()) {
1451-
const_tensor = const_tensor.clone().contiguous();
1451+
const_tensor = const_tensor.clone(at::MemoryFormat::Contiguous);
14521452
unpacked_constant_tensors_.push_back(const_tensor);
14531453
}
14541454

torch/testing/_internal/common_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2297,7 +2297,7 @@ def to_gpu(obj, type_map=None):
22972297
assert obj.is_leaf
22982298
t = type_map.get(obj.dtype, obj.dtype)
22992299
with torch.no_grad():
2300-
res = obj.clone().to(dtype=t, device="cuda")
2300+
res = obj.to(dtype=t, device="cuda", copy=True)
23012301
res.requires_grad = obj.requires_grad
23022302
return res
23032303
elif torch.is_storage(obj):

0 commit comments

Comments
 (0)