From b358f33527dc56e8c498035a78090417c3e04127 Mon Sep 17 00:00:00 2001 From: Hua-yuxiu Date: Wed, 8 Jan 2025 11:18:35 +0800 Subject: [PATCH 1/2] add npu implemention for assign_score_withk_backward --- .../pytorch/npu/assign_score_withk_npu.cpp | 34 +++- tests/test_ops/test_assign_score_withk.py | 147 +++++++++--------- 2 files changed, 100 insertions(+), 81 deletions(-) diff --git a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp index 104b0d26c3..2464edb93b 100644 --- a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp @@ -5,12 +5,13 @@ using namespace std; void assign_score_withk_forward_npu(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, - const Tensor& centers, const Tensor& scores, + const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& output) { - at::Tensor points_trans = points.permute({0, 3, 1, 2}); - at::Tensor centers_trans = centers.permute({0, 3, 1, 2}); - EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, - knn_idx, B, N0, N1, M, K, O, aggregate, output); + at::Tensor points_trans = points.permute({0, 3, 1, 2}); + at::Tensor centers_trans = centers.permute({0, 3, 1, 2}); + + EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, knn_idx, B, N0, N1, M, K, O, aggregate, output); } void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, @@ -19,5 +20,24 @@ void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, const Tensor& scores, const Tensor& knn_idx, Tensor& output); -REGISTER_NPU_IMPL(assign_score_withk_forward_impl, - assign_score_withk_forward_npu); +REGISTER_NPU_IMPL(assign_score_withk_forward_impl, assign_score_withk_forward_npu); + + +void assign_score_withk_backward_npu( + int B, int N0, int N1, int M, int K, int O, int aggregate, + const Tensor& grad_out, const Tensor& points, const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, + Tensor& grad_centers, Tensor& grad_scores) { + + at::Tensor grad_out_trans = grad_out.permute({0, 2, 3, 1}); + + EXEC_NPU_CMD(aclnnAssignScoreWithkGrad, grad_out_trans, points, centers, scores, knn_idx, B, N0, N1, M, K, O, aggregate, grad_scores, grad_points, grad_centers); +} + +void assign_score_withk_backward_impl( + int B, int N0, int N1, int M, int K, int O, int aggregate, + const Tensor& grad_out, const Tensor& points, const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, + Tensor& grad_centers, Tensor& grad_scores); + +REGISTER_NPU_IMPL(assign_score_withk_backward_impl, assign_score_withk_backward_npu); diff --git a/tests/test_ops/test_assign_score_withk.py b/tests/test_ops/test_assign_score_withk.py index bd27144545..d778121c74 100644 --- a/tests/test_ops/test_assign_score_withk.py +++ b/tests/test_ops/test_assign_score_withk.py @@ -126,77 +126,76 @@ def test_paconv_assign_scores(device): assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6) # test backward - if device == 'cuda': - loss = output.sum() - loss.backward() - expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], - [-0.78873926, 0.7485497], - [-0.6866992, 0.05346543], - [0.04288036, -0.18217683]], - [[-1.1407862, 0.13533896], - [-0.06964391, -0.22948086], - [-1.1407862, 0.13533896], - [-0.06964391, -0.22948086]]], - [[[-0.3363995, -2.212181], - [-1.1589496, -2.7724311], - [-0.9387654, -1.3163853], - [-1.4385346, -1.0614843]], - [[-0.5048497, 1.4143617], - [-0.47332114, 0.6017133], - [-0.30974793, 1.1995442], - [-0.5048497, - 1.4143617]]]]).float() - expected_points_grad = torch.tensor( - [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.15585709, 0.15585709, 0.15585709, 0.15585709], - [1.1893613, 1.1893613, 1.1893613, 1.1893613]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[1.6530733, 1.6530733, 1.6530733, 1.6530733], - [1.8130021, 1.8130021, 1.8130021, 1.8130021]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.58863074, 0.58863074, 0.58863074, 0.58863074], - [1.3727596, 1.3727596, 1.3727596, 1.3727596]], - [[0.28462553, 0.28462553, 0.28462553, 0.28462553], - [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], - [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], - [0.34856772, 0.34856772, 0.34856772, 0.34856772]], - [[0.7405102, 0.7405102, 0.7405102, 0.7405102], - [0.06438422, 0.06438422, 0.06438422, 0.06438422]], - [[0.8491963, 0.8491963, 0.8491963, 0.8491963], - [1.1301711, 1.1301711, 1.1301711, 1.1301711]], - [[0.6887394, 0.6887394, 0.6887394, 0.6887394], - [0.22089851, 0.22089851, 0.22089851, 0.22089851]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.605832, 0.605832, 0.605832, 0.605832], - [0.92364264, 0.92364264, 0.92364264, 0.92364264]], - [[0.23089725, 0.23089725, 0.23089725, 0.23089725], - [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() - expected_centers_grad = torch.tensor( - [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], - [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], - [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]]], - [[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], - [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], - [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], - [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() - assert torch.allclose( - scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) - assert torch.allclose( - points.grad.detach().cpu(), expected_points_grad, atol=1e-6) - assert torch.allclose( - centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6) + loss = output.sum() + loss.backward() + expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], + [-0.78873926, 0.7485497], + [-0.6866992, 0.05346543], + [0.04288036, -0.18217683]], + [[-1.1407862, 0.13533896], + [-0.06964391, -0.22948086], + [-1.1407862, 0.13533896], + [-0.06964391, -0.22948086]]], + [[[-0.3363995, -2.212181], + [-1.1589496, -2.7724311], + [-0.9387654, -1.3163853], + [-1.4385346, -1.0614843]], + [[-0.5048497, 1.4143617], + [-0.47332114, 0.6017133], + [-0.30974793, 1.1995442], + [-0.5048497, + 1.4143617]]]]).float() + expected_points_grad = torch.tensor( + [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.15585709, 0.15585709, 0.15585709, 0.15585709], + [1.1893613, 1.1893613, 1.1893613, 1.1893613]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[1.6530733, 1.6530733, 1.6530733, 1.6530733], + [1.8130021, 1.8130021, 1.8130021, 1.8130021]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.58863074, 0.58863074, 0.58863074, 0.58863074], + [1.3727596, 1.3727596, 1.3727596, 1.3727596]], + [[0.28462553, 0.28462553, 0.28462553, 0.28462553], + [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], + [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], + [0.34856772, 0.34856772, 0.34856772, 0.34856772]], + [[0.7405102, 0.7405102, 0.7405102, 0.7405102], + [0.06438422, 0.06438422, 0.06438422, 0.06438422]], + [[0.8491963, 0.8491963, 0.8491963, 0.8491963], + [1.1301711, 1.1301711, 1.1301711, 1.1301711]], + [[0.6887394, 0.6887394, 0.6887394, 0.6887394], + [0.22089851, 0.22089851, 0.22089851, 0.22089851]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.605832, 0.605832, 0.605832, 0.605832], + [0.92364264, 0.92364264, 0.92364264, 0.92364264]], + [[0.23089725, 0.23089725, 0.23089725, 0.23089725], + [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() + expected_centers_grad = torch.tensor( + [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], + [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], + [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]]], + [[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], + [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], + [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], + [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() + assert torch.allclose( + scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) + assert torch.allclose( + points.grad.detach().cpu(), expected_points_grad, atol=1e-6) + assert torch.allclose( + centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6) From 20da5424f452699fd1176b1f547d3b2d272418fb Mon Sep 17 00:00:00 2001 From: Hua-yuxiu Date: Wed, 8 Jan 2025 11:24:39 +0800 Subject: [PATCH 2/2] add npu implemention for assign_score_withk_backward --- mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp | 2 +- tests/test_ops/test_assign_score_withk.py | 59 +++++++++---------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp index eb937d50fa..5b92305d16 100644 --- a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp @@ -52,4 +52,4 @@ vector> pixel_group_impl(Tensor score, Tensor mask, Tensor embeddi Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold); -REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu); \ No newline at end of file +REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu); diff --git a/tests/test_ops/test_assign_score_withk.py b/tests/test_ops/test_assign_score_withk.py index d778121c74..8c53cea2c7 100644 --- a/tests/test_ops/test_assign_score_withk.py +++ b/tests/test_ops/test_assign_score_withk.py @@ -129,70 +129,69 @@ def test_paconv_assign_scores(device): loss = output.sum() loss.backward() expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], - [-0.78873926, 0.7485497], - [-0.6866992, 0.05346543], - [0.04288036, -0.18217683]], + [-0.78873926, 0.7485497], + [-0.6866992, 0.05346543], + [0.04288036, -0.18217683]], [[-1.1407862, 0.13533896], - [-0.06964391, -0.22948086], - [-1.1407862, 0.13533896], - [-0.06964391, -0.22948086]]], - [[[-0.3363995, -2.212181], - [-1.1589496, -2.7724311], - [-0.9387654, -1.3163853], - [-1.4385346, -1.0614843]], + [-0.06964391, -0.22948086], + [-1.1407862, 0.13533896], + [-0.06964391, -0.22948086]]], + [[[-0.3363995, -2.212181], + [-1.1589496, -2.7724311], + [-0.9387654, -1.3163853], + [-1.4385346, -1.0614843]], [[-0.5048497, 1.4143617], - [-0.47332114, 0.6017133], - [-0.30974793, 1.1995442], - [-0.5048497, - 1.4143617]]]]).float() + [-0.47332114, 0.6017133], + [-0.30974793, 1.1995442], + [-0.5048497, 1.4143617]]]]).float() expected_points_grad = torch.tensor( [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.15585709, 0.15585709, 0.15585709, 0.15585709], - [1.1893613, 1.1893613, 1.1893613, 1.1893613]], + [1.1893613, 1.1893613, 1.1893613, 1.1893613]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[1.6530733, 1.6530733, 1.6530733, 1.6530733], - [1.8130021, 1.8130021, 1.8130021, 1.8130021]], + [1.8130021, 1.8130021, 1.8130021, 1.8130021]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.58863074, 0.58863074, 0.58863074, 0.58863074], - [1.3727596, 1.3727596, 1.3727596, 1.3727596]], + [1.3727596, 1.3727596, 1.3727596, 1.3727596]], [[0.28462553, 0.28462553, 0.28462553, 0.28462553], - [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], - [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], - [0.34856772, 0.34856772, 0.34856772, 0.34856772]], + [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], + [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], + [0.34856772, 0.34856772, 0.34856772, 0.34856772]], [[0.7405102, 0.7405102, 0.7405102, 0.7405102], - [0.06438422, 0.06438422, 0.06438422, 0.06438422]], + [0.06438422, 0.06438422, 0.06438422, 0.06438422]], [[0.8491963, 0.8491963, 0.8491963, 0.8491963], - [1.1301711, 1.1301711, 1.1301711, 1.1301711]], + [1.1301711, 1.1301711, 1.1301711, 1.1301711]], [[0.6887394, 0.6887394, 0.6887394, 0.6887394], - [0.22089851, 0.22089851, 0.22089851, 0.22089851]], + [0.22089851, 0.22089851, 0.22089851, 0.22089851]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.605832, 0.605832, 0.605832, 0.605832], - [0.92364264, 0.92364264, 0.92364264, 0.92364264]], + [0.92364264, 0.92364264, 0.92364264, 0.92364264]], [[0.23089725, 0.23089725, 0.23089725, 0.23089725], - [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() + [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() expected_centers_grad = torch.tensor( [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], - [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], + [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], - [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], + [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], [[0., 0., 0., 0.], [0., 0., 0., 0.]]], - [[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], - [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], + [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], - [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() + [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() assert torch.allclose( scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) assert torch.allclose(