Skip to content

Commit ea1236c

Browse files
BIGWangYuDongZwwWayne
authored andcommitted
[Refactor] Refactor Sparse RCNN and QueryInst
1 parent 1b7560c commit ea1236c

35 files changed

+1262
-928
lines changed

configs/queryinst/queryinst_r50_fpn_1x_coco.py

+32-15
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
num_proposals = 100
77
model = dict(
88
type='QueryInst',
9+
data_preprocessor=dict(
10+
type='DetDataPreprocessor',
11+
mean=[123.675, 116.28, 103.53],
12+
std=[58.395, 57.12, 57.375],
13+
bgr_to_rgb=True,
14+
pad_mask=True,
15+
pad_size_divisor=32),
916
backbone=dict(
1017
type='ResNet',
1118
depth=50,
@@ -111,10 +118,11 @@
111118
dict(
112119
assigner=dict(
113120
type='HungarianAssigner',
114-
cls_cost=dict(type='FocalLossCost', weight=2.0),
115-
reg_cost=dict(type='BBoxL1Cost', weight=5.0),
116-
iou_cost=dict(type='IoUCost', iou_mode='giou',
117-
weight=2.0)),
121+
match_costs=[
122+
dict(type='FocalLossCost', weight=2.0),
123+
dict(type='BBoxL1Cost', weight=5.0, box_format='xyxy'),
124+
dict(type='IoUCost', iou_mode='giou', weight=2.0)
125+
]),
118126
sampler=dict(type='PseudoSampler'),
119127
pos_weight=1,
120128
mask_size=28,
@@ -124,15 +132,24 @@
124132
rpn=None, rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5)))
125133

126134
# optimizer
127-
optimizer = dict(
128-
_delete_=True,
129-
type='AdamW',
130-
lr=0.0001,
131-
weight_decay=0.0001,
135+
optim_wrapper = dict(
136+
type='OptimWrapper',
137+
optimizer=dict(
138+
_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001),
132139
paramwise_cfg=dict(
133-
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
134-
optimizer_config = dict(
135-
_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
136-
# learning policy
137-
lr_config = dict(policy='step', step=[8, 11], warmup_iters=1000)
138-
runner = dict(type='EpochBasedRunner', max_epochs=12)
140+
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}),
141+
clip_grad=dict(max_norm=0.1, norm_type=2))
142+
143+
# learning rate
144+
param_scheduler = [
145+
dict(
146+
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
147+
end=1000),
148+
dict(
149+
type='MultiStepLR',
150+
begin=0,
151+
end=12,
152+
by_epoch=True,
153+
milestones=[8, 11],
154+
gamma=0.1)
155+
]

configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py

+30-37
Original file line numberDiff line numberDiff line change
@@ -6,49 +6,42 @@
66
_delete_=True,
77
rpn=None,
88
rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5)))
9-
img_norm_cfg = dict(
10-
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
119

1210
# augmentation strategy originates from DETR.
1311
train_pipeline = [
14-
dict(type='LoadImageFromFile'),
12+
dict(
13+
type='LoadImageFromFile',
14+
file_client_args={{_base_.file_client_args}}),
1515
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
16-
dict(type='RandomFlip', flip_ratio=0.5),
16+
dict(type='RandomFlip', prob=0.5),
1717
dict(
18-
type='AutoAugment',
19-
policies=[[
18+
type='RandomChoice',
19+
transforms=[[
2020
dict(
21-
type='Resize',
22-
img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
23-
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
24-
(736, 1333), (768, 1333), (800, 1333)],
25-
multiscale_mode='value',
21+
type='RandomChoiceResize',
22+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
23+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
24+
(736, 1333), (768, 1333), (800, 1333)],
2625
keep_ratio=True)
2726
],
28-
[
29-
dict(
30-
type='Resize',
31-
img_scale=[(400, 1333), (500, 1333), (600, 1333)],
32-
multiscale_mode='value',
33-
keep_ratio=True),
34-
dict(
35-
type='RandomCrop',
36-
crop_type='absolute_range',
37-
crop_size=(384, 600),
38-
allow_negative_crop=True),
39-
dict(
40-
type='Resize',
41-
img_scale=[(480, 1333), (512, 1333), (544, 1333),
42-
(576, 1333), (608, 1333), (640, 1333),
43-
(672, 1333), (704, 1333), (736, 1333),
44-
(768, 1333), (800, 1333)],
45-
multiscale_mode='value',
46-
override=True,
47-
keep_ratio=True)
48-
]]),
49-
dict(type='Normalize', **img_norm_cfg),
50-
dict(type='Pad', size_divisor=32),
51-
dict(type='DefaultFormatBundle'),
52-
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
27+
[
28+
dict(
29+
type='RandomChoiceResize',
30+
scales=[(400, 1333), (500, 1333), (600, 1333)],
31+
keep_ratio=True),
32+
dict(
33+
type='RandomCrop',
34+
crop_type='absolute_range',
35+
crop_size=(384, 600),
36+
allow_negative_crop=True),
37+
dict(
38+
type='RandomChoiceResize',
39+
scales=[(480, 1333), (512, 1333), (544, 1333),
40+
(576, 1333), (608, 1333), (640, 1333),
41+
(672, 1333), (704, 1333), (736, 1333),
42+
(768, 1333), (800, 1333)],
43+
keep_ratio=True)
44+
]]),
45+
dict(type='PackDetInputs')
5346
]
54-
data = dict(train=dict(pipeline=train_pipeline))
47+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
_base_ = './queryinst_r50_fpn_1x_coco.py'
22

3-
img_norm_cfg = dict(
4-
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5-
min_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
63
train_pipeline = [
7-
dict(type='LoadImageFromFile'),
4+
dict(
5+
type='LoadImageFromFile',
6+
file_client_args={{_base_.file_client_args}}),
87
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
98
dict(
10-
type='Resize',
11-
img_scale=[(1333, value) for value in min_values],
12-
multiscale_mode='value',
9+
type='RandomChoiceResize',
10+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
11+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
12+
(736, 1333), (768, 1333), (800, 1333)],
1313
keep_ratio=True),
14-
dict(type='RandomFlip', flip_ratio=0.5),
15-
dict(type='Normalize', **img_norm_cfg),
16-
dict(type='Pad', size_divisor=32),
17-
dict(type='DefaultFormatBundle'),
18-
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
14+
dict(type='RandomFlip', prob=0.5),
15+
dict(type='PackDetInputs')
1916
]
2017

21-
data = dict(train=dict(pipeline=train_pipeline))
22-
lr_config = dict(policy='step', step=[27, 33])
23-
runner = dict(type='EpochBasedRunner', max_epochs=36)
18+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
19+
20+
# learning policy
21+
max_epochs = 36
22+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs)
23+
24+
param_scheduler = [
25+
dict(
26+
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
27+
dict(
28+
type='MultiStepLR',
29+
begin=0,
30+
end=max_epochs,
31+
by_epoch=True,
32+
milestones=[27, 33],
33+
gamma=0.1)
34+
]

configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
num_proposals = 100
77
model = dict(
88
type='SparseRCNN',
9+
data_preprocessor=dict(
10+
type='DetDataPreprocessor',
11+
mean=[123.675, 116.28, 103.53],
12+
std=[58.395, 57.12, 57.375],
13+
bgr_to_rgb=True,
14+
pad_size_divisor=32),
915
backbone=dict(
1016
type='ResNet',
1117
depth=50,
@@ -78,19 +84,18 @@
7884
dict(
7985
assigner=dict(
8086
type='HungarianAssigner',
81-
# TODO update
82-
cls_cost=dict(type='FocalLossCost', weight=2.0),
83-
reg_cost=dict(type='BBoxL1Cost', weight=5.0),
84-
iou_cost=dict(type='IoUCost', iou_mode='giou',
85-
weight=2.0)),
87+
match_costs=[
88+
dict(type='FocalLossCost', weight=2.0),
89+
dict(type='BBoxL1Cost', weight=5.0, box_format='xyxy'),
90+
dict(type='IoUCost', iou_mode='giou', weight=2.0)
91+
]),
8692
sampler=dict(type='PseudoSampler'),
8793
pos_weight=1) for _ in range(num_stages)
8894
]),
8995
test_cfg=dict(rpn=None, rcnn=dict(max_per_img=num_proposals)))
9096

9197
# optimizer
92-
optimizer = dict(_delete_=True, type='AdamW', lr=0.000025, weight_decay=0.0001)
93-
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=1, norm_type=2))
94-
# learning policy
95-
lr_config = dict(policy='step', step=[8, 11])
96-
runner = dict(type='EpochBasedRunner', max_epochs=12)
98+
optim_wrapper = dict(
99+
optimizer=dict(
100+
_delete_=True, type='AdamW', lr=0.000025, weight_decay=0.0001),
101+
clip_grad=dict(max_norm=1, norm_type=2))

configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py

+30-37
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,42 @@
44
rpn_head=dict(num_proposals=num_proposals),
55
test_cfg=dict(
66
_delete_=True, rpn=None, rcnn=dict(max_per_img=num_proposals)))
7-
img_norm_cfg = dict(
8-
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
97

108
# augmentation strategy originates from DETR.
119
train_pipeline = [
12-
dict(type='LoadImageFromFile'),
10+
dict(
11+
type='LoadImageFromFile',
12+
file_client_args={{_base_.file_client_args}}),
1313
dict(type='LoadAnnotations', with_bbox=True),
14-
dict(type='RandomFlip', flip_ratio=0.5),
14+
dict(type='RandomFlip', prob=0.5),
1515
dict(
16-
type='AutoAugment',
17-
policies=[[
16+
type='RandomChoice',
17+
transforms=[[
1818
dict(
19-
type='Resize',
20-
img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
21-
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
22-
(736, 1333), (768, 1333), (800, 1333)],
23-
multiscale_mode='value',
19+
type='RandomChoiceResize',
20+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
21+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
22+
(736, 1333), (768, 1333), (800, 1333)],
2423
keep_ratio=True)
2524
],
26-
[
27-
dict(
28-
type='Resize',
29-
img_scale=[(400, 1333), (500, 1333), (600, 1333)],
30-
multiscale_mode='value',
31-
keep_ratio=True),
32-
dict(
33-
type='RandomCrop',
34-
crop_type='absolute_range',
35-
crop_size=(384, 600),
36-
allow_negative_crop=True),
37-
dict(
38-
type='Resize',
39-
img_scale=[(480, 1333), (512, 1333), (544, 1333),
40-
(576, 1333), (608, 1333), (640, 1333),
41-
(672, 1333), (704, 1333), (736, 1333),
42-
(768, 1333), (800, 1333)],
43-
multiscale_mode='value',
44-
override=True,
45-
keep_ratio=True)
46-
]]),
47-
dict(type='Normalize', **img_norm_cfg),
48-
dict(type='Pad', size_divisor=32),
49-
dict(type='DefaultFormatBundle'),
50-
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
25+
[
26+
dict(
27+
type='RandomChoiceResize',
28+
scales=[(400, 1333), (500, 1333), (600, 1333)],
29+
keep_ratio=True),
30+
dict(
31+
type='RandomCrop',
32+
crop_type='absolute_range',
33+
crop_size=(384, 600),
34+
allow_negative_crop=True),
35+
dict(
36+
type='RandomChoiceResize',
37+
scales=[(480, 1333), (512, 1333), (544, 1333),
38+
(576, 1333), (608, 1333), (640, 1333),
39+
(672, 1333), (704, 1333), (736, 1333),
40+
(768, 1333), (800, 1333)],
41+
keep_ratio=True)
42+
]]),
43+
dict(type='PackDetInputs')
5144
]
52-
data = dict(train=dict(pipeline=train_pipeline))
45+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
_base_ = './sparse_rcnn_r50_fpn_1x_coco.py'
22

3-
img_norm_cfg = dict(
4-
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5-
min_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
63
train_pipeline = [
7-
dict(type='LoadImageFromFile'),
4+
dict(
5+
type='LoadImageFromFile',
6+
file_client_args={{_base_.file_client_args}}),
87
dict(type='LoadAnnotations', with_bbox=True),
98
dict(
10-
type='Resize',
11-
img_scale=[(1333, value) for value in min_values],
12-
multiscale_mode='value',
9+
type='RandomChoiceResize',
10+
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
11+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
12+
(736, 1333), (768, 1333), (800, 1333)],
1313
keep_ratio=True),
14-
dict(type='RandomFlip', flip_ratio=0.5),
15-
dict(type='Normalize', **img_norm_cfg),
16-
dict(type='Pad', size_divisor=32),
17-
dict(type='DefaultFormatBundle'),
18-
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
14+
dict(type='RandomFlip', prob=0.5),
15+
dict(type='PackDetInputs')
1916
]
2017

21-
data = dict(train=dict(pipeline=train_pipeline))
22-
lr_config = dict(policy='step', step=[27, 33])
23-
runner = dict(type='EpochBasedRunner', max_epochs=36)
18+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
19+
20+
# learning policy
21+
max_epochs = 36
22+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs)
23+
24+
param_scheduler = [
25+
dict(
26+
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
27+
dict(
28+
type='MultiStepLR',
29+
begin=0,
30+
end=max_epochs,
31+
by_epoch=True,
32+
milestones=[27, 33],
33+
gamma=0.1)
34+
]

mmdet/models/dense_heads/base_dense_head.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,13 @@ def predict_by_feat(self,
265265

266266
for img_id in range(len(batch_img_metas)):
267267
img_meta = batch_img_metas[img_id]
268-
cls_score_list = select_single_mlvl(cls_scores, img_id)
269-
bbox_pred_list = select_single_mlvl(bbox_preds, img_id)
268+
cls_score_list = select_single_mlvl(
269+
cls_scores, img_id, detach=True)
270+
bbox_pred_list = select_single_mlvl(
271+
bbox_preds, img_id, detach=True)
270272
if with_score_factors:
271-
score_factor_list = select_single_mlvl(score_factors, img_id)
273+
score_factor_list = select_single_mlvl(
274+
score_factors, img_id, detach=True)
272275
else:
273276
score_factor_list = [None for _ in range(num_levels)]
274277

0 commit comments

Comments
 (0)