|
1 |
| -_base_ = '../_base_/default_runtime.py' |
2 |
| - |
| 1 | +_base_ = [ |
| 2 | + '../_base_/datasets/coco_instance.py', '../_base_/default_runtime.py' |
| 3 | +] |
| 4 | +img_norm_cfg = dict( |
| 5 | + mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True) |
3 | 6 | # model settings
|
4 |
| -img_size = 550 |
| 7 | +input_size = 550 |
5 | 8 | model = dict(
|
6 | 9 | type='YOLACT',
|
| 10 | + data_preprocessor=dict( |
| 11 | + type='DetDataPreprocessor', |
| 12 | + mean=img_norm_cfg['mean'], |
| 13 | + std=img_norm_cfg['std'], |
| 14 | + bgr_to_rgb=img_norm_cfg['to_rgb'], |
| 15 | + pad_mask=True), |
7 | 16 | backbone=dict(
|
8 | 17 | type='ResNet',
|
9 | 18 | depth=50,
|
|
56 | 65 | num_protos=32,
|
57 | 66 | num_classes=80,
|
58 | 67 | max_masks_to_train=100,
|
59 |
| - loss_mask_weight=6.125), |
60 |
| - segm_head=dict( |
61 |
| - type='YOLACTSegmHead', |
62 |
| - num_classes=80, |
63 |
| - in_channels=256, |
| 68 | + loss_mask_weight=6.125, |
| 69 | + with_seg_branch=True, |
64 | 70 | loss_segm=dict(
|
65 | 71 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
|
66 | 72 | # training and testing settings
|
|
72 | 78 | min_pos_iou=0.,
|
73 | 79 | ignore_iof_thr=-1,
|
74 | 80 | gt_max_assign_all=False),
|
| 81 | + sampler=dict(type='PseudoSampler'), # YOLACT should use PseudoSampler |
75 | 82 | # smoothl1_beta=1.,
|
76 | 83 | allowed_border=-1,
|
77 | 84 | pos_weight=-1,
|
|
81 | 88 | nms_pre=1000,
|
82 | 89 | min_bbox_size=0,
|
83 | 90 | score_thr=0.05,
|
| 91 | + mask_thr=0.5, |
84 | 92 | iou_thr=0.5,
|
85 | 93 | top_k=200,
|
86 |
| - max_per_img=100)) |
| 94 | + max_per_img=100, |
| 95 | + mask_thr_binary=0.5)) |
87 | 96 | # dataset settings
|
88 |
| -dataset_type = 'CocoDataset' |
89 |
| -data_root = 'data/coco/' |
90 |
| -img_norm_cfg = dict( |
91 |
| - mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True) |
92 | 97 | train_pipeline = [
|
93 |
| - dict(type='LoadImageFromFile'), |
| 98 | + dict( |
| 99 | + type='LoadImageFromFile', |
| 100 | + file_client_args={{_base_.file_client_args}}), |
94 | 101 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
95 | 102 | dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)),
|
96 | 103 | dict(
|
|
102 | 109 | type='MinIoURandomCrop',
|
103 | 110 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
|
104 | 111 | min_crop_size=0.3),
|
105 |
| - dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False), |
106 |
| - dict(type='RandomFlip', flip_ratio=0.5), |
| 112 | + dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), |
| 113 | + dict(type='RandomFlip', prob=0.5), |
107 | 114 | dict(
|
108 | 115 | type='PhotoMetricDistortion',
|
109 | 116 | brightness_delta=32,
|
110 | 117 | contrast_range=(0.5, 1.5),
|
111 | 118 | saturation_range=(0.5, 1.5),
|
112 | 119 | hue_delta=18),
|
113 |
| - dict(type='Normalize', **img_norm_cfg), |
114 |
| - dict(type='DefaultFormatBundle'), |
115 |
| - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), |
| 120 | + dict(type='PackDetInputs') |
116 | 121 | ]
|
117 | 122 | test_pipeline = [
|
118 | 123 | dict(type='LoadImageFromFile'),
|
| 124 | + dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), |
| 125 | + dict( |
| 126 | + type='PackDetInputs', |
| 127 | + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
| 128 | + 'scale_factor')) |
| 129 | +] |
| 130 | +train_dataloader = dict( |
| 131 | + batch_size=8, |
| 132 | + num_workers=4, |
| 133 | + batch_sampler=None, |
| 134 | + dataset=dict(pipeline=train_pipeline)) |
| 135 | +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) |
| 136 | +test_dataloader = val_dataloader |
| 137 | + |
| 138 | +max_epochs = 55 |
| 139 | +# training schedule for 55e |
| 140 | +train_cfg = dict( |
| 141 | + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) |
| 142 | +val_cfg = dict(type='ValLoop') |
| 143 | +test_cfg = dict(type='TestLoop') |
| 144 | + |
| 145 | +# learning rate |
| 146 | +param_scheduler = [ |
| 147 | + dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=500), |
119 | 148 | dict(
|
120 |
| - type='MultiScaleFlipAug', |
121 |
| - img_scale=(img_size, img_size), |
122 |
| - flip=False, |
123 |
| - transforms=[ |
124 |
| - dict(type='Resize', keep_ratio=False), |
125 |
| - dict(type='Normalize', **img_norm_cfg), |
126 |
| - dict(type='ImageToTensor', keys=['img']), |
127 |
| - dict(type='Collect', keys=['img']), |
128 |
| - ]) |
| 149 | + type='MultiStepLR', |
| 150 | + begin=0, |
| 151 | + end=max_epochs, |
| 152 | + by_epoch=True, |
| 153 | + milestones=[20, 42, 49, 52], |
| 154 | + gamma=0.1) |
129 | 155 | ]
|
130 |
| -data = dict( |
131 |
| - samples_per_gpu=8, |
132 |
| - workers_per_gpu=4, |
133 |
| - train=dict( |
134 |
| - type=dataset_type, |
135 |
| - ann_file=data_root + 'annotations/instances_train2017.json', |
136 |
| - img_prefix=data_root + 'train2017/', |
137 |
| - pipeline=train_pipeline), |
138 |
| - val=dict( |
139 |
| - type=dataset_type, |
140 |
| - ann_file=data_root + 'annotations/instances_val2017.json', |
141 |
| - img_prefix=data_root + 'val2017/', |
142 |
| - pipeline=test_pipeline), |
143 |
| - test=dict( |
144 |
| - type=dataset_type, |
145 |
| - ann_file=data_root + 'annotations/instances_val2017.json', |
146 |
| - img_prefix=data_root + 'val2017/', |
147 |
| - pipeline=test_pipeline)) |
| 156 | + |
148 | 157 | # optimizer
|
149 |
| -optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) |
150 |
| -optimizer_config = dict() |
151 |
| -# learning policy |
152 |
| -lr_config = dict( |
153 |
| - policy='step', |
154 |
| - warmup='linear', |
155 |
| - warmup_iters=500, |
156 |
| - warmup_ratio=0.1, |
157 |
| - step=[20, 42, 49, 52]) |
158 |
| -runner = dict(type='EpochBasedRunner', max_epochs=55) |
159 |
| -cudnn_benchmark = True |
160 |
| -evaluation = dict(metric=['bbox', 'segm']) |
| 158 | +optim_wrapper = dict( |
| 159 | + type='OptimWrapper', |
| 160 | + optimizer=dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)) |
| 161 | + |
| 162 | +custom_hooks = [ |
| 163 | + dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW') |
| 164 | +] |
| 165 | + |
| 166 | +env_cfg = dict(cudnn_benchmark=True) |
161 | 167 |
|
162 | 168 | # NOTE: `auto_scale_lr` is for automatically scaling LR,
|
163 | 169 | # USER SHOULD NOT CHANGE ITS VALUES.
|
|
0 commit comments