-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdetector.py
238 lines (221 loc) · 9.4 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
from torch import device, load, Tensor
from torch import nn
from torchvision.models.detection import MaskRCNN
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models import convnext_base, ConvNeXt_Base_Weights, convnext_tiny, ConvNeXt_Tiny_Weights, convnext_small, ConvNeXt_Small_Weights, convnext_large, ConvNeXt_Large_Weights
import numpy as np
from PIL import Image
class BackboneWithFPN(nn.Module):
"""
Adds a FPN on top of a model.
Internally, it uses torchvision.models.feature_extraction.create_feature_extractor
to extract a submodel that returns the feature maps specified in given backbone
feature extractor model.
Parameters
----------
backbone (nn.Module): Feature extractor ConvNeXt pretrained model.
in_channels_list (List[int]): Number of channels for each feature map
that is returned, in the order they are present in the OrderedDict
out_channels (int): number of channels in the FPN.
norm_layer (callable, optional): Default None.
Module specifying the normalization layer to use.
extra_blocks (callable, optional): Default None.
Extra optional FPN blocks.
Attributes
----------
out_channels : int
The number of channels in the FPN.
"""
def __init__(
self,
backbone,
in_channels_list,
out_channels,
extra_blocks = None,
norm_layer = None,
):
super().__init__()
if extra_blocks is None:
extra_blocks = LastLevelMaxPool()
self.body = backbone
self.fpn = FeaturePyramidNetwork(
in_channels_list=in_channels_list,
out_channels=out_channels,
extra_blocks=extra_blocks,
norm_layer=norm_layer,
)
self.out_channels = out_channels
def forward(self, x):
x = self.body(x)
x = self.fpn(x)
return x
def convnext_fpn_backbone(
backbone_name,
trainable_layers,
extra_blocks = None,
norm_layer = None,
feature_dict = {'1': '0', '3': '1', '5':'2', '7':'3'},
out_channels = 256
):
"""
Returns an FPN-extended backbone network using a feature extractor
based on models developed in the article 'A ConvNet for the 2020s'.
For detailed information about the feature extractor ConvNeXt, read the article.
https://arxiv.org/abs/2201.03545
Parameters
----------
backbone_name : str
ConvNeXt architecture. Possible values are 'convnext_tiny', 'convnext_small',
'convnext_base' or 'convnext_large'.
trainable_layers : int
Number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 8, with 8 meaning all backbone layers
are trainable.
extra_blocks (ExtraFPNBlock or None): default a ``LastLevelMaxPool`` is used.
If provided, extra operations will be performed. It is expected to take
the fpn features, the original features and the names of the original
features as input, and returns a new list of feature maps and their
corresponding names.
norm_layer (callable, optional): Default None.
Module specifying the normalization layer to use. It is recommended to use
the default value. For details visit:
(https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
feature_dict : dictionary
Contains the names of the 'nn.Sequential' object used in the ConvNeXt
model configuration if you need more detailed information,
https://github.com/facebookresearch/ConvNeXt.
out_channels (int): defaults to 256.
Number of channels in the FPN.
Returns
-------
BackboneWithFPN : torch.nn.Module
Returns a specified ConvNeXt backbone with FPN on top.
Freezes the specified number of layers in the backbone.
"""
input_channels_dict = {
'convnext_tiny': [96, 192, 384, 768],
'convnext_small': [96, 192, 384, 768],
'convnext_base': [128, 256, 512, 1024],
'convnext_large': [192, 384, 768, 1536],
'convnext_xlarge': [256, 512, 1024, 2048],
}
if backbone_name == "convnext_tiny":
backbone = convnext_tiny(weights=ConvNeXt_Tiny_Weights.DEFAULT).features
backbone = create_feature_extractor(backbone, feature_dict)
elif backbone_name == "convnext_small":
backbone = convnext_small(weights=ConvNeXt_Small_Weights.DEFAULT).features
backbone = create_feature_extractor(backbone, feature_dict)
elif backbone_name == "convnext_base":
backbone = convnext_base(weights=ConvNeXt_Base_Weights.DEFAULT).features
backbone = create_feature_extractor(backbone, feature_dict)
elif backbone_name == "convnext_large":
backbone = convnext_large(weights=ConvNeXt_Large_Weights.DEFAULT).features
backbone = create_feature_extractor(backbone, feature_dict)
else:
raise ValueError(f"Backbone names should be in the {list(input_channels_dict.keys())}, got {backbone_name}")
in_channels_list = input_channels_dict[backbone_name]
# select layers that wont be frozen
if trainable_layers < 0 or trainable_layers > 8:
raise ValueError(f"Trainable layers should be in the range [0,8], got {trainable_layers}")
layers_to_train = ["7", "6", "5", "4", "3", "2", "1"][:trainable_layers]
if trainable_layers == 8:
layers_to_train.append("bn1")
for name, parameter in backbone.named_parameters():
if all([not name.startswith(layer) for layer in layers_to_train]):
parameter.requires_grad_(False)
if extra_blocks is None:
extra_blocks = LastLevelMaxPool()
return BackboneWithFPN(
backbone, in_channels_list, out_channels,
extra_blocks=extra_blocks, norm_layer=norm_layer
)
def get_model(model_path, backbone="convnext_base",
layers=5, num_classes=2, max_size=512, min_size=512):
"""
Get pretrained MaskRCNN model with ConvNeXt backbone.
Parameters
----------
model_path : (str)
File path where the pre-trained model is located.
backbone (str, optional): Defaults to "convnext_base".
Whichever backbone network is used in the training is entered.
layers (int, optional): Defaults to 5.
Number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 8, with 8 meaning all backbone layers
are trainable. Just for backbone initialization.
num_classes (int, optional): Defaults to 2.
Number of classes for prediction.
Just for MaskRCNN model initialization.
max_size (int, optional): Defaults to 512.
Max image size given input for model.
Just for MaskRCNN model initialization.
min_size (int, optional): Defaults to 512.
Min image size given input for model.
Just for MaskRCNN model initialization.
Returns
-------
model: torchvision.model
Pre-trained MaskRCNN model.
"""
backbone = convnext_fpn_backbone(
backbone,
layers
)
model = MaskRCNN(
backbone,
num_classes=num_classes,
max_size=max_size,
min_size=min_size,
)
model.load_state_dict(
load(model_path, map_location=device('cpu'))["model_state_dict"]
)
model.eval()
return model
def get_prediction(model, img_path, confidence, CLASS_NAMES, mask_th=0.5):
"""
It reads the image from the given file path and
returns the output predicted by the model.
Parameters
----------
model : pytorch model
The model to generate the predictions.
img_path : str
The file path with the image to be predicted.
confidence : float
Enter a value between 0 and 1 to access reliable results.
A value close to 1 is the result that the model predicted
with higher confidence.
CLASS_NAMES : list
Used to return class names based on a list of label names.
mask_th (float, optional): Defaults to 0.5.
Enter a value between 0 and 1 to threshold the probability
map generated by the model.
Returns
----------
masks : np.uint8
Thresholded masks according to the given threshold value.
pred_boxes : list
Bounding boxes predicted by the model.
pred_class : list
Name of predicted classes.
"""
img = Image.open(img_path).convert('RGB')
img = Tensor(np.array(img, dtype=np.uint8).transpose((2, 0, 1)))
img = img.to("cpu")
pred = model([img])
pred_score = list(pred[0]['scores'].detach().cpu().numpy())
while True:
try:
indices = [pred_score.index(x) for x in pred_score if x>confidence][-1]
break
except:
confidence -= 0.1
masks = (pred[0]['masks']>mask_th).squeeze().detach().cpu().numpy()
pred_boxes = [[(i[1], i[0]), (i[3], i[2])] for i in list(pred[0]['boxes'].detach().cpu().numpy())]
pred_class = [CLASS_NAMES[i] for i in list(pred[0]['labels'].cpu().numpy())]
masks = masks[:indices+1]
pred_boxes = pred_boxes[:indices+1]
pred_class = pred_class[:indices+1]
return pred_boxes, masks, pred_class