-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathviola_jones.py
343 lines (249 loc) · 11 KB
/
viola_jones.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
from typing import List, Tuple
from itertools import combinations
from math import sqrt
import json
import numpy as np
from PIL import Image, ImageDraw
def show_image(img: np.ndarray):
img = Image.fromarray(img.astype(np.uint8))
img.show()
def rgb2gray(img: np.unsignedinteger) -> np.unsignedinteger:
"""
Convert an RGB image to grayscale.
Algorithm source: https://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
"""
# If the image is already grayscale, return it as is
if len(img.shape) == 2:
return img
return np.dot(img[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8)
def draw_boxes_around_detections(
img: Image, detections: List[Tuple[Tuple[int, int], int]]
):
# Convert image to RGB if it is grayscale so that the boxes drawn have a color
if img.mode == "L":
img = img.convert("RGB")
draw = ImageDraw.Draw(img)
for detection in detections:
# Swap x and y because PIL wants the first entry to be x while we have that as y
left_top = (detection[0][1], detection[0][0])
right_bottom = (
left_top[0] + detection[1],
left_top[1] + detection[1],
)
draw.rectangle((left_top, right_bottom), outline="blue")
return img
def load_cascade(path: str) -> dict:
with open(path) as json_file:
return json.load(json_file)["cascade"]
def scale_nearest_neighbor(img: np.unsignedinteger, scale: float) -> np.unsignedinteger:
"""
Scale an image by nearest neighbor interpolation.
A scale larger than 1 means that the image is scaled down (ie. smaller).
"""
new_img = np.zeros(
(int(img.shape[0] / scale), int(img.shape[1] / scale)), dtype=np.uint8
)
for i in range(new_img.shape[0]):
for j in range(new_img.shape[1]):
# Make sure that the nearest neighbor pixel in the original image by using min()
new_img[i, j] = img[
min(img.shape[0], int(i * scale)), min(img.shape[1], int(j * scale))
]
return new_img
def integral_image(img: np.unsignedinteger, square=False) -> np.unsignedinteger:
"""
In an integral image each pixel is the sum of all pixels in the original image
that are 'left and above' the pixel.
Original Integral
+-------- +----------
| 1 2 3 . | 1 3 6 .
| 4 5 6 . | 5 12 21 .
| . . . . | . . . . .
Algorithm source: https://github.com/scikit-image/scikit-image/blob/main/skimage/transform/integral.py
Table source: https://github.com/Simon-Hohberg/Viola-Jones/blob/master/violajones/IntegralImage.py
"""
img = img.astype(np.int64)
if square == True:
img = img ** 2
return np.cumsum(np.cumsum(img, axis=0), axis=1)
def calc_rectangle_feature(
integral_img: np.unsignedinteger,
left_top: Tuple[int, int],
right_bottom: Tuple[int, int],
weight: int = 1,
) -> int:
feature = integral_img[right_bottom[0], right_bottom[1]]
# If-else statements to make sure the calculations do not go out of bounds
if left_top[0] != 0:
if left_top[1] != 0:
feature += integral_img[left_top[0] - 1, left_top[1] - 1]
feature -= integral_img[left_top[0] - 1, right_bottom[1]]
if left_top[1] != 0:
feature -= integral_img[right_bottom[0], left_top[1] - 1]
return weight * feature
def detect_objects(
gray_img: np.unsignedinteger,
cascade: dict,
detector_size: int = 24,
scale_factor: float = 1.25,
delta: float = 1.5,
) -> List[Tuple[Tuple[int, int], int]]:
step_size = int(delta * scale_factor)
# Will contain all detected faces.
detections = []
current_scale = 1.0
# Loop over the scales in the image pyramid. Please note that in the original implementation as
# proposed by Viola and Jones, the detector is scaled instead of the image.
while (
gray_img.shape[0] / current_scale >= detector_size
and gray_img.shape[1] / current_scale >= detector_size
):
current_image = scale_nearest_neighbor(gray_img, current_scale)
current_integral_image = integral_image(current_image)
current_squared_integral_image = integral_image(current_image, True)
# Loop over the image sub windows (i = height, j = width)
for i in range(0, current_image.shape[0] - detector_size + 1, step_size):
for j in range(0, current_image.shape[1] - detector_size + 1, step_size):
# Mean and squared sum calculations are done this way as this approach can also be
# used in hardware.
regular_sum = calc_rectangle_feature(
current_integral_image,
(i, j),
(i + detector_size - 2, j + detector_size - 2),
)
squared_sum = calc_rectangle_feature(
current_squared_integral_image,
(i, j),
(i + detector_size - 2, j + detector_size - 2),
)
variance = squared_sum * (detector_size ** 2) - regular_sum ** 2
# std = sqrt(true variance) * (detector_size^2); need multiplication with detector
# size since it will be post-applied
std = int(sqrt(variance))
window_left_top = np.array([i, j])
detected_face = True
# Loop over the detection cascade
for layer in cascade:
layer_sum = 0
# Loop over the features in the current layer of the cascade
for feature in layer["features"]:
feature_sum = 0
for rectangles in feature["rectangles"]:
feature_sum += calc_rectangle_feature(
current_integral_image,
window_left_top + rectangles["left_top"],
window_left_top + rectangles["right_bottom"],
rectangles["weight"],
)
# Apply variance normalization as explained in the original paper by
# multiplying with the standard deviation of the window.
if feature_sum >= feature["threshold"] * std:
layer_sum += feature["pass_value"]
else:
layer_sum += feature["fail_value"]
if layer_sum < 0.4 * layer["threshold"]:
detected_face = False
break
if detected_face == True:
detections.append(
(
(int(i * current_scale), int(j * current_scale)),
int(detector_size * current_scale),
)
)
current_scale *= scale_factor
return detections
def overlap_ratio(
left_top_1: Tuple[int, int], size_1: int, left_top_2: Tuple[int, int], size_2: int
) -> float:
"""
Calculate the overlap ratio between two rectangles. The overlap ratio is equal to the
interected area divided by the union area.
All coordinates are in the form of (y, x) with the left top corner being (0, 0). All input
values should consist of integers.
Algorithm source: https://stackoverflow.com/questions/9324339/how-much-do-two-rectangles-overlap/9325084
"""
intersection_area = max(
0,
min(left_top_1[1] + size_1, left_top_2[1] + size_2)
- max(left_top_1[1], left_top_2[1]),
) * max(
0,
min(left_top_1[0] + size_1, left_top_2[0] + size_2)
- max(left_top_1[0], left_top_2[0]),
)
if intersection_area >= size_1 ** 2 or intersection_area >= size_2 ** 2:
return 1
union_area = size_1 ** 2 + size_2 ** 2 - intersection_area
return intersection_area / union_area
def average_detections(
detections: List[Tuple[Tuple[int, int], int]]
) -> Tuple[Tuple[int, int], int]:
"""
Calculate the average detection of a list of detections.
The average detection is calculated by taking the average of the left top corner of the
detections and the average size of the detections.
"""
left_top = tuple(
np.array(
[
np.mean([detection[0][0] for detection in detections]),
np.mean([detection[0][1] for detection in detections]),
]
).astype(int)
)
size = int(np.mean([detection[1] for detection in detections]))
return left_top, size
def group_objects(
detections: List[Tuple[Tuple[int, int], int]],
overlap_threshold: float = 0.4,
min_neighbors: int = 3,
) -> List[Tuple[Tuple[int, int], int]]:
"""
Group detections that are close to each other. The overlap threshold is used to determine if two
detections are close enough to each other to be grouped together as one object.
An overlap treshold of 1.0 means that two detections have to be exactly the same to be grouped,
a threshold of 0.5 means that the ratio of intersection area to union area has to be at least
0.5. 0.0 means that they do not have to overlap in order to be grouped together.
"""
already_combined_detections = []
final_detections = []
for i in range(len(detections)):
overlapping_rectangles = []
for j in range(len(detections)):
if i != j and i not in already_combined_detections:
if (
overlap_ratio(
detections[i][0],
detections[i][1],
detections[j][0],
detections[j][1],
)
> overlap_threshold
):
overlapping_rectangles.append(j)
if len(overlapping_rectangles) >= min_neighbors:
already_combined_detections.extend(overlapping_rectangles)
already_combined_detections.append(i)
detections_to_average = [detections[k] for k in overlapping_rectangles]
detections_to_average.append(detections[i])
final_detections.append(average_detections(detections_to_average))
return final_detections
if __name__ == "__main__":
image_path = "images/man.jpeg"
cascade_path = "cascades/haarcascade_frontalface_default.json"
cascade = load_cascade(cascade_path)
original_image = Image.open(image_path)
image_array = np.array(original_image)
# Factor to scale with in every scale of the image pyramid
scale_factor = 1.2
# Size of the detector in pixels.
detector_size = 24
gray_image = rgb2gray(image_array)
detections = detect_objects(gray_image, cascade, detector_size, scale_factor, 1.5)
detections = group_objects(detections)
image_with_detections = draw_boxes_around_detections(original_image, detections)
# Show the image with the detected faces.
image_with_detections.show()
# Save the image with the detected faces.
image_with_detections.save("-result.".join(image_path.rsplit(".", 1)))