|
| 1 | +import cv2 |
| 2 | +import numpy as np |
| 3 | + |
| 4 | +def hook(frame_data, _): |
| 5 | + frame = frame_data['original'] |
| 6 | + model_output = frame_data['inference_output'] |
| 7 | + if len(model_output) > 0: |
| 8 | + yolo_input_shape = (640, 640, 3) # h,w,c |
| 9 | + boxes, scores, class_ids = postprocess_yolo_world(frame.shape, yolo_input_shape, model_output) |
| 10 | + class_labels = [yolo_classes[int(id)] for id in class_ids] |
| 11 | + for i in range(len(boxes)): |
| 12 | + draw_bbox(frame, boxes[i], class_labels[i], scores[i], color_palette[int(class_ids[i])]) |
| 13 | + |
| 14 | + frame_data['modified'] = frame |
| 15 | + |
| 16 | +################################################# |
| 17 | +# Util functions to make the hook more readable # |
| 18 | +################################################# |
| 19 | +yolo_classes = ['hard hat', 'gloves', 'protective boot', 'reflective vest', 'person'] |
| 20 | +color_palette = np.random.uniform(0, 255, size=(len(yolo_classes), 3)) |
| 21 | + |
| 22 | +def draw_bbox(image, box, label='', score=None, color=(255, 0, 255), txt_color=(255, 255, 255)): |
| 23 | + lw = max(round(sum(image.shape) / 2 * 0.003), 2) |
| 24 | + p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) |
| 25 | + cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA) |
| 26 | + if label: |
| 27 | + tf = max(lw - 1, 1) # font thickness |
| 28 | + w, h = cv2.getTextSize(str(label), 0, fontScale=lw / 3, thickness=tf)[0] # text width, height |
| 29 | + outside = p1[1] - h >= 3 |
| 30 | + p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 |
| 31 | + cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled |
| 32 | + if score is not None: |
| 33 | + cv2.putText(image, f'{label} - {score}', (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), |
| 34 | + 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) |
| 35 | + else: |
| 36 | + cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), |
| 37 | + 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) |
| 38 | + |
| 39 | +def postprocess_yolo_world(original_frame_shape, resized_img_shape, output): |
| 40 | + original_height, original_width, _ = original_frame_shape |
| 41 | + resized_height, resized_width, _ = resized_img_shape |
| 42 | + |
| 43 | + boxes = np.array(output['boxes'][0]) |
| 44 | + classes = np.array(output['labels'][0]) |
| 45 | + scores = np.array(output['scores'][0]) |
| 46 | + |
| 47 | + # Filter negative indexes |
| 48 | + neg_indexes_classes = np.where(classes < 0)[0] |
| 49 | + neg_indexes_scores = np.where(scores < 0)[0] |
| 50 | + neg_indexes = np.concatenate((neg_indexes_classes, neg_indexes_scores)) |
| 51 | + |
| 52 | + mask = np.ones(classes.shape, dtype=bool) |
| 53 | + mask[neg_indexes] = False |
| 54 | + |
| 55 | + boxes = boxes[mask] |
| 56 | + classes = classes[mask] |
| 57 | + scores = scores[mask] |
| 58 | + |
| 59 | + # arrays to accumulate the results |
| 60 | + result_boxes = [] |
| 61 | + result_classes = [] |
| 62 | + result_scores = [] |
| 63 | + |
| 64 | + # Calculate the scaling factors for the bounding box coordinates |
| 65 | + if original_height > original_width: |
| 66 | + scale_factor = original_height / resized_height |
| 67 | + else: |
| 68 | + scale_factor = original_width / resized_width |
| 69 | + |
| 70 | + # Resize the output boxes |
| 71 | + for i, score in enumerate(scores): |
| 72 | + if score < 0.05: # apply confidence threshold |
| 73 | + continue |
| 74 | + if not score < 1: |
| 75 | + continue # Remove bad predictions that return a score of 1.0 |
| 76 | + |
| 77 | + x1, y1, x2, y2 = boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3] |
| 78 | + |
| 79 | + ## Calculate the scaled coordinates of the bounding box |
| 80 | + ## the original image was padded to be square |
| 81 | + if original_height > original_width: |
| 82 | + # we added pad on the width |
| 83 | + pad = (resized_width - original_width / scale_factor) // 2 |
| 84 | + x1 = int((x1 - pad) * scale_factor) |
| 85 | + y1 = int(y1 * scale_factor) |
| 86 | + x2 = int((x2 - pad) * scale_factor) |
| 87 | + y2 = int(y2 * scale_factor) |
| 88 | + else: |
| 89 | + # we added pad on the height |
| 90 | + pad = (resized_height - original_height / scale_factor) // 2 |
| 91 | + x1 = int(x1 * scale_factor) |
| 92 | + y1 = int((y1 - pad) * scale_factor) |
| 93 | + x2 = int(x2 * scale_factor) |
| 94 | + y2 = int((y2 - pad) * scale_factor) |
| 95 | + |
| 96 | + result_classes.append(classes[i]) |
| 97 | + result_scores.append(score) |
| 98 | + result_boxes.append([x1, y1, x2, y2]) |
| 99 | + |
| 100 | + return result_boxes, result_scores, result_classes |
0 commit comments