dlunion caffe加入hard examples 训练 R-Net,O-Net

it2022-05-05 186

dlunion caffe加入hard examples 训练 R-Net,O-Net

上一篇提到，dlunion作者的caffe训练MTCNN并没有加入hard examples，所里这里将加入hard examples 训练R-Net和O-Net.

生成R-Net的训练数据

执行

python create_hard1.py

create_hard1.py的代码如下：

import sys import tools import caffe import cv2 import numpy as np import os from utils import * deploy = '../12/det1.prototxt' caffemodel = '../12/det1.caffemodel' net_12 = caffe.Net(deploy, caffemodel, caffe.TEST) def view_bar(num, total): rate = float(num) / total rate_num = int(rate * 100) r = '\r[%s%s]%d%% (%d/%d)' % ("#"*rate_num, " "*(100-rate_num), rate_num, num, total) sys.stdout.write(r) sys.stdout.flush() def detectFace(img_path, threshold): img = cv2.imread(img_path) caffe_img = img.copy()-128 origin_h, origin_w, ch = caffe_img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h*scale) ws = int(origin_w*scale) scale_img = cv2.resize(caffe_img, (ws, hs)) scale_img = np.swapaxes(scale_img, 0, 2) net_12.blobs['data'].reshape(1, 3, ws, hs) net_12.blobs['data'].data[...] = scale_img caffe.set_device(0) caffe.set_mode_gpu() out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['prob1'][0][1] roi = out[i]['conv4-2'][0] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1/scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) return rectangles anno_file = './wider_face_train.txt' im_dir = "./WIDER_train/images/" neg_save_dir = "./24/negative" pos_save_dir = "./24/positive" part_save_dir = "./24/part" image_size = 24 f1 = open('./24/pos_24.txt', 'a') f2 = open('./24/neg_24.txt', 'a') f3 = open('./24/part_24.txt', 'a') threshold = [0.6, 0.6, 0.7] with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print("%d pics in total" % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = im_dir + annotation[0] + '.jpg' rectangles = detectFace(img_path, threshold) img = cv2.imread(img_path) image_idx += 1 view_bar(image_idx,num) for box in rectangles: x_left, y_top, x_right, y_bottom, _ = box crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size : continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write("%s/positive/%s" % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write("%s/part/%s" % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()

其中会导入tools.py，这个脚本的代码如下：

import sys from operator import itemgetter import numpy as np import cv2 ''' Function: calculate Intersect of Union Input: rect_1: 1st rectangle rect_2: 2nd rectangle Output: IoU ''' def IoU(rect_1, rect_2): x11 = rect_1[0] # first rectangle top left x y11 = rect_1[1] # first rectangle top left y x12 = rect_1[2] # first rectangle bottom right x y12 = rect_1[3] # first rectangle bottom right y x21 = rect_2[0] # second rectangle top left x y21 = rect_2[1] # second rectangle top left y x22 = rect_2[2] # second rectangle bottom right x y22 = rect_2[3] # second rectangle bottom right y x_overlap = max(0, min(x12,x22) -max(x11,x21)) y_overlap = max(0, min(y12,y22) -max(y11,y21)) intersection = x_overlap * y_overlap union = (x12-x11) * (y12-y11) + (x22-x21) * (y22-y21) - intersection if union == 0: return 0 else: return float(intersection) / union ''' Function: calculate Intersect of Min area Input: rect_1: 1st rectangle rect_2: 2nd rectangle Output: IoM ''' def IoM(rect_1, rect_2): x11 = rect_1[0] # first rectangle top left x y11 = rect_1[1] # first rectangle top left y x12 = rect_1[2] # first rectangle bottom right x y12 = rect_1[3] # first rectangle bottom right y x21 = rect_2[0] # second rectangle top left x y21 = rect_2[1] # second rectangle top left y x22 = rect_2[2] # second rectangle bottom right x y22 = rect_2[3] # second rectangle bottom right y x_overlap = max(0, min(x12,x22) -max(x11,x21)) y_overlap = max(0, min(y12,y22) -max(y11,y21)) intersection = x_overlap * y_overlap rect1_area = (y12 - y11) * (x12 - x11) rect2_area = (y22 - y21) * (x22 - x21) min_area = min(rect1_area, rect2_area) return float(intersection) / min_area ''' Function: apply NMS(non-maximum suppression) on ROIs in same scale Input: rectangles: rectangles[i][0:3] is the position, rectangles[i][4] is scale, rectangles[i][5] is score Output: rectangles: same as input ''' def NMS(rectangles, threshold, type): sorted(rectangles, key=itemgetter(4), reverse=True) result_rectangles = rectangles number_of_rects = len(result_rectangles) cur_rect = 0 while cur_rect < number_of_rects : rects_to_compare = number_of_rects - cur_rect - 1 cur_rect_to_compare = cur_rect + 1 while rects_to_compare > 0: score = 0 if type == 'iou': score = IoU(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare]) else: score = IoM(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare]) if score >= threshold: del result_rectangles[cur_rect_to_compare] # delete the rectangle number_of_rects -= 1 else: cur_rect_to_compare += 1 # skip to next rectangle rects_to_compare -= 1 cur_rect += 1 # finished comparing for current rectangle return result_rectangles def detect_face_12net(cls_prob,roi,out_side,scale,width,height,threshold): in_side = 2*out_side+11 stride = 0 if out_side != 1: stride = float(in_side-12)/(out_side-1) boundingBox = [] for (x, y), prob in np.ndenumerate(cls_prob): if(prob >= threshold): original_x1 = int((stride*x + 1)*scale) original_y1 = int((stride*y + 1)*scale) original_w = int((12.0 - 1)*scale) original_h = int((12.0 - 1)*scale) original_x2 = original_x1 + original_w original_y2 = original_y1 + original_h rect = [] x1 = int(round(max(0, original_x1 + original_w * roi[0][x][y]))) y1 = int(round(max(0, original_y1 + original_h * roi[1][x][y]))) x2 = int(round(min(width, original_x2 + original_w * roi[2][x][y]))) y2 = int(round(min(height, original_y2 + original_h * roi[3][x][y]))) if x2 > x1 and y2 > y1: rect = [x1, y1, x2, y2, prob] boundingBox.append(rect) return NMS(boundingBox, 0.5, 'iou') def filter_face_24net(cls_prob,roi,rectangles,width,height,threshold): boundingBox = [] rect_num = len(rectangles) for i in range(rect_num): if cls_prob[i][1] > threshold: original_w = rectangles[i][2]-rectangles[i][0]+1 original_h = rectangles[i][3]-rectangles[i][1]+1 x1 = int(round(max(0, rectangles[i][0] + original_w * roi[i][0]))) y1 = int(round(max(0, rectangles[i][1] + original_h * roi[i][1]))) x2 = int(round(min(width, rectangles[i][2] + original_w * roi[i][2]))) y2 = int(round(min(height, rectangles[i][3] + original_h * roi[i][3]))) if x2 > x1 and y2 > y1: rect = [x1, y1, x2, y2, cls_prob[i][1]] boundingBox.append(rect) return NMS(boundingBox,0.7,'iou') def filter_face_48net(cls_prob, roi, pts, rectangles, width, height, threshold): boundingBox = [] rect_num = len(rectangles) for i in range(rect_num): if cls_prob[i][1]>threshold: rect = [rectangles[i][0], rectangles[i][1], rectangles[i][2], rectangles[i][3], cls_prob[i][1], roi[i][0], roi[i][1], roi[i][2], roi[i][3], pts[i][0], pts[i][5], pts[i][1], pts[i][6], pts[i][2], pts[i][7], pts[i][3], pts[i][8], pts[i][4], pts[i][9]] boundingBox.append(rect) rectangles = NMS(boundingBox, 0.7, 'iom') rect = [] for rectangle in rectangles: roi_w = rectangle[2]-rectangle[0]+1 roi_h = rectangle[3]-rectangle[1]+1 x1 = round(max(0, rectangle[0]+rectangle[5]*roi_w)) y1 = round(max(0, rectangle[1]+rectangle[6]*roi_h)) x2 = round(min(width, rectangle[2]+rectangle[7]*roi_w)) y2 = round(min(height, rectangle[3]+rectangle[8]*roi_h)) pt0 = rectangle[9]*roi_w + rectangle[0] - 1 pt1 = rectangle[10]*roi_h + rectangle[1] - 1 pt2 = rectangle[11]*roi_w + rectangle[0] - 1 pt3 = rectangle[12]*roi_h + rectangle[1] - 1 pt4 = rectangle[13]*roi_w + rectangle[0] - 1 pt5 = rectangle[14]*roi_h + rectangle[1] - 1 pt6 = rectangle[15]*roi_w + rectangle[0] - 1 pt7 = rectangle[16]*roi_h + rectangle[1] - 1 pt8 = rectangle[17]*roi_w + rectangle[0] - 1 pt9 = rectangle[18]*roi_h + rectangle[1] - 1 score = rectangle[4] rect_ = np.round([x1, y1, x2, y2, pt0, pt1, pt2, pt3, pt4, pt5, pt6, pt7, pt8, pt9]).astype(int) rect_ = np.append(rect_, score) rect.append(rect_) return rect def calculateScales(img): caffe_img = img.copy() h, w, ch = caffe_img.shape pr_scale = 1000.0/max(h, w) w = int(w*pr_scale) h = int(h*pr_scale) scales = [] factor = 0.7937 factor_count = 0 minl = min(h, w) while minl >= 12: scales.append(pr_scale*pow(factor, factor_count)) minl *= factor factor_count += 1 return scales

生成O-Net的hard-examples的代码类似，只需要把create_hard1.py中的12修改成24。生成难样本之后的数据预处理和训练步骤与训练P-Net的步骤类似，具体参见上一篇。

专利

最新回复(0)