论文:YOLO5Face: Why Reinventing a Face Detector
GitHub源码:GitHub - deepcam-cn/yolov5-face: YOLO5Face: Why Reinventing a Face Detector (https://arxiv.org/abs/2105.12931)YOLO5Face: Why Reinventing a Face Detector (https://arxiv.org/abs/2105.12931) - GitHub - deepcam-cn/yolov5-face: YOLO5Face: Why Reinventing a Face Detector (https://arxiv.org/abs/2105.12931)https://github.com/deepcam-cn/yolov5-face
环境配置:
Windows10
torch 1.10.0+cu113
torchaudio 0.10.1+cu113
torchvision 0.11.0+cu113
其他的包按需install,版本比较灵活
训练:
训练过程参考下面博主的博客https://blog.csdn.net/weixin_44649780/article/details/123341719
在训练时,可减少epoch,但是epoch要大于20,否则会报错(无法找到rusults.txt)。
经调试,发现train.py中第330行左右代码可修改,以在epoch<20时运行成功。
# DDP process 0 or single-GPU
if rank in [-1, 0] and epoch > 20:
# mAP
Detect:
训练结束后可运行detect_face.py检测人脸(运行前注意修改weights为自己的训练结果),源码中为检测单张图片,修改detect_one,改为摄像头实时检测,代码如下:
#改 摄像头实时监测
def detect(model, image, device):
# Load model
img_size = 800
conf_thres = 0.3
iou_thres = 0.5
orgimg = image # BGR
img0 = copy.deepcopy(orgimg)
h0, w0 = orgimg.shape[:2] # orig hw
r = img_size / max(h0, w0) # resize image to img_size
if r != 1: # always resize down, only resize up if training with augmentation
interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)
imgsz = check_img_size(img_size, s=model.stride.max()) # check img_size
img = letterbox(img0, new_shape=imgsz)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416
# Run inference
t0 = time.time()
img = torch.from_numpy(img).to(device)
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img)[0]
# Apply NMS
pred = non_max_suppression_face(pred, conf_thres, iou_thres)
print('img.shape: ', img.shape)
print('orgimg.shape: ', orgimg.shape)
# Process detections
for i, det in enumerate(pred): # detections per image
gn = torch.tensor(orgimg.shape)[[1, 0, 1, 0]].to(device) # normalization gain whwh
gn_lks = torch.tensor(orgimg.shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]].to(device) # normalization gain landmarks
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], orgimg.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
det[:, 5:15] = scale_coords_landmarks(img.shape[2:], det[:, 5:15], orgimg.shape).round()
for j in range(det.size()[0]):
xywh = (xyxy2xywh(det[j, :4].view(1, 4)) / gn).view(-1).tolist()
conf = det[j, 4].cpu().numpy()
landmarks = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
class_num = det[j, 15].cpu().numpy()
orgimg = show_results(orgimg, xywh, conf, landmarks, class_num)
cv2.imwrite('result.jpg', orgimg)
return orgimg
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='runs/train/exp/weights/last.pt', help='model.pt path(s)')
# parser.add_argument('--image', type=str, default='data/images/test.jpg', help='source') # file/folder, 0 for webcam
# parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
opt = parser.parse_args()
print(opt)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_model(opt.weights, device)
# detect_one(model, opt.image, device)
#改 摄像头实时监测
cap=cv2.VideoCapture(0)
cap.open(0)
while cap.isOpened():
success,frame=cap.read()
if not success:
print("error")
break
start_time=time.time()
frame=detect(model, frame, device)
cv2.imshow('detection',frame)
if cv2.waitKey(1) in [ord('q'),27]:
break
cap.release()
cv2.destroyWindow()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)