Jiale/proj_deepface/realtime.py

291 lines
9.9 KiB
Python

from deepface import DeepFace
import cv2
import time
import os
import sys
import numpy as np
import re
import pandas as pd
from deepface.modules.streaming import (
build_demography_models, build_facial_recognition_model,
search_identity, highlight_facial_areas,
grab_facial_areas, extract_facial_areas,
perform_facial_recognition, perform_demography_analysis,
)
from deepface.modules import recognition, verification, detection, representation
from deepface.models.FacialRecognition import FacialRecognition
from deepface.modules import preprocessing
from deepface.commons.logger import Logger
import pickle
logger = Logger(module="commons.realtime")
def own_faceRecognition(
img,
faces_coordinates,
ml_faces,
df, # 来自存储文件的 df
# detector_backend=detector_backend,
model,
distance_metric='cosine',
target_threshold = None
):
process_img = img.copy()
target_threshold = target_threshold or verification.find_threshold(model_name, distance_metric)
for idx, (x, y, w, h) in enumerate(faces_coordinates):
ml_face = ml_faces[idx]
target_representation = model.find_embeddings(
preprocessing.normalize_input(img=ml_face,normalization='base')
)
# target_embedding_obj = representation.represent(
# img_path=ml_face,
# model_name=model_name,
# enforce_detection=True,
# detector_backend="skip",
# align=False,
# normalization='base',
# )
# target_representation = target_embedding_obj[0]["embedding"]
distances = []
result_df = df.copy()
for _, instance in df.iterrows():
source_representation = instance['embedding']
if source_representation is None :
distances.append(float('inf'))
continue
assert len(target_representation) == len(source_representation), 'wrong len of embedding '
distance = verification.find_distance(source_representation,target_representation,distance_metric)
distances.append(distance)
# result_df['threshold'] = target_threshold
result_df['distance'] = distances
result_df = result_df.drop(columns=['embedding'])
result_df = result_df[result_df['distance'] < target_threshold]
color = (255,0,0)
if result_df.shape[0] <= 0:
target_label = 'unknown'
color = (0,0,255)
else:
result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True)
target_label, min_dist = result_df.loc[0, "identity"], result_df.loc[0, "distance"]
target_label = re.split(r'[\\\/]', target_label)[-1].split('.')[0] + f" {min_dist:.4f}"
cv2.putText(
img,
target_label,
(x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.75,
color,
1,
)
cv2.rectangle(img,(x,y),(x+w,y+h),color,3)
return img
def own_grab_facial_areas(
img: np.ndarray, detector_backend: str, target_size, threshold: int = 130
):
"""
Find facial area coordinates in the given image
Args:
img (np.ndarray): image itself
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
target_size (tuple): input shape of the facial recognition model.
threshold (int): threshold for facial area, discard smaller ones
Returns
result (list): list of tuple with x, y, w and h coordinates
"""
try:
face_objs = detection.extract_faces(
img_path=img,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=True,
align=True,
expand_percentage=0,
grayscale=False,
human_readable=False,
)
faces_coordinates = [
(
face_obj["facial_area"]["x"],
face_obj["facial_area"]["y"],
face_obj["facial_area"]["w"],
face_obj["facial_area"]["h"],
)
for face_obj in face_objs
if face_obj["facial_area"]["w"] > threshold
]
faces = [ face_obj["face"] for face_obj in face_objs ]
return faces_coordinates, faces
except: # to avoid exception if no face detected
return [], []
# 删除掉其中的time and frame threshold ,尽可能地实时检测
def realtime(
db_path: str,
model_name="VGG-Face",
detector_backend="opencv",
distance_metric="cosine",
enable_face_analysis=True,
source=0,
):
"""
Run real time face recognition and facial attribute analysis
Args:
db_path (string): Path to the folder containing image files. All detected faces
in the database will be considered in the decision-making process.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2' (default is cosine).
enable_face_analysis (bool): Flag to enable face analysis (default is True).
source (Any): The source for the video stream (default is 0, which represents the
default camera).
time_threshold (int): The time threshold (in seconds) for face recognition (default is 5).
frame_threshold (int): The frame threshold for face recognition (default is 5).
Returns:
None
"""
# initialize models
build_demography_models(enable_face_analysis=enable_face_analysis) # 构建人脸属性检测model
model: FacialRecognition = DeepFace.build_model(model_name=model_name)
logger.info(f'{model_name} is built')
target_size = model.input_shape
target_threshold = verification.find_threshold(model_name, distance_metric)
print(f"target threshold of model is: {target_threshold:.6f}")
# call a dummy find function for db_path once to create embeddings before starting webcam
_ = search_identity( # 保证database是最新的
detected_face=np.zeros([224, 224, 3]),
db_path=db_path,
detector_backend=detector_backend,
distance_metric=distance_metric,
model_name=model_name,
)
file_name = f"ds_{model_name}_{detector_backend}_v2.pkl"
file_name = file_name.replace("-", "").lower()
datastore_path = os.path.join(db_path, file_name)
with open(datastore_path, "rb") as f:
db_representations = pickle.load(f)
assert len(db_representations) > 0, 'no face in database'
print(f"{len(db_representations)} faces representataion in {file_name}.")
tic = time.time()
cap = cv2.VideoCapture(source) # webcam
while True:
has_frame, img = cap.read()
if not has_frame:
break
# we are adding some figures into img such as identified facial image, age, gender
# that is why, we need raw image itself to make analysis
raw_img = img.copy()
faces_coordinates, ml_faces = [], []
faces_coordinates, ml_faces = own_grab_facial_areas( # 得到面部区域的坐标
img=img, detector_backend=detector_backend, target_size=target_size
)
# # 在raw_img上原地操作: 画框
# img = highlight_facial_areas(img=raw_img, faces_coordinates=faces_coordinates)
# age, gender and emotion analysis
# img = perform_demography_analysis(
# enable_face_analysis=enable_face_analysis,
# img=raw_img,
# faces_coordinates=faces_coordinates,
# detected_faces=detected_faces,
# )
# facial recogntion analysis
img = own_faceRecognition(
img=img,
faces_coordinates=faces_coordinates,
ml_faces=ml_faces,
df=pd.DataFrame(db_representations),
model = model,
distance_metric=distance_metric,
target_threshold=target_threshold,
)
cv2.imshow("img", img)
if cv2.waitKey(1) & 0xFF == ord("q"): # press q to quit
break
# kill open cv things
cap.release()
cv2.destroyAllWindows()
# find indivial from dataset
"""
Deepface is a hybrid face recognition package. It currently wraps many state-of-the-art face recognition models:
model_name Declared LFW Score
VGG-Face 98.9%
Facenet 99.2%
Facenet512 99.6% drop
OpenFace 92.9%
DeepID 97.4%
Dlib 99.3%
SFace 99.5%
ArcFace 99.5% 阈值调整为1.00 **
GhostFaceNet 99.7% **
Human-beings 97.5%
The default configuration uses VGG-Face model.
"""
"""
backends = {
"opencv": OpenCv.OpenCvClient,
"mtcnn": MtCnn.MtCnnClient,
"ssd": Ssd.SsdClient,
"dlib": Dlib.DlibClient,
"retinaface": RetinaFace.RetinaFaceClient,
"mediapipe": MediaPipe.MediaPipeClient,
"yolov8": Yolo.YoloClient,
"yunet": YuNet.YuNetClient,
"fastmtcnn": FastMtCnn.FastMtCnnClient,
}
"""
"""
distance_metric = ['euclidean_l2','cosine','euclidean']
author: Euclidean L2 form seems to be more stable than cosine and regular Euclidean distance based on experiments.
"""
if __name__ == "__main__":
model_name = sys.argv[1]
detector_backend = sys.argv[2]
assert detector_backend in ['opencv','mtcnn','ssd','retinaface','mediapipe','yolov8','yunet','fastmtcnn']
realtime(
db_path='./face',
model_name = model_name,
detector_backend=detector_backend,
distance_metric="euclidean_l2",
enable_face_analysis=False,
source=0, # 流媒体来源
)