291 lines
9.9 KiB
Python
291 lines
9.9 KiB
Python
from deepface import DeepFace
|
|
import cv2
|
|
import time
|
|
import os
|
|
import sys
|
|
import numpy as np
|
|
import re
|
|
import pandas as pd
|
|
|
|
from deepface.modules.streaming import (
|
|
build_demography_models, build_facial_recognition_model,
|
|
search_identity, highlight_facial_areas,
|
|
grab_facial_areas, extract_facial_areas,
|
|
perform_facial_recognition, perform_demography_analysis,
|
|
)
|
|
|
|
from deepface.modules import recognition, verification, detection, representation
|
|
from deepface.models.FacialRecognition import FacialRecognition
|
|
from deepface.modules import preprocessing
|
|
from deepface.commons.logger import Logger
|
|
import pickle
|
|
|
|
logger = Logger(module="commons.realtime")
|
|
|
|
|
|
def own_faceRecognition(
|
|
img,
|
|
faces_coordinates,
|
|
ml_faces,
|
|
df, # 来自存储文件的 df
|
|
# detector_backend=detector_backend,
|
|
model,
|
|
distance_metric='cosine',
|
|
target_threshold = None
|
|
):
|
|
process_img = img.copy()
|
|
target_threshold = target_threshold or verification.find_threshold(model_name, distance_metric)
|
|
|
|
for idx, (x, y, w, h) in enumerate(faces_coordinates):
|
|
ml_face = ml_faces[idx]
|
|
target_representation = model.find_embeddings(
|
|
preprocessing.normalize_input(img=ml_face,normalization='base')
|
|
)
|
|
# target_embedding_obj = representation.represent(
|
|
# img_path=ml_face,
|
|
# model_name=model_name,
|
|
# enforce_detection=True,
|
|
# detector_backend="skip",
|
|
# align=False,
|
|
# normalization='base',
|
|
# )
|
|
# target_representation = target_embedding_obj[0]["embedding"]
|
|
|
|
distances = []
|
|
result_df = df.copy()
|
|
for _, instance in df.iterrows():
|
|
source_representation = instance['embedding']
|
|
if source_representation is None :
|
|
distances.append(float('inf'))
|
|
continue
|
|
|
|
assert len(target_representation) == len(source_representation), 'wrong len of embedding '
|
|
distance = verification.find_distance(source_representation,target_representation,distance_metric)
|
|
distances.append(distance)
|
|
|
|
# result_df['threshold'] = target_threshold
|
|
result_df['distance'] = distances
|
|
result_df = result_df.drop(columns=['embedding'])
|
|
result_df = result_df[result_df['distance'] < target_threshold]
|
|
color = (255,0,0)
|
|
if result_df.shape[0] <= 0:
|
|
target_label = 'unknown'
|
|
color = (0,0,255)
|
|
else:
|
|
result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True)
|
|
target_label, min_dist = result_df.loc[0, "identity"], result_df.loc[0, "distance"]
|
|
target_label = re.split(r'[\\\/]', target_label)[-1].split('.')[0] + f" {min_dist:.4f}"
|
|
cv2.putText(
|
|
img,
|
|
target_label,
|
|
(x, y - 10),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.75,
|
|
color,
|
|
1,
|
|
)
|
|
cv2.rectangle(img,(x,y),(x+w,y+h),color,3)
|
|
|
|
return img
|
|
|
|
|
|
def own_grab_facial_areas(
|
|
img: np.ndarray, detector_backend: str, target_size, threshold: int = 130
|
|
):
|
|
"""
|
|
Find facial area coordinates in the given image
|
|
Args:
|
|
img (np.ndarray): image itself
|
|
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
|
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
|
|
target_size (tuple): input shape of the facial recognition model.
|
|
threshold (int): threshold for facial area, discard smaller ones
|
|
Returns
|
|
result (list): list of tuple with x, y, w and h coordinates
|
|
"""
|
|
try:
|
|
face_objs = detection.extract_faces(
|
|
img_path=img,
|
|
target_size=target_size,
|
|
detector_backend=detector_backend,
|
|
enforce_detection=True,
|
|
align=True,
|
|
expand_percentage=0,
|
|
grayscale=False,
|
|
human_readable=False,
|
|
)
|
|
faces_coordinates = [
|
|
(
|
|
face_obj["facial_area"]["x"],
|
|
face_obj["facial_area"]["y"],
|
|
face_obj["facial_area"]["w"],
|
|
face_obj["facial_area"]["h"],
|
|
)
|
|
for face_obj in face_objs
|
|
if face_obj["facial_area"]["w"] > threshold
|
|
]
|
|
faces = [ face_obj["face"] for face_obj in face_objs ]
|
|
|
|
return faces_coordinates, faces
|
|
except: # to avoid exception if no face detected
|
|
return [], []
|
|
|
|
|
|
# 删除掉其中的time and frame threshold ,尽可能地实时检测
|
|
def realtime(
|
|
db_path: str,
|
|
model_name="VGG-Face",
|
|
detector_backend="opencv",
|
|
distance_metric="cosine",
|
|
enable_face_analysis=True,
|
|
source=0,
|
|
):
|
|
"""
|
|
Run real time face recognition and facial attribute analysis
|
|
|
|
Args:
|
|
db_path (string): Path to the folder containing image files. All detected faces
|
|
in the database will be considered in the decision-making process.
|
|
|
|
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
|
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
|
|
|
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
|
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
|
|
|
|
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
|
'euclidean', 'euclidean_l2' (default is cosine).
|
|
|
|
enable_face_analysis (bool): Flag to enable face analysis (default is True).
|
|
|
|
source (Any): The source for the video stream (default is 0, which represents the
|
|
default camera).
|
|
|
|
time_threshold (int): The time threshold (in seconds) for face recognition (default is 5).
|
|
|
|
frame_threshold (int): The frame threshold for face recognition (default is 5).
|
|
Returns:
|
|
None
|
|
"""
|
|
# initialize models
|
|
build_demography_models(enable_face_analysis=enable_face_analysis) # 构建人脸属性检测model
|
|
model: FacialRecognition = DeepFace.build_model(model_name=model_name)
|
|
logger.info(f'{model_name} is built')
|
|
target_size = model.input_shape
|
|
target_threshold = verification.find_threshold(model_name, distance_metric)
|
|
print(f"target threshold of model is: {target_threshold:.6f}")
|
|
|
|
# call a dummy find function for db_path once to create embeddings before starting webcam
|
|
_ = search_identity( # 保证database是最新的
|
|
detected_face=np.zeros([224, 224, 3]),
|
|
db_path=db_path,
|
|
detector_backend=detector_backend,
|
|
distance_metric=distance_metric,
|
|
model_name=model_name,
|
|
)
|
|
file_name = f"ds_{model_name}_{detector_backend}_v2.pkl"
|
|
file_name = file_name.replace("-", "").lower()
|
|
datastore_path = os.path.join(db_path, file_name)
|
|
with open(datastore_path, "rb") as f:
|
|
db_representations = pickle.load(f)
|
|
assert len(db_representations) > 0, 'no face in database'
|
|
print(f"{len(db_representations)} faces representataion in {file_name}.")
|
|
|
|
tic = time.time()
|
|
|
|
cap = cv2.VideoCapture(source) # webcam
|
|
while True:
|
|
has_frame, img = cap.read()
|
|
if not has_frame:
|
|
break
|
|
|
|
# we are adding some figures into img such as identified facial image, age, gender
|
|
# that is why, we need raw image itself to make analysis
|
|
raw_img = img.copy()
|
|
faces_coordinates, ml_faces = [], []
|
|
|
|
faces_coordinates, ml_faces = own_grab_facial_areas( # 得到面部区域的坐标
|
|
img=img, detector_backend=detector_backend, target_size=target_size
|
|
)
|
|
|
|
# # 在raw_img上原地操作: 画框
|
|
# img = highlight_facial_areas(img=raw_img, faces_coordinates=faces_coordinates)
|
|
|
|
# age, gender and emotion analysis
|
|
# img = perform_demography_analysis(
|
|
# enable_face_analysis=enable_face_analysis,
|
|
# img=raw_img,
|
|
# faces_coordinates=faces_coordinates,
|
|
# detected_faces=detected_faces,
|
|
# )
|
|
# facial recogntion analysis
|
|
img = own_faceRecognition(
|
|
img=img,
|
|
faces_coordinates=faces_coordinates,
|
|
ml_faces=ml_faces,
|
|
df=pd.DataFrame(db_representations),
|
|
model = model,
|
|
distance_metric=distance_metric,
|
|
target_threshold=target_threshold,
|
|
)
|
|
|
|
cv2.imshow("img", img)
|
|
|
|
if cv2.waitKey(1) & 0xFF == ord("q"): # press q to quit
|
|
break
|
|
|
|
# kill open cv things
|
|
cap.release()
|
|
cv2.destroyAllWindows()
|
|
|
|
|
|
# find indivial from dataset
|
|
|
|
|
|
"""
|
|
Deepface is a hybrid face recognition package. It currently wraps many state-of-the-art face recognition models:
|
|
model_name Declared LFW Score
|
|
VGG-Face 98.9%
|
|
Facenet 99.2%
|
|
Facenet512 99.6% drop
|
|
OpenFace 92.9%
|
|
DeepID 97.4%
|
|
Dlib 99.3%
|
|
SFace 99.5%
|
|
ArcFace 99.5% 阈值调整为1.00 **
|
|
GhostFaceNet 99.7% **
|
|
Human-beings 97.5%
|
|
|
|
The default configuration uses VGG-Face model.
|
|
"""
|
|
"""
|
|
backends = {
|
|
"opencv": OpenCv.OpenCvClient,
|
|
"mtcnn": MtCnn.MtCnnClient,
|
|
"ssd": Ssd.SsdClient,
|
|
"dlib": Dlib.DlibClient,
|
|
"retinaface": RetinaFace.RetinaFaceClient,
|
|
"mediapipe": MediaPipe.MediaPipeClient,
|
|
"yolov8": Yolo.YoloClient,
|
|
"yunet": YuNet.YuNetClient,
|
|
"fastmtcnn": FastMtCnn.FastMtCnnClient,
|
|
}
|
|
"""
|
|
"""
|
|
distance_metric = ['euclidean_l2','cosine','euclidean']
|
|
author: Euclidean L2 form seems to be more stable than cosine and regular Euclidean distance based on experiments.
|
|
"""
|
|
if __name__ == "__main__":
|
|
model_name = sys.argv[1]
|
|
detector_backend = sys.argv[2]
|
|
assert detector_backend in ['opencv','mtcnn','ssd','retinaface','mediapipe','yolov8','yunet','fastmtcnn']
|
|
realtime(
|
|
db_path='./face',
|
|
model_name = model_name,
|
|
detector_backend=detector_backend,
|
|
distance_metric="euclidean_l2",
|
|
enable_face_analysis=False,
|
|
source=0, # 流媒体来源
|
|
)
|
|
|