Jiale/proj_deepface/realtime.py

from deepface import DeepFace
import cv2
import time
import os
import sys
import numpy as np
import re
import pandas as pd

from deepface.modules.streaming import (
    build_demography_models, build_facial_recognition_model,
    search_identity, highlight_facial_areas,
    grab_facial_areas, extract_facial_areas,
    perform_facial_recognition, perform_demography_analysis,
)

from deepface.modules import recognition, verification, detection, representation
from deepface.models.FacialRecognition import FacialRecognition
from deepface.modules import  preprocessing
from deepface.commons.logger import Logger
import pickle

logger = Logger(module="commons.realtime")


def own_faceRecognition(
    img,
    faces_coordinates,
    ml_faces,
    df,  # 来自存储文件的 df
    # detector_backend=detector_backend,
    model,
    distance_metric='cosine',
    target_threshold = None
):
    process_img = img.copy()
    target_threshold = target_threshold or verification.find_threshold(model_name, distance_metric)

    for idx, (x, y, w, h) in enumerate(faces_coordinates):
        ml_face = ml_faces[idx]
        target_representation = model.find_embeddings(
            preprocessing.normalize_input(img=ml_face,normalization='base')
        )
        # target_embedding_obj = representation.represent(
        #     img_path=ml_face,
        #     model_name=model_name,
        #     enforce_detection=True,
        #     detector_backend="skip",
        #     align=False,
        #     normalization='base',
        # )
        # target_representation = target_embedding_obj[0]["embedding"]

        distances = []
        result_df = df.copy()
        for _, instance in df.iterrows():
            source_representation = instance['embedding']
            if source_representation is None :
                distances.append(float('inf'))
                continue

            assert len(target_representation) == len(source_representation), 'wrong len of embedding '
            distance = verification.find_distance(source_representation,target_representation,distance_metric)
            distances.append(distance)

        # result_df['threshold'] = target_threshold
        result_df['distance'] = distances
        result_df = result_df.drop(columns=['embedding'])
        result_df = result_df[result_df['distance'] < target_threshold]
        color = (255,0,0)
        if result_df.shape[0] <= 0:
            target_label = 'unknown'
            color = (0,0,255)
        else:
            result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True)
            target_label, min_dist = result_df.loc[0, "identity"], result_df.loc[0, "distance"]
            target_label = re.split(r'[\\\/]', target_label)[-1].split('.')[0] + f" {min_dist:.4f}"
        cv2.putText(
            img,
            target_label,
            (x, y - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.75,
            color,
            1,
        )
        cv2.rectangle(img,(x,y),(x+w,y+h),color,3)

    return img


def own_grab_facial_areas(
    img: np.ndarray, detector_backend: str, target_size, threshold: int = 130
):
    """
    Find facial area coordinates in the given image
    Args:
        img (np.ndarray): image itself
        detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
            'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
        target_size (tuple): input shape of the facial recognition model.
        threshold (int): threshold for facial area, discard smaller ones
    Returns
        result (list): list of tuple with x, y, w and h coordinates
    """
    try:
        face_objs = detection.extract_faces(
            img_path=img,
            target_size=target_size,
            detector_backend=detector_backend,
            enforce_detection=True,
            align=True,
            expand_percentage=0,
            grayscale=False,
            human_readable=False,
        )
        faces_coordinates = [
            (
                face_obj["facial_area"]["x"],
                face_obj["facial_area"]["y"],
                face_obj["facial_area"]["w"],
                face_obj["facial_area"]["h"],
            )
            for face_obj in face_objs
            if face_obj["facial_area"]["w"] > threshold
        ]
        faces = [ face_obj["face"] for face_obj in face_objs ]

        return faces_coordinates, faces
    except:  # to avoid exception if no face detected
        return [], []


# 删除掉其中的time and frame threshold ，尽可能地实时检测
def realtime(
    db_path: str,
    model_name="VGG-Face",
    detector_backend="opencv",
    distance_metric="cosine",
    enable_face_analysis=True,
    source=0,
):
    """
    Run real time face recognition and facial attribute analysis

    Args:
        db_path (string): Path to the folder containing image files. All detected faces
            in the database will be considered in the decision-making process.

        model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
            OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).

        detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
            'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).

        distance_metric (string): Metric for measuring similarity. Options: 'cosine',
            'euclidean', 'euclidean_l2' (default is cosine).

        enable_face_analysis (bool): Flag to enable face analysis (default is True).

        source (Any): The source for the video stream (default is 0, which represents the
            default camera).

        time_threshold (int): The time threshold (in seconds) for face recognition (default is 5).

        frame_threshold (int): The frame threshold for face recognition (default is 5).
    Returns:
        None
    """
    # initialize models
    build_demography_models(enable_face_analysis=enable_face_analysis)   # 构建人脸属性检测model
    model: FacialRecognition = DeepFace.build_model(model_name=model_name)
    logger.info(f'{model_name} is built')
    target_size = model.input_shape
    target_threshold = verification.find_threshold(model_name, distance_metric)
    print(f"target threshold of model is: {target_threshold:.6f}")

    # call a dummy find function for db_path once to create embeddings before starting webcam
    _ = search_identity( # 保证database是最新的
        detected_face=np.zeros([224, 224, 3]),
        db_path=db_path,
        detector_backend=detector_backend,
        distance_metric=distance_metric,
        model_name=model_name,
    )
    file_name = f"ds_{model_name}_{detector_backend}_v2.pkl"
    file_name = file_name.replace("-", "").lower()
    datastore_path = os.path.join(db_path, file_name)
    with open(datastore_path, "rb") as f:
        db_representations = pickle.load(f)
    assert len(db_representations) > 0, 'no face in database'
    print(f"{len(db_representations)} faces representataion in {file_name}.")

    tic = time.time()

    cap = cv2.VideoCapture(source)  # webcam
    while True:
        has_frame, img = cap.read()
        if not has_frame:
            break

        # we are adding some figures into img such as identified facial image, age, gender
        # that is why, we need raw image itself to make analysis
        raw_img = img.copy()
        faces_coordinates, ml_faces = [], []

        faces_coordinates, ml_faces = own_grab_facial_areas( # 得到面部区域的坐标
            img=img, detector_backend=detector_backend, target_size=target_size
        )

        # # 在raw_img上原地操作: 画框
        # img = highlight_facial_areas(img=raw_img, faces_coordinates=faces_coordinates)

        # age, gender and emotion analysis
        # img = perform_demography_analysis(
        #     enable_face_analysis=enable_face_analysis,
        #     img=raw_img,
        #     faces_coordinates=faces_coordinates,
        #     detected_faces=detected_faces,
        # )
        # facial recogntion analysis
        img = own_faceRecognition(
            img=img,
            faces_coordinates=faces_coordinates,
            ml_faces=ml_faces,
            df=pd.DataFrame(db_representations),
            model = model,
            distance_metric=distance_metric,
            target_threshold=target_threshold,
        )

        cv2.imshow("img", img)

        if cv2.waitKey(1) & 0xFF == ord("q"):  # press q to quit
            break

    # kill open cv things
    cap.release()
    cv2.destroyAllWindows()


# find indivial from dataset


"""
Deepface is a hybrid face recognition package. It currently wraps many state-of-the-art face recognition models:
model_name	  Declared LFW Score
VGG-Face	    98.9%
Facenet	        99.2%
Facenet512	    99.6%  drop
OpenFace	    92.9%
DeepID	        97.4%
Dlib	        99.3%
SFace	        99.5%
ArcFace	        99.5% 阈值调整为1.00  **
GhostFaceNet	99.7%   **
Human-beings	97.5%

The default configuration uses VGG-Face model.
"""
"""
backends = {
    "opencv": OpenCv.OpenCvClient,
    "mtcnn": MtCnn.MtCnnClient,
    "ssd": Ssd.SsdClient,
    "dlib": Dlib.DlibClient,
    "retinaface": RetinaFace.RetinaFaceClient,
    "mediapipe": MediaPipe.MediaPipeClient,
    "yolov8": Yolo.YoloClient,
    "yunet": YuNet.YuNetClient,
    "fastmtcnn": FastMtCnn.FastMtCnnClient,
}
"""
"""
distance_metric = ['euclidean_l2','cosine','euclidean']
author: Euclidean L2 form seems to be more stable than cosine and regular Euclidean distance based on experiments.
"""
if __name__ == "__main__":
    model_name = sys.argv[1]
    detector_backend = sys.argv[2]
    assert detector_backend in ['opencv','mtcnn','ssd','retinaface','mediapipe','yolov8','yunet','fastmtcnn']
    realtime(
        db_path='./face',
        model_name = model_name,
        detector_backend=detector_backend,
        distance_metric="euclidean_l2",
        enable_face_analysis=False,
        source=0,  # 流媒体来源
    )