smart-interactive-display/Assets/StreamingAssets/MergeFace/AgeGenderPrediction.py

126 lines
3.8 KiB
Python
Raw Normal View History

2024-06-21 01:20:01 -07:00
from Facenet.models.mtcnn import MTCNN
from AgeNet.models import Model
import torch
from torchvision import transforms as T
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import argparse
age_range_list = ["1-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"]
class AgeEstimator():
def __init__(self, face_size=224, weights=None, device='cpu', tpx=500):
self.thickness_per_pixels = tpx
if isinstance(face_size, int):
self.face_size = (face_size, face_size)
else:
self.face_size = face_size
# Set device
self.device = device
if isinstance(device, str):
if (device == 'cuda' or device == 'gpu') and torch.cuda.is_available():
self.device = torch.device(device)
else:
self.device = torch.device('cpu')
self.facenet_model = MTCNN(device=self.device)
self.model = Model().to(self.device)
self.model.eval()
if weights:
self.model.load_state_dict(torch.load(weights, map_location=torch.device('cpu')))
# print('Weights loaded successfully from path:', weights)
# print('====================================================')
def transform(self, image):
return T.Compose(
[
T.Resize(self.face_size),
T.ToTensor(),
T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
]
)(image)
@staticmethod
def preprocess_image(image, face_size=(224, 224)):
# Resize the image to the target size
resized_transform = T.Resize(face_size)
resized_image = resized_transform(image)
return resized_image
@staticmethod
def padding_face(box, padding=20):
return [
box[0] - padding,
box[1] - padding,
box[2] + padding,
box[3] + padding
]
def predict_from_frame(self, frame, min_prob=0.9):
image = Image.fromarray(frame)
ndarray_image = np.array(image)
image_shape = ndarray_image.shape
try:
bboxes, prob = self.facenet_model.detect(image)
bboxes = bboxes[prob > min_prob]
face_images = []
for box in bboxes:
box = np.clip(box, 0, np.inf).astype(np.uint32)
padding = max(image_shape) * 5 / self.thickness_per_pixels
padding = int(max(padding, 10))
box = self.padding_face(box, padding)
face = image.crop(box)
transformed_face = self.transform(face)
face_images.append(transformed_face)
face_images = torch.stack(face_images, dim=0).to(self.device)
genders, ages = self.model(face_images)
ages = torch.round(ages).long()
for i, box in enumerate(bboxes):
box = np.clip(box, 0, np.inf).astype(np.uint32)
thickness = max(image_shape) / 400
thickness = int(max(np.ceil(thickness), 1))
age_range = age_range_list[int(ages[i].item() / 10)]
gender = round(genders[i].item(), 2)
return [gender, age_range]
except:
return None
return None
def Prediction(frame, weights="weights.pt", face_size=64, device='cuda'):
# Initialize the AgeEstimator
model = AgeEstimator(weights=weights, face_size=face_size, device=device)
# Open a connection to the camera (use 0 for default camera)
# Convert the frame to RGB format (OpenCV uses BGR by default)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Predict age and gender from the frame
predicted_image = model.predict_from_frame(rgb_frame)
return predicted_image