126 lines
3.8 KiB
Python
126 lines
3.8 KiB
Python
|
from Facenet.models.mtcnn import MTCNN
|
||
|
from AgeNet.models import Model
|
||
|
import torch
|
||
|
from torchvision import transforms as T
|
||
|
from PIL import Image
|
||
|
import numpy as np
|
||
|
import cv2
|
||
|
import matplotlib.pyplot as plt
|
||
|
import os
|
||
|
import argparse
|
||
|
|
||
|
|
||
|
age_range_list = ["1-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"]
|
||
|
|
||
|
|
||
|
class AgeEstimator():
|
||
|
def __init__(self, face_size=224, weights=None, device='cpu', tpx=500):
|
||
|
self.thickness_per_pixels = tpx
|
||
|
|
||
|
if isinstance(face_size, int):
|
||
|
self.face_size = (face_size, face_size)
|
||
|
else:
|
||
|
self.face_size = face_size
|
||
|
|
||
|
# Set device
|
||
|
self.device = device
|
||
|
if isinstance(device, str):
|
||
|
if (device == 'cuda' or device == 'gpu') and torch.cuda.is_available():
|
||
|
self.device = torch.device(device)
|
||
|
else:
|
||
|
self.device = torch.device('cpu')
|
||
|
|
||
|
self.facenet_model = MTCNN(device=self.device)
|
||
|
|
||
|
self.model = Model().to(self.device)
|
||
|
self.model.eval()
|
||
|
if weights:
|
||
|
self.model.load_state_dict(torch.load(weights, map_location=torch.device('cpu')))
|
||
|
# print('Weights loaded successfully from path:', weights)
|
||
|
# print('====================================================')
|
||
|
|
||
|
def transform(self, image):
|
||
|
return T.Compose(
|
||
|
[
|
||
|
T.Resize(self.face_size),
|
||
|
T.ToTensor(),
|
||
|
T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||
|
]
|
||
|
)(image)
|
||
|
|
||
|
@staticmethod
|
||
|
def preprocess_image(image, face_size=(224, 224)):
|
||
|
|
||
|
# Resize the image to the target size
|
||
|
resized_transform = T.Resize(face_size)
|
||
|
|
||
|
resized_image = resized_transform(image)
|
||
|
|
||
|
return resized_image
|
||
|
|
||
|
@staticmethod
|
||
|
def padding_face(box, padding=20):
|
||
|
return [
|
||
|
box[0] - padding,
|
||
|
box[1] - padding,
|
||
|
box[2] + padding,
|
||
|
box[3] + padding
|
||
|
]
|
||
|
|
||
|
def predict_from_frame(self, frame, min_prob=0.9):
|
||
|
image = Image.fromarray(frame)
|
||
|
|
||
|
ndarray_image = np.array(image)
|
||
|
image_shape = ndarray_image.shape
|
||
|
|
||
|
try:
|
||
|
|
||
|
bboxes, prob = self.facenet_model.detect(image)
|
||
|
bboxes = bboxes[prob > min_prob]
|
||
|
|
||
|
face_images = []
|
||
|
|
||
|
for box in bboxes:
|
||
|
box = np.clip(box, 0, np.inf).astype(np.uint32)
|
||
|
|
||
|
padding = max(image_shape) * 5 / self.thickness_per_pixels
|
||
|
padding = int(max(padding, 10))
|
||
|
box = self.padding_face(box, padding)
|
||
|
|
||
|
face = image.crop(box)
|
||
|
transformed_face = self.transform(face)
|
||
|
face_images.append(transformed_face)
|
||
|
|
||
|
face_images = torch.stack(face_images, dim=0).to(self.device)
|
||
|
|
||
|
genders, ages = self.model(face_images)
|
||
|
ages = torch.round(ages).long()
|
||
|
|
||
|
for i, box in enumerate(bboxes):
|
||
|
box = np.clip(box, 0, np.inf).astype(np.uint32)
|
||
|
thickness = max(image_shape) / 400
|
||
|
thickness = int(max(np.ceil(thickness), 1))
|
||
|
age_range = age_range_list[int(ages[i].item() / 10)]
|
||
|
gender = round(genders[i].item(), 2)
|
||
|
|
||
|
return [gender, age_range]
|
||
|
|
||
|
except:
|
||
|
return None
|
||
|
|
||
|
return None
|
||
|
|
||
|
|
||
|
def Prediction(frame, weights="weights.pt", face_size=64, device='cuda'):
|
||
|
# Initialize the AgeEstimator
|
||
|
model = AgeEstimator(weights=weights, face_size=face_size, device=device)
|
||
|
|
||
|
# Open a connection to the camera (use 0 for default camera)
|
||
|
|
||
|
# Convert the frame to RGB format (OpenCV uses BGR by default)
|
||
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||
|
|
||
|
# Predict age and gender from the frame
|
||
|
predicted_image = model.predict_from_frame(rgb_frame)
|
||
|
|
||
|
return predicted_image
|