from Facenet.models.mtcnn import MTCNN
from AgeNet.models import Model
import torch
from torchvision import transforms as T
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import argparse


age_range_list = ["1-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"]


class AgeEstimator():
    def __init__(self, face_size=224, weights=None, device='cpu', tpx=500):
        self.thickness_per_pixels = tpx

        if isinstance(face_size, int):
            self.face_size = (face_size, face_size)
        else:
            self.face_size = face_size

        # Set device
        self.device = device
        if isinstance(device, str):
            if (device == 'cuda' or device == 'gpu') and torch.cuda.is_available():
                self.device = torch.device(device)
            else:
                self.device = torch.device('cpu')

        self.facenet_model = MTCNN(device=self.device)

        self.model = Model().to(self.device)
        self.model.eval()
        if weights:
            self.model.load_state_dict(torch.load(weights, map_location=torch.device('cpu')))
            # print('Weights loaded successfully from path:', weights)
            # print('====================================================')

    def transform(self, image):
        return T.Compose(
            [
                T.Resize(self.face_size),
                T.ToTensor(),
                T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            ]
        )(image)

    @staticmethod
    def preprocess_image(image, face_size=(224, 224)):

        # Resize the image to the target size
        resized_transform = T.Resize(face_size)

        resized_image = resized_transform(image)

        return resized_image

    @staticmethod
    def padding_face(box, padding=20):
        return [
            box[0] - padding,
            box[1] - padding,
            box[2] + padding,
            box[3] + padding
        ]

    def predict_from_frame(self, frame, min_prob=0.9):
        image = Image.fromarray(frame)

        ndarray_image = np.array(image)
        image_shape = ndarray_image.shape

        try:

            bboxes, prob = self.facenet_model.detect(image)
            bboxes = bboxes[prob > min_prob]

            face_images = []

            for box in bboxes:
                box = np.clip(box, 0, np.inf).astype(np.uint32)

                padding = max(image_shape) * 5 / self.thickness_per_pixels
                padding = int(max(padding, 10))
                box = self.padding_face(box, padding)

                face = image.crop(box)
                transformed_face = self.transform(face)
                face_images.append(transformed_face)

            face_images = torch.stack(face_images, dim=0).to(self.device)

            genders, ages = self.model(face_images)
            ages = torch.round(ages).long()

            for i, box in enumerate(bboxes):
                box = np.clip(box, 0, np.inf).astype(np.uint32)
                thickness = max(image_shape) / 400
                thickness = int(max(np.ceil(thickness), 1))
                age_range = age_range_list[int(ages[i].item() / 10)]
                gender = round(genders[i].item(), 2)

                return [gender, age_range]

        except:
            return None

        return None


def Prediction(frame, weights="weights.pt", face_size=64, device='cuda'):
    # Initialize the AgeEstimator
    model = AgeEstimator(weights=weights, face_size=face_size, device=device)

    # Open a connection to the camera (use 0 for default camera)

    # Convert the frame to RGB format (OpenCV uses BGR by default)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Predict age and gender from the frame
    predicted_image = model.predict_from_frame(rgb_frame)

    return predicted_image