smart-interactive-display/Assets/StreamingAssets/MergeFace/demo.py

324 lines
12 KiB
Python
Raw Normal View History

2024-06-24 20:41:35 -07:00
import cv2
import numpy as np
from lib import *
class MainProgram:
def __init__(self, face_model_path, model_path, reid_weights, tracker_type="deepocsort"):
self.face_model_path = face_model_path
self.model_path = model_path
self.face_model = YOLO(face_model_path)
self.person_model = YOLO(model_path)
self.reid_weights = reid_weights
self.tracker_conf = get_tracker_config(tracker_type)
self.sock = U.UdpComms(udpIP="192.168.1.122", portTX=8000, portRX=8001, enableRX=True, suppressWarnings=True)
self.tracker = create_tracker(
tracker_type=tracker_type,
tracker_config=self.tracker_conf,
reid_weights=reid_weights,
device='0',
half=False,
per_class=False
)
self.send_data_unity: dict = {
"PassBy": False,
"Engage": False,
"Ready": False,
"Gender": None,
"AgeMin": None,
"AgeMax": None,
"GenerateImageSuccess": False,
"Description": ""
}
self.focus_id = None
self.frame_count_remove_idx = 0
sa = gspread.service_account("key.json")
sh = sa.open("TestData")
wks = sh.worksheet("Sheet1")
self.all_record = wks.get_all_records()
self.client = NovitaClient("48cc2b16-286f-49c8-9581-f409b68359c4")
self.ready_success = False
self.show_success = False
self.check_save, self.check_generate = False, False
self.forward_face = Face_detection.FaceDetection()
def convertFrame(self, frame) -> str:
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 90]
frame = imutils.resize(frame, width=512)
result, encoded_frame = cv2.imencode('.jpg', frame, encode_param)
jpg_as_text = base64.b64encode(encoded_frame.tobytes())
return jpg_as_text.decode('utf-8')
def get_image(self):
ran_num = random.randint(0, len(self.all_record) - 1)
image_url = self.all_record[ran_num]["Image link"]
des = self.all_record[ran_num]["Note"]
return image_url, des
def generate_image(self):
image_url, des = self.get_image()
res = self.client.merge_face(
image=image_url,
face_image="./image/output.jpg",
)
base64_to_image(res.image_file).save("./image/merge_face.png")
self.send_data_unity["Description"] = des
self.send_data_unity["GenerateImageSuccess"] = True
self.send_data_unity["StreamingData"] = "./Assets/StreamingAssets/MergeFace/image/merge_face.png"
def predict_age_and_gender(self):
image_predict = cv2.imread("./image/output.jpg")
if AgeGenderPrediction.Prediction(image_predict):
self.send_data_unity["Gender"] = AgeGenderPrediction.Prediction(image_predict)[0]
self.send_data_unity["AgeMin"] = int(
AgeGenderPrediction.Prediction(image_predict)[1].split("-")[0])
self.send_data_unity["AgeMax"] = int(
AgeGenderPrediction.Prediction(image_predict)[1].split("-")[1])
else:
self.send_data_unity["Gender"] = None
self.send_data_unity["AgeMin"] = None
self.send_data_unity["AgeMax"] = None
def get_face_bbox(self, frame):
outs = self.face_model(frame)
results = sv.Detections.from_ultralytics(outs[0])
bbox = results.xyxy.astype(np.int_)
conf = results.confidence.astype(np.float32)
return np.concatenate((bbox, conf[:, np.newaxis]), axis=1)
def get_person_bbox(self, frame):
# Perform object detection with YOLOv8 class = 0 indicate person class
outs = self.person_model(frame, classes=[0], conf=0.7)
if not outs[0].boxes.xyxy.tolist():
detection = np.empty((0, 6))
# Extract relevant information from detections for boxmot
else:
boxes = outs[0].boxes.xyxy.tolist()
classes = outs[0].boxes.cls.tolist()
confidences = outs[0].boxes.conf.tolist()
detection = np.array([box + [conf, cls] for box, conf, cls in zip(boxes, confidences, classes)])
return detection
def check_engage(self, x1, x2) -> bool:
if not (x1 > self.red_zone_width[1] or x2 < self.red_zone_width[0]):
return True
return False
def cropped_image(self, frame, x1, y1, x2, y2):
return frame[y1: y2, x1: x2]
def calculate_dis_to_cp(self, cx, cy, face_cx, face_cy) -> float:
return math.sqrt((face_cx - cx) ** 2 + (face_cy - cy) ** 2)
def check_ready(self, x1, y1, x2, y2, frame):
person_frame = self.cropped_image(frame, x1, y1, x2, y2)
# out = self.face_model(person_frame)
# results = sv.Detections.from_ultralytics(out[0])
# bbox = results.xyxy.astype(np.int_)
#
# face_cx, face_cy = (int(bbox[0][0] + x1 + (bbox[0][2] - bbox[0][0]) / 2),
# int(bbox[0][1] + y1 + (bbox[0][3] - bbox[0][1]) / 2))
#
# dis = self.calculate_dis_to_cp()
return self.forward_face.detect_face(person_frame, self.face_zone_center_point[0],
self.face_zone_center_point[1], x1, y1)
def person_process(self, frame):
# Perform person detection
person_detections = self.get_person_bbox(frame)
# Update the tracker with person detections
tracked_objects = self.tracker.update(person_detections, frame)
track_list = []
frame_to_crop = frame.copy()
engage = False
for track in tracked_objects.astype(int):
x1, y1, x2, y2, track_id, conf, cls, _ = track
track_list.append(track_id)
# cv2.rectangle(self.frame_to_show, (x1, y1), (x2, y2), (0, 255, 0), 2)
# cv2.putText(self.frame_to_show, f"ID: {track_id} Conf: {conf:.2f}", (x1, y1 - 10),
# cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
#
# cv2.rectangle(self.frame_to_show, (self.red_zone_width[0], self.red_zone_height[0]),
# (self.red_zone_width[1], self.red_zone_height[1]), (255, 0, 0), 2)
if not engage:
engage = self.check_engage(x1, x2)
if not self.focus_id:
self.focus_id = track_id if self.check_ready(x1, y1, x2, y2, frame_to_crop) else None
elif track_id != self.focus_id:
continue
else:
received_data = self.sock.ReadReceivedData()
if received_data == "Begin":
self.ready_success = True
elif received_data == "End":
self.ready_success = False
self.check_save = False
self.check_generate = False
os.remove("./image/output.jpg")
os.remove("./image/merge_face.png")
self.send_data_unity: dict = {
"PassBy": False,
"Engage": False,
"Ready": False,
"Gender": None,
"AgeMin": None,
"AgeMax": None,
"GenerateImageSuccess": False,
"Description": ""
}
if not self.ready_success:
self.send_data_unity["Ready"] = True if self.check_ready(x1, y1, x2, y2, frame_to_crop) else False
elif not self.check_save:
cv2.imwrite("./image/output.jpg", self.cropped_image(frame, x1, y1, x2, y2))
self.check_save = True
elif not self.check_generate:
if str(self.send_data_unity["Gender"]) == "None":
self.predict_age_and_gender()
else:
self.generate_image()
self.check_generate = True
elif self.show_success:
self.check_save = False
self.check_generate = False
if track_list:
self.send_data_unity["PassBy"] = True
self.send_data_unity["Engage"] = engage
else:
self.send_data_unity["Engage"] = False
self.send_data_unity["PassBy"] = False
self.send_data_unity["Ready"] = False
if self.focus_id not in track_list:
if self.frame_count_remove_idx == 20:
self.frame_count_remove_idx = 0
self.focus_id = None
else:
self.frame_count_remove_idx += 1
else:
self.frame_count_remove_idx = 0
# cv2.putText(self.frame_to_show, f"Focus id: {self.focus_id}", (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
# 1.0, (0, 255, 255), 2)
def __call__(self):
cap = cv2.VideoCapture(0)
while cap.isOpened():
self.frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.center_point = (int(int(self.frame_width) / 2), int(int(self.frame_height) / 2))
self.red_zone_width = (self.center_point[0] - 250, self.center_point[0] + 250)
self.red_zone_height = (self.center_point[1] - 50, self.frame_height)
self.face_zone_width = (self.center_point[0] - 100, self.center_point[0] + 100)
self.face_zone_height = (self.center_point[1] - 200, self.center_point[1])
self.face_zone_center_point = (
int((self.face_zone_width[1] - self.face_zone_width[0]) / 2) + self.face_zone_width[0],
int((self.face_zone_height[1] - self.face_zone_height[0]) / 2) + self.face_zone_height[0])
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
continue
frame_to_handle = frame.copy()
self.frame_to_show = frame.copy()
try:
self.person_process(frame_to_handle)
except Exception as e:
print(e)
if not self.send_data_unity["GenerateImageSuccess"]:
self.send_data_unity["StreamingData"] = self.convertFrame(self.cropped_image(frame,
self.face_zone_width[0],
self.face_zone_height[0],
self.face_zone_width[1],
self.face_zone_height[1]))
self.sock.SendData(self.send_data_unity)
cv2.rectangle(self.frame_to_show, (self.face_zone_width[0], self.face_zone_height[0]),
(self.face_zone_width[1], self.face_zone_height[1]),
(0, 255, 255), 2)
cv2.circle(self.frame_to_show, self.face_zone_center_point, 5, (255, 255, 0), -1)
cv2.imshow("Output", self.frame_to_show)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
print("Starting python...")
face_model_path = "face_detect.pt"
model_path = "yolov8n.pt"
tracker_type = "deepocsort"
reid_weights = Path('osnet_x0_25_msmt17.pt')
run_main_program = MainProgram(face_model_path, model_path, reid_weights, tracker_type)
run_main_program()