diff --git a/Process saliency map/YUVFileLoader.py b/Process saliency map/YUVFileLoader.py new file mode 100644 index 0000000000000000000000000000000000000000..6b25d4d6290fa4fda69b85a3f572e275e83e5915 --- /dev/null +++ b/Process saliency map/YUVFileLoader.py @@ -0,0 +1,89 @@ +import os +import cv2 +import numpy as np + +class YUVFileLoader: + @staticmethod + def load_yuv_file(file_path, width, height, is_10bit=False): + if is_10bit: + return YUVFileLoader.load_yuv_file_10bit(file_path, width, height) + else: + return YUVFileLoader.load_yuv_file_8bit(file_path, width, height) + + @staticmethod + def load_yuv_file_8bit(file_path, width, height): + with open(file_path, 'rb') as f: + # Calculate the size of each frame + frame_size = width * height * 3 // 2 # YUV 420 format + + # Calculate the number of frames in the file + num_frames = os.path.getsize(file_path) // frame_size + print(f"Number of frames in the file: {num_frames}") + + # Create an empty numpy array to store the video frames for 3 channels + video = np.empty((num_frames, height, width, 3), dtype=np.uint8) + + # Read each frame from the file and store it in the numpy array + for i in range(num_frames): + # Read the Y component of the frame + Y = np.frombuffer(f.read(width * height), dtype=np.uint8).reshape((height, width)) + + # Read the U and V components of the frame + U = np.frombuffer(f.read(width // 2 * height // 2), dtype=np.uint8).reshape((height // 2, width // 2)) + V = np.frombuffer(f.read(width // 2 * height // 2), dtype=np.uint8).reshape((height // 2, width // 2)) + + # Upsample the U and V components to match the size of the Y component + U = cv2.resize(U, (width, height), interpolation=cv2.INTER_LINEAR) + V = cv2.resize(V, (width, height), interpolation=cv2.INTER_LINEAR) + + # Combine the Y, U, and V components to form the frame + frame = np.dstack((Y, U, V)) + + # Store the frame in the numpy array + video[i] = frame + return video + + @staticmethod + def load_yuv_file_10bit(file_path, width, height): + # Open the YUV file + with open(file_path, 'rb') as f: + frame_size = (width * height * 3 // 2) * 2 # 10-bit YUV 4:2:0 format + num_frames = os.path.getsize(file_path) // frame_size + print(f"Number of frames in the file: {num_frames}") + video = np.empty((num_frames, height, width, 3), dtype=np.uint8) + idx = 0 + while True: + # Read Y plane + y_plane = np.frombuffer(f.read(width * height * 2), dtype=np.uint16) + if y_plane.size < width * height: + print(f"Number of frames read: {idx}") + return video + y_plane = y_plane.reshape((height, width)) + + # Read U and V planes + u_plane = np.frombuffer(f.read(width * height // 2), dtype=np.uint16) + if u_plane.size < width * height // 4: + print(f"Number of frames read: {idx}") + return video + u_plane = u_plane.reshape((height // 2, width // 2)) + + v_plane = np.frombuffer(f.read(width * height // 2), dtype=np.uint16) + if v_plane.size < width * height // 4: + print(f"Number of frames read: {idx}") + return video + v_plane = v_plane.reshape((height // 2, width // 2)) + + # Convert 10-bit to 8-bit + y_plane = (y_plane >> 2).astype(np.uint8) # Adjust bit-shift for 10-bit to 8-bit conversion + u_plane = (u_plane >> 2).astype(np.uint8) + v_plane = (v_plane >> 2).astype(np.uint8) + + # Upsample U and V planes + u_plane_upsampled = cv2.resize(u_plane, (width, height), interpolation=cv2.INTER_LINEAR) + v_plane_upsampled = cv2.resize(v_plane, (width, height), interpolation=cv2.INTER_LINEAR) + + # Merge Y, U, V planes into one YUV frame + yuv_frame = np.dstack((y_plane, u_plane_upsampled, v_plane_upsampled)) + video[idx] = yuv_frame + + idx += 1 \ No newline at end of file