문장 설명

이 시리즈는 Github에서malin9402가 제공한 코드에 대한 설명을 목적으로 하고 있으며, 이 글은 YOLOv3 프로젝트의 데이터set에 대한 설명을 드리겠습니다.py 파일에 대한 설명입니다.이 파일에는 데이터 집합을 만드는 데 사용되는 Dataset 클래스가 하나밖에 없기 때문에 본고는 코드에 대한 설명을 코드 옆에 직접 씁니다.
Github의 코드만 실행하고 싶으면 YOLOv3 코드에 대한 설명을 참고하십시오.
전체 코드

import os
import cv2
import random
import numpy as np
import tensorflow as tf
import core.utils as utils
from core.config import cfg

class Dataset(object):
    """implement Dataset here"""
    def __init__(self, dataset_type):
        self.annot_path  = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH  #   （  ）     
        self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE  #   （  ）     
        self.batch_size  = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE  #   （  ）     
        self.data_aug    = cfg.TRAIN.DATA_AUG   if dataset_type == 'train' else cfg.TEST.DATA_AUG  #      （  ）           

        self.train_input_sizes = cfg.TRAIN.INPUT_SIZE  #        
        self.strides = np.array(cfg.YOLO.STRIDES)  #    feature map                   
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)  #      
        self.num_classes = len(self.classes)  #      
        self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) #          
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE  #     （feature map）       
        self.max_bbox_per_scale = 150  #     （feature map）         （          ）

        self.annotations = self.load_annotations(dataset_type)  #     （  ）   
        self.num_samples = len(self.annotations)  #     
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))  #       batch
        self.batch_count = 0  #   


    def load_annotations(self, dataset_type):
        with open(self.annot_path, 'r') as f:
            txt = f.readlines()
            annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
        np.random.shuffle(annotations)
        return annotations

    def __iter__(self):
        return self

    def __next__(self):

        with tf.device('/cpu:0'):
            self.train_input_size = random.choice(self.train_input_sizes)  #            ，        ；            ，               
            self.train_output_sizes = self.train_input_size // self.strides  #     （     feature map）   

            #           
            batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)

            #           （     4，             52，26，13，      3    ，     80 ）
            batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0],
                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)  #             ，shape   [4, 52, 52, 3, 85]
            batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1],
                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)  #             ，shape   [4, 26, 26, 3, 85]
            batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2],
                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)  #             ，shape   [4, 13, 13, 3, 85]

            batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)

            num = 0  #            （4   ）        
            if self.batch_count < self.num_batchs:  #       batch         batch   
                while num < self.batch_size:  #       （4   ）     
                    index = self.batch_count * self.batch_size + num  #           （            ）
                    if index >= self.num_samples: index -= self.num_samples  #               ，  index   ，    repeat   
                    annotation = self.annotations[index]  #     index       （      shuffle   ，           index.jpg）
                    image, bboxes = self.parse_annotation(annotation)  #           ->        ->                
                    label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)  #                         

                    batch_image[num, :, :, :] = image  #               
                    batch_label_sbbox[num, :, :, :, :] = label_sbbox  #                                
                    batch_label_mbbox[num, :, :, :, :] = label_mbbox  #                                
                    batch_label_lbbox[num, :, :, :, :] = label_lbbox  #                                
                    batch_sbboxes[num, :, :] = sbboxes  #                                
                    batch_mbboxes[num, :, :] = mbboxes  #                                
                    batch_lbboxes[num, :, :] = lbboxes  #                                
                    num += 1
                self.batch_count += 1
                batch_smaller_target = batch_label_sbbox, batch_sbboxes  #       
                batch_medium_target  = batch_label_mbbox, batch_mbboxes  #       
                batch_larger_target  = batch_label_lbbox, batch_lbboxes  #       

                return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
            else:  #       batch       batch    
                self.batch_count = 0  #     
                np.random.shuffle(self.annotations)  #       
                raise StopIteration

    def random_horizontal_flip(self, image, bboxes):  #         

        if random.random() < 0.5:  #              0.5
            _, w, _ = image.shape
            image = image[:, ::-1, :]
            bboxes[:, [0,2]] = w - bboxes[:, [2,0]]

        return image, bboxes

    def random_crop(self, image, bboxes):  #       

        if random.random() < 0.5:  #              0.5
            h, w, _ = image.shape  #       
            #         [xmin, ymin]      [xmin, ymin]   [xmax, ymax]      [xmax, ymax]
            #    max_bbox               
            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)

            max_l_trans = max_bbox[0]  #             
            max_u_trans = max_bbox[1]  #             
            max_r_trans = w - max_bbox[2]  #             
            max_d_trans = h - max_bbox[3]  #             

            crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))  #                    
            crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))  #                    
            crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))  #                    
            crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))  #                    

            image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]  #       

            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin  #                    
            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin  #                    

        return image, bboxes

    def random_translate(self, image, bboxes):  #       

        if random.random() < 0.5:  #              0.5
            h, w, _ = image.shape  #       
            #         [xmin, ymin]      [xmin, ymin]   [xmax, ymax]      [xmax, ymax]
            #    max_bbox               
            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)

            max_l_trans = max_bbox[0]  #             
            max_u_trans = max_bbox[1]  #             
            max_r_trans = w - max_bbox[2]  #             
            max_d_trans = h - max_bbox[3]  #             

            tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))  #        
            ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))  #        

            M = np.array([[1, 0, tx], [0, 1, ty]])
            image = cv2.warpAffine(image, M, (w, h))  #     

            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx  #                    
            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty  #                    

        return image, bboxes

    def parse_annotation(self, annotation):

        line = annotation.split()  #        ' '              
        image_path = line[0]  #       
        if not os.path.exists(image_path):  #             
            raise KeyError("%s does not exist ... " %image_path)
        image = cv2.imread(image_path)  #           
        #   map                     ，         （       text.py     ）
        bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])

        if self.data_aug:  #              
            image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))  #         
            image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))  #       
            image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))  #       

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR --> RGB
        #                
        image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
        return image, bboxes
    
    #          IOU  
    def bbox_iou(self, boxes1, boxes2):

        boxes1 = np.array(boxes1)  #            
        boxes2 = np.array(boxes2)  #            

        boxes1_area = boxes1[..., 2] * boxes1[..., 3]  #          
        boxes2_area = boxes2[..., 2] * boxes2[..., 3]  #          

        boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                                boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)  #             +     
        boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                                boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)  #             +     

        left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])  #      ，left_up=[xmin2, ymin2]
        right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])  #      ，right_down=[xmax1, ymax1]

        inter_section = np.maximum(right_down - left_up, 0.0)  #     
        inter_area = inter_section[..., 0] * inter_section[..., 1]  #     
        union_area = boxes1_area + boxes2_area - inter_area  #     

        return inter_area / union_area

    def preprocess_true_boxes(self, bboxes):

        # label   3   ，        [   feature map   ,    feature map   ,    feature map        , 5 +     ]
        label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
                           5 + self.num_classes)) for i in range(3)]
        # bboxes_xywh   3   ，        [               ,     4    ]
        bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)]
        bbox_count = np.zeros((3,))  #                      

        for bbox in bboxes:  #                     
            bbox_coor = bbox[:4]  #           （    +    ）
            bbox_class_ind = bbox[4]  #              （     ）

            onehot = np.zeros(self.num_classes, dtype=np.float)  #             
            onehot[bbox_class_ind] = 1.0  #                      1
            # uniform_distribution        [   ,]    ，             
            uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
            deta = 0.01
            smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution  #     

            #   [xmin, ymin, xmax, ymax]     [     ,      ,  ,  ]
            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
            #            feature map  （              ）
            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]  # bbox_xywh shape: [1, 4]；strides shape: [3, ]; bbox_xywh_scaled shape: [1, 3, 4]

            iou = []
            exist_positive = False  # False              
            for i in range(3):  # 3           
                anchors_xywh = np.zeros((self.anchor_per_scale, 4))  #         ，shape = [3, 4]，3              ，4                 
                anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5  #               （              ）
                anchors_xywh[:, 2:4] = self.anchors[i]  #                

                iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)  #         （    ）     IOU  
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3  # IOU     0.3             1

                if np.any(iou_mask):  #                
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)  #             

                    #      (yind, xind)                
                    label[i][yind, xind, iou_mask, :] = 0
                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh  #                         
                    label[i][yind, xind, iou_mask, 4:5] = 1.0  # 1            
                    label[i][yind, xind, iou_mask, 5:] = smooth_onehot  #                       

                    bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)  #                
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh  #                     
                    bbox_count[i] += 1

                    exist_positive = True  # True             

            if not exist_positive:  #                  
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)  #   IOU          
                best_detect = int(best_anchor_ind / self.anchor_per_scale)  #      
                best_anchor = int(best_anchor_ind % self.anchor_per_scale)  #            
                xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)  #             

                label[best_detect][yind, xind, best_anchor, :] = 0
                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh  #                    
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0  # 1            
                label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot  #                  

                bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)  #                
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh  #                     
                bbox_count[best_detect] += 1
        label_sbbox, label_mbbox, label_lbbox = label  #            （    、        ）           
        sbboxes, mbboxes, lbboxes = bboxes_xywh  #                             
        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

    def __len__(self):
        return self.num_batchs
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
GPU가 있는 M1 MacBook Air에 TensorFlow 설치(메탈)
이제 TensorFlow v2.5에서 Apple의 tensorflow-metal PluggableDevice를 활용하여 Metal을 사용하여 Mac GPU에서 직접 가속화된 교육을 받을 수 있습니다. 자세히 알아보기...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.
Tensorflow2.0 YOLOv3 구현(6): dataset.py

문서 목록

문장 설명

전체 코드

좋은 웹페이지 즐겨찾기