딥러닝 | RPN 네트워크 분석

32244 단어 DL
감사https://blog.csdn.net/jiongnima/article/details/79781792
 
1) 논문을 직접 읽기:https://arxiv.org/abs/1506.01497
2) Faster R-CNN은 선험지식이 많아 논문 읽기가 어렵다고 생각하는 독자는 필자의 블로그를 참고할 필요가 있다.
인스턴스 분할 모델Mask R-CNN 상세 정보: R-CNN, Fast R-CNN, Faster R-CNN에서 Mask R-CNN으로
3) Faster R-CNN을 소개하는 위의 기사도 볼 수 있는데 필자는 괜찮다고 생각한다.
Faster R-CNN 텍스트 읽기
3. (매우 중요) 해석된 Faster R-CNN 코드는 tensorflow 버전의 링크 주소입니다.https://github.com/kevinjliang/tf-Faster-RCNN그러나 매우 많은 인터페이스가 여전히 사용되고 있는Girshick의py-faster-rcnn 버전이며 주요 모듈의 실현도 마찬가지다.그러므로 여러분은 먼저 대응하는 코드를 다운로드하고 전체 코드 구조에 대해 상응하는 이해를 해야만 전체 블로그를 이해할 수 있습니다.
다음은 건조품입니다.
우선,fasterrcnn_resnet50ish.py 파일에서 훈련할 때 데이터층이 출력하는 것은 다음과 같다.
# Train data
self.x['TRAIN'] = tf.placeholder(tf.float32, [1, None, None, 3]) #  
self.im_dims['TRAIN'] = tf.placeholder(tf.int32, [None, 2]) #     [height, width]
self.gt_boxes['TRAIN'] = tf.placeholder(tf.int32, [None, 5]) #   
       ,            ,              anchor       。        ,             ,         ,          。

그리고 우리는 패스터rcnn_networks.py 파일은 rpn 클래스를 볼 수 있는데 필자의 스타일에 따라 주석의 원본 코드를 먼저 붙인다.
# -*- coding: utf-8 -*-

"""

Created on Fri Dec 30 16:14:48 2016


@author: Kevin Liang


Faster R-CNN detection and classification networks.


Contains the Region Proposal Network (RPN), ROI proposal layer, and the RCNN.


TODO: -Split off these three networks into their own files OR add to Layers

"""


import sys


sys.path.append('../')


from Lib.TensorBase.tensorbase.base import Layers


from Lib.faster_rcnn_config import cfg

from Lib.loss_functions import rpn_cls_loss, rpn_bbox_loss, fast_rcnn_cls_loss, fast_rcnn_bbox_loss

from Lib.roi_pool import roi_pool

from Lib.rpn_softmax import rpn_softmax

from Networks.anchor_target_layer import anchor_target_layer

from Networks.proposal_layer import proposal_layer

from Networks.proposal_target_layer import proposal_target_layer


import tensorflow as tf



class rpn:

'''

Region Proposal Network (RPN): From the convolutional feature maps

(TensorBase Layers object) of the last layer, generate bounding boxes

relative to anchor boxes and give an "objectness" score to each


In evaluation mode (eval_mode==True), gt_boxes should be None.

'''


def __init__(self, featureMaps, gt_boxes, im_dims, _feat_stride, eval_mode):

self.featureMaps = featureMaps #      

self.gt_boxes = gt_boxes #     shape: [None, 5],                

self.im_dims = im_dims #     shape: [None ,2],          

self._feat_stride = _feat_stride #              

self.anchor_scales = cfg.RPN_ANCHOR_SCALES #  anchor    [8, 16, 32]

self.eval_mode = eval_mode #         


self._network() #  _network  


def _network(self):

# There shouldn't be any gt_boxes if in evaluation mode

if self.eval_mode is True: #       ,     ground truth

assert self.gt_boxes is None, \

'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)'


_num_anchors = len(self.anchor_scales)*3 #_num_anchors 9(3×3),       9 anchor


rpn_layers = Layers(self.featureMaps) #       rpn_layers


with tf.variable_scope('rpn'):

# Spatial windowing

for i in range(len(cfg.RPN_OUTPUT_CHANNELS)):#      3×3    512   

rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i])


features = rpn_layers.get_output()


with tf.variable_scope('cls'):

# Box-classification layer (objectness)

self.rpn_bbox_cls_layers = Layers(features) #     1×1    18(9×2)   

self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors*2, activation_fn=None)


with tf.variable_scope('target'): #       anchor   target

# Only calculate targets in train mode. No ground truth boxes in evaluation mode

if self.eval_mode is False:

# Anchor Target Layer (anchors and deltas)

rpn_cls_score = self.rpn_bbox_cls_layers.get_output()

self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \

anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims,

_feat_stride=self._feat_stride, anchor_scales=self.anchor_scales)


with tf.variable_scope('bbox'): #     1×1    36(9×4)   

# Bounding-Box regression layer (bounding box predictions)

self.rpn_bbox_pred_layers = Layers(features)

self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors*4, activation_fn=None)


# Get functions

def get_rpn_cls_score(self): #  rpn     anchor     

return self.rpn_bbox_cls_layers.get_output()


def get_rpn_labels(self): #    anchor         ground truth

assert self.eval_mode is False, 'No RPN labels without ground truth boxes'

return self.rpn_labels


def get_rpn_bbox_pred(self): #  rpn   anchor      

return self.rpn_bbox_pred_layers.get_output()


def get_rpn_bbox_targets(self): #    anchor           

assert self.eval_mode is False, 'No RPN bounding box targets without ground truth boxes'

return self.rpn_bbox_targets


def get_rpn_bbox_inside_weights(self): #            ,          anchor  

assert self.eval_mode is False, 'No RPN inside weights without ground truth boxes'

return self.rpn_bbox_inside_weights


def get_rpn_bbox_outside_weights(self): #            ,          anchor  

assert self.eval_mode is False, 'No RPN outside weights without ground truth boxes'

return self.rpn_bbox_outside_weights


# Loss functions

def get_rpn_cls_loss(self): #  rpn   loss

assert self.eval_mode is False, 'No RPN cls loss without ground truth boxes'

rpn_cls_score = self.get_rpn_cls_score()

rpn_labels = self.get_rpn_labels()

return rpn_cls_loss(rpn_cls_score, rpn_labels)


def get_rpn_bbox_loss(self): #  rpn     loss,         inside outside_weights

assert self.eval_mode is False, 'No RPN bbox loss without ground truth boxes'

rpn_bbox_pred = self.get_rpn_bbox_pred()

rpn_bbox_targets = self.get_rpn_bbox_targets()

rpn_bbox_inside_weights = self.get_rpn_bbox_inside_weights()

rpn_bbox_outside_weights = self.get_rpn_bbox_outside_weights()

return rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)

우리가 볼 수 있듯이 rpn류는 훈련할 때 주로 두 가지 기능이 있는데 첫 번째는 get 이다rpn_cls_loss가 계산한 rpn 네트워크 분류loss, 두 번째는 getrpn_bbox_loss가 계산한 rpn 네트워크의 anchor 경계가 loss로 회귀합니다.그렇다면 두 개의 로스를 계산하려면 가장 어려운 것은groundtruth를 어떻게 얻는가이다.이ground truth의 획득은anchor를 통해target_layer 함수가 실현되면 우리는 먼저 이 함수에 들어가서 관례에 따라 원본 코드를 먼저 내보냅니다.
# -*- coding: utf-8 -*-

"""

Created on Sun Jan 1 16:11:17 2017


@author: Kevin Liang (modifications)


Anchor Target Layer: Creates all the anchors in the final convolutional feature

map, assigns anchors to ground truth boxes, and applies labels of "objectness"


Adapted from the official Faster R-CNN repo:

https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/anchor_target_layer.py

"""


# --------------------------------------------------------

# Faster R-CNN

# Copyright (c) 2015 Microsoft

# Licensed under The MIT License [see LICENSE for details]

# Written by Ross Girshick and Sean Bell

# --------------------------------------------------------


import sys

sys.path.append('../')


import numpy as np

import numpy.random as npr

import tensorflow as tf


from Lib.bbox_overlaps import bbox_overlaps

from Lib.bbox_transform import bbox_transform

from Lib.faster_rcnn_config import cfg

from Lib.generate_anchors import generate_anchors


#       anchor   ground truth(  /  ,     )

def anchor_target_layer(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):

'''

Make Python version of _anchor_target_layer_py below Tensorflow compatible

'''

#  _anchor_target_layer_py  ,        rpn    ,ground_truth_box,     ,              anchor   

rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = \

tf.py_func(_anchor_target_layer_py, [rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales],

[tf.float32, tf.float32, tf.float32, tf.float32])


#   tensor

rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')

rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')

rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')

rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')


return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights



def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):

"""

Python version


Assign anchors to ground-truth targets. Produces anchor classification

labels and bounding-box regression targets.


# Algorithm:

#

# for each (H, W) location i

# generate 9 anchor boxes centered on cell i

# apply predicted bbox deltas at cell i to each of the 9 anchors

# filter out-of-image anchors

# measure GT overlap

"""

im_dims = im_dims[0] #       [height, width]

_anchors = generate_anchors(scales=np.array(anchor_scales))#   9   ,shape: [9,4]

_num_anchors = _anchors.shape[0] #_num_anchors  9


# allow boxes to sit over the edge by a small amount

_allowed_border = 0 # anchor          0


# Only minibatch of 1 supported      batch_size   1

assert rpn_cls_score.shape[0] == 1, \

'Only single item batches are supported'


# map of shape (..., H, W)

height, width = rpn_cls_score.shape[1:3] #      rpn   H W,  anchor     H×W×9


# 1. Generate proposals from bbox deltas and shifted anchors

#         anchor

shift_x = np.arange(0, width) * _feat_stride #shape: [width,]

shift_y = np.arange(0, height) * _feat_stride #shape: [height,]

shift_x, shift_y = np.meshgrid(shift_x, shift_y) #     shift_x shape: [height, width], shift_y shape: [height, width]

shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),

shift_x.ravel(), shift_y.ravel())).transpose() # shape[height*width, 4]


# add A anchors (1, A, 4) to

# cell K shifts (K, 1, 4) to get

# shift anchors (K, A, 4)

# reshape to (K*A, 4) shifted anchors

A = _num_anchors # A = 9

K = shifts.shape[0] # K=height*width(     )

all_anchors = (_anchors.reshape((1, A, 4)) +

shifts.reshape((1, K, 4)).transpose((1, 0, 2))) #shape[K,A,4]      anchor

all_anchors = all_anchors.reshape((K * A, 4))

total_anchors = int(K * A) #total_anchors  anchor   


# anchors inside the image inds_inside   anchor          

inds_inside = np.where(

(all_anchors[:, 0] >= -_allowed_border) &

(all_anchors[:, 1] >= -_allowed_border) &

(all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width

(all_anchors[:, 3] < im_dims[0] + _allowed_border) # height

)[0]


# keep only inside anchors

anchors = all_anchors[inds_inside, :]#        anchors,         


# label: 1 is positive, 0 is negative, -1 is dont care

labels = np.empty((len(inds_inside), ), dtype=np.float32)#labels        anchor   

labels.fill(-1) #  -1  labels


# overlaps between the anchors and the gt boxes

# overlaps (ex, gt)

#            anchor  overlap,   shape: [len(anchors), len(gt_boxes)]

overlaps = bbox_overlaps(

np.ascontiguousarray(anchors, dtype=np.float),

np.ascontiguousarray(gt_boxes, dtype=np.float))

argmax_overlaps = overlaps.argmax(axis=1) #    anchor,     gt_box  。shape: [len(anchors),]

max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #    anchor,     overlap gt_box shape: [len(anchors)]

gt_argmax_overlaps = overlaps.argmax(axis=0) #    gt_box,       overlap anchor。shape[len(gt_boxes),]

gt_max_overlaps = overlaps[gt_argmax_overlaps,

np.arange(overlaps.shape[1])]#    gt_box,   anchor   IoU 。shape[len(gt_boxes),]

gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#      gt_box,       overlap anchor。shape[len(gt_boxes),]


if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: #       positive anchor,     anchor  ,              。

# assign bg labels first so that positive labels can clobber them

labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #      IoU      (0.3)   anchor 0


# fg label: for each gt, anchor with highest overlap

labels[gt_argmax_overlaps] = 1 #      gt_box  IoU   anchor 1


# fg label: above threshold IOU

labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #      IoU    (0.7)   anchor 1


if cfg.TRAIN.RPN_CLOBBER_POSITIVES: #      positive anchor,    anchor   

# assign bg labels last so that negative labels can clobber positives

labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #      IoU      (0.3)   anchor 0


# subsample positive labels if we have too many

num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)#       batch         

fg_inds = np.where(labels == 1)[0] #        anchors

if len(fg_inds) > num_fg:

disable_inds = npr.choice(

fg_inds, size=(len(fg_inds) - num_fg), replace=False)

labels[disable_inds] = -1 #         anchor      ,         anchor


# subsample negative labels if we have too many

num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) ##       batch         

bg_inds = np.where(labels == 0)[0] #        anchors

if len(bg_inds) > num_bg:

disable_inds = npr.choice(

bg_inds, size=(len(bg_inds) - num_bg), replace=False)

labels[disable_inds] = -1 #         anchor      ,         anchor


# bbox_targets: The deltas (relative to anchors) that Faster R-CNN should

# try to predict at each anchor

# TODO: This "weights" business might be deprecated. Requires investigation

#    ,    anchor,         (tx,ty,th,tw)。

bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #         anchor,  0        

bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #    anchor,           overlap gt_box    


bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #   0   inside_weights

bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #   anchor    


bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #   0   outside_weights

if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #  RPN_POSITIVE_WEIGHT  0  ,

# uniform weighting of examples (given non-uniform sampling)

num_examples = np.sum(labels >= 0)

positive_weights = np.ones((1, 4)) * 1.0 / num_examples # positive_weights negative_weights   

negative_weights = np.ones((1, 4)) * 1.0 / num_examples

else:

assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &

(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) #  RPN_POSITIVE_WEIGHT  0 1    ,

positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /

np.sum(labels == 1))

negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /

np.sum(labels == 0)) # positive_weights negative_weights    

bbox_outside_weights[labels == 1, :] = positive_weights

bbox_outside_weights[labels == 0, :] = negative_weights # positive_weights negative_weights  bbox_outside_weights


# map up to original set of anchors

labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#      anchor   label     anchor(          anchor,    -1)

bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#      anchor   bbox_target      anchor(          anchor,  0)

bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #      anchor   inside_weights     anchor(          anchor,  0)

bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #      anchor   outside_weights     anchor(          anchor,  0)


# labels

labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)

labels = labels.reshape((1, 1, A * height, width)) # anchor   label      [1,1,9*height,width]

rpn_labels = labels


# bbox_targets

rpn_bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor            [1,9*4,height,width]


# bbox_inside_weights

rpn_bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor inside_weights       [1,9*4,height,width]


# bbox_outside_weights

rpn_bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor outside_weights       [1,9*4,height,width]


return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights #     ground truth 



def _unmap(data, count, inds, fill=0): #_unmap        anchor          anchor

""" Unmap a subset of item (data) back to the original set of items (of

size count) """

if len(data.shape) == 1:

ret = np.empty((count, ), dtype=np.float32)

ret.fill(fill)

ret[inds] = data

else:

ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)

ret.fill(fill)

ret[inds, :] = data

return ret


def _compute_targets(ex_rois, gt_rois): #_compute_targets    anchor    gt_box     

"""Compute bounding-box regression targets for an image."""


assert ex_rois.shape[0] == gt_rois.shape[0]

assert ex_rois.shape[1] == 4

assert gt_rois.shape[1] == 5


return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)

   anchor_target_layer 함수는 주로 호출되었습니다anchor_target_layer_py 함수입니다. 출력을 tensor로 바꿉니다.다음은 우리가 자세히 분석해 보자anchor_target_layer_py 함수.이 함수에서 먼저generateanchors 함수는 9개의 후보 상자를 생성한 다음에 공유 특징에서 매번 미끄러질 때마다 원도에 대응하는 위치에 따라 후보 상자를 생성한다. 즉allanchors.이어서 모든 테두리가 이미지 경계를 초과하는 후보 상자를 제외하고 anchors를 얻었습니다. 그 다음 동작은 모두 이미지 내부의anchors를 대상으로 합니다.그리고bbox를 통해overlaps 함수는 모든 경계 내의 anchor와 포위망 사이의 IoU 값을 계산합니다.이어 IoU가 0.3에서 0.7 사이인 anchor(labels에 대응하는 값을 -1로 설정함)를 배제하고 훈련에 적합한 수량의 전망 anchor와 배경 anchor를 배정했다.그리고 통과compute_targets 함수는 모든anchor에 대응하는 좌표 변환값(tx,ty,th,tw)을 계산하여bbox 가 존재합니다targets 그룹 안에 있습니다.다시 계산했어요bboxinside_weights와bboxoutside_weights, 이 두 개의 그룹은 anchor 테두리 수정을 훈련할 때 중대한 역할을 한다.마지막, 통과unmap 함수는 모든 이미지 테두리 내부의 anchor를 모든 anchor에 비칩니다.
필자 여러분은 위의 해석을 처음 보시면 혼란스러울 수 있으니 조급해 하지 마세요.anchor_target_layer는 주로 두 가지 물건을 얻기 위해 첫 번째 물건은 대응하는 한 장의 이미지로 생성된 anchor의 유형이다. 훈련할 때 일정 수량의 정견본(전경)과 일정 수량의 마이너스 견본(배경)을 부여하고 나머지는 모두 -1로 설정하여 훈련할 때 무시한다는 뜻이다.두 번째는 모든anchor의 테두리 수정입니다. 테두리 수정loss의 계산을 할 때 전경anchor만 작용합니다. 이것은bbox 입니다.inside_weights와bboxoutside_weights가 이루어지고 있습니다.비전경과 배경 anchor에 대응하는bboxinside_weights와bboxoutside_weights는 모두 0이다.
anchortarget_layer 함수 중 몇 가지 비교적 중요한 함수가 있는데, 첫 번째 함수는generate 이다anchors, 이 함수의 주요 역할은 9개의 anchor를 생성하는 것으로 3가지 길이와 면적을 포함한다.소스 코드와 주석은 다음과 같습니다.
# -*- coding: utf-8 -*-

"""

Created on Sun Jan 1 16:11:17 2017


@author: Kevin Liang (modifications)


generate_anchors and supporting functions: generate reference windows (anchors)

for Faster R-CNN. Specifically, it creates a set of k (default of 9) relative

coordinates. These references will be added on to all positions of the final

convolutional feature maps.


Adapted from the official Faster R-CNN repo:

https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py


Note: the produced anchors have indices off by 1 of what the comments claim.

Probably due to MATLAB being 1-indexed, while Python is 0-indexed.

"""


# --------------------------------------------------------

# Faster R-CNN

# Copyright (c) 2015 Microsoft

# Licensed under The MIT License [see LICENSE for details]

# Written by Ross Girshick and Sean Bell

# --------------------------------------------------------


import numpy as np


# Verify that we compute the same anchors as Shaoqing's matlab implementation:

#

# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat

# >> anchors

#

# anchors =

#

# -83 -39 100 56

# -175 -87 192 104

# -359 -183 376 200

# -55 -55 72 72

# -119 -119 136 136

# -247 -247 264 264

# -35 -79 52 96

# -79 -167 96 184

# -167 -343 184 360


#array([[ -83., -39., 100., 56.],

# [-175., -87., 192., 104.],

# [-359., -183., 376., 200.],

# [ -55., -55., 72., 72.],

# [-119., -119., 136., 136.],

# [-247., -247., 264., 264.],

# [ -35., -79., 52., 96.],

# [ -79., -167., 96., 184.],

# [-167., -343., 184., 360.]])


def generate_anchors(base_size=16, ratios=[0.5, 1, 2],

scales=2**np.arange(3, 6)):

"""

Generate anchor (reference) windows by enumerating aspect ratios X

scales wrt a reference (0, 0, 15, 15) window.

"""

#   anchor        ,               ,            

#        anchor,              [0,0,15,15]

base_anchor = np.array([1, 1, base_size, base_size]) - 1 #[0,0,15,15]

ratio_anchors = _ratio_enum(base_anchor, ratios) #shape: [3,4],          anchor

anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)

for i in range(ratio_anchors.shape[0])])#        shape: [9,4]

return anchors


def _whctrs(anchor):#  anchor           ,  anchor        

"""

Return width, height, x center, and y center for an anchor (window).

"""


w = anchor[2] - anchor[0] + 1

h = anchor[3] - anchor[1] + 1

x_ctr = anchor[0] + 0.5 * (w - 1)

y_ctr = anchor[1] + 0.5 * (h - 1)

return w, h, x_ctr, y_ctr


def _mkanchors(ws, hs, x_ctr, y_ctr):# anchor         window,            

"""

Given a vector of widths (ws) and heights (hs) around a center

(x_ctr, y_ctr), output a set of anchors (windows).

"""


ws = ws[:, np.newaxis] #shape: [3,1]

hs = hs[:, np.newaxis] #shape: [3,1]

anchors = np.hstack((x_ctr - 0.5 * (ws - 1),

y_ctr - 0.5 * (hs - 1),

x_ctr + 0.5 * (ws - 1),

y_ctr + 0.5 * (hs - 1)))

return anchors #shape [3,4],    anchor,              


def _ratio_enum(anchor, ratios): #              anchor   

"""

Enumerate a set of anchors for each aspect ratio wrt an anchor.

"""


w, h, x_ctr, y_ctr = _whctrs(anchor) #  anchor       

size = w * h #  anchor   

size_ratios = size / ratios #    anchor          :array([512.,256.,128.])

ws = np.round(np.sqrt(size_ratios)) #         anchor  :array([23.,16.,11.])

hs = np.round(ws * ratios) #         anchor   array([12.,16.,22.])

#     ,     ws hs  ,    256  

anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#          anchor      shape:[3,4],   anchor              

return anchors


def _scale_enum(anchor, scales): #             anchor,         anchor  

"""

Enumerate a set of anchors for each scale wrt an anchor.

"""


w, h, x_ctr, y_ctr = _whctrs(anchor) #  anchor     

ws = w * scales #shape [3,]           

hs = h * scales #shape [3,]           

anchors = _mkanchors(ws, hs, x_ctr, y_ctr) #         anchor  ,              

return anchors


if __name__ == '__main__':

import time

t = time.time()

a = generate_anchors()

print(time.time() - t)

print(a)

from IPython import embed; embed()

위의 코드에서 주요한 원리는 가장 먼저 하나의 기준anchor를 생성하는 것이다.그리고 이 기준을 통해 anchor는 세 개의 서로 다른 길이와 너비, 면적이 같은 anchor를 생성한다.마지막으로 각 길이와 너비에 대해 세 개의 서로 다른 면적 척도의 anchor를 생성하고 최종적으로 9개의 anchor를 생성합니다. 자세한 내용은 코드 설명을 보십시오.
두 번째 중요한 함수는bboxoverlaps 함수, 이 함수는 모든 anchor와 모든ground truth box에서 IoU 값을 계산합니다. 코드는 다음과 같습니다.
# -*- coding: utf-8 -*-

"""

Created on Sun Jan 1 20:25:19 2017


@author: Kevin Liang (modification)


Calculates bounding box overlaps between N bounding boxes, and K query boxes

(anchors) and return a matrix of overlap proportions


Written in Cython for optimization.

"""

# --------------------------------------------------------

# Fast R-CNN

# Copyright (c) 2015 Microsoft

# Licensed under The MIT License [see LICENSE for details]

# Written by Sergey Karayev

# --------------------------------------------------------


cimport cython

import numpy as np

cimport numpy as np


DTYPE = np.float

ctypedef np.float_t DTYPE_t


def bbox_overlaps(#      ,              /             

np.ndarray[DTYPE_t, ndim=2] boxes,

np.ndarray[DTYPE_t, ndim=2] query_boxes):

"""

Parameters

----------

boxes: (N, 4) ndarray of float

query_boxes: (K, 4) ndarray of float

Returns

-------

overlaps: (N, K) ndarray of overlap between boxes and query_boxes

"""

cdef unsigned int N = boxes.shape[0]

cdef unsigned int K = query_boxes.shape[0]

cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)

cdef DTYPE_t iw, ih, box_area

cdef DTYPE_t ua

cdef unsigned int k, n

for k in range(K):

box_area = (

(query_boxes[k, 2] - query_boxes[k, 0] + 1) *

(query_boxes[k, 3] - query_boxes[k, 1] + 1)

)

for n in range(N):

iw = (

min(boxes[n, 2], query_boxes[k, 2]) -

max(boxes[n, 0], query_boxes[k, 0]) + 1

)

if iw > 0:

ih = (

min(boxes[n, 3], query_boxes[k, 3]) -

max(boxes[n, 1], query_boxes[k, 1]) + 1

)

if ih > 0:

ua = float(

(boxes[n, 2] - boxes[n, 0] + 1) *

(boxes[n, 3] - boxes[n, 1] + 1) +

box_area - iw * ih

)

overlaps[n, k] = iw * ih / ua

return overlaps

세 번째 중요한 부분은 anchor의 좌표 변환값을 계산할 때 bbox 를 사용했다는 것이다transform 함수, 계산 좌표의 변환을 계산할 때 anchor의 표시 형식을 중심 좌표와 길이로 바꾸는 것을 주의하십시오.함수 코드와 주석은 다음과 같습니다.
# -*- coding: utf-8 -*-

"""

Created on Sun Jan 1 21:18:58 2017


@author: Kevin Liang (modifications)


bbox_transform and its inverse operation

"""


# --------------------------------------------------------

# Fast R-CNN

# Copyright (c) 2015 Microsoft

# Licensed under The MIT License [see LICENSE for details]

# Written by Ross Girshick

# --------------------------------------------------------


import numpy as np


def bbox_transform(ex_rois, gt_rois):

'''

Receives two sets of bounding boxes, denoted by two opposite corners

(x1,y1,x2,y2), and returns the target deltas that Faster R-CNN should aim

for.

'''

ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0

ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0

ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths

ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights #      anchor        


gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0

gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0

gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths

gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights #    anchor   ground truth box          


targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths #         

targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights

targets_dw = np.log(gt_widths / ex_widths)

targets_dh = np.log(gt_heights / ex_heights)


targets = np.vstack(

(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()#     anchor,        shape: [4, num_anchor]

return targets

여기까지, anchortarget_layers 해석이 완료되었습니다.이것은 rpn 원본 코드에서 가장 중요한 함수 중 하나입니다. 모든anchor에 대응하는 종류와 대응하는 테두리 수정값을 되돌려주기 때문에loss를 계산할 때 계산하기 쉽습니다.rpn을 계산하는loss의 함수, 코드 및 주석은 다음과 같습니다.
#!/usr/bin/env python3

# -*- coding: utf-8 -*-

"""

Created on Tue Jan 17 15:05:05 2017


@author: Kevin Liang


Loss functions

"""


from .faster_rcnn_config import cfg


import tensorflow as tf



def rpn_cls_loss(rpn_cls_score,rpn_labels):

'''

Calculate the Region Proposal Network classifier loss. Measures how well

the RPN is able to propose regions by the performance of its "objectness"

classifier.


Standard cross-entropy loss on logits

'''

with tf.variable_scope('rpn_cls_loss'):

# input shape dimensions

shape = tf.shape(rpn_cls_score)


# Stack all classification scores into 2D matrix

rpn_cls_score = tf.transpose(rpn_cls_score,[0,3,1,2])

rpn_cls_score = tf.reshape(rpn_cls_score,[shape[0],2,shape[3]//2*shape[1],shape[2]])

rpn_cls_score = tf.transpose(rpn_cls_score,[0,2,3,1])

rpn_cls_score = tf.reshape(rpn_cls_score,[-1,2])


# Stack labels

rpn_labels = tf.reshape(rpn_labels,[-1]) #     label   one_hot  


# Ignore label=-1 (Neither object nor background: IoU between 0.3 and 0.7)

#     label  -1       score   ,    [-1,2]          loss

rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_labels,-1))),[-1,2])

#     label   -1  ,     anchor gt IoU 0.7  

rpn_labels = tf.reshape(tf.gather(rpn_labels,tf.where(tf.not_equal(rpn_labels,-1))),[-1])


# Cross entropy error         loss

rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_labels))


return rpn_cross_entropy



def rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_inside_weights, rpn_outside_weights):

'''

Calculate the Region Proposal Network bounding box loss. Measures how well

the RPN is able to propose regions by the performance of its localization.


lam/N_reg * sum_i(p_i^* * L_reg(t_i,t_i^*))


lam: classification vs bbox loss balance parameter

N_reg: Number of anchor locations (~2500)

p_i^*: ground truth label for anchor (loss only for positive anchors)

L_reg: smoothL1 loss

t_i: Parameterized prediction of bounding box

t_i^*: Parameterized ground truth of closest bounding box

'''

with tf.variable_scope('rpn_bbox_loss'):

# Transposing

rpn_bbox_targets = tf.transpose(rpn_bbox_targets, [0,2,3,1])

rpn_inside_weights = tf.transpose(rpn_inside_weights, [0,2,3,1])

rpn_outside_weights = tf.transpose(rpn_outside_weights, [0,2,3,1])


# How far off was the prediction?

#       tx,ty,th,tw      ,   rpn_inside_weights,     positive anchor  bbox loss

diff = tf.multiply(rpn_inside_weights, rpn_bbox_pred - rpn_bbox_targets)

#     smooth_L1  

diff_sL1 = smoothL1(diff, 3.0)


# Only count loss for positive anchors. Make sure it's a sum.

#             rpn_outside_weights    ,     positive anchor  bbox loss


rpn_bbox_reg = tf.reduce_sum(tf.multiply(rpn_outside_weights, diff_sL1))


# Constant for weighting bounding box loss with classification loss

#             lambda  ,         

rpn_bbox_reg = cfg.TRAIN.RPN_BBOX_LAMBDA * rpn_bbox_reg


return rpn_bbox_reg #       

위 함수와 같이 rpn 을 계산합니다cls_loss를 할 때 label에서 대응하는 값이 -1인 값을 제외합니다. 즉, 이미지 경계 내의ground truth box와 최대 IoU가 0.7 이상 또는 0.3 이하인 anchor만 보존합니다.rpn 계산하기bbox_loss 때, 처음부터 rpn을 곱하기inside_weights에서 볼 때 전망 anchor의 bbox loss만 계산했다. 왜냐하면 나머지 비전망 anchor에 대응하는 rpninside_weights는 모두 0이다.
지금까지 Faster R-CNN의 RPN 코드는 거의 막바지에 다다랐다.RPN 코드의 가장 정교한 부분은 다음과 같은 두 가지라고 생각합니다.
1) H 생성 방법×W×9개의 anchor: 방법은 먼저 9개의 서로 다른 길이와 너비가 서로 다른 면적의 anchor를 생성한 다음에 그림의 각 미끄럼 구역에서 모두 9개의 anchor를 생성하는 것이다.
2) 모든 anchor의 종류(전경 배경)와 테두리 변환 값을 어떻게 계산합니까?방법은 우선 모든 anchor에 대해ground truth box와 대응하는 IoU 값을 계산하고 IoU가 0.3~0.7인 anchor를 배제하는 것이다.0.3이하는 배경 앙코르, 0.7이상은 전망 앙코르.테두리 변화 값은 계산된 anchor와 IoU의 중합이 가장 큰ground truth box에 대응하는 tx,ty,th,tw 네 개의 값입니다.
참조:https://blog.csdn.net/jiongnima/article/details/79781792

좋은 웹페이지 즐겨찾기