딥러닝 | RPN 네트워크 분석
32244 단어 DL
1) 논문을 직접 읽기:https://arxiv.org/abs/1506.01497
2) Faster R-CNN은 선험지식이 많아 논문 읽기가 어렵다고 생각하는 독자는 필자의 블로그를 참고할 필요가 있다.
인스턴스 분할 모델Mask R-CNN 상세 정보: R-CNN, Fast R-CNN, Faster R-CNN에서 Mask R-CNN으로
3) Faster R-CNN을 소개하는 위의 기사도 볼 수 있는데 필자는 괜찮다고 생각한다.
Faster R-CNN 텍스트 읽기
3. (매우 중요) 해석된 Faster R-CNN 코드는 tensorflow 버전의 링크 주소입니다.https://github.com/kevinjliang/tf-Faster-RCNN그러나 매우 많은 인터페이스가 여전히 사용되고 있는Girshick의py-faster-rcnn 버전이며 주요 모듈의 실현도 마찬가지다.그러므로 여러분은 먼저 대응하는 코드를 다운로드하고 전체 코드 구조에 대해 상응하는 이해를 해야만 전체 블로그를 이해할 수 있습니다.
다음은 건조품입니다.
우선,fasterrcnn_resnet50ish.py 파일에서 훈련할 때 데이터층이 출력하는 것은 다음과 같다.
# Train data
self.x['TRAIN'] = tf.placeholder(tf.float32, [1, None, None, 3]) #
self.im_dims['TRAIN'] = tf.placeholder(tf.int32, [None, 2]) # [height, width]
self.gt_boxes['TRAIN'] = tf.placeholder(tf.int32, [None, 5]) #
, 。 , anchor 。 , , , 。
그리고 우리는 패스터rcnn_networks.py 파일은 rpn 클래스를 볼 수 있는데 필자의 스타일에 따라 주석의 원본 코드를 먼저 붙인다.
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 30 16:14:48 2016
@author: Kevin Liang
Faster R-CNN detection and classification networks.
Contains the Region Proposal Network (RPN), ROI proposal layer, and the RCNN.
TODO: -Split off these three networks into their own files OR add to Layers
"""
import sys
sys.path.append('../')
from Lib.TensorBase.tensorbase.base import Layers
from Lib.faster_rcnn_config import cfg
from Lib.loss_functions import rpn_cls_loss, rpn_bbox_loss, fast_rcnn_cls_loss, fast_rcnn_bbox_loss
from Lib.roi_pool import roi_pool
from Lib.rpn_softmax import rpn_softmax
from Networks.anchor_target_layer import anchor_target_layer
from Networks.proposal_layer import proposal_layer
from Networks.proposal_target_layer import proposal_target_layer
import tensorflow as tf
class rpn:
'''
Region Proposal Network (RPN): From the convolutional feature maps
(TensorBase Layers object) of the last layer, generate bounding boxes
relative to anchor boxes and give an "objectness" score to each
In evaluation mode (eval_mode==True), gt_boxes should be None.
'''
def __init__(self, featureMaps, gt_boxes, im_dims, _feat_stride, eval_mode):
self.featureMaps = featureMaps #
self.gt_boxes = gt_boxes # shape: [None, 5],
self.im_dims = im_dims # shape: [None ,2],
self._feat_stride = _feat_stride #
self.anchor_scales = cfg.RPN_ANCHOR_SCALES # anchor [8, 16, 32]
self.eval_mode = eval_mode #
self._network() # _network
def _network(self):
# There shouldn't be any gt_boxes if in evaluation mode
if self.eval_mode is True: # , ground truth
assert self.gt_boxes is None, \
'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)'
_num_anchors = len(self.anchor_scales)*3 #_num_anchors 9(3×3), 9 anchor
rpn_layers = Layers(self.featureMaps) # rpn_layers
with tf.variable_scope('rpn'):
# Spatial windowing
for i in range(len(cfg.RPN_OUTPUT_CHANNELS)):# 3×3 512
rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i])
features = rpn_layers.get_output()
with tf.variable_scope('cls'):
# Box-classification layer (objectness)
self.rpn_bbox_cls_layers = Layers(features) # 1×1 18(9×2)
self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors*2, activation_fn=None)
with tf.variable_scope('target'): # anchor target
# Only calculate targets in train mode. No ground truth boxes in evaluation mode
if self.eval_mode is False:
# Anchor Target Layer (anchors and deltas)
rpn_cls_score = self.rpn_bbox_cls_layers.get_output()
self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \
anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims,
_feat_stride=self._feat_stride, anchor_scales=self.anchor_scales)
with tf.variable_scope('bbox'): # 1×1 36(9×4)
# Bounding-Box regression layer (bounding box predictions)
self.rpn_bbox_pred_layers = Layers(features)
self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors*4, activation_fn=None)
# Get functions
def get_rpn_cls_score(self): # rpn anchor
return self.rpn_bbox_cls_layers.get_output()
def get_rpn_labels(self): # anchor ground truth
assert self.eval_mode is False, 'No RPN labels without ground truth boxes'
return self.rpn_labels
def get_rpn_bbox_pred(self): # rpn anchor
return self.rpn_bbox_pred_layers.get_output()
def get_rpn_bbox_targets(self): # anchor
assert self.eval_mode is False, 'No RPN bounding box targets without ground truth boxes'
return self.rpn_bbox_targets
def get_rpn_bbox_inside_weights(self): # , anchor
assert self.eval_mode is False, 'No RPN inside weights without ground truth boxes'
return self.rpn_bbox_inside_weights
def get_rpn_bbox_outside_weights(self): # , anchor
assert self.eval_mode is False, 'No RPN outside weights without ground truth boxes'
return self.rpn_bbox_outside_weights
# Loss functions
def get_rpn_cls_loss(self): # rpn loss
assert self.eval_mode is False, 'No RPN cls loss without ground truth boxes'
rpn_cls_score = self.get_rpn_cls_score()
rpn_labels = self.get_rpn_labels()
return rpn_cls_loss(rpn_cls_score, rpn_labels)
def get_rpn_bbox_loss(self): # rpn loss, inside outside_weights
assert self.eval_mode is False, 'No RPN bbox loss without ground truth boxes'
rpn_bbox_pred = self.get_rpn_bbox_pred()
rpn_bbox_targets = self.get_rpn_bbox_targets()
rpn_bbox_inside_weights = self.get_rpn_bbox_inside_weights()
rpn_bbox_outside_weights = self.get_rpn_bbox_outside_weights()
return rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)
우리가 볼 수 있듯이 rpn류는 훈련할 때 주로 두 가지 기능이 있는데 첫 번째는 get 이다rpn_cls_loss가 계산한 rpn 네트워크 분류loss, 두 번째는 getrpn_bbox_loss가 계산한 rpn 네트워크의 anchor 경계가 loss로 회귀합니다.그렇다면 두 개의 로스를 계산하려면 가장 어려운 것은groundtruth를 어떻게 얻는가이다.이ground truth의 획득은anchor를 통해target_layer 함수가 실현되면 우리는 먼저 이 함수에 들어가서 관례에 따라 원본 코드를 먼저 내보냅니다.
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 16:11:17 2017
@author: Kevin Liang (modifications)
Anchor Target Layer: Creates all the anchors in the final convolutional feature
map, assigns anchors to ground truth boxes, and applies labels of "objectness"
Adapted from the official Faster R-CNN repo:
https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/anchor_target_layer.py
"""
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import sys
sys.path.append('../')
import numpy as np
import numpy.random as npr
import tensorflow as tf
from Lib.bbox_overlaps import bbox_overlaps
from Lib.bbox_transform import bbox_transform
from Lib.faster_rcnn_config import cfg
from Lib.generate_anchors import generate_anchors
# anchor ground truth( / , )
def anchor_target_layer(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
'''
Make Python version of _anchor_target_layer_py below Tensorflow compatible
'''
# _anchor_target_layer_py , rpn ,ground_truth_box, , anchor
rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = \
tf.py_func(_anchor_target_layer_py, [rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales],
[tf.float32, tf.float32, tf.float32, tf.float32])
# tensor
rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')
rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')
rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')
rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
"""
Python version
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap
"""
im_dims = im_dims[0] # [height, width]
_anchors = generate_anchors(scales=np.array(anchor_scales))# 9 ,shape: [9,4]
_num_anchors = _anchors.shape[0] #_num_anchors 9
# allow boxes to sit over the edge by a small amount
_allowed_border = 0 # anchor 0
# Only minibatch of 1 supported batch_size 1
assert rpn_cls_score.shape[0] == 1, \
'Only single item batches are supported'
# map of shape (..., H, W)
height, width = rpn_cls_score.shape[1:3] # rpn H W, anchor H×W×9
# 1. Generate proposals from bbox deltas and shifted anchors
# anchor
shift_x = np.arange(0, width) * _feat_stride #shape: [width,]
shift_y = np.arange(0, height) * _feat_stride #shape: [height,]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) # shift_x shape: [height, width], shift_y shape: [height, width]
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() # shape[height*width, 4]
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors # A = 9
K = shifts.shape[0] # K=height*width( )
all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) #shape[K,A,4] anchor
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) #total_anchors anchor
# anchors inside the image inds_inside anchor
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width
(all_anchors[:, 3] < im_dims[0] + _allowed_border) # height
)[0]
# keep only inside anchors
anchors = all_anchors[inds_inside, :]# anchors,
# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside), ), dtype=np.float32)#labels anchor
labels.fill(-1) # -1 labels
# overlaps between the anchors and the gt boxes
# overlaps (ex, gt)
# anchor overlap, shape: [len(anchors), len(gt_boxes)]
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1) # anchor, gt_box 。shape: [len(anchors),]
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # anchor, overlap gt_box shape: [len(anchors)]
gt_argmax_overlaps = overlaps.argmax(axis=0) # gt_box, overlap anchor。shape[len(gt_boxes),]
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]# gt_box, anchor IoU 。shape[len(gt_boxes),]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]# gt_box, overlap anchor。shape[len(gt_boxes),]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # positive anchor, anchor , 。
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # IoU (0.3) anchor 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 # gt_box IoU anchor 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # IoU (0.7) anchor 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # positive anchor, anchor
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # IoU (0.3) anchor 0
# subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)# batch
fg_inds = np.where(labels == 1)[0] # anchors
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1 # anchor , anchor
# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) ## batch
bg_inds = np.where(labels == 0)[0] # anchors
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1 # anchor , anchor
# bbox_targets: The deltas (relative to anchors) that Faster R-CNN should
# try to predict at each anchor
# TODO: This "weights" business might be deprecated. Requires investigation
# , anchor, (tx,ty,th,tw)。
bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # anchor, 0
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # anchor, overlap gt_box
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # 0 inside_weights
bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # anchor
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # 0 outside_weights
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # RPN_POSITIVE_WEIGHT 0 ,
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples # positive_weights negative_weights
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) # RPN_POSITIVE_WEIGHT 0 1 ,
positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0)) # positive_weights negative_weights
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights # positive_weights negative_weights bbox_outside_weights
# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)# anchor label anchor( anchor, -1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)# anchor bbox_target anchor( anchor, 0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # anchor inside_weights anchor( anchor, 0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # anchor outside_weights anchor( anchor, 0)
# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width)) # anchor label [1,1,9*height,width]
rpn_labels = labels
# bbox_targets
rpn_bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor [1,9*4,height,width]
# bbox_inside_weights
rpn_bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor inside_weights [1,9*4,height,width]
# bbox_outside_weights
rpn_bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # anchor outside_weights [1,9*4,height,width]
return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights # ground truth
def _unmap(data, count, inds, fill=0): #_unmap anchor anchor
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret
def _compute_targets(ex_rois, gt_rois): #_compute_targets anchor gt_box
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
anchor_target_layer 함수는 주로 호출되었습니다anchor_target_layer_py 함수입니다. 출력을 tensor로 바꿉니다.다음은 우리가 자세히 분석해 보자anchor_target_layer_py 함수.이 함수에서 먼저generateanchors 함수는 9개의 후보 상자를 생성한 다음에 공유 특징에서 매번 미끄러질 때마다 원도에 대응하는 위치에 따라 후보 상자를 생성한다. 즉allanchors.이어서 모든 테두리가 이미지 경계를 초과하는 후보 상자를 제외하고 anchors를 얻었습니다. 그 다음 동작은 모두 이미지 내부의anchors를 대상으로 합니다.그리고bbox를 통해overlaps 함수는 모든 경계 내의 anchor와 포위망 사이의 IoU 값을 계산합니다.이어 IoU가 0.3에서 0.7 사이인 anchor(labels에 대응하는 값을 -1로 설정함)를 배제하고 훈련에 적합한 수량의 전망 anchor와 배경 anchor를 배정했다.그리고 통과compute_targets 함수는 모든anchor에 대응하는 좌표 변환값(tx,ty,th,tw)을 계산하여bbox 가 존재합니다targets 그룹 안에 있습니다.다시 계산했어요bboxinside_weights와bboxoutside_weights, 이 두 개의 그룹은 anchor 테두리 수정을 훈련할 때 중대한 역할을 한다.마지막, 통과unmap 함수는 모든 이미지 테두리 내부의 anchor를 모든 anchor에 비칩니다.
필자 여러분은 위의 해석을 처음 보시면 혼란스러울 수 있으니 조급해 하지 마세요.anchor_target_layer는 주로 두 가지 물건을 얻기 위해 첫 번째 물건은 대응하는 한 장의 이미지로 생성된 anchor의 유형이다. 훈련할 때 일정 수량의 정견본(전경)과 일정 수량의 마이너스 견본(배경)을 부여하고 나머지는 모두 -1로 설정하여 훈련할 때 무시한다는 뜻이다.두 번째는 모든anchor의 테두리 수정입니다. 테두리 수정loss의 계산을 할 때 전경anchor만 작용합니다. 이것은bbox 입니다.inside_weights와bboxoutside_weights가 이루어지고 있습니다.비전경과 배경 anchor에 대응하는bboxinside_weights와bboxoutside_weights는 모두 0이다.
anchortarget_layer 함수 중 몇 가지 비교적 중요한 함수가 있는데, 첫 번째 함수는generate 이다anchors, 이 함수의 주요 역할은 9개의 anchor를 생성하는 것으로 3가지 길이와 면적을 포함한다.소스 코드와 주석은 다음과 같습니다.
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 16:11:17 2017
@author: Kevin Liang (modifications)
generate_anchors and supporting functions: generate reference windows (anchors)
for Faster R-CNN. Specifically, it creates a set of k (default of 9) relative
coordinates. These references will be added on to all positions of the final
convolutional feature maps.
Adapted from the official Faster R-CNN repo:
https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py
Note: the produced anchors have indices off by 1 of what the comments claim.
Probably due to MATLAB being 1-indexed, while Python is 0-indexed.
"""
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import numpy as np
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
#array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2**np.arange(3, 6)):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
# anchor , ,
# anchor, [0,0,15,15]
base_anchor = np.array([1, 1, base_size, base_size]) - 1 #[0,0,15,15]
ratio_anchors = _ratio_enum(base_anchor, ratios) #shape: [3,4], anchor
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])# shape: [9,4]
return anchors
def _whctrs(anchor):# anchor , anchor
"""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):# anchor window,
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis] #shape: [3,1]
hs = hs[:, np.newaxis] #shape: [3,1]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors #shape [3,4], anchor,
def _ratio_enum(anchor, ratios): # anchor
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) # anchor
size = w * h # anchor
size_ratios = size / ratios # anchor :array([512.,256.,128.])
ws = np.round(np.sqrt(size_ratios)) # anchor :array([23.,16.,11.])
hs = np.round(ws * ratios) # anchor array([12.,16.,22.])
# , ws hs , 256
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)# anchor shape:[3,4], anchor
return anchors
def _scale_enum(anchor, scales): # anchor, anchor
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) # anchor
ws = w * scales #shape [3,]
hs = h * scales #shape [3,]
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) # anchor ,
return anchors
if __name__ == '__main__':
import time
t = time.time()
a = generate_anchors()
print(time.time() - t)
print(a)
from IPython import embed; embed()
위의 코드에서 주요한 원리는 가장 먼저 하나의 기준anchor를 생성하는 것이다.그리고 이 기준을 통해 anchor는 세 개의 서로 다른 길이와 너비, 면적이 같은 anchor를 생성한다.마지막으로 각 길이와 너비에 대해 세 개의 서로 다른 면적 척도의 anchor를 생성하고 최종적으로 9개의 anchor를 생성합니다. 자세한 내용은 코드 설명을 보십시오.
두 번째 중요한 함수는bboxoverlaps 함수, 이 함수는 모든 anchor와 모든ground truth box에서 IoU 값을 계산합니다. 코드는 다음과 같습니다.
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 20:25:19 2017
@author: Kevin Liang (modification)
Calculates bounding box overlaps between N bounding boxes, and K query boxes
(anchors) and return a matrix of overlap proportions
Written in Cython for optimization.
"""
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------
cimport cython
import numpy as np
cimport numpy as np
DTYPE = np.float
ctypedef np.float_t DTYPE_t
def bbox_overlaps(# , /
np.ndarray[DTYPE_t, ndim=2] boxes,
np.ndarray[DTYPE_t, ndim=2] query_boxes):
"""
Parameters
----------
boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
cdef DTYPE_t iw, ih, box_area
cdef DTYPE_t ua
cdef unsigned int k, n
for k in range(K):
box_area = (
(query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
)
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + 1
)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + 1
)
if ih > 0:
ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) *
(boxes[n, 3] - boxes[n, 1] + 1) +
box_area - iw * ih
)
overlaps[n, k] = iw * ih / ua
return overlaps
세 번째 중요한 부분은 anchor의 좌표 변환값을 계산할 때 bbox 를 사용했다는 것이다transform 함수, 계산 좌표의 변환을 계산할 때 anchor의 표시 형식을 중심 좌표와 길이로 바꾸는 것을 주의하십시오.함수 코드와 주석은 다음과 같습니다.
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 21:18:58 2017
@author: Kevin Liang (modifications)
bbox_transform and its inverse operation
"""
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
def bbox_transform(ex_rois, gt_rois):
'''
Receives two sets of bounding boxes, denoted by two opposite corners
(x1,y1,x2,y2), and returns the target deltas that Faster R-CNN should aim
for.
'''
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights # anchor
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights # anchor ground truth box
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths #
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()# anchor, shape: [4, num_anchor]
return targets
여기까지, anchortarget_layers 해석이 완료되었습니다.이것은 rpn 원본 코드에서 가장 중요한 함수 중 하나입니다. 모든anchor에 대응하는 종류와 대응하는 테두리 수정값을 되돌려주기 때문에loss를 계산할 때 계산하기 쉽습니다.rpn을 계산하는loss의 함수, 코드 및 주석은 다음과 같습니다.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 17 15:05:05 2017
@author: Kevin Liang
Loss functions
"""
from .faster_rcnn_config import cfg
import tensorflow as tf
def rpn_cls_loss(rpn_cls_score,rpn_labels):
'''
Calculate the Region Proposal Network classifier loss. Measures how well
the RPN is able to propose regions by the performance of its "objectness"
classifier.
Standard cross-entropy loss on logits
'''
with tf.variable_scope('rpn_cls_loss'):
# input shape dimensions
shape = tf.shape(rpn_cls_score)
# Stack all classification scores into 2D matrix
rpn_cls_score = tf.transpose(rpn_cls_score,[0,3,1,2])
rpn_cls_score = tf.reshape(rpn_cls_score,[shape[0],2,shape[3]//2*shape[1],shape[2]])
rpn_cls_score = tf.transpose(rpn_cls_score,[0,2,3,1])
rpn_cls_score = tf.reshape(rpn_cls_score,[-1,2])
# Stack labels
rpn_labels = tf.reshape(rpn_labels,[-1]) # label one_hot
# Ignore label=-1 (Neither object nor background: IoU between 0.3 and 0.7)
# label -1 score , [-1,2] loss
rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_labels,-1))),[-1,2])
# label -1 , anchor gt IoU 0.7
rpn_labels = tf.reshape(tf.gather(rpn_labels,tf.where(tf.not_equal(rpn_labels,-1))),[-1])
# Cross entropy error loss
rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_labels))
return rpn_cross_entropy
def rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_inside_weights, rpn_outside_weights):
'''
Calculate the Region Proposal Network bounding box loss. Measures how well
the RPN is able to propose regions by the performance of its localization.
lam/N_reg * sum_i(p_i^* * L_reg(t_i,t_i^*))
lam: classification vs bbox loss balance parameter
N_reg: Number of anchor locations (~2500)
p_i^*: ground truth label for anchor (loss only for positive anchors)
L_reg: smoothL1 loss
t_i: Parameterized prediction of bounding box
t_i^*: Parameterized ground truth of closest bounding box
'''
with tf.variable_scope('rpn_bbox_loss'):
# Transposing
rpn_bbox_targets = tf.transpose(rpn_bbox_targets, [0,2,3,1])
rpn_inside_weights = tf.transpose(rpn_inside_weights, [0,2,3,1])
rpn_outside_weights = tf.transpose(rpn_outside_weights, [0,2,3,1])
# How far off was the prediction?
# tx,ty,th,tw , rpn_inside_weights, positive anchor bbox loss
diff = tf.multiply(rpn_inside_weights, rpn_bbox_pred - rpn_bbox_targets)
# smooth_L1
diff_sL1 = smoothL1(diff, 3.0)
# Only count loss for positive anchors. Make sure it's a sum.
# rpn_outside_weights , positive anchor bbox loss
rpn_bbox_reg = tf.reduce_sum(tf.multiply(rpn_outside_weights, diff_sL1))
# Constant for weighting bounding box loss with classification loss
# lambda ,
rpn_bbox_reg = cfg.TRAIN.RPN_BBOX_LAMBDA * rpn_bbox_reg
return rpn_bbox_reg #
위 함수와 같이 rpn 을 계산합니다cls_loss를 할 때 label에서 대응하는 값이 -1인 값을 제외합니다. 즉, 이미지 경계 내의ground truth box와 최대 IoU가 0.7 이상 또는 0.3 이하인 anchor만 보존합니다.rpn 계산하기bbox_loss 때, 처음부터 rpn을 곱하기inside_weights에서 볼 때 전망 anchor의 bbox loss만 계산했다. 왜냐하면 나머지 비전망 anchor에 대응하는 rpninside_weights는 모두 0이다.
지금까지 Faster R-CNN의 RPN 코드는 거의 막바지에 다다랐다.RPN 코드의 가장 정교한 부분은 다음과 같은 두 가지라고 생각합니다.
1) H 생성 방법×W×9개의 anchor: 방법은 먼저 9개의 서로 다른 길이와 너비가 서로 다른 면적의 anchor를 생성한 다음에 그림의 각 미끄럼 구역에서 모두 9개의 anchor를 생성하는 것이다.
2) 모든 anchor의 종류(전경 배경)와 테두리 변환 값을 어떻게 계산합니까?방법은 우선 모든 anchor에 대해ground truth box와 대응하는 IoU 값을 계산하고 IoU가 0.3~0.7인 anchor를 배제하는 것이다.0.3이하는 배경 앙코르, 0.7이상은 전망 앙코르.테두리 변화 값은 계산된 anchor와 IoU의 중합이 가장 큰ground truth box에 대응하는 tx,ty,th,tw 네 개의 값입니다.
참조:https://blog.csdn.net/jiongnima/article/details/79781792
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
[ML/DL] Train/Test/Valid Dataset모델을 학습 시킬 때 사용할 데이터는 train/test/valid(혹은 Dev) 으로 분리해서 학습에 사용된다. 이렇게 분리하는 이유와 각 데이터 셋의 용도와 데이터를 분리할 때 사용할 수 있는 모듈들을 알아보자~...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.