Skip to content
Open

Dev #22

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed config.pickle
Binary file not shown.
Binary file removed keras_frcnn/.mobil.swp
Binary file not shown.
38 changes: 19 additions & 19 deletions keras_frcnn/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@
from __future__ import absolute_import

from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \
AveragePooling2D, TimeDistributed
AveragePooling2D, TimeDistributed, BatchNormalization

from keras import backend as K

import os
from keras_frcnn.RoiPoolingConv import RoiPoolingConv
from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization


def get_weight_path():
if K.image_dim_ordering() == 'th':
return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
else:
return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'
return os.path.join("pretrain", "resnet50_weights_tf_dim_ordering_tf_kernels.h5")

def get_img_output_length(width, height):
def get_output_length(input_length):
Expand Down Expand Up @@ -47,15 +48,15 @@ def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=T
bn_name_base = 'bn' + str(stage) + block + '_branch'

x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)

x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)

x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

x = Add()([x, input_tensor])
x = Activation('relu')(x)
Expand All @@ -76,15 +77,15 @@ def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainabl
bn_name_base = 'bn' + str(stage) + block + '_branch'

x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
x = Activation('relu')(x)

x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
x = Activation('relu')(x)

x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)

x = Add()([x, input_tensor])
x = Activation('relu')(x)
Expand All @@ -103,18 +104,18 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2),
bn_name_base = 'bn' + str(stage) + block + '_branch'

x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)

x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)

x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor)
shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

x = Add()([x, shortcut])
x = Activation('relu')(x)
Expand All @@ -135,18 +136,18 @@ def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape,
bn_name_base = 'bn' + str(stage) + block + '_branch'

x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
x = Activation('relu')(x)

x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
x = Activation('relu')(x)

x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x)
x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)

shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor)
shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)
shortcut = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)

x = Add()([x, shortcut])
x = Activation('relu')(x)
Expand Down Expand Up @@ -176,7 +177,7 @@ def nn_base(input_tensor=None, trainable=False):
x = ZeroPadding2D((3, 3))(img_input)

x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

Expand Down Expand Up @@ -244,4 +245,3 @@ def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=Fal
# note: no regression target for bg class
out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]

100 changes: 100 additions & 0 deletions parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np


def get_data(input_path):
all_imgs = []

classes_count = {}

class_mapping = {}

visualise = False

data_paths = [os.path.join(input_path,s) for s in ['VOC2007']] #add VOC2012 if using.
print("data path:", data_paths)

print('Parsing annotation files')
data_path = input_path

annot_path = os.path.join(data_path, 'Annotations')
imgs_path = os.path.join(data_path, 'JPEGImages')
imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')
imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')

trainval_files = []
test_files = []
try:
with open(imgsets_path_trainval) as f:
for line in f:
trainval_files.append(line.strip() + '.jpg')
except Exception as e:
print(e)
try:
with open(imgsets_path_test) as f:
for line in f:
test_files.append(line.strip() + '.jpg')
except Exception as e:
if data_path[-7:] == 'VOC2012':
# this is expected, most pascal voc distibutions dont have the test.txt file
pass
else:
print(e)

annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
idx = 0
for annot in annots:
try:
idx += 1
et = ET.parse(annot)
element = et.getroot()
element_objs = element.findall('object')
element_filename = element.find('filename').text
element_width = int(element.find('size').find('width').text)
element_height = int(element.find('size').find('height').text)

if len(element_objs) > 0:
annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
'height': element_height, 'bboxes': []}

if element_filename in trainval_files:
annotation_data['imageset'] = 'trainval'
elif element_filename in test_files:
annotation_data['imageset'] = 'test'
else:
annotation_data['imageset'] = 'trainval'

for element_obj in element_objs:
class_name = element_obj.find('name').text
if class_name not in classes_count:
classes_count[class_name] = 1
else:
classes_count[class_name] += 1

if class_name not in class_mapping:
class_mapping[class_name] = len(class_mapping)

obj_bbox = element_obj.find('bndbox')
x1 = int(round(float(obj_bbox.find('xmin').text)))
y1 = int(round(float(obj_bbox.find('ymin').text)))
x2 = int(round(float(obj_bbox.find('xmax').text)))
y2 = int(round(float(obj_bbox.find('ymax').text)))
difficulty = 1 # parse all files.
annotation_data['bboxes'].append(
{'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
all_imgs.append(annotation_data)

if visualise:
img = cv2.imread(annotation_data['filepath'])
for bbox in annotation_data['bboxes']:
cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[
'x2'], bbox['y2']), (0, 0, 255))
cv2.imshow('img', img)
cv2.waitKey(0)

except Exception as e:
print(e)
continue
return all_imgs, classes_count, class_mapping
68 changes: 25 additions & 43 deletions test_frcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
from keras_frcnn import roi_helpers
from keras.applications.mobilenet import preprocess_input

# for GPU settings..
if 'tensorflow' == K.backend():
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config2 = tf.ConfigProto()
config2.gpu_options.allow_growth = True
set_session(tf.Session(config=config2))

sys.setrecursionlimit(40000)

parser = OptionParser()
Expand Down Expand Up @@ -101,36 +109,11 @@ def format_img_channels(img, C):
img = np.expand_dims(img, axis=0)
return img

def format_img(img):
img_min_side = 600.0
(height,width,_) = img.shape

if width <= height:
f = img_min_side/width
new_height = int(f * height)
new_width = 600
else:
f = img_min_side/height
new_width = int(f * width)
new_height = 600
# Zero-center by mean pixel, and preprocess image
img = cv2.resize(img,(new_width,new_height),interpolation = cv2.INTER_CUBIC)
# x_img = np.transpose(img,(2,0,1)).astype(np.float32)
x_img = img[:,:, (2, 1, 0)] # BGR -> RGB
x_img = x_img.astype(np.float32)
x_img[:, :, 0] -= C.img_channel_mean[0]
x_img[:, :, 1] -= C.img_channel_mean[1]
x_img[:, :, 2] -= C.img_channel_mean[2]
x_img /= C.img_scaling_factor

x_img = np.transpose(x_img, (2, 0, 1))
x_img = np.expand_dims(x_img, axis=0)
if K.backend == 'tf':
x_img = np.transpose(x_img, (0, 2, 3, 1))

# img = np.expand_dims(x_img, axis=0)
# img -= 127.5
return x_img
def format_img(img, C):
""" formats an image for model prediction based on config """
img, ratio = format_img_size(img, C)
img = format_img_channels(img, C)
return img, ratio

# Method to transform the coordinates of the bounding box to its original size
def get_real_coordinates(ratio, x1, y1, x2, y2):
Expand Down Expand Up @@ -216,7 +199,7 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):
img = cv2.imread(filepath)

# preprocess image
X = format_img(img)
X, ratio = format_img(img, C)
img_scaled = (np.transpose(X[0,:,:,:],(1,2,0)) + 127.5).astype('uint8')
if K.image_dim_ordering() == 'tf':
X = np.transpose(X, (0, 2, 3, 1))
Expand Down Expand Up @@ -271,23 +254,22 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):
print(len(bboxes[key]))
bbox = np.array(bboxes[key])

new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlapThresh = 0.5)
new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh = 0.5)
for jk in range(new_boxes.shape[0]):
(x1,y1,x2,y2) = new_boxes[jk,:]
cv2.rectangle(img_scaled,(x1,y1),(x2,y2),class_to_color[key],1)
(x1, y1, x2, y2) = new_boxes[jk,:]
(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

textLabel = '{}:{}'.format(key,int(100*new_probs[jk]))
if key not in all_dets:
all_dets[key] = 100*new_probs[jk]
else:
all_dets[key] = max(all_dets[key],100*new_probs[jk])
textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
all_dets.append((key,100*new_probs[jk]))

(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
textOrg = (x1,y1+20)
textOrg = (real_x1, real_y1-0)

cv2.rectangle(img_scaled,(textOrg[0] - 5,textOrg[1]+baseLine - 5),(textOrg[0]+retval[0] + 5,textOrg[1]-retval[1] - 5),(0,0,0),2)
cv2.rectangle(img_scaled,(textOrg[0] - 5,textOrg[1]+baseLine - 5),(textOrg[0]+retval[0] + 5,textOrg[1]-retval[1] - 5),(255,255,255),-1)
cv2.putText(img_scaled,textLabel,textOrg,cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0),1)
cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

print('Elapsed time = {}'.format(time.time() - st))
print(all_dets)
Expand Down
24 changes: 18 additions & 6 deletions train_frcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
from optparse import OptionParser
import pickle
import os

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
Expand All @@ -16,6 +17,7 @@
import keras_frcnn.roi_helpers as roi_helpers
from keras.utils import generic_utils

# for GPU settings..
if 'tensorflow' == K.backend():
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
Expand Down Expand Up @@ -46,6 +48,8 @@
parser.add_option("--opt", dest="optimizers", help="set the optimizer to use", default="SGD")
parser.add_option("--elen", dest="epoch_length", help="set the epoch length. def=1000", default=1000)
parser.add_option("--load", dest="load", help="What model to load", default=None)
parser.add_option("--dataset", dest="dataset", help="name of the dataset", default="voc")
parser.add_option("--lr", dest="lr", help="learn rate", default=1e-3)
(options, args) = parser.parse_args()

if not options.train_path: # if filename is not given
Expand All @@ -64,7 +68,13 @@
C.use_vertical_flips = bool(options.vertical_flips)
C.rot_90 = bool(options.rot_90)

C.model_path = options.output_weight_path
# mkdir to save models.
if not os.path.isdir("models"):
os.mkdir("models")
if not os.path.isdir("models/"+options.network):
os.mkdir(os.path.join("models", options.network))
C.model_path = os.path.join("models", options.network, options.dataset+".hdf5")

C.num_rois = int(options.num_rois)

# we will use resnet. may change to others
Expand Down Expand Up @@ -156,17 +166,19 @@
print('loading weights from {}'.format(C.base_net_weights))
model_rpn.load_weights(C.base_net_weights, by_name=True)
model_classifier.load_weights(C.base_net_weights, by_name=True)
print("loaded basenet weights!")
except:
print('Could not load pretrained model weights. Weights can be found in the keras application folder \
https://github.com/fchollet/keras/tree/master/keras/applications')

# optimizer setup
lr = float(options.lr)
if options.optimizers == "SGD":
optimizer = SGD(lr=1e-2, decay=0.0005, momentum=0.9)
optimizer_classifier = SGD(lr=1e-3, decay=0.0005, momentum=0.9)
optimizer = SGD(lr=lr, decay=0.0005, momentum=0.9)
optimizer_classifier = SGD(lr=lr, decay=0.0005, momentum=0.9)
else:
optimizer = Adam(lr=1e-6, clipnorm=0.001)
optimizer_classifier = Adam(lr=1e-56, clipnorm=0.001)
optimizer = Adam(lr=lr)
optimizer_classifier = Adam(lr=lr)

# may use this to resume from rpn models or previous training. specify either rpn or frcnn model to load
if options.load is not None:
Expand All @@ -177,7 +189,7 @@
print("loading RPN weights from ", options.rpn_weight_path)
model_rpn.load_weights(options.rpn_weight_path, by_name=True)
else:
print("no previous model was loaded")
print("no previous RPN model was loaded")

# compile the model AFTER loading weights!
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
Expand Down