kentaroy47 · kentaroy47 · Aug 6, 2019 · Aug 7, 2019 · Aug 7, 2019 · Aug 7, 2019
diff --git a/config.pickle b/config.pickle
diff --git a/keras_frcnn/.mobil.swp b/keras_frcnn/.mobil.swp
diff --git a/keras_frcnn/resnet.py b/keras_frcnn/resnet.py
@@ -8,18 +8,19 @@
 from __future__ import absolute_import
 
 from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \
-    AveragePooling2D, TimeDistributed
+    AveragePooling2D, TimeDistributed, BatchNormalization
 
 from keras import backend as K
-
+import os
 from keras_frcnn.RoiPoolingConv import RoiPoolingConv
 from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization
 
+
 def get_weight_path():
     if K.image_dim_ordering() == 'th':
         return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
     else:
-        return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'
+        return os.path.join("pretrain", "resnet50_weights_tf_dim_ordering_tf_kernels.h5")
 
 def get_img_output_length(width, height):
     def get_output_length(input_length):
@@ -47,15 +48,15 @@ def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=T
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
     x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
     x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
     x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 
     x = Add()([x, input_tensor])
     x = Activation('relu')(x)
@@ -76,15 +77,15 @@ def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainabl
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
     x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
     x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
     x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
 
     x = Add()([x, input_tensor])
     x = Activation('relu')(x)
@@ -103,18 +104,18 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2),
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
     x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
     x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
     x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)
-    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 
     shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor)
-    shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
+    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
 
     x = Add()([x, shortcut])
     x = Activation('relu')(x)
@@ -135,18 +136,18 @@ def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape,
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
     x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
     x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
     x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x)
-    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
+    x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)
 
     shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor)
-    shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)
+    shortcut = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)
 
     x = Add()([x, shortcut])
     x = Activation('relu')(x)
@@ -176,7 +177,7 @@ def nn_base(input_tensor=None, trainable=False):
     x = ZeroPadding2D((3, 3))(img_input)
 
     x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)
-    x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
+    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
     x = Activation('relu')(x)
     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
@@ -244,4 +245,3 @@ def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=Fal
     # note: no regression target for bg class
     out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
     return [out_class, out_regr]
-
diff --git a/parser.py b/parser.py
@@ -0,0 +1,100 @@
+import os
+import cv2
+import xml.etree.ElementTree as ET
+import numpy as np
+
+
+def get_data(input_path):
+	all_imgs = []
+
+	classes_count = {}
+
+	class_mapping = {}
+
+	visualise = False
+
+	data_paths = [os.path.join(input_path,s) for s in ['VOC2007']] #add VOC2012 if using.
+	print("data path:", data_paths)	
+
+	print('Parsing annotation files')
+	data_path = input_path
+
+	annot_path = os.path.join(data_path, 'Annotations')
+	imgs_path = os.path.join(data_path, 'JPEGImages')
+	imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')
+	imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')
+
+	trainval_files = []
+	test_files = []
+	try:
+		with open(imgsets_path_trainval) as f:
+			for line in f:
+				trainval_files.append(line.strip() + '.jpg')
+	except Exception as e:
+		print(e)
+	try:
+		with open(imgsets_path_test) as f:
+			for line in f:
+				test_files.append(line.strip() + '.jpg')
+	except Exception as e:
+		if data_path[-7:] == 'VOC2012':
+			# this is expected, most pascal voc distibutions dont have the test.txt file
+			pass
+		else:
+			print(e)
+
+	annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
+	idx = 0
+	for annot in annots:
+		try:
+			idx += 1
+			et = ET.parse(annot)
+			element = et.getroot()
+			element_objs = element.findall('object')
+			element_filename = element.find('filename').text
+			element_width = int(element.find('size').find('width').text)
+			element_height = int(element.find('size').find('height').text)
+
+			if len(element_objs) > 0:
+				annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
+								   'height': element_height, 'bboxes': []}
+
+				if element_filename in trainval_files:
+					annotation_data['imageset'] = 'trainval'
+				elif element_filename in test_files:
+					annotation_data['imageset'] = 'test'
+				else:
+					annotation_data['imageset'] = 'trainval'
+
+			for element_obj in element_objs:
+				class_name = element_obj.find('name').text
+				if class_name not in classes_count:
+					classes_count[class_name] = 1
+				else:
+					classes_count[class_name] += 1
+
+				if class_name not in class_mapping:
+					class_mapping[class_name] = len(class_mapping)
+
+				obj_bbox = element_obj.find('bndbox')
+				x1 = int(round(float(obj_bbox.find('xmin').text)))
+				y1 = int(round(float(obj_bbox.find('ymin').text)))
+				x2 = int(round(float(obj_bbox.find('xmax').text)))
+				y2 = int(round(float(obj_bbox.find('ymax').text)))
+				difficulty = 1 # parse all files.
+				annotation_data['bboxes'].append(
+					{'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
+			all_imgs.append(annotation_data)
+
+			if visualise:
+				img = cv2.imread(annotation_data['filepath'])
+				for bbox in annotation_data['bboxes']:
+					cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[
+								  'x2'], bbox['y2']), (0, 0, 255))
+				cv2.imshow('img', img)
+				cv2.waitKey(0)
+
+		except Exception as e:
+			print(e)
+			continue
+	return all_imgs, classes_count, class_mapping
diff --git a/test_frcnn.py b/test_frcnn.py
@@ -13,6 +13,14 @@
 from keras_frcnn import roi_helpers
 from keras.applications.mobilenet import preprocess_input
 
+# for GPU settings..
+if 'tensorflow' == K.backend():
+    import tensorflow as tf
+from keras.backend.tensorflow_backend import set_session
+config2 = tf.ConfigProto()
+config2.gpu_options.allow_growth = True
+set_session(tf.Session(config=config2))
+
 sys.setrecursionlimit(40000)
 
 parser = OptionParser()
@@ -101,36 +109,11 @@ def format_img_channels(img, C):
 	img = np.expand_dims(img, axis=0)
 	return img
 
-def format_img(img):
-	img_min_side = 600.0
-	(height,width,_) = img.shape
-
-	if width <= height:
-		f = img_min_side/width
-		new_height = int(f * height)
-		new_width = 600
-	else:
-		f = img_min_side/height
-		new_width = int(f * width)
-		new_height = 600
-	# Zero-center by mean pixel, and preprocess image
-	img = cv2.resize(img,(new_width,new_height),interpolation = cv2.INTER_CUBIC)
-#	x_img = np.transpose(img,(2,0,1)).astype(np.float32)
-	x_img = img[:,:, (2, 1, 0)]  # BGR -> RGB
-	x_img = x_img.astype(np.float32)
-	x_img[:, :, 0] -= C.img_channel_mean[0]
-	x_img[:, :, 1] -= C.img_channel_mean[1]
-	x_img[:, :, 2] -= C.img_channel_mean[2]
-	x_img /= C.img_scaling_factor
-
-	x_img = np.transpose(x_img, (2, 0, 1))
-	x_img = np.expand_dims(x_img, axis=0)
-	if K.backend == 'tf':
-		x_img = np.transpose(x_img, (0, 2, 3, 1))
-
-#	img = np.expand_dims(x_img, axis=0)
-#	img -= 127.5
-	return x_img
+def format_img(img, C):
+	""" formats an image for model prediction based on config """
+	img, ratio = format_img_size(img, C)
+	img = format_img_channels(img, C)
+	return img, ratio
 
 # Method to transform the coordinates of the bounding box to its original size
 def get_real_coordinates(ratio, x1, y1, x2, y2):
@@ -216,7 +199,7 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):
 	img = cv2.imread(filepath)
 
     # preprocess image
-	X = format_img(img)
+	X, ratio = format_img(img, C)
 	img_scaled = (np.transpose(X[0,:,:,:],(1,2,0)) + 127.5).astype('uint8')
 	if K.image_dim_ordering() == 'tf':
 		X = np.transpose(X, (0, 2, 3, 1))
@@ -271,23 +254,22 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):
 		print(len(bboxes[key]))
 		bbox = np.array(bboxes[key])
 
-		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlapThresh = 0.5)
+		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh = 0.5)
 		for jk in range(new_boxes.shape[0]):
-			(x1,y1,x2,y2) = new_boxes[jk,:]
-			cv2.rectangle(img_scaled,(x1,y1),(x2,y2),class_to_color[key],1)
+			(x1, y1, x2, y2) = new_boxes[jk,:]
+			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
+
+			cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
 
-			textLabel = '{}:{}'.format(key,int(100*new_probs[jk]))
-			if key not in all_dets:
-				all_dets[key] = 100*new_probs[jk]
-			else:
-				all_dets[key] = max(all_dets[key],100*new_probs[jk])
+			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
+			all_dets.append((key,100*new_probs[jk]))
 
 			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
-			textOrg = (x1,y1+20)
+			textOrg = (real_x1, real_y1-0)
 
-			cv2.rectangle(img_scaled,(textOrg[0] - 5,textOrg[1]+baseLine - 5),(textOrg[0]+retval[0] + 5,textOrg[1]-retval[1] - 5),(0,0,0),2)
-			cv2.rectangle(img_scaled,(textOrg[0] - 5,textOrg[1]+baseLine - 5),(textOrg[0]+retval[0] + 5,textOrg[1]-retval[1] - 5),(255,255,255),-1)
-			cv2.putText(img_scaled,textLabel,textOrg,cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0),1)
+			cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
+			cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
+			cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
 
 	print('Elapsed time = {}'.format(time.time() - st))
 	print(all_dets)

diff --git a/train_frcnn.py b/train_frcnn.py
@@ -6,6 +6,7 @@
 import numpy as np
 from optparse import OptionParser
 import pickle
+import os
 
 from keras import backend as K
 from keras.optimizers import Adam, SGD, RMSprop
@@ -16,6 +17,7 @@
 import keras_frcnn.roi_helpers as roi_helpers
 from keras.utils import generic_utils
 
+# for GPU settings..
 if 'tensorflow' == K.backend():
     import tensorflow as tf
 from keras.backend.tensorflow_backend import set_session
@@ -46,6 +48,8 @@
 parser.add_option("--opt", dest="optimizers", help="set the optimizer to use", default="SGD")
 parser.add_option("--elen", dest="epoch_length", help="set the epoch length. def=1000", default=1000)
 parser.add_option("--load", dest="load", help="What model to load", default=None)
+parser.add_option("--dataset", dest="dataset", help="name of the dataset", default="voc")
+parser.add_option("--lr", dest="lr", help="learn rate", default=1e-3)
 (options, args) = parser.parse_args()
 
 if not options.train_path:   # if filename is not given
@@ -64,7 +68,13 @@
 C.use_vertical_flips = bool(options.vertical_flips)
 C.rot_90 = bool(options.rot_90)
 
-C.model_path = options.output_weight_path
+# mkdir to save models.
+if not os.path.isdir("models"):
+  os.mkdir("models")
+if not os.path.isdir("models/"+options.network):
+  os.mkdir(os.path.join("models", options.network))
+C.model_path = os.path.join("models", options.network, options.dataset+".hdf5")
+
 C.num_rois = int(options.num_rois)
 
 # we will use resnet. may change to others
@@ -156,17 +166,19 @@
     print('loading weights from {}'.format(C.base_net_weights))
     model_rpn.load_weights(C.base_net_weights, by_name=True)
     model_classifier.load_weights(C.base_net_weights, by_name=True)
+    print("loaded basenet weights!")
 except:
     print('Could not load pretrained model weights. Weights can be found in the keras application folder \
 		https://github.com/fchollet/keras/tree/master/keras/applications')
 
 # optimizer setup
+lr = float(options.lr)
 if options.optimizers == "SGD":
-    optimizer = SGD(lr=1e-2, decay=0.0005, momentum=0.9)
-    optimizer_classifier = SGD(lr=1e-3, decay=0.0005, momentum=0.9)
+    optimizer = SGD(lr=lr, decay=0.0005, momentum=0.9)
+    optimizer_classifier = SGD(lr=lr, decay=0.0005, momentum=0.9)
 else:
-    optimizer = Adam(lr=1e-6, clipnorm=0.001)
-    optimizer_classifier = Adam(lr=1e-56, clipnorm=0.001)
+    optimizer = Adam(lr=lr)
+    optimizer_classifier = Adam(lr=lr)
 
 # may use this to resume from rpn models or previous training. specify either rpn or frcnn model to load
 if options.load is not None:
@@ -177,7 +189,7 @@
     print("loading RPN weights from ", options.rpn_weight_path)
     model_rpn.load_weights(options.rpn_weight_path, by_name=True)
 else:
-    print("no previous model was loaded")
+    print("no previous RPN model was loaded")
 
 # compile the model AFTER loading weights!
 model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])