Merge pull request #42 from philipperemy/paper

Philippe Rémy · web-flow · commit 141ef1c1fdfc · 2019-02-24T14:12:15.000-03:00
Implementation closer to the paper
diff --git a/README.md b/README.md
@@ -95,7 +95,7 @@ model.fit(x, y) # Keras model.
 
 ### Arguments
 
-`tcn.TCN(nb_filters=64, kernel_size=2, nb_stacks=1, dilations=[1, 2, 4, 8, 16, 32], activation='norm_relu', padding='causal', use_skip_connections=True, dropout_rate=0.0, return_sequences=True, name='tcn')`
+`TCN(nb_filters=64, kernel_size=2, nb_stacks=1, dilations=[1, 2, 4, 8, 16, 32], activation='norm_relu', padding='causal', use_skip_connections=True, dropout_rate=0.0, return_sequences=True, name='tcn')`
 
 - `nb_filters`: Integer. The number of filters to use in the convolutional layers.
 - `kernel_size`: Integer. The size of the kernel to use in each convolutional layer.
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='keras-tcn',
-    version='2.3.6',
+    version='2.5.6',
     description='Keras TCN',
     author='Philippe Remy',
     license='MIT',
diff --git a/tasks/adding_problem/main.py b/tasks/adding_problem/main.py
@@ -1,7 +1,7 @@
 import keras
-from utils import data_generator
 
 from tcn import compiled_tcn
+from utils import data_generator
 
 x_train, y_train = data_generator(n=200000, seq_length=600)
 x_test, y_test = data_generator(n=40000, seq_length=600)
@@ -22,9 +22,8 @@ def run_task():
                          nb_filters=24,
                          kernel_size=8,
                          dilations=[2 ** i for i in range(9)],
-                         nb_stacks=2,
+                         nb_stacks=1,
                          max_len=x_train.shape[1],
-                         activation='norm_relu',
                          use_skip_connections=True,
                          regression=True,
                          dropout_rate=0)
@@ -39,7 +38,7 @@ def run_task():
     model.summary()
 
     model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=500,
-              callbacks=[psv], batch_size=128)
+              callbacks=[psv], batch_size=256)
 
 
 if __name__ == '__main__':
diff --git a/tasks/copy_memory/main.py b/tasks/copy_memory/main.py
@@ -1,7 +1,7 @@
 import keras
-from utils import data_generator
 
 from tcn import compiled_tcn
+from utils import data_generator
 
 x_train, y_train = data_generator(601, 10, 30000)
 x_test, y_test = data_generator(601, 10, 6000)
@@ -25,9 +25,8 @@ def run_task():
                          nb_filters=10,
                          kernel_size=8,
                          dilations=[2 ** i for i in range(9)],
-                         nb_stacks=2,
+                         nb_stacks=1,
                          max_len=x_train[0:1].shape[1],
-                         activation='norm_relu',
                          use_skip_connections=True,
                          return_sequences=True)
 
diff --git a/tasks/mnist_pixel/main.py b/tasks/mnist_pixel/main.py
@@ -9,12 +9,11 @@ def run_task():
     model = compiled_tcn(return_sequences=False,
                          num_feat=1,
                          num_classes=10,
-                         nb_filters=25,
-                         kernel_size=7,
+                         nb_filters=20,
+                         kernel_size=6,
                          dilations=[2 ** i for i in range(9)],
-                         nb_stacks=2,
+                         nb_stacks=1,
                          max_len=x_train[0:1].shape[1],
-                         activation='norm_relu',
                          use_skip_connections=True)
 
     print(f'x_train.shape = {x_train.shape}')
diff --git a/tasks/receptive-field/main.py b/tasks/receptive-field/main.py
@@ -15,7 +15,6 @@ def run_task(sequence_length=8):
                          dilations=[1, 2, 4, 8, 16, 32],
                          nb_stacks=6,
                          max_len=x_train[0:1].shape[1],
-                         activation='norm_relu',
                          use_skip_connections=False)
 
     print(f'x_train.shape = {x_train.shape}')
diff --git a/tcn/__init__.py b/tcn/__init__.py
@@ -1,3 +1,3 @@
 from tcn.tcn import TCN, compiled_tcn
 
-__version__ = '2.3.5'
+__version__ = '2.5.6'
diff --git a/tcn/tcn.py b/tcn/tcn.py
@@ -1,87 +1,44 @@
+from typing import List, Tuple
+
 import keras.backend as K
 import keras.layers
 from keras import optimizers
 from keras.engine.topology import Layer
-from keras.layers import Activation, Lambda
+from keras.layers import Activation, Lambda, BatchNormalization
 from keras.layers import Conv1D, SpatialDropout1D
 from keras.layers import Convolution1D, Dense
 from keras.models import Input, Model
-from typing import List, Tuple
-
-
-def channel_normalization(x):
-    # type: (Layer) -> Layer
-    """ Normalize a layer to the maximum activation
-
-    This keeps a layers values between zero and one.
-    It helps with relu's unbounded activation
-
-    Args:
-        x: The layer to normalize
-
-    Returns:
-        A maximal normalized layer
-    """
-    max_values = K.max(K.abs(x), 2, keepdims=True) + 1e-5
-    out = x / max_values
-    return out
 
 
-def wave_net_activation(x):
-    # type: (Layer) -> Layer
-    """This method defines the activation used for WaveNet
-
-    described in https://deepmind.com/blog/wavenet-generative-model-raw-audio/
-
-    Args:
-        x: The layer we want to apply the activation to
-
-    Returns:
-        A new layer with the wavenet activation applied
-    """
-    tanh_out = Activation('tanh')(x)
-    sigm_out = Activation('sigmoid')(x)
-    return keras.layers.multiply([tanh_out, sigm_out])
-
-
-def residual_block(x, s, i, c, activation, nb_filters, kernel_size, padding, dropout_rate=0, name=''):
-    # type: (Layer, int, int, int, str, int, int, str, float, str) -> Tuple[Layer, Layer]
+def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, dropout_rate=0):
+    # type: (Layer, int, int, int, str, float) -> Tuple[Layer, Layer]
     """Defines the residual block for the WaveNet TCN
 
     Args:
         x: The previous layer in the model
-        s: The stack index i.e. which stack in the overall TCN
-        i: The dilation power of 2 we are using for this residual block
-        c: The dilation name to make it unique. In case we have same dilation twice: [1, 1, 2, 4].
-        activation: The name of the type of activation to use
+        dilation_rate: The dilation power of 2 we are using for this residual block
         nb_filters: The number of convolutional filters to use in this block
         kernel_size: The size of the convolutional kernel
         padding: The padding used in the convolutional layers, 'same' or 'causal'.
         dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
-        name: Name of the model. Useful when having multiple TCN.
 
     Returns:
         A tuple where the first element is the residual model layer, and the second
         is the skip connection.
     """
+    prev_x = x
+    for k in range(2):
+        x = Conv1D(filters=nb_filters,
+                   kernel_size=kernel_size,
+                   dilation_rate=dilation_rate,
+                   padding=padding)(x)
+        # x = BatchNormalization()(x)  # TODO should be WeightNorm here.
+        x = Activation('relu')(x)
+        x = SpatialDropout1D(rate=dropout_rate)(x)
 
-    original_x = x
-    conv = Conv1D(filters=nb_filters, kernel_size=kernel_size,
-                  dilation_rate=i, padding=padding,
-                  name=name + '_d_%s_conv_%d-%d_tanh_s%d' % (padding, i, c, s))(x)
-    if activation == 'norm_relu':
-        x = Activation('relu')(conv)
-        x = Lambda(channel_normalization)(x)
-    elif activation == 'wavenet':
-        x = wave_net_activation(conv)
-    else:
-        x = Activation(activation)(conv)
-
-    x = SpatialDropout1D(dropout_rate, name=name + '_spatial_dropout1d_%d-%d_s%d_%f' % (i, c, s, dropout_rate))(x)
-
-    # 1x1 conv.
+    # 1x1 conv to match the shapes (channel dimension).
     x = Convolution1D(nb_filters, 1, padding='same')(x)
-    res_x = keras.layers.add([original_x, x])
+    res_x = keras.layers.add([prev_x, x])
     return res_x, x
 
 
@@ -109,7 +66,6 @@ class TCN:
             kernel_size: The size of the kernel to use in each convolutional layer.
             dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64].
             nb_stacks : The number of stacks of residual blocks to use.
-            activation: The activations to use (norm_relu, wavenet, relu...).
             padding: The padding to use in the convolutional layers, 'causal' or 'same'.
             use_skip_connections: Boolean. If we want to add skip connections from input to each residual block.
             return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
@@ -125,7 +81,6 @@ def __init__(self,
                  kernel_size=2,
                  nb_stacks=1,
                  dilations=[1, 2, 4, 8, 16, 32],
-                 activation='norm_relu',
                  padding='causal',
                  use_skip_connections=True,
                  dropout_rate=0.0,
@@ -135,7 +90,6 @@ def __init__(self,
         self.return_sequences = return_sequences
         self.dropout_rate = dropout_rate
         self.use_skip_connections = use_skip_connections
-        self.activation = activation
         self.dilations = dilations
         self.nb_stacks = nb_stacks
         self.kernel_size = kernel_size
@@ -147,27 +101,29 @@ def __init__(self,
 
         if not isinstance(nb_filters, int):
             print('An interface change occurred after the version 2.1.2.')
-            print('Before: tcn.TCN(i, return_sequences=False, ...)')
-            print('Now should be: tcn.TCN(return_sequences=False, ...)(i)')
-            print('Second solution is to pip install keras-tcn==2.1.2 to downgrade.')
+            print('Before: tcn.TCN(x, return_sequences=False, ...)')
+            print('Now should be: tcn.TCN(return_sequences=False, ...)(x)')
+            print('The alternative is to downgrade to 2.1.2 (pip install keras-tcn==2.1.2).')
             raise Exception()
 
     def __call__(self, inputs):
         x = inputs
-        x = Convolution1D(self.nb_filters, 1, padding=self.padding, name=self.name + '_initial_conv')(x)
+        # 1D FCN.
+        x = Convolution1D(self.nb_filters, 1, padding=self.padding)(x)
         skip_connections = []
         for s in range(self.nb_stacks):
-            for i, d in enumerate(self.dilations):
-                x, skip_out = residual_block(x, s, d, i, self.activation, self.nb_filters,
-                                             self.kernel_size, self.padding, self.dropout_rate, name=self.name)
+            for d in self.dilations:
+                x, skip_out = residual_block(x,
+                                             dilation_rate=d,
+                                             nb_filters=self.nb_filters,
+                                             kernel_size=self.kernel_size,
+                                             padding=self.padding,
+                                             dropout_rate=self.dropout_rate)
                 skip_connections.append(skip_out)
         if self.use_skip_connections:
             x = keras.layers.add(skip_connections)
-        x = Activation('relu')(x)
-
         if not self.return_sequences:
-            output_slice_index = -1
-            x = Lambda(lambda tt: tt[:, output_slice_index, :])(x)
+            x = Lambda(lambda tt: tt[:, -1, :])(x)
         return x
 
 
@@ -178,7 +134,6 @@ def compiled_tcn(num_feat,  # type: int
                  dilations,  # type: List[int]
                  nb_stacks,  # type: int
                  max_len,  # type: int
-                 activation='norm_relu',  # type: str
                  padding='causal',  # type: str
                  use_skip_connections=True,  # type: bool
                  return_sequences=True,
@@ -197,7 +152,6 @@ def compiled_tcn(num_feat,  # type: int
         dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64].
         nb_stacks : The number of stacks of residual blocks to use.
         max_len: The maximum sequence length, use None if the sequence length is dynamic.
-        activation: The activations to use.
         padding: The padding to use in the convolutional layers.
         use_skip_connections: Boolean. If we want to add skip connections from input to each residual block.
         return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
@@ -213,8 +167,8 @@ def compiled_tcn(num_feat,  # type: int
 
     input_layer = Input(shape=(max_len, num_feat))
 
-    x = TCN(nb_filters, kernel_size, nb_stacks, dilations, activation,
-            padding, use_skip_connections, dropout_rate, return_sequences, name)(input_layer)
+    x = TCN(nb_filters, kernel_size, nb_stacks, dilations, padding,
+            use_skip_connections, dropout_rate, return_sequences, name)(input_layer)
 
     print('x.shape=', x.shape)
 
@@ -223,13 +177,11 @@ def compiled_tcn(num_feat,  # type: int
         x = Dense(num_classes)(x)
         x = Activation('softmax')(x)
         output_layer = x
-        print(f'model.x = {input_layer.shape}')
-        print(f'model.y = {output_layer.shape}')
         model = Model(input_layer, output_layer)
 
         # https://github.com/keras-team/keras/pull/11373
         # It's now in Keras@master but still not available with pip.
-        # TODO To remove later.
+        # TODO remove later.
         def accuracy(y_true, y_pred):
             # reshape in case it's in shape (num_samples, 1) instead of (num_samples,)
             if K.ndim(y_true) == K.ndim(y_pred):
@@ -241,16 +193,15 @@ def accuracy(y_true, y_pred):
 
         adam = optimizers.Adam(lr=0.002, clipnorm=1.)
         model.compile(adam, loss='sparse_categorical_crossentropy', metrics=[accuracy])
-        print('Adam with norm clipping.')
     else:
         # regression
         x = Dense(1)(x)
         x = Activation('linear')(x)
         output_layer = x
-        print(f'model.x = {input_layer.shape}')
-        print(f'model.y = {output_layer.shape}')
         model = Model(input_layer, output_layer)
         adam = optimizers.Adam(lr=0.002, clipnorm=1.)
         model.compile(adam, loss='mean_squared_error')
-
+    print(f'model.x = {input_layer.shape}')
+    print(f'model.y = {output_layer.shape}')
+    print('Adam with norm clipping.')
     return model

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`from tcn.tcn import TCN, compiled_tcn`
`2`	`2`
`3`		`-__version__ = '2.3.5'`
	`3`	`+__version__ = '2.5.6'`