diff --git a/README.md b/README.md
index d594385..a2f1705 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ model.fit(x, y) # Keras model.
 
 ### Arguments
 
-`TCN(nb_filters=64, kernel_size=2, nb_stacks=1, dilations=[1, 2, 4, 8, 16, 32], padding='causal', use_skip_connections=True, dropout_rate=0.0, return_sequences=True, name='tcn')`
+`TCN(nb_filters=64, kernel_size=2, nb_stacks=1, dilations=[1, 2, 4, 8, 16, 32], padding='causal', use_skip_connections=True, dropout_rate=0.0, return_sequences=True, activation='linear', name='tcn')`
 
 - `nb_filters`: Integer. The number of filters to use in the convolutional layers. Would be similar to `units` for LSTM.
 - `kernel_size`: Integer. The size of the kernel to use in each convolutional layer.
@@ -106,6 +106,7 @@ model.fit(x, y) # Keras model.
 - `use_skip_connections`: Boolean. If we want to add skip connections from input to each residual block.
 - `return_sequences`: Boolean. Whether to return the last output in the output sequence, or the full sequence.
 - `dropout_rate`: Float between 0 and 1. Fraction of the input units to drop.
+- `activation`: The activation used in the residual blocks o = activation(x + F(x)).
 - `name`: Name of the model. Useful when having multiple TCN.
 
 ### Input shape
diff --git a/setup.py b/setup.py
index 736ae98..47d4427 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='keras-tcn',
-    version='2.6.7',
+    version='2.7.0',
     description='Keras TCN',
     author='Philippe Remy',
     license='MIT',
diff --git a/tcn/tcn.py b/tcn/tcn.py
index 8503a43..cfb5913 100644
--- a/tcn/tcn.py
+++ b/tcn/tcn.py
@@ -10,8 +10,8 @@
 from keras.models import Input, Model
 
 
-def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, dropout_rate=0):
-    # type: (Layer, int, int, int, str, float) -> Tuple[Layer, Layer]
+def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, activation='relu', dropout_rate=0):
+    # type: (Layer, int, int, int, str, str, float) -> Tuple[Layer, Layer]
     """Defines the residual block for the WaveNet TCN
 
     Args:
@@ -20,8 +20,8 @@ def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, dropout_r
         nb_filters: The number of convolutional filters to use in this block
         kernel_size: The size of the convolutional kernel
         padding: The padding used in the convolutional layers, 'same' or 'causal'.
+        activation: The final activation used in o = Activation(x + F(x))
         dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
-
     Returns:
         A tuple where the first element is the residual model layer, and the second
         is the skip connection.
@@ -39,6 +39,7 @@ def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, dropout_r
     # 1x1 conv to match the shapes (channel dimension).
     prev_x = Conv1D(nb_filters, 1, padding='same')(prev_x)
     res_x = keras.layers.add([prev_x, x])
+    res_x = Activation(activation)(res_x)
     return res_x, x
 
 
@@ -69,6 +70,7 @@ class TCN:
             padding: The padding to use in the convolutional layers, 'causal' or 'same'.
             use_skip_connections: Boolean. If we want to add skip connections from input to each residual block.
             return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
+            activation: The activation used in the residual blocks o = Activation(x + F(x)).
             dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
             name: Name of the model. Useful when having multiple TCN.
 
@@ -85,6 +87,7 @@ def __init__(self,
                  use_skip_connections=True,
                  dropout_rate=0.0,
                  return_sequences=False,
+                 activation='linear',
                  name='tcn'):
         self.name = name
         self.return_sequences = return_sequences
@@ -94,6 +97,7 @@ def __init__(self,
         self.nb_stacks = nb_stacks
         self.kernel_size = kernel_size
         self.nb_filters = nb_filters
+        self.activation = activation
         self.padding = padding
 
         if padding != 'causal' and padding != 'same':
@@ -118,6 +122,7 @@ def __call__(self, inputs):
                                              nb_filters=self.nb_filters,
                                              kernel_size=self.kernel_size,
                                              padding=self.padding,
+                                             activation=self.activation,
                                              dropout_rate=self.dropout_rate)
                 skip_connections.append(skip_out)
         if self.use_skip_connections:
@@ -140,6 +145,7 @@ def compiled_tcn(num_feat,  # type: int
                  regression=False,  # type: bool
                  dropout_rate=0.05,  # type: float
                  name='tcn',  # type: str,
+                 activation='linear',  # type:str,
                  opt='adam',
                  lr=0.002):
     # type: (...) -> keras.Model
@@ -159,6 +165,7 @@ def compiled_tcn(num_feat,  # type: int
         return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
         regression: Whether the output should be continuous or discrete.
         dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
+        activation: The activation used in the residual blocks o = Activation(x + F(x)).
         name: Name of the model. Useful when having multiple TCN.
         opt: Optimizer name.
         lr: Learning rate.
@@ -171,7 +178,8 @@ def compiled_tcn(num_feat,  # type: int
     input_layer = Input(shape=(max_len, num_feat))
 
     x = TCN(nb_filters, kernel_size, nb_stacks, dilations, padding,
-            use_skip_connections, dropout_rate, return_sequences, name)(input_layer)
+            use_skip_connections, dropout_rate, return_sequences,
+            activation, name)(input_layer)
 
     print('x.shape=', x.shape)