|
1 | 1 | from tensorflow.keras.layers import Dense, Lambda, dot, Activation, concatenate |
| 2 | +from tensorflow.keras.layers import Layer |
2 | 3 |
|
3 | 4 |
|
4 | | -def attention_3d_block(hidden_states): |
5 | | - """ |
6 | | - Many-to-one attention mechanism for Keras. |
7 | | - @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim). |
8 | | - @return: 2D tensor with shape (batch_size, 128) |
9 | | - @author: felixhao28. |
10 | | - """ |
11 | | - hidden_size = int(hidden_states.shape[2]) |
12 | | - # Inside dense layer |
13 | | - # hidden_states dot W => score_first_part |
14 | | - # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size) |
15 | | - # W is the trainable weight matrix of attention Luong's multiplicative style score |
16 | | - score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states) |
17 | | - # score_first_part dot last_hidden_state => attention_weights |
18 | | - # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps) |
19 | | - h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states) |
20 | | - score = dot([score_first_part, h_t], [2, 1], name='attention_score') |
21 | | - attention_weights = Activation('softmax', name='attention_weight')(score) |
22 | | - # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size) |
23 | | - context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector') |
24 | | - pre_activation = concatenate([context_vector, h_t], name='attention_output') |
25 | | - attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation) |
26 | | - return attention_vector |
| 5 | +class Attention(Layer): |
| 6 | + |
| 7 | + def __init__(self, **kwargs): |
| 8 | + super().__init__(**kwargs) |
| 9 | + |
| 10 | + def __call__(self, hidden_states): |
| 11 | + """ |
| 12 | + Many-to-one attention mechanism for Keras. |
| 13 | + @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim). |
| 14 | + @return: 2D tensor with shape (batch_size, 128) |
| 15 | + @author: felixhao28. |
| 16 | + """ |
| 17 | + hidden_size = int(hidden_states.shape[2]) |
| 18 | + # Inside dense layer |
| 19 | + # hidden_states dot W => score_first_part |
| 20 | + # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size) |
| 21 | + # W is the trainable weight matrix of attention Luong's multiplicative style score |
| 22 | + score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states) |
| 23 | + # score_first_part dot last_hidden_state => attention_weights |
| 24 | + # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps) |
| 25 | + h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states) |
| 26 | + score = dot([score_first_part, h_t], [2, 1], name='attention_score') |
| 27 | + attention_weights = Activation('softmax', name='attention_weight')(score) |
| 28 | + # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size) |
| 29 | + context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector') |
| 30 | + pre_activation = concatenate([context_vector, h_t], name='attention_output') |
| 31 | + attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation) |
| 32 | + return attention_vector |
0 commit comments