changed activation function to gelu

This commit is contained in:
Setra Solofoniaina 2021-04-02 10:15:54 +03:00
parent 00fcc2633b
commit 42efbf86b6

View File

@ -35,7 +35,7 @@ class BERT(nn.Module):
# multi-layers transformer blocks, deep network # multi-layers transformer blocks, deep network
#self.transformer_blocks = nn.ModuleList( #self.transformer_blocks = nn.ModuleList(
# [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)]) # [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])
encoder_layers = nn.TransformerEncoderLayer(hidden, attn_heads, self.feed_forward_hidden, dropout) encoder_layers = nn.TransformerEncoderLayer(hidden, attn_heads, self.feed_forward_hidden, dropout, activation="gelu")
self.transformer_encoder = nn.TransformerEncoder(encoder_layers, n_layers) self.transformer_encoder = nn.TransformerEncoder(encoder_layers, n_layers)
def _generate_square_subsequent_mask(self, sz): def _generate_square_subsequent_mask(self, sz):