Skip to content

Commit

Permalink
still use layernorm for everything
Browse files Browse the repository at this point in the history
  • Loading branch information
www committed Aug 13, 2021
1 parent c68ea16 commit 546114c
Showing 1 changed file with 3 additions and 9 deletions.
12 changes: 3 additions & 9 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,6 @@ def __init__(self, config, layer_id):
self.ln2 = nn.LayerNorm(config.n_embd)

if config.model_type == 'RWKV':
self.ln1 = FixedNorm(config.n_embd)
self.ln2 = FixedNorm(config.n_embd)
self.attn = RWKV_TimeMix(config, layer_id)
self.mlp = RWKV_ChannelMix(config, layer_id)
elif config.model_type == 'MHA_rotary':
Expand All @@ -323,11 +321,7 @@ def __init__(self, config):

self.blocks = nn.Sequential(*[Block(config, i) for i in range(config.n_layer)])

if config.model_type == 'RWKV':
self.ln_f = FixedNorm(config.n_embd)
else:
self.ln_f = nn.LayerNorm(config.n_embd)

self.ln_f = nn.LayerNorm(config.n_embd)
self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

self.ctx_len = config.ctx_len
Expand All @@ -347,9 +341,9 @@ def __init__(self, config):
elif 'blocks.' in k:
block_id = int(k.split('.')[1])
if 'receptance.weight' in k:
ww[k] *= 0.2 # 0.2 ~ 0.5 gives similar results
ww[k] *= 0 # 0 works the best
elif 'attn.key.weight' in k:
ww[k] *= 0.2 # 0.2 ~ 0.5 gives similar results
ww[k] *= 0 # 0 works the best
elif 'attn.output.weight' in k:
ww[k] *= 1 / pow(1+block_id, 0.5) # 0.5 ~ 0.7 gives similar results
elif 'mlp.weight.weight' in k:
Expand Down

0 comments on commit 546114c

Please sign in to comment.