Skip to content

Commit

Permalink
Update n-grams (#993)
Browse files Browse the repository at this point in the history
  • Loading branch information
vulcandth authored Feb 24, 2025
1 parent 280fee5 commit 462a132
Show file tree
Hide file tree
Showing 3 changed files with 449 additions and 170 deletions.
128 changes: 64 additions & 64 deletions charmap.asm
Original file line number Diff line number Diff line change
Expand Up @@ -244,73 +244,73 @@ DEF NGRAMS_VAR_START EQU $4f
newcharmap default, compressing

charmap "e ", $09
charmap " t", $0a
charmap "ou", $0b
charmap "ou", $0a
charmap "th", $0b
charmap "in", $0c
charmap "th", $0d
charmap "he", $0e
charmap "t ", $0f
charmap "er", $10
charmap "t ", $0d
charmap "er", $0e
charmap "s ", $0f
charmap "an", $10
charmap "on", $11
charmap "re", $12
charmap "s ", $13
charmap "at", $14
charmap "an", $15
charmap "to", $16
charmap "ha", $17
charmap "ng", $18
charmap "it", $19
charmap "is", $1a
charmap "ea", $1b
charmap "ve", $1c
charmap "ar", $1d
charmap "to ", $12
charmap "d ", $13
charmap "ea", $14
charmap "y ", $15
charmap "en", $16
charmap "or", $17
charmap "at", $18
charmap ", ", $19
charmap "ll", $1a
charmap "I ", $1b
charmap "ar", $1c
charmap "it", $1d
charmap "st", $1e
charmap "le", $1f
charmap "or", $20
charmap "te", $21
charmap "as", $22
charmap "yo", $23
charmap "y ", $24
charmap "r ", $25
charmap " b", $26
charmap "en", $27
charmap "me", $28
charmap "e t", $29
charmap ", ", $2a
charmap "es", $2b
charmap "e you", $2c
charmap "se", $2d
charmap "ne", $2e
charmap " h", $2f
charmap "I ", $30
charmap "our", $31
charmap "You", $32
charmap "nd", $33
charmap "ow", $34
charmap " c", $35
charmap " wa", $36
charmap "ome", $37
charmap "are", $38
charmap "The", $39
charmap "t's", $3a
charmap "ut", $3b
charmap "nt", $3c
charmap "the", $3d
charmap "you", $3e
charmap "ing", $3f
charmap "hat", $40
charmap "and", $41
charmap "for", $42
charmap "all", $43
charmap "here", $44
charmap "that", $45
charmap "have", $46
charmap "rain", $47
charmap "this", $48
charmap "ight", $49
charmap "with", $4a
charmap "ould", $4b
charmap "attle", $4c
charmap "ow", $1f
charmap "ha", $20
charmap "a ", $21
charmap "om", $22
charmap "le", $23
charmap "of ", $24
charmap "se", $25
charmap "re", $26
charmap "to", $27
charmap "'s ", $28
charmap "Th", $29
charmap "is", $2a
charmap "ra", $2b
charmap "ch", $2c
charmap "I'm ", $2d
charmap "o ", $2e
charmap "gh", $2f
charmap "es", $30
charmap "wa", $31
charmap "e.", $32
charmap "oo", $33
charmap "ck", $34
charmap "r ", $35
charmap "l ", $36
charmap "be", $37
charmap "li", $38
charmap "ed", $39
charmap "us", $3a
charmap "ti", $3b
charmap " you", $3c
charmap "ing ", $3d
charmap "the ", $3e
charmap "you", $3f
charmap "ing", $40
charmap "is ", $41
charmap "the", $42
charmap "You ", $43
charmap "er ", $44
charmap "with", $45
charmap "batt", $46
charmap "for", $47
charmap "ve ", $48
charmap "ed ", $49
charmap "It's ", $4a
charmap "that ", $4b
charmap " you ", $4c

DEF NGRAMS_END EQU $51

Expand Down
212 changes: 106 additions & 106 deletions data/text/ngrams.asm
Original file line number Diff line number Diff line change
@@ -1,73 +1,73 @@
NgramStrings:
table_width 1
dr .e_
dr ._t
dr .ou
dr .in
dr .th
dr .he
dr .in
dr .t_
dr .er
dr .on
dr .re
dr .s_
dr .at
dr .an
dr .to
dr .ha
dr .ng
dr .it
dr .is
dr .on
dr .to_
dr .d_
dr .ea
dr .ve
dr .ar
dr .st
dr .le
dr .or
dr .te
dr .as
dr .yo
dr .y_
dr .r_
dr ._b
dr .en
dr .me
dr .e_t
dr .comma_
dr .es
dr .e_you
dr .se
dr .ne
dr ._h
dr .or
dr .at
dr .$_
dr .ll
dr .I_
dr .our
dr .You
dr .nd
dr .ar
dr .it
dr .st
dr .ow
dr ._c
dr ._wa
dr .ome
dr .are
dr .The
dr .t__s
dr .ut
dr .nt
dr .the
dr .ha
dr .a_
dr .om
dr .le
dr .of_
dr .se
dr .re
dr .to
dr .#s_
dr .Th
dr .is
dr .ra
dr .ch
dr .I#m_
dr .o_
dr .gh
dr .es
dr .wa
dr .e@
dr .oo
dr .ck
dr .r_
dr .l_
dr .be
dr .li
dr .ed
dr .us
dr .ti
dr ._you
dr .ing_
dr .the_
dr .you
dr .ing
dr .hat
dr .and
dr .for
dr .all
dr .here
dr .that
dr .have
dr .rain
dr .this
dr .ight
dr .is_
dr .the
dr .You_
dr .er_
dr .with
dr .ould
dr .attle
dr .batt
dr .for
dr .ve_
dr .ed_
dr .It#s_
dr .that_
dr ._you_
dr .Poke
dr .Pokemon
assert_table_length NGRAMS_VAR_START - NGRAMS_START
Expand All @@ -80,72 +80,72 @@ NgramStrings:
.Rival: dw wRivalName
.Trendy: dw wTrendyPhrase
.e_: rawchar "e @"
._t: rawchar " t@"
.ou: rawchar "ou@"
.in: rawchar "in@"
.th: rawchar "th@"
.he: rawchar "he@"
.in: rawchar "in@"
.t_: rawchar "t @"
.er: rawchar "er@"
.on: rawchar "on@"
.re: rawchar "re@"
.s_: rawchar "s @"
.at: rawchar "at@"
.an: rawchar "an@"
.to: rawchar "to@"
.ha: rawchar "ha@"
.ng: rawchar "ng@"
.it: rawchar "it@"
.is: rawchar "is@"
.on: rawchar "on@"
.to_: rawchar "to @"
.d_: rawchar "d @"
.ea: rawchar "ea@"
.ve: rawchar "ve@"
.ar: rawchar "ar@"
.st: rawchar "st@"
.le: rawchar "le@"
.or: rawchar "or@"
.te: rawchar "te@"
.as: rawchar "as@"
.yo: rawchar "yo@"
.y_: rawchar "y @"
.r_: rawchar "r @"
._b: rawchar " b@"
.en: rawchar "en@"
.me: rawchar "me@"
.e_t: rawchar "e t@"
.comma_: rawchar ", @"
.es: rawchar "es@"
.e_you: rawchar "e you@"
.se: rawchar "se@"
.ne: rawchar "ne@"
._h: rawchar " h@"
.or: rawchar "or@"
.at: rawchar "at@"
.$_: rawchar ", @"
.ll: rawchar "ll@"
.I_: rawchar "I @"
.our: rawchar "our@"
.You: rawchar "You@"
.nd: rawchar "nd@"
.ar: rawchar "ar@"
.it: rawchar "it@"
.st: rawchar "st@"
.ow: rawchar "ow@"
._c: rawchar " c@"
._wa: rawchar " wa@"
.ome: rawchar "ome@"
.are: rawchar "are@"
.The: rawchar "The@"
.t__s: rawchar "t's@"
.ut: rawchar "ut@"
.nt: rawchar "nt@"
.the: rawchar "the@"
.ha: rawchar "ha@"
.a_: rawchar "a @"
.om: rawchar "om@"
.le: rawchar "le@"
.of_: rawchar "of @"
.se: rawchar "se@"
.re: rawchar "re@"
.to: rawchar "to@"
.#s_: rawchar "'s @"
.Th: rawchar "Th@"
.is: rawchar "is@"
.ra: rawchar "ra@"
.ch: rawchar "ch@"
.I#m_: rawchar "I'm @"
.o_: rawchar "o @"
.gh: rawchar "gh@"
.es: rawchar "es@"
.wa: rawchar "wa@"
.e@: rawchar "e.@"
.oo: rawchar "oo@"
.ck: rawchar "ck@"
.r_: rawchar "r @"
.l_: rawchar "l @"
.be: rawchar "be@"
.li: rawchar "li@"
.ed: rawchar "ed@"
.us: rawchar "us@"
.ti: rawchar "ti@"
._you: rawchar " you@"
.ing_: rawchar "ing @"
.the_: rawchar "the @"
.you: rawchar "you@"
.ing: rawchar "ing@"
.hat: rawchar "hat@"
.and: rawchar "and@"
.for: rawchar "for@"
.all: rawchar "all@"
.here: rawchar "here@"
.that: rawchar "that@"
.have: rawchar "have@"
.rain: rawchar "rain@"
.this: rawchar "this@"
.ight: rawchar "ight@"
.is_: rawchar "is @"
.the: rawchar "the@"
.You_: rawchar "You @"
.er_: rawchar "er @"
.with: rawchar "with@"
.ould: rawchar "ould@"
.attle: rawchar "attle@"
.batt: rawchar "batt@"
.for: rawchar "for@"
.ve_: rawchar "ve @"
.ed_: rawchar "ed @"
.It#s_: rawchar "It's @"
.that_: rawchar "that @"
._you_: rawchar " you @"
.Poke: rawchar "Poké@"
.Pokemon: rawchar "Pokémon@"
Loading

0 comments on commit 462a132

Please sign in to comment.