Skip to content

Commit

Permalink
Add simple English ngrams module
Browse files Browse the repository at this point in the history
  • Loading branch information
hoelzro committed Mar 6, 2016
1 parent 10331ff commit 9b570d8
Showing 1 changed file with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions Tutor/EnglishNGrams.elm
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
module Tutor.EnglishNGrams where

-- Letters from https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language
-- Bigrams from https://en.wikipedia.org/wiki/Bigram#Bigram_frequency_in_the_English_language
-- Trigrams and Tetragrams from http://www.cryptograms.org/letter-frequencies.php#Most_common_trigrams_.28in_order.29

monograms = [(12.702, "e"),
(9.056, "t"),
(8.167, "a"),
(7.507, "o"),
(6.966, "i"),
(6.749, "n"),
(6.327, "s"),
(6.094, "h"),
(5.987, "r"),
(4.253, "d"),
(4.025, "l"),
(2.782, "c"),
(2.758, "u"),
(2.406, "m"),
(2.361, "w"),
(2.228, "f"),
(2.015, "g"),
(1.974, "y"),
(1.929, "p"),
(1.492, "b"),
(0.153, "j"),
(0.150, "x"),
(0.978, "v"),
(0.772, "k"),
(0.095, "q"),
(0.074, "z")]

bigrams = [
( 1.52, "th" ),
( 1.28, "he" ),
( 0.94, "in" ),
( 0.94, "er" ),
( 0.82, "an" ),
( 0.68, "re" ),
( 0.63, "nd" ),
( 0.59, "at" ),
( 0.57, "on" ),
( 0.56, "nt" ),
( 0.56, "ha" ),
( 0.56, "es" ),
( 0.55, "st" ),
( 0.55, "en" ),
( 0.53, "ed" ),
( 0.52, "to" ),
( 0.50, "it" ),
( 0.50, "ou" ),
( 0.47, "ea" ),
( 0.46, "hi" ),
( 0.46, "is" ),
( 0.43, "or" ),
( 0.34, "ti" ),
( 0.33, "as" ),
( 0.27, "te" ),
( 0.19, "et" ),
( 0.18, "ng" ),
( 0.16, "of" ),
( 0.09, "al" ),
( 0.09, "de" ),
( 0.08, "se" ),
( 0.08, "le" ),
( 0.06, "sa" ),
( 0.05, "si" ),
( 0.04, "ar" ),
( 0.04, "ve" ),
( 0.04, "ra" ),
( 0.02, "ld" ),
( 0.02, "ur" )]

trigrams = [(3.508232, "the"),
(1.593878, "and"),
(1.147042, "ing"),
(0.822444, "her"),
(0.650715, "hat"),
(0.596748, "his"),
(0.593593, "tha"),
(0.560594, "ere"),
(0.555372, "for"),
(0.530771, "ent"),
(0.506454, "ion"),
(0.461099, "ter"),
(0.460487, "was"),
(0.437213, "you"),
(0.431250, "ith"),
(0.430732, "ver"),
(0.422758, "all"),
(0.397290, "wit"),
(0.394796, "thi"),
(0.378058, "tio") ]

tetragrams = [(0.761242, "that"),
(0.604501, "ther"),
(0.573866, "with"),
(0.551919, "tion"),
(0.374549, "here"),
(0.369920, "ould"),
(0.309440, "ight"),
(0.290544, "have"),
(0.284292, "hich"),
(0.283826, "whic"),
(0.276333, "this"),
(0.270413, "thin"),
(0.262421, "they"),
(0.262386, "atio"),
(0.260695, "ever"),
(0.258580, "from"),
(0.253447, "ough"),
(0.231089, "were"),
(0.229944, "hing"),
(0.223347, "ment") ]

ngrams = [ (1, monograms),
(2, bigrams),
(3, trigrams),
(4, tetragrams)]

0 comments on commit 9b570d8

Please sign in to comment.