Skip to content

Commit

Permalink
Add and update languages.
Browse files Browse the repository at this point in the history
  • Loading branch information
jzohrab committed Jun 11, 2024
1 parent 50e0ccb commit 612aced
Show file tree
Hide file tree
Showing 29 changed files with 598 additions and 173 deletions.
29 changes: 29 additions & 0 deletions ainu/definition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Ainu
dictionaries:
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####Ainu
active: true
- for: terms
type: popup
url: https://ainugo.nam.go.jp/search/word?word=###&typeDict=on&person=&matCd=&minLineCd=&maxLineCd=
active: true
- for: terms
type: popup
url: https://ainu.ninjal.ac.jp/topic/dictionary/en/?lute=###
active: true
- for: terms
type: popup
url: https://tatoeba.org/en/sentences/search?from=ain&query=###&to=
active: true
- for: sentences
type: embedded
url: '###'
active: true
show_romanization: true
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥|ㇰ=ク|シ=シ|ㇲ=ス|ㇳ=ト|ㇴ=ヌ|ㇵ=ハ|ㇶ=ヒ|ㇷ=フ|ㇸ=ヘ|ㇹ=ホ|ム=ㇺ|ラ=ㇻ|リ=ㇼ|ル=ㇽ|レ=ㇾ|ロ=ㇿ|ク=ク|プ=プ
split_sentences: .!?。?!
split_sentence_exceptions: '[\u3040-\u309F]|[\u30A0-\u30FF]|[\u31F0-\u31FF]|[一-龥]|ㇷ゚|セ゚|ツ゚|ト゚'
word_chars: a-zA-Z\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF一-龥ㇷ゚セ゚ツ゚ト゚
42 changes: 42 additions & 0 deletions ainu/story_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# title: 織田ステノさんの民話(ア) 目つきの悪い犬

001

アコㇿ ミチ アン アコㇿ ハポ アン
a=kor mici an a=kor hapo an
002

シネナンネ ワ
sinen _an=ne wa
003

アコㇿ ミチ ウタㇻ トゥラ オカアニケ
a=kor mici utar tura oka=an __hike
004

エキㇺネアン マ
ekimne=an _wa
005

カムイ エネ ユク エネ アントモッ ワ
kamuy _hene yuk _hene an=tomot wa
006

アルプネウタリ アンレス カネ オカアン
a=rupneutari an=resu kane oka=an
007

アウタリ ウタㇻ カ ポロンノ オカ
a=utari utar ka poronno oka
008

ピㇼカ メノコポ カ ポロンノ オカ コㇿカ
pirka menokopo ka poronno oka korka
009

「シネン ネ エヤイモニコン ナ
“sinen ne e=yaymonikor_ na
010

ピㇼカ ポンメノコ シネン トゥラ アン」
pirka ponmenoko sinen tura _yan”
2 changes: 1 addition & 1 deletion albanian/definition.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?
split_sentence_exceptions: d.m.th.|khs.|mb.|ndajf.|pj.|sh.|vj.|Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.
split_sentence_exceptions: '[A-Z].|etj.|të.|Nr.|në.|Dr.|p.sh.|dhe.|për.|me.|red.|p.e.s.|e.s.|d.m.th.'
word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ
2 changes: 1 addition & 1 deletion amharic/definition.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!¡?።፧፠፨
split_sentence_exceptions: ዓ.ም.|እ.ኤ.አ.|p.|ዓክልበ.|ግ.|እ.አ.አ.|ኪ.ሜ.|ክ.በ.|ም.|እ.ኤ.ኣ.|ዓ.ዓ.|ኤ.አ.|አ.|ኤ.|እ.|አ.አ.|ዓ.|ወዘተ.|አ.ም.|መ.
split_sentence_exceptions: '[A-Z].|[\u1200-\u1359\u1369-\u137C].|ዓ.ም.|እ.ኤ.አ.|ዓክልበ.|እ.አ.አ.|ኪ.ሜ.|ክ.በ.|እ.ኤ.ኣ.|ዓ.ዓ.|ኤ.አ.|አ.አ.|ወዘተ.|አ.ም.|ምዕ.'
word_chars: \u1200-\u1359\u1369-\u137C
32 changes: 8 additions & 24 deletions armenian/definition.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,30 @@ dictionaries:
type: embedded
url: https://en.wiktionary.org/wiki/####Armenian
active: true
- for: terms
type: embedded
url: https://hy.wiktionary.org/wiki/###
active: false
- for: terms
type: popup
url: https://tatoeba.org/en/sentences/search?from=hye&query=###&to=
active: false
- for: terms
type: embedded
url: http://www.nayiri.com/search?l=en&dt=HY_EN&r=0&query=###
active: true
- for: terms
type: embedded
url: http://www.nayiri.com/search?dt=HY_HY&r=0&l=en&query=###
active: true
- for: terms
type: embedded
url: https://calfa.fr/search?query=###
active: false
- for: terms
type: embedded
url: https://www.verbix.com/webverbix/go.php?&D1=135&T1=###
active: true
- for: sentences
type: popup
url: https://translate.google.com/?sl=hy&tl=en&text=###&op=translate
active: true
- for: sentences
type: popup
url: https://translate.yandex.com/?source_lang=hy&target_lang=en&text=###
active: false
- for: sentences
- for: terms
type: embedded
url: http://www.nayiri.com/search?dt=HY_HY&r=0&l=en&query=###
active: true
- for: terms
type: embedded
url: https://www.bing.com/translator/?from=hy&to=en&text=###
active: false
url: https://www.verbix.com/webverbix/go.php?&D1=135&T1=###
active: true
show_romanization: false
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: ։՜՛՞
split_sentence_exceptions: արլ.|արմ.|բն.|բրբ.|գլխ.|դ.|դդ.|հզ.|հզվդ.|հնց.|հս.|հվ.|մ.թ.|մ.թ.ա.|մլն.|մլրդ.|պ-ն|պատմ.|տ-ն
split_sentence_exceptions: '[A-Z]|[\u0531-\u0556\u0561-\u0588].|մ.թ.ա.|դր.|Հ.Գ.|Ն.Ս.Օ.Տ.Տ.|ս.թ.|Ս.թ.|են.|համար.|մասին.|հետ.'
word_chars: \u0531-\u0556\u0561-\u0588
29 changes: 29 additions & 0 deletions bosnian/definition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Bosnian
dictionaries:
- for: terms
type: embedded
url: https://www.rjecnik.ba/prevod/engleski/###.html
active: true
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####Serbo-Croatian
active: true
- for: terms
type: popup
url: https://glosbe.com/bs/en/###
active: true
- for: sentences
type: popup
url: https://translate.google.com/?sl=bs&tl=en&text=###&op=translate
active: true
- for: terms
type: embedded
url: https://bs.wiktionary.org/wiki/###
active: true
show_romanization: true
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?
split_sentence_exceptions: '[A-Z].|[u0400-\u04FF].|bh.|d.o.o.|prof.|d.d.|tj.|tzv.|v.d.|Bh.|npr.|dr.|Beograda.|br.|a.d.|dipl.|p.n.e.|god.'
word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ\u0400-\u04ff
7 changes: 7 additions & 0 deletions bosnian/story_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# title: Kada je Bog stvorio sve

Ko nas je stvorio? Biblija, riječ Božja, govori nam kako je nastao ljudski rod. Davno, davno, Bog je stvorio prvog čovjeka i nazvao ga je Adam. Bog je stvorio Adama od zemaljske prašine. Kada je Bog udahnuo život u Adama, on je oživjeo. Našao se tada u predivnom vrtu zvanom Eden.

Prije nego što je Bog stvorio Adama, stvorio je prelijep svijet pun čudesnih stvari. Korak po korak, Bog je dodavao brježuljkaste i prerijske predjele, mirisno cvijeće i visoko drveće, svijetle pernate ptice i zujave pčele, okretne kitove i klizave puževe. Ustvari, Bog je sve tamo stvorio – sve.

Na samom početku, prije nego što je Bog stvorio bilo šta, nije postojalo ništa sem Boga. Bez ljudi ili mjesta ili stvari. Bez svjetla i bez tame. Bez gore i bez dolje. Bez juče i bez sutra. Postojao je samo Bog koji nije imao početak. A onda je počeo sa djelanjem!
40 changes: 23 additions & 17 deletions english/definition.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
name: English
dictionaries:
- for: terms
type: embedded
url: https://en.thefreedictionary.com/###
- for: terms
type: popup
url: https://www.collinsdictionary.com/dictionary/english/###
- for: sentences
type: popup
url: https://www.deepl.com/translator#en/en/###
# show_romanization:
# right_to_left:

# parser_type: defaults to space delimited.
# character_substitutions:
# split_sentences:
# split_sentence_exceptions:
# word_chars:
- for: terms
type: embedded
url: https://simple.wiktionary.org/wiki/###
active: true
- for: terms
type: popup
url: https://www.collinsdictionary.com/dictionary/english/###
active: true
- for: sentences
type: popup
url: https://www.deepl.com/translator#en/en/###
active: true
- for: terms
type: popup
url: https://conjugator.reverso.net/conjugation-english-verb-###.html
active: true
show_romanization: false
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?
split_sentence_exceptions: Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.|U.S.|St.|No.|pp.|Jr.|p.m.|a.m.|Inc.|Gov.|Rep.|Ms.|Sen.|in.|Co.
word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ
41 changes: 27 additions & 14 deletions french/definition.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
name: French
dictionaries:
- for: terms
type: embedded
url: https://fr.thefreedictionary.com/###
- for: sentences
type: popup
url: https://www.deepl.com/translator#fr/en/###
# show_romanization:
# right_to_left:

# parser_type: defaults to space delimited.
# character_substitutions:
# split_sentences:
# split_sentence_exceptions:
# word_chars:
- for: terms
type: embedded
url: https://www.wordreference.com/fren/###
active: true
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####French
active: true
- for: sentences
type: popup
url: https://www.deepl.com/translator#fr/en/###
active: true
- for: terms
type: popup
url: https://www.larousse.fr/dictionnaires/francais/###
active: true
- for: terms
type: popup
url: https://conjugator.reverso.net/conjugation-french-verb-###.html
active: true
show_romanization: false
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?
split_sentence_exceptions: '[A-Z].|etc.|sens.|fin.|St.|sûr.|sol.|nom.|point.|Dr.|bout.|dos.|haut.|pp.|vol.|av.'
word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ
43 changes: 26 additions & 17 deletions german/definition.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
name: German
dictionaries:
- for: terms
type: embedded
url: https://de.thefreedictionary.com/###
- for: terms
type: embedded
url: https://www.wordreference.com/deen/###
- for: sentences
type: popup
url: https://www.deepl.com/translator#de/en/###
- for: terms
type: embedded
url: https://www.dict.cc/?s=###
active: true
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####German
active: true
- for: sentences
type: popup
url: https://www.deepl.com/translator#de/en/###
active: true
- for: terms
type: popup
url: https://www.duden.de/suchen/dudenonline/###
active: true
- for: terms
type: popup
url: https://conjugator.reverso.net/conjugation-german-verb-###.html
active: true
show_romanization: false
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?
split_sentence_exceptions: '[A-Z].|Dr.|St.|bzw.|Mio.|Co.|ca.|Mrd.|u.a.|Prof.|Nr.|Hrsg.|Chr.|II.|III.|z.B.|usw.|usf.|d.h.|e.V.'
word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ\u200C\u200D
# show_romanization:
# right_to_left:

# parser_type: defaults to space delimited.
# character_substitutions:
# split_sentences:
# split_sentence_exceptions:
# word_chars:
38 changes: 24 additions & 14 deletions greek/definition.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
name: Greek
dictionaries:
- for: terms
type: embedded
url: https://www.wordreference.com/gren/###
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/###
- for: sentences
type: popup
url: https://www.deepl.com/translator#el/en/###
- for: terms
type: embedded
url: https://www.wordreference.com/gren/###
active: true
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####Greek
active: true
- for: sentences
type: popup
url: https://www.deepl.com/translator#el/en/###
active: true
- for: terms
type: embedded
url: https://www.greek-language.gr/greekLang/modern_greek/tools/lexica/search.html?sin=all&lq=###
active: true
- for: terms
type: embedded
url: https://cooljugator.com/gr/###
active: true
show_romanization: true
# right_to_left:

# parser_type: defaults to space delimited.
# character_substitutions:
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .!?;
split_sentence_exceptions: Mr.|Mrs.|Dr.|[A-Z].|κτλ.|κλπ.|π.χ.|λ.χ.|κ.ά|δηλ.|Κος.|Κ.|Κα.|μ.Χ.|ΥΓ.|μ.μ.|π.μ.|σελ.|κεφ.|βλ.|αι.
split_sentence_exceptions: '[A-Z].|[Α-Ω].|κτλ.|κλπ.|π.χ.|λ.χ.|κ.ά|δηλ.|Κος.|Κ.|Κα.|μ.Χ.|ΥΓ.|μ.μ.|π.μ.|σελ.|κεφ.|βλ.|αι.|Ε.Ε.|Δ.Σ.|Α.Ε.|Γ.Σ.|π.Χ.|τ.χλμ.|τ.μ.|κ.λπ.'
word_chars: α-ωΑ-ΩάόήέώύίΊΏΈΉΌΆΎϊΪϋΫΐΰ
34 changes: 20 additions & 14 deletions hindi/definition.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
name: Hindi
dictionaries:
- for: terms
type: embedded
url: https://www.boltidictionary.com/en/search?s=###
- for: terms
type: popup
url: https://translate.google.com/?sl=hi&tl=en&text=###
- for: sentences
type: embedded
url: https://www.bing.com/translator/?from=hi&to=en&text=###
- for: terms
type: embedded
url: https://www.boltidictionary.com/en/search?s=###
active: true
- for: terms
type: embedded
url: https://en.wiktionary.org/wiki/####Hindi
active: true
- for: sentences
type: popup
url: https://translate.google.com/?sl=hi&tl=en&text=###
active: true
- for: terms
type: embedded
url: https://verbix.com/webverbix/go.php?&D1=47&T1=###
active: true
show_romanization: true
# right_to_left:

# parser_type: defaults to space delimited.
# character_substitutions:
right_to_left: false
parser_type: spacedel
character_substitutions: ´='|`='|’='|‘='|...=…|..=‥
split_sentences: .?!|।॥
split_sentence_exceptions: Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.
split_sentence_exceptions: '[A-Z].|[\u0900-\u0963].|[\u0966-\u097F].|[\u200C\u200D].|है.|ए.|हैं.|ई.|ओ.|हूं.|था.|चाहिए.|म.प्र.|होगा.|थी.|स.|ए.एस.आई.|उ.प्र.|न.|ए.टी.एम.|जाएगा.|प.|हो.|ए.के.|ई.पू.|सल्ल.|मी.|सी.|ए.एस.|एम.|इ.|डी.|रजि.|पू.|टी.'
word_chars: a-zA-Z\u0900-\u0963\u0966-\u097F\u200C\u200D
Loading

0 comments on commit 612aced

Please sign in to comment.