Skip to content

Commit 070cfc5

Browse files
committed
update segments and tests
1 parent cc582fb commit 070cfc5

File tree

5 files changed

+42
-15
lines changed

5 files changed

+42
-15
lines changed

bibliography/bibliography.bib

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2874,10 +2874,10 @@ @book{nikolaeva2014tundranenets
28742874
@book{svantesson2005mongolian,
28752875
author = {J. O. Svantesson},
28762876
date-added = {2019-04-17 11:38:04 +0300},
2877-
date-modified = {2019-04-17 11:38:04 +0300},
2877+
date-modified = {2021-11-24 12:53:20 +0100},
28782878
publisher = {OUP Oxford},
28792879
series = {The {Phonology} of the {World}'s {Languages}},
2880-
title = {The {Phonology} of {Mongolian}},
2880+
title = {The Phonology of {Mongolian}},
28812881
year = {2005}}
28822882

28832883
@book{burenhult2005jahai,

data/SegBo database - Metadata.csv

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ InventoryID,Glottocode,Dialect,LanguageName,BibTexKey,Filename,Contributor,Metad
77
6,musa1266,,Aisi,daniels2015sogeram,,Einav Levanon,,
88
7,akaj1239,,Aka-Jeru,abbi2013greatandamanese,,NA,,
99
8,aleu1260,,Aleut,bergsland1997aleut,,Elad Eisen,,
10-
9,chal1275,alqo1238,Alqosh,coghill2004alqosh,,KurdistanDB,,
10+
9,chal1275,alqo1238,Alqosh,coghill2004alqosh,coghill2004alqosh.pdf,KurdistanDB,,
1111
10,amap1240,,Ama (Papua New Guinea),aarsjo1999ama,,Einav Levanon,,
1212
13,wara1294,,Anta-Komnzo-Wára-Wérè-Kémä,dohler2016komnzo,,Einav Levanon,,
1313
14,lish1245,arbe1239,Arbel,khan1999arbel,,KurdistanDB,,
14-
15,stan1288,riop1234,Argentinian Spanish,honsa1965argentinianspanish,honsa1965argentinianspanish,Elad Eisen,glottocode is for Standard Spanish,
14+
15,stan1288,riop1234,Argentinian Spanish,honsa1965argentinianspanish,honsa1965argentinianspanish.pdf,Elad Eisen,glottocode is for Standard Spanish,
1515
16,asil1242,,Asilulu,collins2003asilulu,,Einav Levanon,,
1616
17,awiy1238,,Awiyaana,Bees1965,auy_SPA1979_phon.pdf,Steven Moran,,57
1717
18,bafu1246,,Bafut,tamanji2009bafut,,Elad Eisen,,
@@ -103,7 +103,7 @@ InventoryID,Glottocode,Dialect,LanguageName,BibTexKey,Filename,Contributor,Metad
103103
111,khva1239,,Khwarshi-Inkhoqwari,khalilova2009khwarshi,,NA,,
104104
112,nort3142,,Anatolian Arabic,jastrow2006anatolianarabic,,KurdistanDB,,
105105
113,kipu1237,,Kiput,blust2003short,,Einav Levanon,,
106-
114,doro1266,koki1244,Koki,bradshaw2012koki,bradshaw2012koki,Einav Levanon,,
106+
114,doro1266,koki1244,Koki,bradshaw2012koki,bradshaw2012koki.pdf,Einav Levanon,,
107107
115,kome1238,,Komering,abdurrahman1979komering,,Einav Levanon,,
108108
116,komi1269,,Komi-Permyak,majshev1940komi_permjak,,Dmitry Nikolaev,,
109109
117,komi1277,,Komi-Yazva,lytkin1961komi_yazva,,Dmitry Nikolaev,,
@@ -281,7 +281,7 @@ InventoryID,Glottocode,Dialect,LanguageName,BibTexKey,Filename,Contributor,Metad
281281
301,belh1239,,Belhare,bickel2003belhare,,Eitan Grossman,,
282282
302,bond1245,,Bondo (Remo),fernandez1968remo,,Eitan Grossman,,
283283
303,czec1258,,Czech,dankovicova1997czech,,Eitan Grossman,,
284-
304,chra1242,,Chrau,thomas1971chrau,,Eitan Grossman,,
284+
304,chra1242,,Chrau,thomas1971chrau,thomas1971chrau.pdf,Eitan Grossman,,
285285
305,stan1290,,French,Sten1963,fra_SPA1979_phon.pdf,Eitan Grossman,,162
286286
306,friu1240,,Friulian,miotti2002friulian,,Eitan Grossman,,
287287
307,guja1253,,Gujari (Gojri),losey2002gujari,,Eitan Grossman,,
@@ -457,7 +457,7 @@ InventoryID,Glottocode,Dialect,LanguageName,BibTexKey,Filename,Contributor,Metad
457457
483,tsou1248,,Tsou,tsuchida1975tsouic,,Einav Levanon,,
458458
484,tuam1242,,Tuamotuan,kuki1970tuamotuan,,Einav Levanon,,
459459
485,tugu1245,,Tugun,hinton1991tugun,,Einav Levanon,,
460-
486,tuka1248,,Tukang Besi North,donohue1999tukangbesi,,Einav Levanon,,
460+
486,tuka1248,,Tukang Besi North,donohue1999tukangbesi,donohue1999tukangbesi.pdf,Einav Levanon,,
461461
487,unua1237,,Unua,pearce2015unua,,Einav Levanon,,
462462
488,urak1238,,Urak Lawoi',steinhauer2008urak,,Einav Levanon,,
463463
489,vinm1237,,Neve'ei,musgrave2007neveei,,Einav Levanon,,
@@ -485,7 +485,7 @@ InventoryID,Glottocode,Dialect,LanguageName,BibTexKey,Filename,Contributor,Metad
485485
511,berb1259,,Berbice Dutch,kouwenberg1994berbicedutch,,David Ginebra,,
486486
512,huam1248,,Huallaga (Huanuco) Quechua,weber1983huallagaquechua,,David Ginebra,,
487487
513,siam1242,,Siamou,1627_Haas_sif,Haas_sif.pdf,Elad Eisen,,1627
488-
514,sumb1240,,Sumbwa,1639_Kahigi_suw,Kahigi_suw.PDF,Elad Eisen,,1639
488+
514,sumb1240,,Sumbwa,1639_Kahigi_suw,Kahigi_suw.pdf,Elad Eisen,,1639
489489
515,amha1245,,Amharic,amh_hayward1992,amh_hayward1992.pdf,Elad Eisen,,2156
490490
516,awng1244,,Awngi,1542_awngi_2010,Hetzron1969.png,Elad Eisen,,234
491491
517,chum1261,,Chumburung,Hartell1993,ncu_Hartell1993_phon.pdf,Elad Eisen,,686

data/SegBo database - Phonemes.csv

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ InventoryID,BorrowingLanguageGlottocode,BorrowedSound,SourceLanguageGlottocode,O
3434
9,chal1275,v,nort2641,mostly,,,,
3535
9,chal1275,zˤ,unknown,mostly,,,,
3636
9,chal1275,ʒ,nort2641,no,phonologization of allophone,,a native allophone of /ʃ/,
37-
9,chal1275,ʕ̞,stan1318,mostly,,,,
37+
9,chal1275,ʕ,stan1318,mostly,,,,
3838
10,amap1240,h,unknown,no,other distributional change,no,in native phonology /h/ is rare and appears only before /a/,
3939
13,wara1294,b,"stan1293, hiri1237",yes,new phoneme,no,appears also in ideophones. voiced stops are pre-nasalised in native phonology,
4040
13,wara1294,d,"stan1293, hiri1237",yes,new phoneme,no,voiced stops are pre-nasalised in native phonology,
@@ -856,7 +856,7 @@ an unstressed high front vowel or a front glide.",
856856
284,nucl1301,ʔ,unknown,unknown,to be checked,to be checked,"(loan,transitional)",
857857
285,assa1263,dz,"stan1293, hind1269",yes,new phoneme,affricate,[dz] and also [ ] are used exclusively in loan words from English and Hindi and do not belong to the ‘native’ sound system of Assamese.,
858858
285,assa1263,f,"stan1293, hind1269",yes,new phoneme,no,"Sometimes, consonants apparently not native to Assamese but used in loan words from English and Hindi also surface, like [f] and [S] among others, in words such as fan and shwal. This is also optional and borrowed words may assimilate to the native system in such a way that fan and shwal may be realized as [p En] and [sAl] respectively.",
859-
285,assa1263,ɹ̤,sans1269,yes,new phoneme,no,a Sanskrit remnant and occurs only in a select few words in the careful speech of educated people,
859+
285,assa1263,ɹ̤,sans1269,yes,new phoneme,no,a Sanskrit remnant and occurs only in a select few words in the careful speech of educated people (denoted as [rʰ] and in an example as /pɔɹʰ/),
860860
285,assa1263,ʃ,"stan1293, hind1269",yes,new phoneme,postalveolar,"Sometimes, consonants apparently not native to Assamese but used in loan words from English and Hindi also surface, like [f] and [S] among others, in words such as fan and shwal. This is also optional and borrowed words may assimilate to the native system in such a way that fan and shwal may be realized as [p En] and [sAl] respectively.",
861861
285,assa1263,t̠ʃ,"stan1293, hind1269",yes,new phoneme,affricate,[dz] and also [ ] are used exclusively in loan words from English and Hindi and do not belong to the ‘native’ sound system of Assamese.,
862862
286,kwaz1243,b,port1283,yes,new phoneme,voicing,,
@@ -910,8 +910,8 @@ an unstressed high front vowel or a front glide.",
910910
303,czec1258,oː,unknown,yes,new phoneme,no,should probably find another source,
911911
304,chra1242,ɓ,viet1252,mostly,new phoneme,implosive,"some clusters are also only in loanwords. related languages had implosives, so chrau probably did originally too, but then lost the contrast until it was reintroduced from vietnamese",
912912
304,chra1242,ɗ,viet1252,mostly,new phoneme,implosive,"some clusters are also only in loanwords. related languages had implosives, so chrau probably did originally too, but then lost the contrast until it was reintroduced from vietnamese",
913-
304,chra1242,ɨə̯,viet1252,mostly,new phoneme,no,has to be checked,
914-
304,chra1242,uə̯,viet1252,mostly,new phoneme,no,has to be checked,
913+
304,chra1242,ua,viet1252,mostly,new phoneme,no,has to be checked,
914+
304,chra1242,ɯə,viet1252,mostly,new phoneme,no,has to be checked,
915915
305,stan1290,ŋ,stan1293,yes,new phoneme,no,,
916916
306,friu1240,dz,"ital1282, vene1258",yes,new phoneme,no,,
917917
306,friu1240,ts,"ital1282, lati1261, slov1268, stan1295",yes,new phoneme,no,,
@@ -947,7 +947,7 @@ an unstressed high front vowel or a front glide.",
947947
317,halh1238,pʲʰ,unknown,mostly,,no,,
948948
317,halh1238,tʲ,mand1415,no,other distributional change,no,Word-initially only in loans. Maybe other source languages as well,
949949
317,halh1238,ʊa,mand1415,mostly,unknown,to be checked,diphthong,
950-
317,halh1238,ʊai,mand1415,mostly,unknown,to be checked,triphthong,
950+
317,halh1238,uai,mand1415,mostly,unknown,to be checked,triphthong,
951951
317,halh1238,w,unknown,no,other distributional change,no,Word-initially only in loans,
952952
318,caml1239,dz,nepa1254,yes,new phoneme,to be checked,,
953953
318,caml1239,d̤z̤,nepa1254,mostly,other distributional change,to be checked,,
@@ -1449,7 +1449,7 @@ There exists a native bisyllabic vocal sequence [u.a], but the borrowed one is m
14491449
486,tuka1248,b,"ambo1250, indo1316",yes,new phoneme,no,,
14501450
486,tuka1248,d,"ambo1250, indo1316",yes,new phoneme,no,,
14511451
486,tuka1248,d̠ʒ,"ambo1250, indo1316",yes,new phoneme,affricate,sometimes as [d] or [z].,
1452-
486,tuka1248,n̺d̺z̺,"ambo1250, indo1316",yes,new phoneme,affricate,suspest loan.,
1452+
486,tuka1248,n̠d̠ʒ,"ambo1250, indo1316",yes,new phoneme,affricate,suspest loan.,
14531453
486,tuka1248,t̠ʃ,"ambo1250, indo1316",yes,new phoneme,affricate,sometimes as [s].,
14541454
487,unua1237,l,bisl1239,mostly,new phoneme,lateral,Appears in names and in one allegedly native word.,
14551455
488,urak1238,e,sout2746,no,other distributional change,no,appears contrastively in native words only in closed final syllables.,

tests/README.Rmd

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,14 @@ lang_mappings <- lang_mappings %>%
146146
expect_equal(nrow(lang_mappings %>% filter(SourceLanguageGlottocode == BorrowingLanguageGlottocode)), 0)
147147
```
148148

149+
Check whether the segments in SegBo are also reported in [PHOIBLE](https://phoible.org).
150+
151+
```{r, warning=FALSE, message=FALSE}
152+
phoible <- read_csv('https://raw.githubusercontent.com/phoible/dev/master/data/phoible.csv')
153+
phoible_segments <- phoible %>% select(Phoneme) %>% distinct()
154+
segbo_phonemes <- phonemes %>% select(BorrowedSound) %>% distinct()
155+
segbo_phonemes[which(!(segbo_phonemes$BorrowedSound %in% phoible_segments$Phoneme)),]
156+
```
157+
158+
At the current time, this rhotic segment reported by Mahanta (2012) in Assamese (ID 285, assa1263) is under investigation (it is reported as a aspirated rhotic from Sanskrit).
159+

tests/README.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Data checks for SegBo data tables
22
================
33
Steven Moran
4-
22 November, 2021
4+
24 November, 2021
55

66
Load libraries.
77

@@ -278,3 +278,19 @@ lang_mappings <- lang_mappings %>%
278278

279279
expect_equal(nrow(lang_mappings %>% filter(SourceLanguageGlottocode == BorrowingLanguageGlottocode)), 0)
280280
```
281+
282+
Check whether the segments in SegBo are also reported in
283+
[PHOIBLE](https://phoible.org).
284+
285+
``` r
286+
phoible <- read_csv('https://raw.githubusercontent.com/phoible/dev/master/data/phoible.csv')
287+
phoible_segments <- phoible %>% select(Phoneme) %>% distinct()
288+
segbo_phonemes <- phonemes %>% select(BorrowedSound) %>% distinct()
289+
segbo_phonemes[which(!(segbo_phonemes$BorrowedSound %in% phoible_segments$Phoneme)),]
290+
```
291+
292+
## [1] "ɹ̤"
293+
294+
At the current time, this rhotic segment reported by Mahanta (2012) in
295+
Assamese (ID 285, assa1263) is under investigation (it is reported as a
296+
aspirated rhotic from Sanskrit).

0 commit comments

Comments
 (0)