-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTriton_ForamSynonyms.R
94 lines (78 loc) · 4.04 KB
/
Triton_ForamSynonyms.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
## function to update names based on lookup tables
library("openxlsx")
## input - species names (can be binomial or just species name)
## output - corrected version of that name if known
foram.syns <- read.xlsx("Data/PFdata.xlsx", sheet = "foramslookup")
forams.macro <- read.xlsx("Data/PFdata.xlsx", sheet = "MacroperforateSpp")[,1]
forams.micro <- read.xlsx("Data/PFdata.xlsx", sheet = "MicroperforateSpp")[, 1]
compare <- function (forams.sample, micro = FALSE, st.age = NA, en.age = NA, age.check = FALSE, PF.ages = foram.ages, forams.lookup = foram.syns) {
# st.age - sample age(s), or oldest age
# en.age - youngest age (where applicable)
# load lookup tables
forams.lookup <- forams.lookup[,1:3]
lookup.table <- function(lookup) {
# create a version of the lookup table which only has the species names
species.nm <- lookup
tmp <- species.nm[!duplicated(species.nm$AcceptedName),]
tmp$Synonym <- tmp$AcceptedName
species.nm <- rbind(species.nm, tmp)
# remove the first name (genus)
species.nm$Synonym <- gsub("^[^ ]* ", "", species.nm$Synonym)
# remove duplicates from mispelled genus
species.nm <- species.nm[!duplicated(species.nm[,1:2]),]
# where there are still duplicates, set these as "Unsure"
spp.dup <- species.nm$Synonym[duplicated(species.nm$Synonym) & species.nm$AcceptedName != "Unsure"]
species.nm$AcceptedName[species.nm$Synonym %in% unique(spp.dup)] <- "Unsure"
# create versions of the lookup table / species list with abbreviated genus names
abb.nm <- lookup
abb.nm <- rbind(abb.nm, tmp)
# remove the first name (genus)
abb.nm$Synonym <- gsub("^(.)[^ ]* ", "\\1\\. ", abb.nm$Synonym)
# remove duplicates from mispelled genus, where the accepted name is the same
abb.nm <- abb.nm[!duplicated(abb.nm[,1:2]),]
# where there are still duplicates, set these as "Unsure"
abb.dup <- abb.nm$Synonym[duplicated(abb.nm$Synonym) & abb.nm$AcceptedName != "Unsure"]
abb.nm$AcceptedName[abb.nm$Synonym %in% unique(abb.dup)] <- "Unsure"
# merge these three dataframes
return(rbind(lookup, species.nm, abb.nm))
}
full.lookup <- lookup.table(forams.lookup)
# if not micro
if (!micro) {
full.lookup$AcceptedName[full.lookup$Micro == "Yes"] <- "Micro"
}
# create the compare function
comp.func <- function(forams.sample, lookup) {
forams.sample <- as.character(forams.sample) ## make sure it is not a factor
species <- lookup$AcceptedName[match(forams.sample, lookup$AcceptedName)]
if (is.na(species)) {
species <- lookup$AcceptedName[match(forams.sample, lookup$Synonym)]
if (is.na(species))
species <- "unknown"
}
return(species)
}
forams.sample <- gsub("\\s", " ", forams.sample)
species.list <- sapply(forams.sample, comp.func, full.lookup, USE.NAMES = FALSE)
# add in a check for "Unsure" based on ages of everything else
if ((!is.na(st.age[1]) | age.check) & any(species.list == "Unsure")) {
# calculate max / min ages
if (is.na(st.age[1])) {
# if these aren't already specified, then base it on the species list
st.age <- max(PF.ages$Start[PF.ages$Species.name %in% species.list])
en.age <- min(PF.ages$End[PF.ages$Species.name %in% species.list])
}
beg.age <- max(st.age, en.age, na.rm = TRUE)
end.age <- min(st.age, en.age, na.rm = TRUE)
age.spp <- PF.ages$Species.name[PF.ages$End <= beg.age & PF.ages$Start >= end.age]
age.forams.lookup <- forams.lookup[forams.lookup$AcceptedName %in% age.spp,]
# recalulate the lookup table, so that Unsure is only specified within the ages
age.full.lookup <- lookup.table(age.forams.lookup)
# if not micro
if (!micro) {
age.full.lookup$AcceptedName[age.full.lookup$Micro == "Yes"] <- "Micro"
}
species.list[species.list == "Unsure"] <- ifelse(sapply(forams.sample[species.list == "Unsure"], comp.func, age.full.lookup, USE.NAMES = FALSE) == "unknown", "Unsure", sapply(forams.sample[species.list == "Unsure"], comp.func, age.full.lookup, USE.NAMES = FALSE))
}
return(species.list)
}