This code calculates indicator S3 of NorInAliS.
Load data from the Alien Species List 2018:
- The “fab” data are available from Sandvik et al. (2020).
- The “path” data are available from Sandvik et al. (2022).
fab <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/359484"),
as.is=T)
path <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/1823673"),
as.is=T)
Translate NA
s into “unknown”:
for (i in 5:7) {
path[which(is.na(path[, i])), i] <- "unknown"
}
Restrict data to alien species that are reproducing unaidedly in mainland Norway:
fab <- fab[which(fab$Status == "reproducing" & fab$Mainl),]
Restrict data to current pathways of secondary spread:
path <- path[!path$Introd & path$Time == "current" & path$Name %in% fab$Name, ]
Define an auxiliary function:
# Combines text strings
"%+%" <- function(string1, string2) paste0(string1, string2)
Define indicator-specific functions:
asFreq <- function(x) {
# translates a frequency interval into a numerical value
# Note that input is in events per decade,
# whereas output is in events per year!
sapply(x, function(z) switch(z,
"<1"=0.07, "1-8"=0.5, "9-19"=1.4, ">19"=4.6, NA))
}
asAbund <- function(x) {
# translates an abundance interval into a numerical value
sapply(x, function(z) switch(z,
"1"=1, "2-10"=5, "11-100"=50, "101-1000"=500, ">1000"=5000, NA))
}
rincr <- function(n, min, max, r=TRUE) {
# generates random numbers according to an increasing
# triangular probability distribution
min + sqrt( if(r) runif(n) else 0.5) * (max - min)
}
rdecr <- function(n, min, max, r=TRUE) {
# generates random numbers according to a decreasing
# triangular probability distribution
max - sqrt(1 - if(r) runif(n) else 0.5) * (max - min)
}
assignFrequencies <- function(n, pathways, pw, r=TRUE) {
# assigns unknown frequencies according to the distribution of known frequencies,
# separately for main pathway categories
freq <- table(
pathways$Freq[which(pathways$Freq != "unknown" & pathways$Cat == pw)]
)[c("<1", "1-8", "9-19", ">19")]
if (any(is.na(freq))) {
freq[which(is.na(freq))] <- 0
}
freq <- freq / sum(freq)
for (i in 4:2) {
freq[i] <- sum(freq[1:i])
}
seq <- if (r) runif(n) else (1:n - 0.5) / n
return(c("<1", "1-8", "9-19", ">19")[sapply(seq, function(x) which(freq > x)[1])])
}
S3a <- function(pathways) {
# estimates S3(a) according to Equation ¤1
return(length(unique(
(pathways$Name %+% pathways$Subcat)[!pathways$Introd & pathways$Time == "current"]
)))
}
S3b <- function(pathways, nsim=0, CL=c(0.025, 0.25, 0.5, 0.75, 0.975)) {
# estimates S3(b) according to Equation ¤2
pathways <- pathways[which(!pathways$Introd & pathways$Time == "current"),]
results <- matrix(0, nrow(pathways), max(1, nsim))
freq <- matrix(pathways$Freq, nrow(pathways), max(1, nsim))
for (pw in unique(pathways$Cat)) {
w <- which(pathways$Freq == "unknown" & pathways$Cat == pw)
if (length(w)) {
freq[w,] <- assignFrequencies(length(w) * max(1, nsim),
pathways, pw, r = nsim >= 1)
}
}
W <- freq == "<1"
results[W] <- rincr(sum(W), 0.01, 0.1, r = nsim >= 1)
W <- freq == "1-8"
results[W] <- if (nsim >= 1) runif(sum(W), 0.10, 0.9) else 0.5
W <- freq == "9-19"
results[W] <- if (nsim >= 1) runif(sum(W), 0.90, 1.9) else 1.4
W <- freq == ">19"
results[W] <- rdecr(sum(W), 1.90, 10.0, r = nsim >= 1)
if (nsim >=1 ) {
freq <- c(mean(apply(results, 2, sum)),
sd(apply(results, 2, sum)),
quantile(apply(results, 2, sum), CL))
names(freq) <- c("Average", "St.Dev.", (CL * 100) %+% "%CL")
return(freq)
} else {
return(sum(asFreq(freq)))
}
}
S3c <- function(pathways) {
# estimates S3(c) according to Equation ¤3
# (Note: this is a minimum implementation of indicator definition S3c!
# It illustrates the way S3c would be estimated, but it currently ignores
# uncertainty and simply assumes that the (utterly few) abundances reported
# are in fact representative of the unknown abundances, which is unlikely.)
freq <- asFreq(pathways$Freq[!pathways$Introd & pathways$Time == "current"])
w <- which(is.na(freq))
if (length(w)) {
freq[w] <- mean(freq[-w])
}
abund <- asAbund(pathways$Abund[!pathways$Introd & pathways$Time == "current"])
w <- which(is.na(abund))
if (length(w)) {
abund[w] <- mean(abund[-w])
}
return(sum(freq * abund))
}
Summarise available data on frequency and abundance:
{
N <- length(which(!path$Introd & path$Time == "current"))
cat("Distribution of frequencies of secondary spread events (per decade):\n")
print(round(table(path$Freq )[c(1, 3, 4, 2, 5)] / N, 4))
cat("\n\nDistribution of abundances (individuals) per secondary spread event:\n")
print(round(table(path$Abund)[c(4, 3, 2, 1, 5)] / N, 4))
}
## Distribution of frequencies of secondary spread events (per decade):
##
## <1 1-8 9-19 >19 unknown
## 0.0701 0.0473 0.2668 0.4375 0.1784
##
##
## Distribution of abundances (individuals) per secondary spread event:
##
## 2-10 11-100 101-1000 >1000 unknown
## 0.0091 0.0061 0.0061 0.0168 0.9619
Show how frequencies of introduction vary between main pathway categories:
summary(lm(asFreq(Freq) ~ Cat, data=path, subset=which(Freq != "unknown")))
##
## Call:
## lm(formula = asFreq(Freq) ~ Cat, data = path, subset = which(Freq !=
## "unknown"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.356 -1.542 1.174 1.787 1.787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9416 0.1640 17.933 <2e-16 ***
## Catrelease 1.6584 1.8118 0.915 0.3604
## Catstowaway 0.4846 0.2581 1.878 0.0609 .
## Catunaided -0.1283 0.1914 -0.670 0.5028
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.804 on 535 degrees of freedom
## Multiple R-squared: 0.01555, Adjusted R-squared: 0.01003
## F-statistic: 2.818 on 3 and 535 DF, p-value: 0.03851
Tabulate frequencies separately for each main pathway category:
for (pw in unique(path$Cat)) {
cat("\n\n" %+% pw %+% ":\n")
print(table(path$Freq[which(path$Cat == pw)]))
}
##
##
## unaided:
##
## <1 >19 1-8 9-19 unknown
## 32 168 24 111 73
##
##
## contaminant:
##
## <1 >19 1-8 9-19 unknown
## 9 64 7 41 22
##
##
## stowaway:
##
## <1 >19 9-19 unknown
## 5 54 23 20
##
##
## release:
##
## >19 unknown
## 1 1
##
##
## corridor:
##
## unknown
## 1
This shows that “corridor” has only 1 occurrence with unknown frequencies. Therefore, corridors are excluded from S3b below.
Estimate the three indicator definitions for the current Alien Species List:
S3b_results <- S3b(path[path$Cat != "corridor",], nsim=100000)
{
cat("\nIndicator value for S3(a): " %+% S3a(path), "\n")
cat("\nIndicator value for S3(b):\n"); print(round(S3b_results))
cat("\nIndicator value for S3(c): " %+% round(S3c(path)), "\n\n")
}
##
## Indicator value for S3(a): 655
##
## Indicator value for S3(b):
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 1927 41 1848 1899 1927 1955 2008
##
## Indicator value for S3(c): 4419637
Disaggregate indicator S3(b) for main pathway categories:
S3b_disaggr <- list()
for (pw in c("release", "contaminant", "stowaway", "unaided")) {
cat("\n\n" %+% pw %+% ":\n")
S3b_disaggr[[pw]] <- S3b(path[which(path$Cat == pw),], nsim=100000)
print(S3b_disaggr[[pw]])
}
##
##
## release:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 9 3 5 7 9 11 15
##
##
## contaminant:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 421 19 384 408 420 433 458
##
##
## stowaway:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 349 17 316 338 349 361 384
##
##
## unaided:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 1148 32 1087 1126 1148 1169 1210