Skip to content

Latest commit

 

History

History
297 lines (247 loc) · 9.54 KB

S3.md

File metadata and controls

297 lines (247 loc) · 9.54 KB

Introduction

This code calculates indicator S3 of NorInAliS.

Preparations

Load data from the Alien Species List 2018:

fab  <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/359484"),
                  as.is=T)
path <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/1823673"),
                  as.is=T)

Translate NAs into “unknown”:

for (i in 5:7) {
  path[which(is.na(path[, i])), i] <- "unknown"
}

Restrict data to alien species that are reproducing unaidedly in mainland Norway:

fab <- fab[which(fab$Status == "reproducing" & fab$Mainl),]

Restrict data to current pathways of secondary spread:

path <- path[!path$Introd & path$Time == "current" & path$Name %in% fab$Name, ]

Define an auxiliary function:

# Combines text strings
"%+%" <- function(string1, string2) paste0(string1, string2)

Functions

Define indicator-specific functions:

asFreq  <- function(x) {
  # translates a frequency interval into a numerical value
  # Note that input is in events per decade,
  #  whereas output is in events per year!
  sapply(x, function(z) switch(z,
    "<1"=0.07, "1-8"=0.5, "9-19"=1.4, ">19"=4.6, NA))
}


asAbund <- function(x) {
  # translates an abundance interval into a numerical value
  sapply(x, function(z) switch(z,
    "1"=1, "2-10"=5, "11-100"=50, "101-1000"=500, ">1000"=5000, NA))
}


rincr <- function(n, min, max, r=TRUE) {
  # generates random numbers according to an increasing
  # triangular probability distribution
  min + sqrt(    if(r) runif(n) else 0.5) * (max - min)
}


rdecr <- function(n, min, max, r=TRUE) {
  # generates random numbers according to a decreasing
  # triangular probability distribution
  max - sqrt(1 - if(r) runif(n) else 0.5) * (max - min)
}


assignFrequencies <- function(n, pathways, pw, r=TRUE) {
  # assigns unknown frequencies according to the distribution of known frequencies,
  # separately for main pathway categories
  freq <- table(
    pathways$Freq[which(pathways$Freq != "unknown" & pathways$Cat == pw)]
  )[c("<1", "1-8", "9-19", ">19")]
  if (any(is.na(freq))) {
    freq[which(is.na(freq))] <- 0
  }
  freq <- freq / sum(freq)
  for (i in 4:2) {
    freq[i] <- sum(freq[1:i])
  }
  seq <- if (r) runif(n) else (1:n - 0.5) / n
  return(c("<1", "1-8", "9-19", ">19")[sapply(seq, function(x) which(freq > x)[1])])
}


S3a <- function(pathways) {
  # estimates S3(a) according to Equation ¤1
  return(length(unique(
    (pathways$Name %+% pathways$Subcat)[!pathways$Introd & pathways$Time == "current"]
  )))
}


S3b <- function(pathways, nsim=0, CL=c(0.025, 0.25, 0.5, 0.75, 0.975)) {
  # estimates S3(b) according to Equation ¤2
  pathways <- pathways[which(!pathways$Introd & pathways$Time == "current"),]
  results <- matrix(0, nrow(pathways), max(1, nsim))
  freq <- matrix(pathways$Freq, nrow(pathways), max(1, nsim))
  for (pw in unique(pathways$Cat)) {
    w <- which(pathways$Freq == "unknown" & pathways$Cat == pw)
    if (length(w)) {
      freq[w,] <- assignFrequencies(length(w) * max(1, nsim),
                                    pathways, pw, r = nsim >= 1)
    }
  }
  W <- freq == "<1"
  results[W] <-                rincr(sum(W), 0.01,  0.1, r = nsim >= 1)
  W <- freq == "1-8"
  results[W] <- if (nsim >= 1) runif(sum(W), 0.10,  0.9) else 0.5
  W <- freq == "9-19"
  results[W] <- if (nsim >= 1) runif(sum(W), 0.90,  1.9) else 1.4
  W <- freq == ">19"
  results[W] <-                rdecr(sum(W), 1.90, 10.0, r = nsim >= 1)
  if (nsim >=1 ) {
    freq <- c(mean(apply(results, 2, sum)),
              sd(apply(results, 2, sum)),
              quantile(apply(results, 2, sum), CL))
    names(freq) <- c("Average", "St.Dev.", (CL * 100) %+% "%CL")
    return(freq)
  } else {
    return(sum(asFreq(freq)))
  }
}


S3c <- function(pathways) {
  # estimates S3(c) according to Equation ¤3
  # (Note: this is a minimum implementation of indicator definition S3c!
  #  It illustrates the way S3c would be estimated, but it currently ignores
  #  uncertainty and simply assumes that the (utterly few) abundances reported
  #  are in fact representative of the unknown abundances, which is unlikely.)
  freq <- asFreq(pathways$Freq[!pathways$Introd & pathways$Time == "current"])
  w <- which(is.na(freq))
  if (length(w)) {
    freq[w] <- mean(freq[-w])
  }
  abund <- asAbund(pathways$Abund[!pathways$Introd & pathways$Time == "current"])
  w <- which(is.na(abund))
  if (length(w)) {
    abund[w] <- mean(abund[-w])
  }
  return(sum(freq * abund))
}

Calculations

Summarise available data on frequency and abundance:

{
  N <- length(which(!path$Introd & path$Time == "current"))
  cat("Distribution of frequencies of secondary spread events (per decade):\n")
  print(round(table(path$Freq )[c(1, 3, 4, 2, 5)] / N, 4))
  cat("\n\nDistribution of abundances (individuals) per secondary spread event:\n")
  print(round(table(path$Abund)[c(4, 3, 2, 1, 5)] / N, 4))
}

## Distribution of frequencies of secondary spread events (per decade):
## 
##      <1     1-8    9-19     >19 unknown 
##  0.0701  0.0473  0.2668  0.4375  0.1784 
## 
## 
## Distribution of abundances (individuals) per secondary spread event:
## 
##     2-10   11-100 101-1000    >1000  unknown 
##   0.0091   0.0061   0.0061   0.0168   0.9619

Show how frequencies of introduction vary between main pathway categories:

summary(lm(asFreq(Freq) ~ Cat, data=path, subset=which(Freq != "unknown")))

## 
## Call:
## lm(formula = asFreq(Freq) ~ Cat, data = path, subset = which(Freq != 
##     "unknown"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.356 -1.542  1.174  1.787  1.787 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9416     0.1640  17.933   <2e-16 ***
## Catrelease    1.6584     1.8118   0.915   0.3604    
## Catstowaway   0.4846     0.2581   1.878   0.0609 .  
## Catunaided   -0.1283     0.1914  -0.670   0.5028    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.804 on 535 degrees of freedom
## Multiple R-squared:  0.01555,    Adjusted R-squared:  0.01003 
## F-statistic: 2.818 on 3 and 535 DF,  p-value: 0.03851

Tabulate frequencies separately for each main pathway category:

for (pw in unique(path$Cat)) {
  cat("\n\n" %+% pw %+% ":\n")
  print(table(path$Freq[which(path$Cat == pw)]))
}

## 
## 
## unaided:
## 
##      <1     >19     1-8    9-19 unknown 
##      32     168      24     111      73 
## 
## 
## contaminant:
## 
##      <1     >19     1-8    9-19 unknown 
##       9      64       7      41      22 
## 
## 
## stowaway:
## 
##      <1     >19    9-19 unknown 
##       5      54      23      20 
## 
## 
## release:
## 
##     >19 unknown 
##       1       1 
## 
## 
## corridor:
## 
## unknown 
##       1

This shows that “corridor” has only 1 occurrence with unknown frequencies. Therefore, corridors are excluded from S3b below.

Estimate the three indicator definitions for the current Alien Species List:

S3b_results <- S3b(path[path$Cat != "corridor",], nsim=100000)

{
  cat("\nIndicator value for S3(a): " %+%             S3a(path),    "\n")
  cat("\nIndicator value for S3(b):\n");  print(round(S3b_results))
  cat("\nIndicator value for S3(c): " %+%       round(S3c(path)), "\n\n")
}

## 
## Indicator value for S3(a): 655 
## 
## Indicator value for S3(b):
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##    1927      41    1848    1899    1927    1955    2008 
## 
## Indicator value for S3(c): 4419637

Disaggregate indicator S3(b) for main pathway categories:

S3b_disaggr <- list()
for (pw in c("release", "contaminant", "stowaway", "unaided")) {
  cat("\n\n" %+% pw %+% ":\n")
  S3b_disaggr[[pw]] <- S3b(path[which(path$Cat == pw),], nsim=100000)
  print(S3b_disaggr[[pw]])
}

## 
## 
## release:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##       9       3       5       7       9      11      15 
## 
## 
## contaminant:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##     421      19     384     408     420     433     458 
## 
## 
## stowaway:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##     349      17     316     338     349     361     384 
## 
## 
## unaided:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##    1148      32    1087    1126    1148    1169    1210