knitr::opts_chunk$set(
    echo = FALSE,
    comment = NA,
    quiet = TRUE,
    progress = FALSE,
    tidy = FALSE,
    cache = FALSE,
    message = FALSE,
    error = FALSE, # FALSE: do not preserve errors. Always stop execution.
    warning = TRUE
)
options(width = 110)

r settings$title

Data Files and Settings

Benthic Ecosystem Quality Index 2 -- BEQI2-package version r packageVersion("BEQI2") (r packageDescription("BEQI2", fields = "Date"))

Selection of benthos records

# keep only records within the period of interest
month <- as.integer(format(d_beqi$DATE, format = "%m"))
sel <- (month >= settings$months[1]) & 
       (month <= settings$months[2])
d_beqi <- d_beqi[sel, ]
# convert taxa in 'd_beqi' to preferred names
d_beqi$TAXON_NEW <- rename(x = d_beqi$TAXON, from = d_twn$taxonname, to = d_twn$taxon)

# handle azoic (or 'azoisch' (NL)) samples (no species available)
sel <- isAzoic(d_beqi$TAXON)
d_beqi$TAXON_NEW[sel] <- d_beqi$TAXON[sel]

# add taxon level and taxon group
index <- match(x = d_beqi$TAXON_NEW, table = d_twn$taxon)
d_beqi$taxonlevel <- d_twn$taxonlevel[index]
d_beqi$taxongroup <- d_twn$taxongroup[index]
# BEQI2 has been designed for endofauna, therefore remove 
# epifaunal species and insecta

# create black-list of taxon groups to be removed
blacklist <- as.data.frame(
    matrix(data = c(
        "ARACH",    "Arachnida",
        "CRDEC",    "Crustacea - Decapoda",
        "CRMYS",    "Crustacea - Mysida",
        "IDCHI",    "Insecta/Diptera - Chironomidae",
        "IDREM",    "Insecta/Diptera - remaining",
        "IDSIM",    "Insecta/Diptera - Simuliidae",
        "INCOL",    "Insecta - Coleoptera",
        "INEPH",    "Insecta - Ephemeroptera",
        "INHET",    "Insecta - Heteroptera",
        "INLEP",    "Insecta - Lepidoptera",
        "INODO",    "Insecta - Odonata",
        "INREM",    "Insecta - remaining",
        "INTRI",    "Insecta - Trichoptera"
    ), ncol = 2, byrow = TRUE, 
    dimnames = list(NULL, c("taxongroup", "description"))),
    stringsAsFactors = FALSE
)

# merge black-list with BEQI2 data
d_beqi <- merge(x = d_beqi, y = blacklist, by = "taxongroup", 
                all.x= TRUE, all.y = FALSE, sort = FALSE)

# select all black-list species
blacklisted <- !is.na(d_beqi$description)

if (any(blacklisted)) {
    d <- ddply(
        .data = d_beqi[blacklisted, ], 
        .variables= "description", 
        .fun = function(x) {
            data.frame(frequency = nrow(x))
        }
    )
    names(d)[1] <- "taxon group"

    # remove black-list species from BEQI2 data
    d_beqi <- d_beqi[!blacklisted, ]
}
print(xtable(d), type = "html", include.rownames = FALSE)
# remove CHAR code 'REST'
n <- nrow(d_beqi)
d_beqi <- subset(x = d_beqi, subset = is.na(CHAR) | (toupper(CHAR) != "REST"))
# add year
d_beqi$YEAR <- as.integer(format(d_beqi$DATE, format = "%Y"))

Conversion of species names

inconvertible <- which(is.na(d_beqi$TAXON_NEW))

The following r length(inconvertible) taxon names in the BEQI file are inconvertible. These names are not in the Taxa Water management the Netherlands (TWN) list, which is based on the WORMS list, and will be removed:

d <- ddply(
    .data = d_beqi[inconvertible, ],
    .variables = "TAXON", 
    .fun = function(x) {
        data.frame(
            frequency = nrow(x), 
            similar_names_TWN = toString(
                agrep(
                    pattern = x$TAXON[1], 
                    x = d_twn$taxon, 
                    value = TRUE, 
                    ignore.case = TRUE
                )
            )
        )
    }
)
print(xtable(d), type = "html", include.rownames = FALSE)
d_beqi <- d_beqi[-inconvertible, ]
d_beqi$TAXON <- d_beqi$TAXON_NEW
d_beqi$TAXON_NEW <- NULL

The first column gives the taxon name as found in the BEQI input file (r basename(settings$files$beqi2)), the second column gives the number of occurrences of this name, and the third column gives taxon names (if any) in the TWN (WORMS) list that are most similar to the one in the BEQI input file. This column may be useful to discover and correct typing errors or slightly different spelling. Please report inconvertible taxa names of Dutch benthos data to the TAXA-TWN (WORMS) list manager of Rijkswaterstaat (myra.swarte@rws.nl).

Species sensitivity values



isMissing <- is.na(match(x = tolower(d_beqi$TAXON), table = tolower(d_sens$TAXON)))
d <- ddply(
    .data = d_beqi[isMissing, ], 
    .variables = "TAXON", 
    .fun = function(x) {
        data.frame(n = nrow(x))
    }
)
if (nrow(d) > 0L) {
    cat("\nSpecies sensitivity values for the following taxa are missing:\n",
        "(where n is the number of records)\n")
    print(xtable(d), type = "html")
}

Waterbody-ecotopes and sample areas

The following ecotopes have been selected:

d <- ddply(
    .data = d_beqi[, c("OBJECTID", "ECOTOPE")], 
    .variables = c("OBJECTID", "ECOTOPE"), 
    .fun =function(x) {
        data.frame(Nr.records = nrow(x))
    }
)
print(xtable(d), type = "html", include.rownames = FALSE)

Available total sample area for the available sample sizes (m²)

d <- unique(
    d_beqi[, c("OBJECTID", "ECOTOPE", "YEAR", "SAMPLEID", "AREA"), drop = FALSE]
)
d <- subset(x = d, select = -SAMPLEID)
d <- dcast(
    data = d, 
    formula= ... ~ AREA, 
    fun.aggregate = sum, 
    value.var = "AREA"
)
d$total <- rowSums(d[, -(1:3), drop = FALSE])
print(xtable(d), type = "html")

Number of samples per sample area

d <- unique(
    d_beqi[, c("OBJECTID", "ECOTOPE", "YEAR", "SAMPLEID", "AREA"), drop = FALSE]
)
d <- subset(x = d, select = -SAMPLEID)
d <- dcast(
    data = d, 
    formula= ... ~ AREA, 
    fun.aggregate = length, 
    value.var = "AREA"
)
d$total <- as.integer(rowSums(d[, -(1:3), drop = FALSE]))
print(xtable(d), type = "html")

Data pooling



Indicator calculation

groupingVars <- c("OBJECTID", "ECOTOPE", "YEAR", "POOLRUN", "POOLID")

Total abundance

The total abundance of individuals (N) in the data pool. This indicator is provided for general information on the sample, quality control and optional manual correction for sample size (e.g. by means of Margalef d).

Species richness

Species richness (S) is defined as the number of taxa (lowest identification level possible) per sampling unit (data pool or box core sample).

Shannon Index

Shannon Index (H')is given by (Shannon, 1948, p.393). In the BEQI2 tool, the logarithm to the base 2 is taken to estimate H'.

indicators <- function(x) {

    # total abundance
    N <- sum(abundance(taxon = x$TAXON, count = x$VALUE))

    # species richness 
    S <- speciesRichness(taxon = x$TAXON, count = x$VALUE)

    # Shannon's entropy
    H <- entropy(taxon = x$TAXON, count = x$VALUE)

    # number of sampling units in the pool
    nSamplesInPool <- length(unique(x$ID))

    # total area of the pool
    poolArea <- sum(unique(x[, c("ID", "AREA")])$AREA)

    # return results
    data.frame(nSamplesInPool, poolArea, N, S, H)
}

# compute indicators
d_ind <- ddply(.data = d_beqi, .variables = groupingVars, .fun = indicators)

AMBI: Marine Biotic Coefficient

Borja et al. (2000) introduced the Biotic Coefficient (AMBI). It is a weighted linear combination of species sensitivity classes. The table below gives an overview of the total number of identified taxa in the samples within each species sensitivity class.

print(xtable(t(as.matrix(table(d_sens$AMBI)))), type = "html", 
      include.rownames = FALSE)
# merge sensitivity classes with BEQI-input data
index <- match(x = d_beqi$TAXON, table = d_sens$TAXON)
d_beqi$AMBI <- d_sens$AMBI[index]

# estimate frequency distribution of sensitivity classes
d <- ddply(
    .data = d_beqi, 
    .variables = groupingVars,
    .fun = function(x) {
        x$AMBI[x$VALUE == 0L] <- "azoic"
        ddply(
            .data = x, 
            .variables = "AMBI", 
            .fun = function(x) {
                data.frame(n = sum(x$VALUE, na.rm = TRUE))
            }
        )
    }
)

The average percentage of the total abundance without an AMBI classification is given below:

tmp <- ddply(
    .data = d, 
    .variables = c("OBJECTID", "ECOTOPE", "YEAR"),
    .fun = function(x) {
        index <- which(is.na(x$AMBI))
        if (length(index) == 0L) {
            p <- 0
        } else {
            p <- 100 * sum(x$n[index])/sum(x$n)
        }
        data.frame(missing = p)
    }
)
print(xtable(tmp), type = "html")

Note that in r sum(tmp$missing > 20) cases, more than 20% of the total abundance does not have an AMBI classification.

To estimate the biotic coefficients, r sum(is.na(d$AMBI)) taxa with unknown species sensitivity classes will be excluded from analysis.

# compute biotic coefficients
d <- d[!is.na(d$AMBI), ]
d <- ddply(
    .data = d, 
    .variables = groupingVars,
    .fun = function(x) {
        if (all(x$n == 0L)) {
            # handle samples that are fully azoic
            biotic_coefficient <- NA_real_
        } else {
            # handle samples that contain azoic subsamples, 
            # but are not azoic as a whole. Azoic subsamples 
            # have to be removed prior to further analysis.
            x <- x[x$n != 0L, , drop = FALSE]

            # compute biotic coefficients
            f <- x$n/sum(x$n)
            w <- c(I = 0, II = 1.5, III = 3, IV = 4.5, V = 6)
            w <- w[as.character(x$AMBI)] # alignment
            biotic_coefficient <- sum(w * f)
        }
        data.frame(AMBI = biotic_coefficient)
    }
)

d_ind <- merge(x = d_ind, y = d, all = TRUE)

Indicator percentile values

Percentiles for each indicator are given below. In addition, the number of samples (n) used to estimate these percentiles is provided. The percentiles have been calculated for the period r paste(range(d_beqi$YEAR), collapse = "-") and months r paste(range(settings$months), collapse = "-").

d <- ddply(
    .data = d_ind,
    .variables = c("OBJECTID", "ECOTOPE"),
    .fun = function(x) {
        x <- x[, !(names(x) %in% c("OBJECTID", "ECOTOPE", "YEAR", "POOLRUN", 
                                   "poolArea", "POOLID", "nSamplesInPool"))]
        n <- sapply(X = x, FUN = function(x){sum(!is.na(x))})
        x <- ldply(
            .data = x, 
            .fun =  quantile, 
            probs = c(0, 1, 5, 25, 50, 75, 95, 99, 100) / 100, 
            na.rm = TRUE
        )
        names(x)[names(x) == ".id"] <- "indicator"
        x$n <- n
        x
    }
)
print(xtable(d), type = "html")

Indicator Ecological Quality Ratios

The following Ecological Quality Ratios (EQRs) are calculated:

Species richness:

EQR(S) = ( Sass - Sbad ) / ( Sref - Sbad )

Shannon's Index:

EQR(H') = ( H'ass - H'bad ) / ( H'ref - H'bad )

AMBI:

EQR(AMBI) = ( AMBIbad - AMBIass ) / ( AMBIbad - AMBIref )

BEQI2:

EQR(BEQI2) = ⅓ × EQR(S) + ⅓ × EQR(H') + ⅓ × EQR(AMBI)


The 'bad' and 'ref' values can be found in the table below. This table is a copy of r sQuote(basename(settings$files$ecotopes)) as specified in the settings file.

print(xtable(d_erf), type = "html", include.rownames = FALSE)
# add prefixes
sel <- !(names(d_erf) %in% c("OBJECTID", "ECOTOPE", "RELAREA"))
names(d_erf)[sel] <- paste("erf", names(d_erf)[sel], sep = "_")
d_ind <- merge(x = d_ind, y = d_erf, all.x = TRUE, all.y = FALSE)
d_ind$S_EQR <- eqr(x = d_ind$S, bad = d_ind$erf_SBAD, good = d_ind$erf_SREF)
d_ind$H_EQR <- eqr(x = d_ind$H, bad = d_ind$erf_HBAD, good = d_ind$erf_HREF)

# estimate EQR of AMBI
d_ind$AMBI_EQR <- eqr(x = d_ind$AMBI, bad = d_ind$erf_AMBIBAD, 
                      good = d_ind$erf_AMBIREF)

# estimate BEQI2-EQR
d_ind$BEQI2_EQR <- rowMeans(d_ind[, c("S_EQR", "H_EQR", "AMBI_EQR")])

Results

The results are averaged to waterbody-ecotope-year-period combinations. The table below lists all results aggregated by OBJECTID, ECOTOPE and YEAR.

# aggregate by pool run
v <- c("OBJECTID", "ECOTOPE", "YEAR")
w <- c(v, "nSamplesInPool", "poolArea", "RELAREA",
       "N", "S", "H", "AMBI", 
       "S_EQR", "H_EQR", "AMBI_EQR",
       "BEQI2_EQR"
)

# aggregate over pools runs and pooled samples
d <- ddply(
    .data = d_ind[, w], 
    .variables = v,
    .fun = function(x) {
        colMeans(x[, !(names(x) %in% v)])
    }
)

print(xtable(d), type = "html")
write.csv(x = d, file = settings$files$out_ecotope, row.names = FALSE)


The table below lists all results aggregated (ecotope area-weighted) by OBJECTID and YEAR.

# aggregate by pool run
v <- c("OBJECTID", "YEAR")
w <- c(v, "RELAREA", "BEQI2_EQR")
d <- ddply(
    .data = d[, w], 
    .variables = v,
    .fun = function(x) {
        x <- x[, !(names(x) %in% v)]
        # weighted averaging (safe version, also works when RELAREA doesn't sum to 1)
        x <- colSums(x * x$RELAREA) / sum(x$RELAREA)
        x[names(x) != "RELAREA"]
    }
)
print(xtable(d), type = "html")
write.csv(x = d, file = settings$files$out_objectid, row.names = FALSE)

References

Borja, A., J. Franco and V. Pérez, 2000. A Marine Biotic Index to Establish the Ecological Quality of Soft-Bottom Benthos Within European Estuarine and Coastal Environments. Marine Pollution Bulletin 40:1100-1114

Gittenberger A. and W. van Loon, 2013. Sensitivities of marine macrozoobenthos to environmental pressures in the Netherlands. Nederlandse Faunistische Mededelingen, 41 (2013) 79-112.

Shannon, C. E., 1948. A Mathematical Theory of Communication. Bell System Technical Journal 27: 379–423.

van Loon, W.M.G.M. and A.J. Verschoor, 2012. Benthic Ecosystem Quality Index 2: Application to Dutch Marine Benthos Data from the Period 1990 - 2010, Report RWS Waterdienst.

van Loon, W.M.G.M., A.J. Verschoor and A. Gittenberger, 2012. Benthic Ecosystem Quality Index 2: Design and Calibration of the BEQI-2 WFD Metric for Marine Benthos in Transitional Waters, Report RWS Waterdienst.

Acknowledgements

Angel Borja (AZTI-TECHNALIA, Spain), is kindly acknowledged for the permission to use the standard AMBI species list (ambi.azti.es).

Session information

sessionInfo()


Try the BEQI2 package in your browser

Any scripts or data that you put into this service are public.

BEQI2 documentation built on May 2, 2019, 8:19 a.m.