In BENMMI: Benthic Multi-Metric Index

Multidimensional scaling (MDS)

to_log("INFO", "Entering section 'Multidimensional scaling'...")

The plot below is non-metric multidimensional scaling plot of the Bray-Curtis dissimilarity (distance). This plot can be used to identify outliers in the data.

The Bray-Curtis dissimilarity is given by:

D_ij= Σ_kabs(n_ik - n_jk) / Σ_k(n_ik + n_jk)

where n_ik and n_jk is the abundance of species k at locations i and j respectively.

# create table in wide format: 
# - left: sample identifiers
# - right: abundance per taxon
d <- d_mmi %>%
    select(  OBJECTID, HABITAT, DATE, POOL_RUN, POOL_ID, TAXON, VALUE) %>%
    mutate(DATE = format(DATE)) %>%
    group_by(OBJECTID, HABITAT, DATE, POOL_RUN, POOL_ID, TAXON) %>%
    summarise(VALUE = sum(VALUE)) %>%
    spread(key = TAXON, value = VALUE, fill = 0L)

# select abundances only
N <- as.matrix(d[, -(1:5)])

# create Bray-Curtis distance matrix
n <- nrow(N)
D <- matrix(data = 0, nrow = n, ncol = n)
for (i in 1:(n-1)) {
    n1 <- N[i, ]
    for (j in (i+1):n) {
        n2 <- N[j, ]
        D[i, j] <- bray_curtis(n1, n2)
        D[j, i] <- D[i, j]
    }
}

# apply sammon MDS
S <- MASS::sammon(D, trace = FALSE, tol = 1.0e-6)

# extract MDS configuration
V <- S$points %>%
    as.data.frame %>%
    set_names(c("V1", "V2"))

# add object identifiers
V$OBJECTID <- d$OBJECTID
V$HABITAT <- d$HABITAT
V$DATE <- d$DATE
V$MARKER <- 1:nrow(V)
V$MEDIAN_BC <- apply(X = D, MARGIN = 1, FUN = median)
if (isTRUE(settings$pooling)) {
    V$POOL_RUN <- d$POOL_RUN
    V$POOL_ID <- d$POOL_ID
} else {
    V$SAMPLEID <- d_mmi$SAMPLEID[d$POOL_ID %>% match(d_mmi$ID)]
}

Multidimensional scaling (MDS) presses the m × m dimensional distance matrix D into a 2 dimensional space given that the distortion of the original distances in D is minimized, where m is the number of sampling sites. See Sammon (1969) for details.

The figure below gives the multidimensional scaling representation of matrix D. Potential outliers are indicated by text markers.

# detect potential outliers (two rounds of convex hull)
d <- V %>% select(V1, V2, MARKER)
marker_pot_outlier <- d$MARKER[chull(x = d$V1, y = d$V2)]
d <- d %>% filter(!(MARKER %in% marker_pot_outlier))
marker_pot_outlier <- c(marker_pot_outlier, d$MARKER[chull(x = d$V1, y = d$V2)])
V$POT_OUTLIER <- V$MARKER %in% marker_pot_outlier

# create look-up-table for figure caption
V <- V %>% 
    mutate(id_chr = paste(OBJECTID, HABITAT, sep = "-")) %>% 
    mutate(id_num = id_chr %>% match(sort(unique(id_chr)))) %>%
    mutate(id_num = factor(x = id_num, levels = sort(unique(id_num)), ordered = TRUE))

lut <- V %>% 
    select(id_num, id_chr) %>% 
    distinct %>%
    arrange(id_num)

wzxhzdk:3

Multidimensional scaling plot. The meaning of the numbers in the legend is: `r paste(lut$id_num, lut$id_chr, sep = " = ", collapse = "; ")`

The table below can be used to identify potential outliers by matching the text markers in the figure above with those in column MARKER below. The MDS-coordinates are given by: (V1, V2); and MEDIAN_BC is the median value of the Bray-Curtis dissimilarity. The Bray-Curtis dissimilarity is given on a scale ranging from 0 (= identical) to 1 (= very different from other samples) and can be used to identify potential ouliers.

# filter potential outliers
V <- V %>%
    filter(POT_OUTLIER)

# create table with information on potential outliers
if (isTRUE(settings$pooling)) {
    V %>%
        select(MARKER, V1 = round(V1, 2), V2 = round(V2, 2), 
               AREA_CODE = id_num, OBJECTID, HABITAT, DATE, 
               POOL_RUN, POOL_ID, MEDIAN_BC) %>%
        mutate(DATE = format(DATE)) %>%
        xtable %>% 
        print(type = "html")
} else {
    V %>%
        select(MARKER, V1, V2, 
               OBJECTID, HABITAT, DATE,
               SAMPLEID, MEDIAN_BC) %>%
        mutate(
            V1 = round(V1, 2),
            V2 = round(V2, 2),
            DATE = format(DATE)) %>%
        xtable %>% 
        print(type = "html", include.rownames = FALSE)
}