### HEADER #####################################################################
##' @title Combine several dissimilarity distance matrices
##'
##' @name PRE_FATE.speciesDistanceCombine
##'
##' @author Maya Guéguen
##'
##' @description This script is designed to create a distance matrix between
##' species, combining several dissimilarity distance matrices.
##'
##' @param list.mat.dist a \code{list} of matrices containing dissimilarity
##' distance values between each pair of species.
##' @param opt.min.noMat (\emph{optional}) default \code{length(list.mat.dist)}. \cr
##' An \code{integer} corresponding to the minimal number of distance matrices for
##' which each species should have values
##' @param opt.normal (\emph{optional}) default \code{TRUE}. \cr
##' If \code{TRUE}, all given distance matrices will be normalized
##' (see \href{PRE_FATE.speciesDistanceCombine#details}{\code{Details}})
##' @param opt.weights (\emph{optional}) default \code{NULL}. \cr
##' A \code{vector} of \code{double} (between \code{0} and \code{1})
##' corresponding to the weights for each distance matrix provided in
##' \code{list.mat.dist}. They must sum up to \code{1}.
##'
##' @details
##'
##' This function allows to obtain a \strong{distance matrix between species},
##' based on several dissimilarity distance matrices \strong{combined}
##' according to the following formula :
##'
##' \deqn{\text{mat.DIST} = \Sigma (\text{wei.i} * \text{mat.DIST}_{i})}
##'
##' If \code{opt.normal = TRUE}, two \emph{normalization} steps are applied
##' to each distance matrix before combining them :
##'
##' \enumerate{
##' \item a \strong{non-paranormal (npn)} transformation
##' (\code{\link[huge]{huge.npn}} function) to obtain Gaussian distributions
##' for all dissimilarity matrices used
##' \item a \strong{range} normalization to bring the values back between
##' \code{0} and \code{1} :
##'
##' \deqn{\text{mat.DIST}_{i} = \frac{\text{mat.DIST}_{i} -
##' min(\text{mat.DIST}_{i})}{max(\text{mat.DIST}_{i}) -
##' min(\text{mat.DIST}_{i})}}
##' }
##'
##'
##' @return A \code{matrix} containing the weighted (or not) combination of
##' the different transformed (or not) distance matrices given. \cr \cr
##'
##' The information for the combination of all distances is written in
##' \file{PRE_FATE_DOMINANT_speciesDistance.csv} file.
##'
##' @keywords functional distance
##'
##' @seealso \code{\link[huge]{huge.npn}}
##'
##' @examples
##'
##' ## Load example data
##' Champsaur_PFG = .loadData('Champsaur_PFG', 'RData')
##'
##' ## Species traits
##' tab.traits = Champsaur_PFG$sp.traits
##' tab.traits = tab.traits[, c('species', 'GROUP', 'MATURITY', 'LONGEVITY'
##' , 'HEIGHT', 'DISPERSAL', 'LIGHT', 'NITROGEN')]
##' str(tab.traits)
##'
##' ## Species niche overlap (dissimilarity distances)
##' DIST.overlap = Champsaur_PFG$mat.overlap
##' DIST.overlap[1:5, 1:5]
##'
##' ## Species functional distances (dissimilarity)
##' DIST.traits = PRE_FATE.speciesDistanceTraits(mat.traits = tab.traits
##' , opt.maxPercent.NA = 0.05
##' , opt.maxPercent.similarSpecies = 0.3
##' , opt.min.sd = 0.3)
##' DIST.traits$Chamaephyte[1:5, 1:5]
##'
##' ## Combine distances ---------------------------------------------------------
##' list.DIST = list(DIST.overlap, DIST.traits$Chamaephyte)
##' sp.DIST.n = PRE_FATE.speciesDistanceCombine(list.mat.dist = list.DIST
##' , opt.weights = c(0.2, 0.8))
##' sp.DIST.un = PRE_FATE.speciesDistanceCombine(list.mat.dist = list.DIST
##' , opt.norm = FALSE
##' , opt.weights = c(0.2, 0.8))
##' str(sp.DIST.n)
##'
##'
##'
##' \dontrun{
##' require(corrplot)
##' list.DIST = list(DIST.overlap, DIST.traits$Chamaephyte
##' , sp.DIST.un, sp.DIST.n)
##' names(list.DIST) = c('overlap', 'traits', 'un-normed', 'normed')
##'
##' par(mfrow = c(2, 2))
##' for (li in 1:length(list.DIST))
##' {
##' tmp = list.DIST[[li]]
##' tmp = tmp[colnames(sp.DIST.n), colnames(sp.DIST.n)]
##' corrplot(tmp, method = 'shade'
##' , type = 'lower', cl.lim = c(0, 1)
##' , is.corr = FALSE, title = names(list.DIST)[li])
##' }
##'
##' require(foreach); require(ggplot2); require(ggdendro)
##' hc = hclust(as.dist(sp.DIST.n))
##' pp = ggdendrogram(hc, rotate = TRUE) +
##' labs(title = 'Hierarchical clustering based on species distances')
##' plot(pp)
##' }
##'
##'
##' @export
##'
##' @importFrom utils combn
##' @importFrom huge huge.npn
##'
## END OF HEADER ###############################################################
PRE_FATE.speciesDistanceCombine = function(list.mat.dist
, opt.min.noMat = length(list.mat.dist)
, opt.normal = TRUE
, opt.weights = NULL
){
#############################################################################
## Check existence of parameters
if (missing(list.mat.dist) || is.null(list.mat.dist))
{
stop("No data given!\n (missing `list.mat.dist` information)")
}
## CHECK parameter list.mat.dist
if(!.testParam_notInClass(list.mat.dist, "list"))
{
if (length(list.mat.dist) > 0)
{
for (i in 1:length(list.mat.dist))
{
if (!.testParam_notInClass(list.mat.dist[[i]], c("dist", "niolap", "matrix"), FALSE))
{
list.mat.dist[[i]] = as.matrix(list.mat.dist[[i]])
if (ncol(list.mat.dist[[i]]) != nrow(list.mat.dist[[i]]))
{
stop(paste0("Wrong dimension(s) of data!\n `list.mat.dist[[",
i,
"]]` does not have the same number of rows (",
nrow(list.mat.dist[[i]]),
") and columns (",
ncol(list.mat.dist[[i]]),
")"
))
}
} else {
stop(paste0("Wrong type of data!\n `list.mat.dist[["
, i
, "]]` must be a dissimilarity object "
, "(`dist`, `niolap`, `matrix`)"))
}
}
} else
{
stop("Wrong dimension(s) of data!\n `list.mat.dist` must be of length > 0")
}
if(!is.null(names(list.mat.dist)))
{
names_mat = names(list.mat.dist)
} else {
names_mat = paste0("DIST", 1:length(list.mat.dist))
}
} else
{
stop(paste0("Wrong type of data!\n `list.mat.dist` must be a list of "
, "dissimilarity objects (`dist`, `niolap`, `matrix`)"))
}
## CHECK parameter opt
if (!.testParam_notDef(opt.weights))
{
if (length(opt.weights) != length(list.mat.dist))
{
stop(paste0("Wrong type of data!\n `opt.weights` must contain "
, length(list.mat.dist), " values summing up to 1"))
}
.testParam_notBetween.m("opt.weights", opt.weights, 0, 1)
if (sum(opt.weights) != 1)
{
stop(paste0("Wrong type of data!\n `opt.weights` must contain "
, length(list.mat.dist), " values summing up to 1"))
}
} else
{
opt.weights = rep(1, length(list.mat.dist))
}
cat("\n\n #------------------------------------------------------------#")
cat("\n # PRE_FATE.speciesDistanceCombine")
cat("\n #------------------------------------------------------------# \n")
#############################################################################
### PREPARATION OF DATA
#############################################################################
{
cat("\n ---------- INFORMATION : AVAILABLE \n")
no_mat = length(list.mat.dist)
no_species = sapply(list.mat.dist, nrow)
names_species = lapply(list.mat.dist, rownames)
## Check for correspondence :
cat("\n Number of distance matrices : ", no_mat)
cat("\n Number of species for each distance matrix : ", no_species)
cat("\n Comparison of matrix' dimensions : \n")
combi = combn(1:no_mat, 2)
inti_all = table(unlist(names_species))
inti_all = names(inti_all)[which(inti_all >= opt.min.noMat)]
cat("\n Number of species with values for at least", opt.min.noMat, " matrices : ", length(inti_all))
cat("\n")
# inti_all = unique(unlist(names_species))
# for(x in 1:ncol(combi)){
# inti = intersect(names_species[[combi[1, x]]], names_species[[combi[2, x]]])
# inti_all = intersect(inti_all, inti)
# cat("\n> Matrices ", names_mat[combi[1, x]], "-", names_mat[combi[2, x]], ": "
# , length(inti), "species in common")
# }
# cat("\n Number of species with values for all matrices : ", length(inti_all))
# cat("\n")
names_species.toKeep = inti_all
}
# Keep only species present in all distance matrices
mat.dist = lapply(list.mat.dist, function(x) {
x[which(rownames(x) %in% names_species.toKeep), which(colnames(x) %in% names_species.toKeep)]
})
#############################################################################
### COMBINE ALL DISTANCE MATRICES
#############################################################################
if (opt.normal)
{
mat.dist = lapply(mat.dist, function(mat) {
x = mat[lower.tri(mat, diag = FALSE)]
x = matrix(x, ncol = 1)
## Non-paranormal transformation
y = huge.npn(x)
## Normalization by maximum
z = (y - min(y)) / (max(y) - min(y))
## Result
res = mat
res[lower.tri(res, diag = FALSE)] = z
return(as.matrix(as.dist(res)))
})
}
## Fill missing species whith NA if there is any
mat.dist = lapply(mat.dist, function(x) {
if (ncol(x) < length(names_species.toKeep)) {
tmp.col = colnames(x)
tmp.row = rownames(x)
miss = setdiff(names_species.toKeep, colnames(x))
x = cbind(x, matrix(rep(NA, length(miss) * nrow(x)), ncol = length(miss)))
x = rbind(x, matrix(rep(NA, length(miss) * ncol(x)), nrow = length(miss)))
colnames(x) = c(tmp.col, miss)
rownames(x) = c(tmp.row, miss)
}
return(x)
})
## Multiply each distance matrix by its weight
mat.species.DIST = lapply(1:no_mat, function(x) {
tmp = as.matrix(mat.dist[[x]])
wei = opt.weights[x]
return(wei * tmp)
})
## Calculate each pairwise weight (not taking into account dist with NA)
mat.wei = lapply(mat.species.DIST, function(x) ifelse(is.na(x[]), 0, 1))
mat.wei = lapply(1:length(opt.weights), function(x) opt.weights[x] * mat.wei[[x]])
mat.wei = Reduce('+', mat.wei)
## Summing weighted distance matrices
## and divide by weights pairwise corrected
res = Reduce('+', lapply(mat.species.DIST, function(x) replace(x, is.na(x), 0)))
res = res*NA^!Reduce(`+`, lapply(mat.species.DIST, function(x) !is.na(x)))
mat.species.DIST = res / mat.wei
#############################################################################
cat("\n> Done!\n")
## SAVE results
toSave = as.matrix(mat.species.DIST)
rownames(toSave) = colnames(toSave)
write.csv(toSave
, file = paste0("PRE_FATE_DOMINANT_speciesDistance.csv")
, row.names = TRUE)
message(paste0("\n The output file \n"
, " > PRE_FATE_DOMINANT_speciesDistance.csv \n"
, "has been successfully created !\n"))
return(mat.species.DIST)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.