weimea: Weighted Mean Analysis

#' Testing the relationship species attributes and sample attributes by fourth corner method
#' 
#' Function calculating fourth corner analysis between sample attributes (environmental variables) and species attributes (traits) and testing it by row based, column based or max permutation test.
#' 
#' @name test_fourth
#' @param env Vector, matrix or data frame with sample attributes (environemntal variables).
#' @param com Matrix of data frame with species composition data.
#' @param traits Vector, matrix or data frame with species attributes (traits)
#' @param cwm An object of the class \code{cwm}.
#' @param perm Number of permutations.
#' @param test Vector of character values. Which test should be conducted? Partial match to \code{'rowbased'} row-based permutation test (called Model 2 in the Legendre et al. 1997 and Dray & Legendre 2008), \code{'colbased'} for column-based permutation test (Model 4), and \code{'max'} for max test (selecting the higher from \code{rowbased} and \code{colbased} result, Model 6). \code{'all'} includes all three tests. See \code{Details}.
#' @param adjustP Logical, default FALSE. Should be the P-values adjusted? If \code{adjustP = TRUE}, the last column in the results is adjusted by method selected in \code{p.adjust.method}.
#' @param p.adjust.method A string indicating the method of P-value adjustement, see \code{\link{p.adjust.methods}} for possible choices.
#' @param parallel NULL (default) or integer number. Number of cores for parallel calculation of modified permutation test. Maximum number of cores should correspond to number of available cores on the processor.
#' @param x,object object of the class \code{"cwm"}, generated by function \code{cwm}.
#' @param digits number of digits reported by \code{print} method on object of \code{"cwm"} class (default is 3).
#' @param missing.summary Logical; should be the summary of values missing in \code{env}, \code{cwm} and \code{traits} be printed along to the result of \code{test_cwm} analysis? Default is \code{TRUE}.
#' @param eps.Pvalue Values of P below this threshold will be printed as \code{< [eps]} in the output.
#' @param signif.stars Logical; if TRUE, P-values are additionally encoded visually as 'significance stars' in order to help scanning of long coefficient tables. It defaults to the show.signif.stars slot of \code{\link{options}}.
#' @param alpha,line,cex.lab,par.mar,box.col,box.lwd Graphical parameters for \code{plot} function.
#' @param ... Other arguments for \code{print}, \code{summary} or \code{coef} functions (not implemented yet).

#' @export
#' @examples 
#' data (vltava)
#' vltava.env <- vltava$env[,c('pH', 'COVERE32')]
#' vltava.com <- vltava$herbs$spe
#' vltava.traits <- vltava$herbs$traits
#' fc <- test_fourth (env = vltava.env, com = vltava.com, traits = vltava.traits)
#' @details
#' Environmental variables, species composition and traits can be provided in two ways: either each object separately, using arguments \code{env}, \code{com} and \code{traits}, or using only \code{env} and \code{cwm}; in the later case, the function \code{extract} is used to extract the \code{com} and \code{traits} from the \code{cwm} object.
#' Specific issue related to weighted mean is the case of missing species attributes. In current implementation, species with missing species attributes are removed from sample x species matrix prior to permutation of species attributes among species. 
#' @return  Function \code{test_fourth} returns the list of the class \code{"testFOURTH"} (with \code{print} and \code{summary} methods), which contains the following items:
#' \itemize{
#'  \item \code{call} Function call.
#'  \item \code{out} Data frame with results of calculation; first two columns contain the original fourth corner metric (\code{r_fc}) and its Chessel transformation (\code{r_ch}); following columns contain some of the following results of permutation tests: \code{P.row}, \code{P.col} and \code{P.max}. If \code{adjustP = TRUE}, the last column contains P values adjusted by \code{p.adjust} function.
#'  \item \code{miss} List with missing values of \code{env}, \code{traits} or \code{cwm} (the last provided for compatibility with \code{test_cwm} function and refers to samples which, after removing missing \code{traits} values, do not contain any species.)
#'  \item \code{param} Setting of input parameters (\code{perm} and \code{test}).
#'  }
#' @seealso \code{\link{cwm}} \code{\link{test_cwm}}

#' @export
test_fourth <- function (env, com = NULL, traits = NULL, cwm = NULL, perm = 499, test = "max", parallel = NULL, p.adjust.method = p.adjust.methods, adjustP = FALSE)
{
  CALL <- match.call ()
  TEST <- c('none', 'rowbased', 'colbased', 'max', 'all')
  test <- match.arg (test, TEST, several.ok = T)
  if ('none' %in% test) test <- 'none'
  if ('all' %in% test) test <- c('rowbased', 'colbased', 'max')
  env <- as.data.frame (as.matrix (env))
  if ( all (!is.null(cwm)&!is.null (com)&!is.null (traits)) | !is.null (cwm)&!is.null (com) | !is.null(cwm)&!is.null(traits)) stop ('Species composition and traits need to be provided using either com and traits argument or cwm argument; combining both is not possible!') 
  if (is.null (com) & is.null (traits)) {
    if (is.null (cwm)) stop ('You need to provide species composition data and traits, either using com and traits arguments, or cwm!') else
    {
      traits <- extract (cwm, 'traits')
      com <- extract (cwm, 'com')
    }
  }
  traits <- as.data.frame (traits) # makes sure that even if the entry object is vector it can still calculate 
  env <- as.data.frame (env)
  if (ncol (as.matrix (com)) != nrow (as.matrix (traits))) stop ("The number of species in 'com' does not fit the number of species in 'traits'!")
  if (nrow (as.matrix (com)) != nrow (as.matrix (env))) stop ("The number of samples in 'com' does not fit the number of samples in 'env'!")
  vars.names <- expand.grid (traits = colnames (traits), env = colnames (env), stringsAsFactors = FALSE)
  if (is.null (parallel)) {
      res <- apply (vars.names, 1, FUN = function (var12) test_fourthCpp (e = env[, var12['env'], drop = TRUE], L = as.matrix (com), t = traits[, var12['traits'], drop = TRUE], test = test, perm = perm))
    }
    else
    {
      cl <- parallel::makeCluster (parallel)
      parallel::clusterExport (cl, varlist = c('vars.names', 'com', 'traits', 'env', 'test', 'perm'), envir = environment ())
      parallel::clusterEvalQ (cl, eval (call ('library', 'weimea')))
      res <- parallel::parApply (cl, vars.names, 1, FUN = function (var12) test_fourthCpp (e = as.matrix (env[, var12['env'], drop = F]), L = as.matrix (com), t = as.matrix (traits[, var12['traits'], drop = F]), test = test, perm = perm))
    }
    names (res) <- apply (vars.names, 1, FUN = function (x) paste (x[1], x[2], sep = ' / '))

  res.miss <- lapply (res, FUN = function (x) x[(length (x)-2):length (x)])
  res.coef <- lapply (res, FUN = function (x) x[1:(length (x)-3)])
  res.coef.df <- do.call (rbind.data.frame, res.coef)  # originally res.out
  test.choice <- c('rowbased', 'colbased', 'max')
  P.test <- c ('P_row', 'P_col', 'P_max')[test.choice %in% test]
  res.out <- cbind (res.coef.df [,!substr (colnames (res.coef.df), 1, 2) %in% 'P_'], res.coef.df[, P.test, drop = F])  # deletes all P values and adds only those specificed in "test" argument
  if (adjustP && test != 'none') {
    res.out <- cbind (res.out, p.adjust (res.out[, ncol (res.out)], method = p.adjust.method))
    colnames (res.out)[ncol (res.out)] <- paste (colnames (res.out)[ncol (res.out)-1], 'adj', sep = '_')
  }
  result <- list (call = CALL, out = res.out, miss = res.miss, param = list (perm = perm, test = test, p.adjust.method = p.adjust.methods, adjustP = adjustP), var.names = list (env = colnames (env), traits = colnames (traits)), orig.data = list (env = env, com = com, traits = traits))
  class (result) <- 'testFOURTH'
  return (result)
}

#' @rdname test_fourth
#' @export
print.testFOURTH <- function (x, digits = max(3, getOption("digits") - 3), missing.summary = FALSE, eps.Pvalue = 0.001, signif.stars = getOption("show.signif.stars"),...)
{
  cs.ind <- c(1, 2)
  tst.ind <- 0
  df.ind <- c(3, 4)
  cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
      "\n\n", sep = "")
  printCoefmat2 (x$out, digits = digits, cs.ind = cs.ind, tst.ind = tst.ind, df.ind = df.ind, eps.Pvalue = 0.001, signif.stars = signif.stars,...)
  if (missing.summary) printMissum (x$miss)
}

# This function is already in test_cwm, can be deleted here:
# printMissum <- function (x){
#   miss <- lapply (x, FUN = function (a) lapply (a, length))
#   miss <- as.matrix (do.call (rbind.data.frame, miss))
#   colnames (miss) <- c('env', 'traits', 'samples')
#   if (sum (miss) == 0) cat("\nThere are no missing env, traits or sample values.\n") else
#   {
#     cat("\nCount of missing values:\n")
#     print.default (miss)
#   }
#   invisible (miss)
#   }

#' @rdname test_fourth
#' @export
coef.testFOURTH <- function (object, ...)
  return (object$out)

#' @rdname test_fourth
#' @export
plot.testFOURTH <- function (x, alpha = 0.05, line = NA, cex.lab = 1.5, par.mar = c(0.5, 0.5, 0.5, 0.5), box.col = c('blue', 'red'), box.lwd = 2, ...){
  # function modelled according to plot.testCWM - keep the same arguments
  opar <- par (c('mfcol', 'mar', 'mgp', 'oma', 'xpd'))  # include all graphical par set globally
  
  trait.names <- x$var.names$traits
  env.names <- x$var.names$env
  TRA <- x$orig.data$traits
  ENV <- x$orig.data$env
  COM <- x$orig.data$com
  
  no_traits <- length (trait.names)
  no_env <- length (env.names)
  
  par (oma = c(3, 3, 1, 1))  # outer margin
  par (mgp = c(2, 0.3, 0))  # where title, axis labels and axis line should be plotted
  layout_matrix <- matrix (seq (1, length (trait.names)*length (env.names)), nrow = no_traits, byrow = FALSE)
  layout (layout_matrix)
  par (mar = par.mar, xpd = NA)
  
  test <- x$out[,max (which (substr (colnames (x$out), 1, 2) %in% 'P_'))]
  
  traits.env <- expand.grid (traits = trait.names, env = env.names)
  
  left_label <- vector ('logical', length = no_traits*no_env)
  left_label[1:nrow(layout_matrix)] <- TRUE
    
  bottom_label <- vector ('logical', length = no_traits*no_env)
  bottom_label [seq (nrow(layout_matrix), no_traits*no_env, by = nrow(layout_matrix))] <- TRUE
  
  traits.env <- cbind (traits.env, left_label = left_label, bottom_label = bottom_label)
  if (!is.null (test))
     traits.env <- cbind (traits.env, test = test, r = x$out$r_fc) 
  
  for (i in seq (1, nrow (traits.env)))
  {
    te <- traits.env[i,]
    TRA0 <- TRA[[te$traits]] 
    ENV0 <- ENV[,te$env]  
    COM0 <- COM[!is.na (ENV0), !is.na (TRA0)]  # remove species which have missing spe or sam attributes
    TRA0 <- TRA0[!is.na (TRA0)]
    ENV0 <- ENV0[!is.na (ENV0)]
    
    TRA_COM <- as.vector (TRA0 * t(as.numeric (COM0>0)))
    ENV_COM <- ENV0 * (as.numeric (COM0>0))
    COM_COM <- as.vector (as.matrix (COM0))
    MER_COM <- as.data.frame (cbind (TRA_COM, ENV_COM, COM_COM))[COM_COM > 0,] # merged together and removed zero abundances
    COM_scaled <- vegan::decostand (MER_COM$COM_COM, 'range')*1 + 0.5
    plot (TRA_COM ~ ENV_COM, data = MER_COM, pch = 16, ann = F, axes = F, cex = COM_scaled)
    axis (1, labels = ifelse (te$bottom_label, TRUE, FALSE), tck = 0.05)
    axis (2, labels = ifelse (te$left_label, TRUE, FALSE), tck = 0.05)
    box (col = 'grey')
    title (xlab = ifelse (te$bottom_label, as.character (te$env), ''), ylab = ifelse (te$left_label, as.character (te$traits), ''), line = line, cex.lab = cex.lab)
    if (!is.null (test)) {
      bc <- ifelse (as.numeric (te$r) < 0, box.col[1], box.col[2])
      if (as.numeric (te$test) <= alpha){
        box (col = bc, lwd = box.lwd)
      } 
    }
  }
  par (opar)
  invisible ()
}