#' @name rsp.cor
#' @title Species correlations
#' @aliases rsp_cor_species
#' @description Functions for studying relationships between
#' species in respeciate data sets.
#' @description \code{\link{rsp_cor_species}} generates a by-species correlation
#' matrix of the supplied respeciate data sets.
#' @param rsp \code{respeciate} object, a \code{data.frame} of respeciate
#' profiles.
#' @param min.n \code{numeric} (default 3), the minimum number of species measurements
#' needed in a profile for the function to use it in correlation calculations.
#' Here, it should be noted that this does not guarantee the three matched
#' pairs of measurements needed to calculate a correlation coefficient because
#' not all profiles contain all species, so there may still be insufficient
#' overlap on a case-by-case basis.
#' @param cols a series of \code{numeric}, \code{character} or other class values
#' that can be translated into a color gradient, used to color valid cases when
#' generating plots and color keys, default \code{c("#80FFFF", "#FFFFFF", "#FF80FF")}
#' equivalent to \code{\link{cm.colors}} output.
#' @param na.col \code{numeric}, \code{character} or other class that can be
#' translated into a single color, used to color \code{NA}s when generating
#' plots and color keys, default grey \code{"#CFCFCF"}.
#' @param heatmap.args \code{logical} or \code{list}, heat map settings. Options
#' include \code{TRUE} (default) to generate the heat map without modification;
#' \code{FALSE} to not plot it; or a list of heat map options to alter the plot
#' default appearance. The plot, a standard heat map with the dendrograms
#' removed, is generated using \code{\link[stats]{heatmap}}, so see associated
#' documentation for valid options.
#' @param key.args \code{logical} or \code{list}, color key settings if plotting
#' the correlation matrix heat map. Options include \code{TRUE} (default) to
#' generate the key without modification; \code{FALSE} to not include the key;
#' or a list of options to alter the key appearance.
#' @param report \code{logical} or \code{character}, the required function
#' output. Options include: \code{'silent'} (default), to return the
#' correlation matrix invisibly; \code{TRUE} to return the matrix
#' (visibly); and, \code{FALSE} to not return it.
#' @return By default \code{rsp_cor_species} invisibly returns the calculated
#' correlation matrix a plots it as a heat map, but arguments including
#' \code{heatmap} and \code{report} can be used to modify function outputs.
#NOTE
# provisional list options done for heatmap.args and key.args!!!
# but will need tidying
# PLUS when done maybe for make local function so same
# can be used in other similar functions
#' @rdname rsp.cor
#' @export
# using data.table for dcast
######################
#cor
#generate correlation matrix
######################
########################
#in development
#######################
#to think about
#########################
#changed name from rsp_species_cor to rsp_cor_species
# rsp_species_cor
# then you could also have rsp_cor_profile
#speed up the correlation calculation
# currently painful using for loop
# can't use stats::cor on whole data set and include min.n option
# but there should be a way to make this a lot faster and retain that...
#formals for cor calculation control
# include cor:use, etc???
#plot handling could be tidier
# at moment plot handling is in an if/else
# with/without NAs handled differently
# but think this could be passed down into key and plot
# holding for now until key is in other plots
# and have an idea how it'll work
# (see also key)
#col key
# currently using local function rsp_col_key
# could export or more to another package but would added to depends
#plot and col key fine control
# suspecting will need better fine control of both
# re plot
# font size, heatplot options
# re col key
# font size, scale position, style/type, range
# currently holding (see plot handling and col key)
#starting thinking about above
# see output control below
#output control
# currently uses lots of settings:
# heatmap TRUE/FALSE to plot heatmap of correlation matrix
# or list of heatmap plot settings
# key TRUE/FALSE to add a col key to the heatmap
# or list of col key plot settings
# report "silent" to return the correlation matrix invisibly,
# or TRUE/FALSE to return it visibly or not at all.
# The list options are not yet done...
#aa <- rsp_profile(rsp_find_profile("ae8", by="profile_type"))
#rsp_cor_species(aa)
#rsp_cor_species(rsp_q_pm.ae8())
rsp_cor_species <- function(rsp, min.n = 3,
cols = c("#80FFFF", "#FFFFFF", "#FF80FF"),
na.col = "#CFCFCF", heatmap.args = TRUE,
key.args = TRUE, report = "silent"){
x <- rsp #quick fix for now
######################
# SPECIEUROPE data
######################
if("rsp_eu" %in% class(x)){
x <- .rsp_eu2us(x)
}
#######################
#if ref missing
.x <- rsp_dcast(x, widen="species")
#no point doing any have less than min.n values?
.test <- apply(.x, 2, function(x) length(.x[!is.na(x)]))
.x <- .x[.test > min.n]
#any point doing any with only 1 unique value??
#############################
#calculate correlations
#############################
#Could not stop bad cors using...
#.cor <- cor(.x[-1:-2], use="pairwise.complete.obs")
#sd = 0 warnings?
# maybe cases where 1 unique case, e.g. x=c(1,1,1,1,1), y=c(2,2,2,2)
# or where overlap is low <2 x=c(1,1,1,NA,NA), y=c(NA,NA,1,NA,NA)
#I'm calling this the pokemon cludge...
#got to catch 'em all...
f <- function(x, y) {
test <- !is.na(x) & !is.na(y)
x <- x[test]
y <- y[test]
if(length(x) >= min.n){
if(length(unique(x))==1 || length(unique(y))==1){
NA
} else {
cor(x,y, use="pairwise.complete.obs")
}
} else { NA }
}
.X <- .x[-1:-2]
.cor <- as.data.frame(matrix(NA, nrow=ncol(.X), ncol=ncol(.X)))
for(i in 1:ncol(.cor)) {
for(j in 1:ncol(.cor)) {
.cor[i, j] <- f(.X[, i], .X[, j]) # apply f() to each combination
}
}
row.names(.cor) <- colnames(.cor) <- colnames(.X)
#cheats to test plot output
#base r graphics is getting painful
#.cor[.cor>0.5 & .cor<1] <- 0.5
#.cor[is.na(.cor)] <- -0.5
#.cor <- .cor/2
#NB: plot is heatmap without the dendrograms
#stackover suggests it is hard going modifying
#https://stackoverflow.com/questions/29893630/r-draw-heatmap-with-clusters-but-hide-dendrogram
if((is.logical(heatmap.args) && heatmap.args) | (is.list(heatmap.args))){
.cols.max <- 300
if(max(.cor, na.rm=TRUE) < 1){
.z <- (1 - max(.cor, na.rm=TRUE))*100
.cols.max <- .cols.max - .z
}
.tmp <- .cor
.tmp[is.na(.tmp)] <- -2
cols <- c(rep(na.col, 100), colorRampPalette(cols)(200))
if(any(is.na(.cor))){
#plot when
#.cor includes nas
.hm.cols <- cols[1:.cols.max]
.k.na <- na.col
} else {
#plot when
#.cor DOES NOT includes nas
.cols.min <- 101
if(min(.cor, na.rm=TRUE) < 1){
.z <- (1 + min(.cor, na.rm=TRUE))*100
.cols.min <- .cols.min + .z
#print(.cols.min)
}
.hm.cols <- cols[.cols.min:.cols.max]
.k.na <- NA
}
.hm <- list(x = as.matrix(.tmp), Rowv = NA, Colv = NA,
col = .hm.cols, scale = "none")
if(is.list(heatmap.args)){
.hm[names(heatmap.args)] <- heatmap.args
}
do.call(heatmap, .hm)
if((is.logical(key.args) && key.args) | (is.list(key.args))){
.k <- list(key = c(-1,1), cols=cols[101:300],
#type=1,
ticks= -1:1, y=0.75, na.col = .k.na)
if(is.list(key.args)){
.k[names(key.args)] <- key.args
}
do.call(.rsp_col_key, .k)
}
}
#report handling
#output
####################
#currently aiming for
# report (invisible), plot or both
# report class data.frame
# output plot only does not mean anything...
if((is.logical(report) && report) | (is.character(report))){
if(is.character(report) && report=="silent"){
return(invisible(.cor))
} else {
return(.cor)
}
} else {
return(invisible(NULL))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.