# Utils
# Author: Walter Xie
# Accessed on 29 Nov 2016
#' @name UtilsCombine
#' @title Utils to combine data frames or matrices into the required data format
#'
#' @details
#' \code{getTriMatrix} converts pairwised comparison result
#' into a symmetric triangular matrix.
#' The pairwised comparison result is stored in a data frame.
#' Two of their columns must have pair names in the same order,
#' which determine the matrix's row names and column names.
#' The 1st data frame goes to the lower triangle of the matrix,
#' and the 2nd is in upper triangle.
#'
#' @param df The data frame containing pairwised comparison result,
#' such as correlations.
#' For example, a data frame can be
#' \tabular{rrrr}{
#' l1 \tab l2 \tab corr\tab sign\cr
#' 16S \tab 18S \tab 0.827 \tab 0.001\cr
#' 16S \tab ITS \tab 0.585 \tab 0.001\cr
#' 18S \tab ITS \tab 0.729 \tab 0.001
#' }
#' @param key Two column names containing the pair names,
#' which determine the output matrix's row names and column names.
#' It must make row and column names same.
#' @param value The column's values are going to fill in the output matrix.
#' @param na.to.0 Logical, replace all NA to 0, as default.
#' @param order.by A vector to order matrix rows and columns.
#' It must be a subset or same as their names.
#' @keywords utils
#' @export
#' @examples
#' corr.tri <- getTriMatrix(corr.df)
#'
#' @rdname UtilsCombine
getTriMatrix <- function(df, row.col=c("l1", "l2"), value="corr", na.to.0=TRUE, order.by=c()) {
if (!all(c(row.col, value) %in% colnames(df)))
stop("Invalid inputs: cannot find column names ", paste(c(row.col, value), collapse = ","), " !")
df2 <- df
df2[,row.col[1]] <- df[,row.col[2]]
df2[,row.col[2]] <- df[,row.col[1]]
df2 <- rbind(df2, df) # Duplicating/reversing data makes symmetric matrix
require(reshape2)
m <- dcast(df2, as.formula(paste(row.col, collapse ="~")), value.var = value)
rownames(m) <- m[,row.col[1]]
# rm column l1
m <- m[ , -which(colnames(m) %in% row.col)]
# order rows and columns
if (length(order.by) > 1) {
if ( !all( is.element(order.by, rownames(m)) ) )
stop("order.by must be a subset or same as row/colnames !")
m <- m[match(order.by, rownames(m)), ]
m <- m[, match(order.by, colnames(m))]
} else {
m <- m[order(rownames(m)), ]
m <- m[, order(colnames(m))]
}
if (na.to.0)
m[is.na(m)] <- 0
if (!all(rownames(m) == colnames(m)))
stop("Invaild result: key 'row.col' must make row/colnames same ! \n",
"Rownames: ", paste(rownames(m), collapse = ","), ". \n",
"Colnames: ", paste(rownames(m), collapse = ","))
return(m)
}
#' @details \code{combineTriMatrix} combines two symmetric triangular matrices into one.
#' The 1st matrix goes to the lower triangle in the combined matrix,
#' and the 2nd to upper triangle.
#'
#' @param tri.m1,tri.m2 Two symmetric triangular matrices,
#' which can be the output from \code{getTriMatrix}.
#' @keywords utils
#' @export
#' @examples
#' corr.sign.tri <- combineTriMatrix(corr.tri, sign.tri)
#'
#' @rdname UtilsCombine
combineTriMatrix <- function(tri.m1, tri.m2) {
if (!all(rownames(tri.m1) == rownames(tri.m2)) || !all(colnames(tri.m1) == colnames(tri.m2)))
stop("Invalid inputs: row or column names not same !")
tri.m1[upper.tri(tri.m1)] <- NA # lower tri
tri.m2[lower.tri(tri.m2)] <- NA # upper tri
m <- tri.m1
m[upper.tri(m)] <- tri.m2[upper.tri(tri.m2)] # Combine matrices
return(m)
}
#' @details \code{nmdsTriMatrix} makes a NMDS plot for one of
#' symmetric triangular.
#'
#' @param tri.m A symmetric triangular matrix from \code{getTriMatrix}.
#' @param text.or.point,text.size,text.repel,title,title.add.stress,...
#' See \code{\link{ggNMDSPlot}}.
#' @keywords utils
#' @export
#' @examples
#' nmds <- nmdsTriMatrix(corr.tri) + expand_limits(x = c(-1, 1), y=c(-1, 1))
#'
#' @rdname UtilsCombine
nmdsTriMatrix <- function(tri.m, text.or.point=1, text.size=5, text.repel=T,
title="NDMS plot", title.add.stress=F, ...) {
require(gg1L)
nmds <- gg1L::ggNMDSPlot(dist(tri.m), text.or.point=text.or.point, text.size=text.size,
text.repel=text.repel, title=title, title.add.stress=title.add.stress,
title.hjust=0.5, ...)
return(nmds)
}
#' @details \code{plotCombinedTriMatrix} plot a heatmap on the combined
#' symmetric triangular matrix.
#'
#' @param tri.m1.m2 The combined symmetric triangular matrix
#' from \code{combineTriMatrix}.
#' @param lower.lim,upper.lim,breaks.by To configure the legend bar.
#' @param label.digits,data.levels,high,mid,low,title,legend.title,...
#' See \code{\link{ggHeatmap}}.
#' @keywords utils
#' @export
#' @examples
#' hm <- plotCombinedTriMatrix(corr.sign.tri)
#'
#' @rdname UtilsCombine
plotCombinedTriMatrix <- function(tri.m1.m2, label.digits=3, lower.lim=NA, upper.lim=NA, breaks.by=NA,
data.levels=c(), high = "#f46d43", mid = "#ffffbf", low = "#3288bd",
title="", legend.title="Correlations", ...) {
cat("min non-zero value is ", min(tri.m1.m2[tri.m1.m2>0]), "max is ", max(tri.m1.m2), "\n.")
if (is.na(lower.lim))
lower.lim <- min(tri.m1.m2[tri.m1.m2>0])
if (is.na(upper.lim))
upper.lim <- max(tri.m1.m2)
if (is.na(breaks.by))
breaks = waiver()
else
breaks = seq(lower.lim, upper.lim, by=breaks.by)
tri.m1.m2$dataset <- rownames(tri.m1.m2)
tri.m1.m2[tri.m1.m2==0] <- NA
require(gg1L)
p.hm <- gg1L::ggHeatmap(tri.m1.m2, melt.id="dataset", title=title, legend.title=legend.title,
x.levels=data.levels, y.levels=rev(data.levels), label.digits=label.digits,
high = high, mid = mid, low = low,
midpoint = mean(c(lower.lim, upper.lim)), limit = c(lower.lim, upper.lim),
breaks = breaks, ... )
return(p.hm)
}
#' @details \code{combineTwoDF} combines two data frames or matrices with a same structure in one,
#' put all values in 2nd data frame into brackets.
#'
#' @param dfm A data frame or matrix.
#' @param dfm2 The 2nd data frame or matrix whose values are into brackets.
#' @param rm.zero Default to TRUE to remove all " (0)".
#' @param return.df,... Default to TRUE to return a data frame, otherwise a matrix.
#' @keywords utils
#' @export
#' @examples
#' df <- combineTwoDF(df, df2, stringsAsFactors=FALSE)
#'
#' @rdname UtilsCombine
combineTwoDF <- function(dfm, dfm2, rm.zero=TRUE, return.df=TRUE, ...) {
if (nrow(dfm) != nrow(dfm2) || ncol(dfm) != ncol(dfm2))
stop("Two data frames must have a same structure !")
dfm <- as.matrix(dfm)
dfm2 <- as.matrix(dfm2)
dfm.comb <- matrix( paste0(trimSpace(dfm), " (", trimSpace(dfm2), ")"),
nrow=nrow(dfm), dimnames=dimnames(dfm) )
if (rm.zero)
dfm.comb <- gsub(" \\(0\\)", "", dfm.comb)
if (return.df)
dfm.comb <- data.frame(dfm.comb, check.names=FALSE, ...)
return(dfm.comb)
}
#' @details \code{mergeBy} is an improved function of
#' \code{\link{merge}} \code{by} column(s).
#'
#' @param x,y data frames, or objects to be coerced to one.
#' @param warning.msg Logical; if TRUE as default, then print warning message
#' if any row is dropped after merge.
#' @param by,... Specifications of the column(s) used for merging,
#' and more arguments passed to \code{\link{merge}}.
#' @param rm.by Logical; if TRUE, then remove the column(s) specified
#' by \code{by} after merge. Default to FALSE.
#' But if \code{by="row.names"}, then copy the column 'Row.names' created
#' by \code{\link{merge}} into rownames before drop it.
#' @keywords utils
#' @export
#' @examples
#' df <- mergeBy(df, df2)
#'
#' @rdname UtilsCombine
mergeBy <- function(x, y, by="row.names", rm.by=FALSE, warning.msg=TRUE, ...) {
xy <- merge(x, y, by=by, ...)
if ( warning.msg && (nrow(xy) != nrow(x) || nrow(xy) != nrow(y)) )
warning("Lossing rows after merge ! nrow(xy) = ", nrow(xy),
", nrow(x) = ", nrow(x), ", nrow(y) = ", nrow(y), " !")
if (rm.by) {
# mv 'by' such as Row.names to rownames, if it is only 1 col
if (tolower(by[1]) == "row.names") {
rownames(xy) <- xy$Row.names
by="Row.names"
}
xy <- xy[,-which(colnames(xy) %in% by)]
cat("Drop column(s)", paste(by, collapse = ","), "after merge.\n")
}
return(xy)
}
#' @details \code{mergeListOfDF} merges a list of data frames into one
#' using \code{\link{mergeBy}}.
#'
#' @param df.list a list of data frames, or objects to be coerced to one.
#' @param suffixes The vector of suffixes added to distinguish colmun names
#' merged by different data frames. Its length must equal to \code{df.list}.
#' @keywords utils
#' @export
#' @examples
#' df <- mergeListOfDF(df.list, suffixes=names(df.list))
#' df <- mergeListOfDF(df.list, by="gene", suffixes=c("16S","18S","ITS"))
#'
#' @rdname UtilsCombine
mergeListOfDF <- function(df.list, by="row.names", rm.by=TRUE, suffixes=c(), ...) {
df.whole <- as.data.frame(df.list[[1]])
if (length(suffixes) > 0) {
if (length(suffixes) != length(df.list))
stop("Invalid inputs : length(suffixes) != length(df.list) !")
suffixes <- ComMA::trimSpace(suffixes, ".") # replace space to .
cm.names <- suffixes
# add suffix to 1st df cols
colnames(df.whole) <- paste(colnames(df.whole), suffixes[1], sep = ".")
} else if (is.null(names(df.list))) {
cm.names <- 1:length(df.list)
} else {
cm.names <- names(df.list)
}
cat("Load data 1 : ", cm.names[1], "\n")
if (length(df.list) > 1) { # multiple cm
for (i in 2:length(df.list)) {
df.tmp <- as.data.frame(df.list[[i]])
cat("Load data", i, ": ", cm.names[i], "\n")
if (length(suffixes) > 0)
colnames(df.tmp) <- paste(colnames(df.tmp), suffixes[i], sep = ".")
# mv Row.names col to rownames
df.whole <- ComMA::mergeBy(df.whole, df.tmp, by=by, rm.by=rm.by)
}
}
return(df.whole)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.