# mctoolsr
#######################
# DISSIMILARITY-BASED #
#######################
#' @title Calculate a dissimilarity matrix from a taxa table
#' @description The default is to calculate Bray-Curtis dissimilarities after
#' performing square-root transformations on the data. Send requests to add
#' additional metrics.
#' @param tax_table The taxa table.
#' @param method The method to use to calculate the dissimilarity metric.
#' Available methods include: 'bray_sq_trans', 'bray', 'jaccard',
#' 'sorensen'. Jaccard converts data to presence/absence before calculating
#' dissimilarity. Sorensen is the presence/absence version of bray.
#' @return A variable of class \code{dist}.
#' @concept Dissimilarity calculation and manipulation
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded, method = "bray_sq_trans")
calc_dm = function(tax_table, method = 'bray_sq_trans'){
# check for and warn about samples with no sequences
if(min(colSums(tax_table)) == 0){
warning('Some samples have no sequences. Samples with low sequence counts
should be filtered out by rarefying or another acceptable method.')
}
if(method == 'bray_sq_trans') {
# transform otu table (square root transformation)
tax_table_xform = t(sqrt(tax_table))
# create dissimilarity matrix from otu table
vegan::vegdist(tax_table_xform, method = 'bray')
} else if (method == 'bray') {
vegan::vegdist(t(tax_table), method = 'bray')
} else if (method == 'jaccard') {
vegan::vegdist(t(tax_table), method = 'jaccard', binary = TRUE)
} else if (method == 'sorensen') {
vegan::vegdist(t(tax_table), method = 'bray', binary = TRUE)
} else {
stop(paste0('Invalid dissimilarity metric specified. See documentation ',
'for allowable metrics.'))
}
}
#' @title Calculate Point Coordinates in an Ordination
#' @description Use to generate ordination axis scores to use when plotting
#' ordination.
#' @param dm Dissimilarity matrix.
#' @param ord_type The type of ordination. 'NMDS', 'PCoA', or 'constrained' are
#' the current accepted values.
#' @param metadata_map Required if 'constrained' ord_type.
#' @param constrain_factor Required if 'constrained' ord_type.
#' @return A data frame consisting of the coordinates.
#' @details Impliments nonmetric multidimensional scaling, principal coordinates
#' analysis, or constrained ordination (via \code{\link{capscale}} in
#' \code{\link{vegan}}).
#' @concept Dissimilarity calculation and manipulation
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded)
#' ord = calc_ordination(dm, "nmds")
calc_ordination = function(dm, ord_type, metadata_map, constrain_factor){
dm = as.dist(dm)
if(ord_type == 'NMDS' | ord_type == 'nmds'){
dm_mds = vegan::metaMDS(dm, k = 2)
data.frame(dm_mds$points)
}
else if(ord_type == 'PCoA' | ord_type == 'pcoa'){
pcoa = cmdscale(dm, k = 2, eig = TRUE, add = TRUE)
pct_explained = round((pcoa$eig / sum(pcoa$eig)) * 100, 1)
pcoa_pts = data.frame(pcoa$points)
colnames(pcoa_pts) = c(paste0('PC1 (', pct_explained[1], '%)'),
paste0('PC2 (', pct_explained[2], '%)'))
pcoa_pts
}
else if(ord_type == 'constrained'){
cap = vegan::capscale(formula = dm ~ metadata_map[, constrain_factor])
data.frame(vegan::scores(cap)$sites)
}
else stop('Only "NMDS", "PCoA", and "constrained" implementd so far.')
}
#' @title Generate an Ordination Plot
#' @description Used to generate a plot from the output of \code{
#' calc_ordination()}
#' @param input The input dataset as loaded by \code{load_taxa_table()}.
#' @param ordination_axes The output of \code{calc_ordination()}.
#' @param color_cat The metadata map header used to color points.
#' @param shape_cat The metadata map header used for points' shapes (optional).
#' @param hulls Whether or not to include an outline around sample categories.
#' @param ... Additional arguments passed to ggplot2's \code{geom_point()}.
#' @concept Plots
#' @examples
#' fvrar = single_rarefy(fruits_veggies, 100)
#' ord = calc_ordination(calc_dm(fvrar$data_loaded), 'nmds')
#' plot_ordination(fvrar, ord, 'Sample_type', 'Farm_type', hulls = TRUE)
plot_ordination = function(input, ordination_axes, color_cat, shape_cat,
hulls = FALSE, ...) {
if (missing(color_cat)) {
warning('No mapping category to color by.')
color_vec = rep('none', nrow(input$map_loaded))
} else
color_vec = input$map_loaded[, color_cat]
to_plot = data.frame(ordination_axes, cat = color_vec)
names(to_plot)[3] = 'cat'
headers = colnames(to_plot)
# hulls prep
if (hulls) {
.find_hulls = function(df) {
df[chull(df),]
}
hull_vals = dplyr::do_(dplyr::group_by_(to_plot, "cat"), ~ .find_hulls(.))
}
# plot w/ shape
if (!missing(shape_cat)) {
to_plot = data.frame(to_plot, cat2 = input$map_loaded[, shape_cat])
p = ggplot2::ggplot(to_plot, ggplot2::aes_string(headers[1], headers[2]))
p = p + ggplot2::geom_point(
ggplot2::aes_string(color = "cat", shape = "cat2"),
..., size = 3, alpha = 0.8
)
p = p + ggplot2::theme_bw()
p = p + ggplot2::xlab(colnames(to_plot)[1]) +
ggplot2::ylab(colnames(to_plot)[2])
p = p + ggplot2::theme(legend.title = ggplot2::element_blank())
}
# plot without shape
else{
p = ggplot2::ggplot(to_plot, ggplot2::aes_string(headers[1], headers[2]))
p = p + ggplot2::geom_point(ggplot2::aes_string(color = "cat"),
..., size = 3, alpha = 0.8)
p = p + ggplot2::theme_bw()
p = p + ggplot2::xlab(colnames(to_plot)[1]) +
ggplot2::ylab(colnames(to_plot)[2])
p = p + ggplot2::theme(legend.title = ggplot2::element_blank())
}
if (hulls) {
p = p + ggplot2::geom_polygon(
data = hull_vals,
ggplot2::aes_string(fill = "cat", color = "cat"),
alpha = 0.1
)
p = p + ggplot2::theme(legend.title = ggplot2::element_blank())
}
p
}
#' @title Generate an NMDS Plot Quickly
#' @description Used to generate a quick ordination.
#' @param dm Dissimilarity matrix.
#' @param metadata_map The metadata mapping data frame.
#' @param color_cat The metadata map header used to color points.
#' @param shape_cat [OPTIONAL] The metadata map header used for points' shapes.
#' @concept Plots
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded)
#' plot_nmds(dm, metadata_map = fruits_veggies$map_loaded, "Sample_type",
#' "Farm_type")
plot_nmds = function(dm, metadata_map = NULL, color_cat, shape_cat){
if(missing(color_cat)){
warning('No mapping category to color by.')
color_vec = rep('none', length(labels(dm)))
} else color_vec = metadata_map[, color_cat]
# format data and do NMDS
dm = as.dist(dm)
dm.mds = vegan::metaMDS(dm, k=2)
# plot w shape
if(!missing(shape_cat)){
points = data.frame(dm.mds$points, cat = color_vec,
cat2 = metadata_map[, shape_cat])
ggplot2::ggplot(points,
ggplot2::aes_string("MDS1", "MDS2", color = "cat",
shape = "cat2")) +
ggplot2::geom_point(size = 3, alpha = 0.8) +
ggplot2::theme_bw() +
ggplot2::scale_color_discrete('') +
ggplot2::scale_shape_discrete('')
}
# plot without shape
else{
points = data.frame(dm.mds$points, cat = color_vec)
ggplot2::ggplot(points,
ggplot2::aes_string("MDS1", "MDS2", color = "cat")) +
ggplot2::geom_point(size = 3, alpha = 0.8) +
ggplot2::theme_bw() +
ggplot2::scale_color_discrete('') +
ggplot2::scale_shape_discrete('')
}
}
#' @title Generate a dendrogram based on a dissimilarity matrix
#' @description Generating a dendrogram can be useful for visualizing
#' similarities and differences in community compositions among samples.
#' @param dm Dissimilarity matrix.
#' @param metadata_map The metadata mapping dataframe. Typically,
#' input$map_loaded.
#' @param labels The metadata mapping dataframe column name representing the
#' intended leaf labels.
#' @param color_by [OPTIONAL] The metadata mapping dataframe column name
#' representing the intended leaf label colors.
#' @param method The clustering method to use when creating the dendrogram.
#' @param ... Other parameters passed on to geom_text
#' @concept Plots
#' @examples
#' fvrar = single_rarefy(fruits_veggies, 100)
#' dm = calc_dm(fvrar$data_loaded)
#' plot_dendrogram(
#' dm, metadata_map = fvrar$map_loaded, labels = 'Sample_type',
#' color_by = 'Farm_type'
#' )
plot_dendrogram = function(dm, metadata_map, labels, color_by,
method = 'complete', ...) {
if (!requireNamespace("ggdendro", quietly = TRUE)) {
stop(paste0("'ggdendro' package needed for this function ",
"to work. Please install it."), call. = FALSE)
}
hc = hclust(dm, method)
ddata = ggdendro::dendro_data(hc)
map_rows = match(ddata$labels$label, row.names(metadata_map))
leaf_labels = as.character(metadata_map[map_rows, labels])
if(!missing(color_by)) {
sample_categories = metadata_map[map_rows, color_by]
ddata$leaf_labels = data.frame(ddata$labels[, 1:2], leaf_labels,
sample_categories)
} else {
ddata$leaf_labels = data.frame(ddata$labels[, 1:2], leaf_labels)
}
p = ggplot2::ggplot()
p = p + ggplot2::geom_segment(data = ddata$segments,
ggplot2::aes_string("x", "y", xend = "xend",
yend = "yend"))
if(!missing(color_by)) {
p = p + ggplot2::geom_text(data = ddata$leaf_labels,
ggplot2::aes_string("x", "y", label = "leaf_labels",
color = "sample_categories"),
...,
hjust = 0, angle = -90)
} else {
p = p + ggplot2::geom_text(data = ddata$leaf_labels,
ggplot2::aes_string("x", "y",
label = "leaf_labels"),
...,
color = 'black', hjust = 0, angle = -90)
}
p = p + ggdendro::theme_dendro()
p = p + ggplot2::scale_y_continuous(lim = c(-0.5, max(ddata$segments$y)*1.05))
p
}
# Interactive plots NOT WORKING
# # @title Generate interactive plot
# # @description Create an interactive plot with the 'clickme' package
# # @param color_by The mapping file header for the factor to color by
# plot_interactive = function(ordination_axes, data, color_by, name_by){
# to_plot = data.frame(ordination_axes, point_colors = data$map_loaded[, color_by],
# point_names = data$map_loaded[, name_by])
# clickme('points', to_plot[, 1], to_plot[, 2],
# color_groups = factor(to_plot$point_colors),
# names = to_plot$point_names)
# }
# # data = otu_data
# # color_by = 'group'
# # output_dir = outdir
# secondary_permanova = function(data, split_cat, cat_level, test_factor){
# data.tmp = filter_data(data, filter_cat = split_cat, keep_vals = cat_level)
# data.tmp = filter_data(data.tmp, filter_cat = test_factor, filter_vals = '#N/A')
# dm.tmp = calc_dm(data.tmp$data_loaded)
# results = adonis(dm.tmp ~ data.tmp$map_loaded[, test_factor])
# results
# }
#
# assess_within_category_effects = function(data, category, test_factor){
# for(i in 1:length(unique(data$map_loaded[, category]))){
# level = unique(data$map_loaded[, category])[i]
# print(paste(cat('\n'), level, cat('\n')))
# results = secondary_permanova(data, category, level, test_factor)
# print(results)
# }
# }
#' @title Convert dissimilarity matrix to 3 column format
#' @description This is useful for performing analyses on dissimilarity values.
#' @param dm Dissimilarity matrix of either class \code{dist} or class
#' \code{data.frame}.
#' @concept Dissimilarity calculation and manipulation
#' @return The dissimilarity matrix as a 3 column data frame with coulmns:
#' \code{c('x1', 'x2', 'dist')}.
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded)
#' dm_cols = convert_dm_to_3_column(dm)
convert_dm_to_3_column = function(dm){
if(class(dm) == 'data.frame'){
dmat = as.dist(dm)
}
else{
dmat = dm
}
dmat.clmns = data.frame(t(combn(unlist(labels(dmat)), 2)), as.numeric(dmat))
names(dmat.clmns) = c('x1', 'x2', 'dist')
dmat.clmns
}
.add_metadata_to_df = function(x, y, z){
stop('Deprecated - Please use "add_metadata_to_dm_clmns".')
}
#' @title Add metadata to an additional column in column formatted
#' dissimilarities dataframe
#' @description This is a useful function to quickly add sample data to a
#' data frame containing pairwise dissimilarities.
#' @param dm_clmns The dissimilarities dataframe produced using
#' \code{\link{convert_dm_to_3_column}}.
#' @param metadata_map The metadata dataframe.
#' @param cat The header string from the metadata map corresponding to the data
#' you would like to add to the dissimilarities dataframe.
#' @concept Dissimilarity calculation and manipulation
#' @return A 5 column data frame with the metadata added. The two additional
#' columns correspond to x1 and x2's metadata values
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded)
#' dm_cols = convert_dm_to_3_column(dm)
#' dm_cols_with_metadata = add_metadata_to_dm_clmns(dm_cols,
#' fruits_veggies$map_loaded, "Sample_type")
add_metadata_to_dm_clmns = function(dm_clmns, metadata_map, cat){
cat1 = metadata_map[match(dm_clmns$x1, row.names(metadata_map)), cat]
cat2 = metadata_map[match(dm_clmns$x2, row.names(metadata_map)), cat]
dm_clmns_wCat = cbind(dm_clmns, cat1, cat2)
names(dm_clmns_wCat) = c(names(dm_clmns), paste(cat, "_1", sep=''),
paste(cat, "_2", sep=''))
dm_clmns_wCat
}
# cats_equal = function(x, col1, col2){
# if(x[col1] == x[col2]){"same"} else{"different"}
# }
#' @keywords internal
# Test which order of two paired strings is the recognized order by comparing to a vector of accepted
# categories
.get_combination_category = function(x, accepted_categories){
if(paste(x, collapse='__') %in% accepted_categories) {return(paste(x, collapse='__'))}
else if(paste(rev(x), collapse='__') %in% accepted_categories) {return(paste(rev(x), collapse='__'))}
else {return("Not accepted category")}
}
#' @keywords internal
# Get the combination category of two vectors containing strings.
# This command dereplicates reverse order combinations.
.id_treatment_combination = function(col1, col2){
# get the list of unique categories
unique_levels = unique(c(as.character(col1), as.character(col2)))
# get all possible combinations
combinations = rbind(t(combn(unique_levels, 2)),
t(as.data.frame(lapply(unique_levels, FUN=rep, times=2))))
combinations = paste(combinations[,1], combinations[,2], sep='__')
# identify the combination for each pair of categories testing each order
comparison_types = apply(data.frame(col1, col2), 1, .get_combination_category,
accepted_categories = combinations)
comparison_types
}
#' @keywords internal
# convert 3 column dissimilarities back to matrix format
.convert_one_column_to_matrix = function(df){
# initialize matrix with dimensions equal to number of unique categories
uNames = unique(c(as.character(df[, 1]), as.character(df[, 2])))
mean_dists_mat = data.frame(matrix(ncol = length(uNames), nrow = length(uNames)))
names(mean_dists_mat) = uNames
row.names(mean_dists_mat) = uNames
for(i in 1:nrow(df)){
mean_dists_mat[as.character(df[i, 1]), as.character(df[i, 2])] = as.character(df[i, 3])
}
for(i in 1:nrow(mean_dists_mat)){
for(k in 1:ncol(mean_dists_mat)){
if(is.na(mean_dists_mat[i,k])){
mean_dists_mat[i,k] = mean_dists_mat[k,i]
}
}
}
mean_dists_mat[is.na(mean_dists_mat)] = 0
mean_dists_mat
}
#' @title Calculate mean dissimilarities using a metadata factor
#' @description Calculate mean dissimilarities across all levels of a given
#' factor
#'
#' @param dm Dissimilarity matrix - typically created using
#' \code{\link{calc_dm}}.
#' @param metadata_map The metadata mapping dataframe.
#' @param summarize_by_factor Category in mapping file to summarize by.
#' @param return_map Whether or not to return summarized mapping files. If true,
#' will return a list (default: FALSE).
#' @return Mean dissimilarities.
#' @concept Dissimilarity calculation and manipulation
#' @examples
#' DM = calc_dm(fruits_veggies$data_loaded)
#' MD = calc_mean_dissimilarities(DM, fruits_veggies$map_loaded, "Sample_type")
calc_mean_dissimilarities = function(dm, metadata_map, summarize_by_factor,
return_map = FALSE){
# check that dm labels match metadata sample IDs
if (!identical(labels(dm), row.names(metadata_map))) {
warning('Dissimilarity matrix labels and metadata sample IDs do not match.')
}
dm_clmns = convert_dm_to_3_column(dm)
# list sample 1 and sample 2 factor categories in new clmns
dm_clmns_wCat = add_metadata_to_dm_clmns(dm_clmns, metadata_map,
summarize_by_factor)
# only take samples in mapping file
dm_clmns_wCat = dm_clmns_wCat[!is.na(dm_clmns_wCat[, 4]) &
!is.na(dm_clmns_wCat[, 5]),]
# remove rows where distances are comparing samples from the same cat
dm_clmns_wCat_reduced = dm_clmns_wCat[dm_clmns_wCat[, 4] !=
dm_clmns_wCat[, 5], ]
# get pairwise comparison while accounting for differences in category order
tx_combo = .id_treatment_combination(col1 = dm_clmns_wCat_reduced[, 4],
col2 = dm_clmns_wCat_reduced[, 5])
dm_clmns_wCat_reduced = cbind(dm_clmns_wCat_reduced, tx_combo)
# calc mean dissimilarities
means = dplyr::summarize(dplyr::group_by(dm_clmns_wCat_reduced, tx_combo),
mean_dist = mean(dist))
# convert back to matrix format
means2 = data.frame(do.call(rbind, strsplit(as.character(means$tx_combo),
split = '__')),
mean_dist = means$mean_dist)
if(return_map){
mean_map = .summarize_map(metadata_map, summarize_by_factor)
dm_loaded = as.dist(.convert_one_column_to_matrix(means2))
map_loaded = mean_map[match(labels(dm_loaded), row.names(mean_map)), ]
list(dm_loaded = dm_loaded, map_loaded = map_loaded)
} else as.dist(.convert_one_column_to_matrix(means2))
}
#' @title Calculate pairwise PERMANOVA results
#' @description The \code{\link[vegan]{adonis}} function in the
#' \code{\link[vegan]{vegan}} package does not provide a way to calculate
#' pairwise post-hoc comparisons between factor levels. This function
#' calculates PERMANOVA results using the \code{\link[vegan]{adonis}}
#' function for all factor level pairs. Raw p values are returned, but it is
#' recommended to use the provided FDR corrected p values since the multiple
#' comparisons can raise your likelihood of false positive differences.
#' @param dm Dissimilarity matrix of class \code{dist}.
#' @param metadata_map The metadata mapping dataframe with samples matching and
#' in the same order as the ones in the provided dm.
#' @param compare_header The header in the metadata mapping dataframe with the
#' factor levels to use for the pairwise comparisons.
#' @param n_perm A vector that controls the number of permutations required.
#' @return A dataframe with R2 and P values.
#' @concept Dissimilarity calculation and manipulation
#' @examples
#' dm = calc_dm(fruits_veggies$data_loaded)
#' calc_pairwise_permanovas(dm, fruits_veggies$map_loaded, "Sample_type")
calc_pairwise_permanovas = function (dm, metadata_map, compare_header, n_perm)
{
comp_var = metadata_map[, compare_header]
comp_pairs = combn(levels(comp_var), 2)
pval = c()
R2 = c()
for (i in 1:ncol(comp_pairs)) {
pair = comp_pairs[, i]
dm_w_map = list(dm_loaded = dm, map_loaded = metadata_map)
dm_w_map$map_loaded$in_pair = comp_var %in% pair
dm_w_map_filt = mctoolsr::filter_dm(dm_w_map, filter_cat = "in_pair",
keep_vals = TRUE)
if (!missing(n_perm)) {
m = vegan::adonis(dm_w_map_filt$dm_loaded ~ dm_w_map_filt$map_loaded[, compare_header],
permutations = n_perm)
} else {
m = vegan::adonis(dm_w_map_filt$dm_loaded ~ dm_w_map_filt$map_loaded[, compare_header])
}
pval = c(pval, m$aov.tab$`Pr(>F)`[1])
R2 = c(R2, m$aov.tab$R2[1])
}
results = data.frame(t(comp_pairs), R2, pval)
results$pvalBon = pval * length(pval)
results$pvalFDR = round(pval * (length(pval) / rank(pval, ties.method = "average")),
3)
results
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.