Nothing
group.Taxa.box <- function(data, is.OTU=TRUE, rank="g",
taxa="", meta, meta.factor="",
cex.y=5, cex.x=5, cex.main=10, RAM.theme=NULL,
col.pal=NULL, main="",
file=NULL, ext=NULL, height=8, width=16) {
# check whether data is list
if ( !is.list(data) ) {
stop("Please provide input data as list, see ?RAM.input.formatting")
}
# valide rank
rank.name <- .get.rank.name(rank, plural=TRUE)
rank_name <- .get.rank.name(rank)
rank_pat <- .get.rank.pat(rank)
# get the groups
if( !length(meta.factor)==1 ) {
stop ("Error: please provide one factor")
} else {
fac.levels <- levels(factor(meta[[meta.factor]]))
}
if( length(taxa)==0 || taxa=="" ) {
stop ("Error: please provide list of taxa to plot")
}
taxa.m <- list()
total.count <- list()
# labels for each dataset
labels <- names(data)
# process each data set
# process each data set
tax.list <- list()
nocap.list <- list()
# find out what in the taxa list not present in all datasets
for (i in 1:length(data)) {
if ( is.null(data[[i]]) ) {
break
}
label <- names(data)[i]
elem <- data[[i]]
if ( is.OTU ) {
valid.OTU(elem)
sum.count <- sum(transpose.OTU(elem))
tax <- tax.abund(elem, rank=rank, drop.unclassified=TRUE,
count=TRUE)
} else {
sum.count <- sum(elem)
tax <- elem
}
if( length(which(names(tax) %in% taxa))==0) {
stop ("Error: no taxa present in the otu table")
}
tax.sel <- tax[, which(names(tax) %in% taxa), drop=FALSE]
nocap <- setdiff(taxa, names(tax.sel))
if ( length(nocap) != 0 ) {
warning(paste(length(nocap), " taxa are not in the ", label, " dataset", sep=""))
} else {
nocap <- NULL
}
tax.list[[label]] <- tax.sel
nocap.list[[label]] <- nocap
}
# taxa that not in all datasets
if ( is.null(unlist(nocap.list)) ) {
taxa <- taxa
} else {
all.nocap <- Reduce(intersect, nocap.list)
if ( length(all.nocap) != 0 ) {
warning(paste(length(all.nocap), " taxa are not in all datasets: ", paste(all.nocap, collapse=", "), sep=""))
rm <- which(taxa %in% all.nocap)
# remaining taxa list
taxa <- taxa[-rm]
} else {
taxa <- taxa
}
}
# process the tax.abund matrices
taxa.m <- list()
total.count <- list()
for (i in 1:length(tax.list)) {
label <- names(tax.list)[i]
tax <- tax.list[[i]]
if ( is.null(tax) ) { break }
df <- data.frame(matrix(vector(), nrow(tax), 0))
for ( i in taxa ) {
if ( i %in% names(tax) ) {
df[[i]] <- tax[[i]]
} else {
df[[i]] <- 0
}
rownames(df) <- rownames(tax)
tax.sel <- df
}
# relative abundance of each taxa among samples
tax.sel <- tax.sel[match(rownames(meta),
rownames(tax.sel)) , , drop=FALSE]
if ( identical(rownames(tax.sel), rownames(meta)) ) {
tax.sel.p <- vegan::decostand(tax.sel, "total", MARGIN=2)
tax.sel.p.fac <- cbind(tax.sel.p, meta[[meta.factor]])
names(tax.sel.p.fac)[ncol(tax.sel.p.fac)] <- meta.factor
# for calculate total.count
tax.sel.fac = cbind(tax.sel, meta[[meta.factor]])
names(tax.sel.fac)[ncol(tax.sel.fac)] <- meta.factor
} else {
stop(paste(label, " and metadata do not have same samples", sep=""))
}
# shorten names
df.count <- tax.sel.fac
df.ra <- tax.sel.p.fac
# reshape2::melt by sample
#if ( require("reshape2") ) {
# reshape2::melt
#} else {
# stop("package 'reshape2' is required to use this function")
#}
df.ra.m <- reshape2::melt(cbind(df.ra, sample=rownames(df.ra)),
variable.name=rank_name, value.name="RA")
names(df.ra.m)[ncol(df.ra.m)-1] <- rank_name
names(df.ra.m)[ncol(df.ra.m)] <- "RA"
df.count.m <- reshape2::melt(cbind(df.count, sample=rownames(df.count)),
variable.name=rank_name, value.name="count")
names(df.count.m)[ncol(df.count.m)-1] <- rank_name
names(df.count.m)[ncol(df.count.m)] <- "count"
#return(list(df.count.m, df.ra.m))
# bind together with appropriate label
df.ra.m <- cbind(df.ra.m, Region=label)
df.count.m <- cbind(df.count.m, Region=label)
#calculate total count of each Taxon
total= sapply(levels(df.count.m[[rank_name]]),
function(x){sum(df.count.m$count[df.count.m[[rank_name]]==x])},
USE.NAMES=F)
total.df <- data.frame(Taxon=levels(df.count.m[[rank_name]]),total)
total.df[["Region"]] <- label
names(total.df)[1] <- rank_name
# reorder levels based on order of appearance in total.count,
df.ra.m[[rank_name]] <- factor(as.character(df.ra.m[[rank_name]]),
levels=total.df[[rank_name]],
ordered=TRUE)
total.count[[label]] <- total.df
taxa.m[[label]] <- df.ra.m
}
all.taxa <- do.call("rbind", taxa.m)
all.count <- do.call("rbind", total.count)
#return(list(all.taxa=all.taxa, all.count=all.count))
# reorder by total counts
all.count <- all.count[order(all.count$total, decreasing=FALSE),]
all.count[[rank_name]] <- factor(all.count[[rank_name]],
levels=rev(levels(all.count[[rank_name]])),
ordered=TRUE)
all.taxa[[rank_name]] <- factor(all.taxa[[rank_name]],
levels=levels(all.count[[rank_name]]),
ordered=TRUE)
# plot title, default is "":
title <- main
# use actual colours
cols.needed <- length(unique(all.taxa[[meta.factor]]))
cols <- .cols(cols.needed=cols.needed, col.pal)
names(cols) <- levels(factor(all.taxa[[meta.factor]]))
# we need to use aes_string to pass CRAN check; see
# http://goo.gl/JxgZ9u
# the distribution of each taxon in each category of meta.factor
#if ( !require("scales") ) {
# stop("package 'scales' is required for this function")
#}
# if ( !require("RColorBrewer") ) {
# stop("package 'RColorBrewer' is required cex.y=5, cex.x=5, for this function")
# }
# if ( !require("grid") ) {
# stop("package 'grid' is required for this function")
# }
# make informative labels
lab.lst <- list()
for ( i in taxa ) {
lab <- list()
for ( j in 1:length(labels) ) {
sel <- which(total.count[[labels[j]]][[rank_name]]==i)
#lab[[labels[j]]] <- paste(labels[j], ":",
# total.count[[labels[j]]]$total[sel], sep="")
lab[[labels[j]]] <- round(total.count[[labels[j]]]$total[sel], digits=0)
}
#lab1 <- paste(unlist(lab), collapse="\n")
#lab.lst[[i]] <- paste(i, "\n", lab1, sep="")
lab1 <- paste(unlist(lab), collapse=",")
lab.lst[[i]] <- paste(i, "(", lab1, ")", sep="")
}
lab2 <- unlist(lab.lst)
#return(all.taxa)
bb<-list()
for ( l in levels(factor(all.taxa[, "Region"])) ) {
aa <- all.taxa[all.taxa$Region==l,]
for ( k in levels(factor(aa[, meta.factor])) ) {
cc <- aa[aa[, meta.factor]==k, ]
bymedian<- reorder(cc[, rank_name], -cc[, "RA"], median)
bb[[paste0(l,".",k)]]<-levels(bymedian)
}
}
by_median<-bb[[1]]
#return(all.taxa)
all.taxa[,rank_name] <- factor(as.character(all.taxa[, rank_name]), levels= by_median, ordered=TRUE)
#return(all.taxa)
# boxplot
p <- ggplot2::ggplot(all.taxa, aes_string(x=rank_name,
y="RA",fill=meta.factor)) +
geom_boxplot()
# facet by region
p <- p + scale_x_discrete(labels = lab2) +
facet_wrap(~Region)
# flip x & y
p <- p + scale_fill_manual(values=cols[names(cols) %in%
levels(factor(all.taxa[[meta.factor]]))])+
scale_y_continuous(labels=scales::percent_format()) +
xlab(.capitalize(.get.rank.name(rank))) +
ylab("Percentage") +
coord_flip() +
ggtitle(title)
if ( is.null(RAM.theme) ) {
p <- p + theme(legend.key.size=unit(6,"mm"),
legend.text = element_text(size=10),
axis.text.y=element_text(size=cex.y, face="bold", margin=margin(5,5,10,5,"pt")),
axis.text.x=element_text(size=cex.x, face="bold"),
#axis.ticks.y=element_line(size=1),
legend.position="right",
plot.title = element_text(face="bold", size=cex.main))
} else {
p <- p + RAM.theme
}
.valid.plot.settings(file, ext)
save <- !is.null(file)
if (save) {
.ggsave.helper(file, ext, width, height, plot=p)
} else {
p
}
}
group.Taxa.bar <- function(data, is.OTU=TRUE, rank="g", taxa="",
meta, meta.factor="", func="sum", cex.y=5,
cex.x=5, cex.main=10, bar.width=NULL,
RAM.theme=NULL, col.pal=NULL, main="",
file=NULL, ext=NULL,
height=8, width=16) {
# valide rank
rank.name <- .get.rank.name(rank, plural=TRUE)
rank_name <- .get.rank.name(rank)
rank_pat <- .get.rank.pat(rank)
# check whether data is list
if ( !is.list(data) ) {
stop("Please provide input data as list, see ?RAM.input.formatting")
}
# get the groups
.valid.factor(meta, meta.factor)
if(!length(meta.factor)==1) {
stop ("Error: please provide one factor")
}
if(length(taxa)< 1) {
stop ("Error: please provide a vector of selected taxa names")
}
# labels for each dataset
labels <- names(data)
# process each data set
tax.list <- list()
nocap.list <- list()
# find out what in the taxa list not present in all datasets
for (i in 1:length(data)) {
if ( is.null(data[[i]]) ) {
break
}
label <- names(data)[i]
elem <- data[[i]]
if ( is.OTU ) {
valid.OTU(elem)
sum.count <- sum(transpose.OTU(elem))
tax <- tax.abund(elem, rank=rank, drop.unclassified=TRUE,
count=TRUE)
} else {
sum.count <- sum(elem)
tax <- elem
}
if( length(which(names(tax) %in% taxa))==0) {
stop ("Error: no taxa present in the otu table")
}
tax.sel <- tax[, which(names(tax) %in% taxa), drop=FALSE]
nocap <- setdiff(taxa, names(tax.sel))
if ( length(nocap) != 0 ) {
warning(paste(length(nocap), " taxa are not in the ", label, " dataset", sep=""))
} else {
nocap <- NULL
}
tax.list[[label]] <- tax.sel
nocap.list[[label]] <- nocap
}
# taxa that not in all datasets
if ( is.null(unlist(nocap.list)) ) {
taxa <- taxa
} else {
all.nocap <- Reduce(intersect, nocap.list)
if ( length(all.nocap) != 0 ) {
warning(paste(length(all.nocap), " taxa are not in all datasets: ", paste(all.nocap, collapse=", "), sep=""))
rm <- which(taxa %in% all.nocap)
taxa <- taxa[-rm]
} else {
rm <- NULL
taxa <- taxa
}
# remaining taxa list
}
# process the tax.abund matrices
taxa.m <- list()
total.count <- list()
for (i in 1:length(tax.list)) {
label <- names(tax.list)[i]
tax <- tax.list[[i]]
if ( is.null(tax) ) { break }
df <- data.frame(matrix(vector(), nrow(tax), 0))
for ( i in taxa ) {
if ( i %in% names(tax) ) {
df[[i]] <- tax[[i]]
} else {
df[[i]] <- 0
}
}
rownames(df) <- rownames(tax)
tax.sel <- df
tax.sel.fac <- stats::aggregate(tax.sel, by=list(meta[[meta.factor]]), FUN=func)
tax.sel.fac.total <- stats::aggregate(tax.sel, by=list(meta[[meta.factor]]), FUN=sum)
###############################
# based on required calculation
###############################
rownames(tax.sel.fac) <- tax.sel.fac$Group.1
tax.sel.fac<-as.data.frame(tax.sel.fac[,-1, drop=FALSE])
# reshape2::melt by factor
#if (!require("reshape2")) {
# stop("package 'reshape2' is required to use this function")
#}
tax.sel.fac.m<-reshape2::melt(cbind(tax.sel.fac,
factor=rownames(tax.sel.fac)),
is.vars=c('factor'), value.name="count",
variable.name=rank_name)
names(tax.sel.fac.m)[ncol(tax.sel.fac.m)-1] <- rank_name
names(tax.sel.fac.m)[ncol(tax.sel.fac.m)] <- "count"
names(tax.sel.fac.m)[names(tax.sel.fac.m)=="factor"] <- meta.factor
# bind together with appropriate label
tax.sel.fac.m <- cbind(tax.sel.fac.m, Region=label)
####################################
#calculate total count of each Taxon
####################################
# based on required calculation
rownames(tax.sel.fac.total) <- tax.sel.fac.total$Group.1
tax.sel.fac.total<-as.data.frame(tax.sel.fac.total[,-1, drop=FALSE])
# reshape2::melt by factor
#if (!require("reshape2")) {
# stop("package 'reshape2' is required to use this function")
#}
tax.sel.fac.total.m<-reshape2::melt(cbind(tax.sel.fac.total,
factor=rownames(tax.sel.fac.total)),
is.vars=c('factor'), value.name="count",
variable.name=rank_name)
names(tax.sel.fac.total.m)[ncol(tax.sel.fac.total.m)-1] <- rank_name
names(tax.sel.fac.total.m)[ncol(tax.sel.fac.total.m)] <- "count"
names(tax.sel.fac.total.m)[names(tax.sel.fac.total.m)=="factor"] <- meta.factor
# bind together with appropriate label
tax.sel.fac.total.m <- cbind(tax.sel.fac.total.m, Region=label)
total= sapply(levels(tax.sel.fac.total.m[[rank_name]]),
function(x){sum(tax.sel.fac.total.m$count[tax.sel.fac.total.m[[rank_name]]==x])},
USE.NAMES=F)
total.df<-data.frame(Taxon=levels(tax.sel.fac.total.m[[rank_name]]),total)
names(total.df)[1] <- rank_name
total.df[["Region"]] <- label
# reorder levels based on order of appearance in total.count,
#tax.sel.fac.m[[rank_name]] <- factor(as.character(tax.sel.fac.m[[rank_name]]),
# levels=total.df[[rank_name]],
# ordered=TRUE)
# bind together with appropriate label
total.count[[label]] <- total.df
taxa.m[[label]] <- tax.sel.fac.m
}
all.taxa <- do.call("rbind", taxa.m)
all.count <- do.call("rbind", total.count)
#return(list(all.taxa, all.count))
# reorder by total counts
all.count$total <- round(all.count$total, digits=0)
#all.count <- all.count[order(all.count$total, decreasing=FALSE),]
all.count[[rank_name]] <- factor(all.count[[rank_name]],
levels=rev(levels(all.count[[rank_name]])),
ordered=TRUE)
all.taxa[[rank_name]] <- factor(all.taxa[[rank_name]],
levels=levels(all.count[[rank_name]]),
ordered=TRUE)
#return(list(all.taxa, all.count))
title = main
# use actual colours
cols.needed <- length(unique(all.taxa[[meta.factor]]))
cols <- .cols(cols.needed, col.pal)
names(cols) <- levels(factor(all.taxa[[meta.factor]]))
# we need to use aes_string to pass CRAN check; see
# http://goo.gl/JxgZ9u
# the distribution of each taxon in each category of meta.factor
#if ( !require("scales") ) {
# stop("package 'scales' is required for this function")
#}
# if ( !require("RColorBrewer") ) {
# stop("package 'RColorBrewer' is required for this function")
# }
# if ( !require("grid") ) {
# stop("package 'grid' is required for this function")
# }
# make informative labels
lab.lst <- list()
for ( i in taxa ) {
lab <- list()
for ( j in 1:length(labels) ) {
sel <- which(total.count[[labels[j]]][[rank_name]]==i)
#lab[[labels[j]]] <- paste(labels[j], ":",
# total.count[[labels[j]]]$total[sel], sep="")
#lab[[labels[j]]] <- round(total.count[[labels[j]]]$total[sel], digits=0)
# round up to next digit
lab[[labels[j]]] <- ceiling(total.count[[labels[j]]]$total[sel])
}
#lab1 <- paste(unlist(lab), collapse="\n")
#lab.lst[[i]] <- paste(i, "\n", lab1, sep="")
lab1 <- paste(unlist(lab), collapse=",")
lab.lst[[i]] <- paste(i, "(", lab1, ")", sep="")
}
lab2 <- unlist(lab.lst)
# reorder by percentage for each taxon
# first add a column with total number for each taxon
# sort by Region, meta.fac and RA
#aa<-aa[order(aa[, "Region"], aa[, meta.factor], ceiling(aa[, "RA"]), decreasing=FALSE),]
#return(all.taxa)
bb<-list()
for ( l in levels(factor(all.taxa[, "Region"])) ) {
aa <- all.taxa[all.taxa$Region==l,]
aa <- transform(aa, Tot.Count = ave(aa[, "count"], aa[, rank_name],FUN = function(x)sum(x)))
# then compute the percentage:
# aa <- transform(aa, RA = 100 * count/Tot.Count)
# because some Tot.Count == 0, RA cannot be calculated
for ( k in 1:nrow(aa)) {
if (aa[k, "Tot.Count"] == 0 ) {
aa[k, "RA"] <- 0
} else {
aa[k, "RA"] <- 100 * aa[k, "count"]/aa[k, "Tot.Count"]
}
}
aa <- aa[order(aa[, meta.factor], aa[, "RA"], decreasing=FALSE),]
bb[[l]]<-aa
#all.taxa[all.taxa$Region==l,] <- aa
}
all.taxa<-do.call("rbind", bb)
#return(all.taxa)
all.taxa[,rank_name] <- factor(as.character(all.taxa[, rank_name]), levels= unique(all.taxa[, rank_name]), ordered=TRUE)
#return(otu.p.sel.fac.m)
# barplot
if ( is.null(bar.width) ) {
p <- ggplot2::ggplot(all.taxa, aes_string(x=rank_name,
y="count",fill=meta.factor)) +
geom_bar(colour="white",stat="identity",
position="fill")
} else {
p <- ggplot2::ggplot(all.taxa, aes_string(x=rank_name,
y="count",fill=meta.factor)) +
geom_bar(width=bar.width, colour="white",stat="identity",
position="fill")
}
#return(p)
# facet by region, # flip x & y
p <- p + scale_fill_manual(values=cols[names(cols) %in%
levels(factor(all.taxa[[meta.factor]]))]) +
scale_y_continuous(labels=scales::percent_format()) +
scale_x_discrete(labels = lab2) +
facet_wrap(~Region) +
ggtitle(title) +
xlab(.capitalize(rank_name)) +
ylab("Percentage") +
coord_flip()
if ( is.null(RAM.theme) ) {
p <- p + theme(legend.key.size=unit(6,"mm"),
#axis.text=element_text(margin=margin(5,5,10,5,"pt")),
axis.text.y=element_text(size=cex.y, face="bold"),
axis.text.x=element_text(size=cex.x, face="bold"),
#axis.ticks.length=grid::unit(-0.25, "cm"),
#axis.ticks.margin=grid::unit(0.5, "cm"),
#axis.ticks.y=element_line(size=0.5),
legend.position="right",
legend.text = element_text(size=10),
plot.title = element_text(face="bold", size=cex.main),
strip.text.x = element_text(size=8, face="bold", angle=0))
} else {
p <- p + RAM.theme
}
save <- !is.null(file)
if (save) {
.valid.plot.settings(file, ext)
.ggsave.helper(file, ext, width, height, plot=p)
} else {
p
}
}
group.abund.Taxa <- function(data, is.OTU=TRUE, rank="g", taxa,
drop.unclassified=FALSE, bar.width=NULL,
meta, meta.factor="", RAM.theme=NULL,
col.pal=NULL, main="",
file=NULL, ext=NULL, height=8, width=16) {
save <- !is.null(file)
# valide rank
rank.name <- .get.rank.name(rank, plural=TRUE)
rank_name <- .get.rank.name(rank)
rank_pat <- .get.rank.pat(rank)
# get the groups
if( !length(meta.factor)==1 ||
!any(names(meta) %in% meta.factor) ) {
stop ("Error: please provide one factor")
}
if( length(taxa)==0 || taxa=="" ) {
stop ("Error: please provide list of taxa or top# of groups to plot")
}
# check whether data is list
if ( !is.list(data) ) {
stop("Please provide input data as list, see ?RAM.input.formatting")
}
# labels for each dataset
labels <- names(data)
# process each data set
tax.list <- list()
for (i in 1:length(data)) {
if ( is.null(data[[i]]) ) {
break
}
label <- names(data)[i]
elem <- data[[i]]
if ( is.OTU ) {
valid.OTU(elem)
# get the groups
tax <-.group.rank2(data=elem, meta=meta, meta.factor=meta.factor,
relative.abund=TRUE, taxa=taxa,
drop.unclassified=drop.unclassified,
rank=rank)
} else {
tax <- .group.rank2(data=elem, is.OTU=FALSE, meta=meta, meta.factor=meta.factor,
relative.abund=TRUE, taxa=taxa,
drop.unclassified=drop.unclassified,
rank=rank)
}
#print(tax)
# reshape2::melt by Sample
#if (!require("reshape2")) {
# stop("package 'reshape2' is required to use this function")
# }
if (ncol(tax) == 0 ) {
stop("selected taxa was not found")
}
tax.m <- reshape2::melt(cbind(tax, Sample=rownames(tax)),
id.vars="Sample", value.name="RA",
variable.name=rank_name)
names(tax.m)[ncol(tax.m)-1] <- rank_name
names(tax.m)[ncol(tax.m)] <- "RA"
# bind together with appropriate label
tax.m <- cbind(tax.m, Region=label)
tax.list[[label]] <- tax.m
}
all.taxa <- do.call("rbind", tax.list)
# reorder levels based on order of appearance in table
all.taxa$Sample <- ordered(all.taxa$Sample, levels=unique(all.taxa$Sample))
title <- main
# if ( !require("scales") ) {
# stop("package 'scales' is required for this function")
# }
# if ( !require("RColorBrewer") ) {
# stop("package 'RColorBrewer' is required for this function")
# }
# if ( !require("grid") ) {
# stop("package 'grid' is required for this function")
# }
# use actual colours
cols.needed <- length(unique(all.taxa[[rank_name]]))
cols <- .cols(cols.needed=cols.needed, col.pal)
names(cols) <- levels(factor(all.taxa[[rank_name]]))
# we need to use aes_string to pass CRAN check; see
# http://goo.gl/JxgZ9u
if ( is.null(bar.width) ) {
p <- ggplot2::ggplot(all.taxa, aes_string(x="Sample", y="RA",
fill=rank_name)) +
geom_bar(position="stack", stat="identity")
} else {
p <- ggplot2::ggplot(all.taxa, aes_string(x="Sample", y="RA",
fill=rank_name)) +
geom_bar(position="stack", stat="identity", width=bar.width)
}
if ( is.null(RAM.theme) ) {
p <- p + theme(legend.position="bottom",
axis.text.x=element_text(angle = 45, vjust = 1,
hjust=1),
panel.grid.major.x = element_blank())
} else {
p <- p + RAM.theme
}
#coord_flip() +
p <- p + scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~Region, scales="free_x") +
xlab(meta.factor) +
ylab("Relative Abundance") +
ggtitle(title)+
scale_fill_manual(values=cols[names(cols) %in%
levels(factor(all.taxa[[rank_name]]))],
guide=ggplot2::guide_legend(direction="horizontal", ncol=5))+
theme(legend.position="bottom")
if (save) {
.valid.plot.settings(file, ext)
.ggsave.helper(file, ext, width, height, plot=p)
} else {
p
}
}
shared.Taxa <- function(data, is.OTU=TRUE, rank="g") {
.valid.data(data=data, is.OTU=is.OTU)
# valide rank
if ( is.OTU ) {
rank.name <- .get.rank.name(rank, plural=TRUE)
rank_name <- .get.rank.name(rank)
rank_pat <- .get.rank.pat(rank)
} else {
warning("data are not otu tables, will ignore the rank provided")
rank_name <- "taxa"
rank.name <- "taxa"
rank_pat <- ""
}
# check whether data is list
if ( !is.list(data) ) {
stop("Please provide input data as list, see ?RAM.input.formatting")
}
# labels for each dataset
labels <- names(data)
# process each data set
shared.list <- list()
for (i in 1:length(data)) {
if ( is.null(data[[i]]) ) {
break
}
label <- names(data)[i]
elem <- data[[i]]
if ( is.OTU ) {
valid.OTU(elem)
total.count <- sum(transpose.OTU(elem))
tax <- tax.abund(elem, rank=rank, drop.unclassified=TRUE,
count=TRUE)
} else {
total.count <- sum(elem)
tax <- elem
}
###check for zero entries?
if(dim(tax)[2] == 1) {
stop(paste("Error: Only one taxon at the ", rank_name, " level! Please choose another taxonomic rank to compare", sep=""))
} else {
tax.pa <- decostand(tax, "pa")
}
# for readability
x <- dim(tax.pa)[1] # number of samples
y <- dim(tax.pa)[2] # number of taxa
num.tax.one.sample <- dim(tax.pa[ ,colSums(tax.pa) == 1, drop=FALSE])[2]
num.tax.mult.sample <- dim(tax.pa[ ,colSums(tax.pa) > 1, drop=FALSE])[2]
### test this
num.tax.all.sample <- dim(tax.pa[ ,colSums(tax.pa) == x, drop=FALSE])[2]
per.tax.one.sample <- num.tax.one.sample / y
per.tax.all.sample <- num.tax.all.sample / y
if( length(which(colSums(tax.pa) == x ))==0 ) {
warning("NO taxa are shared by all samples")
num.seq.shared.tax <- 0
} else {
num.seq.shared.tax <- sum(tax[ , colnames(tax) %in%
names(tax.pa)[which(colSums(tax.pa) == x)]])
}
per.seq.shared.tax <- num.seq.shared.tax / total.count
# get the taxa names for all taxs present in all samples
tax <- names(tax.pa)[which(colSums(tax.pa) == x)]
val <- list(num.tax.one.sample, num.tax.mult.sample,
num.tax.all.sample, per.tax.one.sample,
per.tax.all.sample, num.seq.shared.tax,
per.seq.shared.tax, tax)
# format numerical data?
#val <- format(round(val[1:7], 2), nsmall=2)
names(val) <- c(paste("#_of_", rank.name, "_in_1_sample", sep=""),
paste("#_of_", rank.name, "_in_>1_sample", sep=""),
paste("#_of_", rank.name, "_in_all_samples", sep=""),
paste("%_of_", rank.name, "_in_one_sample", sep=""),
paste("%_of_", rank.name, "_in_all_samples", sep=""),
paste("#_of_sequence_in_shared_", rank.name, sep=""),
paste("%_of_sequence_in_shared_", rank.name, sep=""),
paste(rank.name, "_in_all_samples", sep=""))
shared.list[[label]] <- val
}
return(shared.list)
}
.cols <- function(cols.needed, col.pal=NULL) {
if ( is.null(col.pal) ) {
if ( cols.needed <= 2 ) {
cols <- c("#E41A1C", "#377EB8")
} else if ( cols.needed <= 12 && cols.needed > 2 ) {
cols <- RColorBrewer::brewer.pal(cols.needed, "Set3")
} else {
# if palette max is 12; so if we have more than 6 entries for otu1/2, we
# need to manually construct a palette to use
#cols <- rainbow(cols.needed)
col.func <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))
cols <- col.func(cols.needed)
}
} else {
cols <- col.pal
}
}
.group.rank2<-function(data, is.OTU=TRUE, meta=meta, meta.factor="",
drop.unclassified=FALSE, relative.abund=FALSE,
rank="g", func="sum", taxa=NULL){
## whether data an OTU or taxonomic abundance matrix
if ( is.OTU ) {
valid.OTU(data)
tax <- tax.abund(data, rank=rank, drop.unclassified=drop.unclassified,
count=TRUE)
} else {
tax <- data
}
# get the groups
if( !length(meta.factor)==1 ||
!any(names(meta) %in% meta.factor) ) {
stop ("Error: please provide one factor")
}
if(all(rownames(tax)==rownames(meta))) {
tax.factor <- stats::aggregate(tax, by=list(meta[[meta.factor]]), FUN=func)
rownames(tax.factor) <- tax.factor[,"Group.1"]
tax.factor <- tax.factor[,-1]
} else {
stop("Error: otu and metadata have different samples")
}
tax.factor <- tax.factor[, order(colSums(tax.factor), decreasing=TRUE)]
if(!isTRUE(relative.abund)) {
tax.factor <- tax.factor
} else {
tax.factor <- vegan::decostand(tax.factor, "total")
}
if (drop.unclassified) {
# this selects all columns NOT containing in the blacklist
# drop.unclassified using blacklist
remove.pat <- gsub(.get.rank.pat(rank), "",
paste0(.blacklist(.get.rank.pat(rank)),
"|no_taxonomy"))
tax.factor <- tax.factor[ , !grepl(remove.pat, names(tax.factor),
ignore.case=TRUE), drop=FALSE]
} else {
tax.factor<-tax.factor
}
# keep only the 'top' most abundant groups, where top is user-given
if ( is.null(taxa) ) {
tax.factor.sel<-tax.factor
} else if ( is.numeric(taxa) ) {
if ( taxa <= ncol(tax.factor) ) {
tax.factor.sel<-tax.factor[,1:taxa]
} else {
tax.factor.sel<-tax.factor
}
} else if ( is.character(taxa) && length(taxa) >= 1 ) {
if( !any(names(tax.factor) %in% taxa) ) {
stop ("Error: no taxa present in the dataset")
} else {
tax.factor.sel <- tax.factor[, which(names(tax.factor) %in% taxa), drop=FALSE]
nocap <- setdiff(taxa, names(tax.factor.sel))
if ( length(nocap) != 0 ) {
warning(paste("Taxa not in the dataset: ", paste(nocap, collapse=" ,"), sep=""))
# if a taxon not being found in dataset, add 0
for (i in nocap) {
tax.factor.sel[[i]] <- 0
}
}
}
} else {
tax.factor.sel<-tax.factor
}
return(tax.factor.sel)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.