Nothing
transpose.OTU <- function(data) {
valid.OTU(data)
# return the transpose (without the taxonomy column,
# which should be the last column)
return(as.data.frame(t(data[, -dim(data)[2]])))
}
filter.OTU <- function(data, percent=NULL, number=NULL) {
if ( class(data) != "list" ) {
stop("please provide otu tables as list; see ?RAM.input.formatting")
}
.valid.data(data, is.OTU=TRUE)
###check for zero entries?
num.data <- length(data)
labels <- names(data)
otu_sel <- list()
otu.sel <- list()
for ( i in 1:length(data) ){
otu <- data[[i]]
if (is.null(otu)) { break }
label <- names(data)[i]
valid.OTU(otu)
# both filter are NULL
if (is.null(number) & is.null(percent)) {
warning("No filter was provided, all otus will be used. Consider filtering otu by total number of sequence (e.g. 50) or maximum relative abundance (e.g. 0.01)")
sel<-1:nrow(otu)
} else if (!is.null(number) & !is.null(percent)) {
stop("Error: can only provide one type of filter by total number of sequence (e.g. 50) or maximum relative abundance (e.g. 0.01)")
} else if (!is.null(percent)) {
# select OTUs more than percent in at least on sample (otu columns)
otu.p <- cbind(decostand(otu[, -ncol(otu)],
MARGIN=2, "total"), otu$taxonomy)
names(otu.p)[ncol(otu.p)]<-"taxonomy"
sel <- rownames(otu.p)[which(apply(otu.p[, -ncol(otu.p)],
MARGIN=1, FUN=max) > percent)]
} else if (!is.null(number)) {
# select OTUs more than number sequences in total
sel<-rownames(otu)[which(rowSums(otu[, -ncol(otu)]) > number)]
} else {
warning("no filtering requirment provided, will return the original otus")
}
otu_sel[[label]] <- sel
#return(otu_sel)
if ( length(sel) == 0 ) {
warning("no otus in ", label, " met the filter requirement, original OTU returned")
sel <- ""
} else {
print(paste(length(sel), " otus in ", label,
" met the filter requirment.", sep=""))
otu.sel[[label]] <- otu[sel, ]
}
}
return(otu.sel)
}
data.subset <- function(data, meta, factors="", values="", and=TRUE){
if ( class(data) != "list" ) {
stop("please provide otu tables as list; see ?RAM.input.formatting")
}
.valid.data(data, is.OTU=TRUE)
###check for zero entries?
num.data <- length(data)
labels <- names(data)
otu_sel <- list()
for ( i in 1:length(data) ){
otu <- data[[i]]
if (is.null(otu)) { break }
label <- names(data)[i]
valid.OTU(otu)
# meta factor and value for selection
factors <- .valid.factor(meta=meta, meta.factor=factors)
factors1 <- parse(text = paste("meta$",factors,sep=""))
values1 <- paste(values, collapse="|")
if( length(factors1) > 0 ) {
if ( !and ) {
# and=FALSE; any subjects belong to the values should be kept
meta_sel <- list()
for ( i in 1:length(factors1) ) {
meta_sel[[i]] <- meta[grepl(values1, eval(factors1[i])), ]
}
meta_sel <- meta_sel[lapply(meta_sel,length)>0]
meta_sub <- unique(do.call("rbind", meta_sel))
} else {
# and=TRUE: subjects should belong to all values to be kept
arg <- vector()
for ( i in 1:length(factors1) ) {
new <- paste("grepl( \"", values1, "\", ", factors1[i], " )", sep="")
arg <- c(arg, new)
}
arg.all <- paste(arg, collapse = " & ")
sel <- eval(parse(text = arg.all))
meta_sub <- meta[sel, ]
}
}
if ( nrow(meta_sub) == 0 ) {
stop("Error: no subject met the filter requirement")
} else {
for (i in factors) {
if (is.factor(meta[[i]])) {
# drop levels
meta_sub[[i]] <- factor(meta_sub[[i]])
}
}
}
}
# match samples among OTUs and metadata
result=list()
for (i in 1:length(data)) {
otu <- data[[i]]
label <- names(data)[i]
if ( is.null(otu) ) { break }
label <- names(data)[i]
otu_sub = otu[, match(c(rownames(meta_sub),"taxonomy"), colnames(otu))]
### remove empty rows and columns and order the data.frame ###
otu_sub = otu_sub[rowSums(otu_sub[,-ncol(otu_sub)])>0, colSums(otu_sub[,-ncol(otu_sub)])>0]
otu_sub = otu_sub[order(rowSums(otu_sub[,-ncol(otu_sub)]),decreasing=TRUE),]
### Verify if the data.frame is non empty ###
if(nrow(otu_sub) == 0){
return ("Length of factor data is 0, please check if there were errors in your factors and factor.values")
}
if ( identical(rownames(meta_sub), colnames(otu_sub)[-ncol(otu_sub)]) ) {
result[[label]]<-otu_sub
} else {
stop("meta and `otu` do not have same samples")
}
}
result[["meta"]] <- meta_sub
return(result)
}
data.revamp <- function(data, is.OTU=TRUE, ranks=NULL,
stand.method=NULL, top=NULL,
mode="number") {
if ( class(data) != "list" ) {
stop("please provide otu tables as list; see ?RAM.input.formatting")
}
.valid.data(data, is.OTU=is.OTU)
###check for zero entries?
num.data <- length(data)
labels <- names(data)
data.new <- list()
for ( i in 1:length(data) ){
elem <- data[[i]]
if ( is.null(elem) ) { break }
label <- names(data)[i]
if ( is.OTU ) {
valid.OTU(elem)
if ( is.null(ranks) || ranks == "" ) {
# LCA of each otu
lca <- LCA.OTU(elem)
rownames(lca) <- paste(lca$LCA,
rownames(lca),
sep="_")
lca <- lca[, -ncol(lca)]
lca <- as.data.frame(t(lca))
lca.sort <- lca[, order(colSums(lca), decreasing=TRUE)]
data.new[[label]] <- lca.sort
} else {
num.rank <- length(ranks)
rank.list <- list()
for ( i in 1:num.rank ) {
.valid.rank(ranks[i])
rank_name <- .get.rank.name(ranks[i])
tax_rank <- tax.abund(elem, rank=ranks[i],
drop.unclassified=FALSE,
count=TRUE)
tax_rank.sort <- tax_rank[, order(colSums(tax_rank),
decreasing=TRUE)]
lab <- paste(label, rank_name, sep="_")
data.new[[lab]] <- tax_rank.sort
}
}
} else {
data.new[[label]] <- elem[, order(colSums(elem), decreasing=TRUE)]
}
}
# order by total counts or relative abund
for ( i in 1:length(data.new) ) {
elm <- data.new[[i]]
if ( is.null(elm) ) { break }
label <- names(data.new)[i]
elm.percent <- decostand(elm, "total")
ord.num <- order(colSums(elm), decreasing=TRUE)
ord.per <- order(apply(elm.percent, MARGIN=2,
FUN=max), decreasing=TRUE)
if ( !is.null(top) ) {
# check whether have so many top groups
if ( top <= ncol(elm) ) {
top <- top
} else {
warning(paste(label, " doesn't have ", top, " taxa groups, will retrun all", sep=""))
top <- ncol(elm)
}
if ( mode == "number" ) {
elm <- elm[, ord.num[1:top]]
} else if ( mode == "percent" ) {
elm <- elm[, ord.per[1:top]]
} else {
warning("seletion mode must be \"number\" or \"percent\",
the default is \"number\". Will keep the taxa with
highest total count")
elm <- elm[, ord.num[1:top]]
}
} else {
elm <- elm
}
data.new[[label]] <- elm
}
# data transformation & remove unclassified
remove.pat <- paste(.blacklist(), collapse="|")
for ( i in 1:length(data.new) ) {
elm <- data.new[[i]]
if ( is.null(elm) ) { break }
label <- names(data.new)[i]
keep <- !grepl(remove.pat, names(elm),ignore.case=TRUE)
if ( length(keep) != 0 ) {
if ( !is.null(stand.method) ) {
elm <- decostand(elm, stand.method)
elm <- elm[, keep, drop=FALSE]
} else {
elm <- elm[, keep, drop=FALSE]
}
} else {
warning("No identified groups met requirement, will retrun
all groups without filtering")
elm <- elm
}
data.new[[label]] <- elm
}
return(data.new)
}
filter.Taxa <- function(taxa, drop.unclassified=TRUE,
percent=NULL, number=NULL) {
# whether or not keep unclassified groups
remove.pat <- paste(.blacklist(), collapse="|")
if ( drop.unclassified ) {
taxa <- taxa[, !grepl(remove.pat, names(taxa))]
} else {
taxa <- taxa
}
# both filter are NULL
if (is.null(number) & is.null(percent)) {
warning("No filter was provided, all taxa will be used. Consider filtering taxonomic abundance matrix by total number of sequence (e.g. 50) or maximum relative abundance (e.g. 0.01)")
sel<-1:nrow(taxa)
}
if ( !is.null(number) & !is.null(percent) ) {
warning("should only provide one type of filter by total number of sequence (e.g. 50) or maximum relative abundance (e.g. 0.01), will filter by percent as default")
taxa.p <- decostand(taxa, MARGIN=1, "total")
sel <- which(apply(taxa.p, MARGIN=2, FUN=max) > percent)
}
if ( !is.null(percent) ) {
# select taxa more than percent in at least on sample
taxa.p <- decostand(taxa, MARGIN=1, "total")
sel <- names(taxa.p)[which(apply(taxa.p, MARGIN=2, FUN=max) > percent)]
}
if ( !is.null(number) ) {
# select taxa more than number sequences in total
sel <- names(taxa)[which(colSums(taxa) > number)]
}
if ( length(sel) == 0) {
warning("no taxa met the filter requirement, original taxa abundance matrix returned")
taxa.new <- taxa
} else {
taxa.new <- taxa[, names(taxa) %in% sel]
print(paste(length(names(taxa.new)), " taxa met the filter requirment", sep=""))
}
return(taxa.new)
}
OTU.rarefy <- function(data, sample=NULL) {
if ( class(data) != "list" ) {
stop("please provide otu tables as list; see ?RAM.input.formatting")
}
.valid.data(data, is.OTU=TRUE)
###check for zero entries?
num.data <- length(data)
labels <- names(data)
data.new <- list()
for ( i in 1:length(data) ){
otu <- data[[i]]
if ( is.null(otu) ) { break }
label <- names(data)[i]
valid.OTU(otu)
# transpose OTU
otu.t<-transpose.OTU(otu)
# OTU taxonomy
otu_tax <- otu$taxonomy
# rarefy
if (is.null(sample)) {
otu.t.rf<-rrarefy(otu.t, sample=min(apply(otu.t, 1, sum)))
} else if (is.numeric(sample)) {
if (sample <= min(apply(otu.t, 1, sum))) {
otu.t.rf<-rrarefy(otu.t, sample=sample)
} else {
warning("sampling number is larger than the minimum sequence of a sample, using minimum number instead ")
otu.t.rf<-rrarefy(otu.t, sample=sample)
}
} else {
stop("Error: what's the sampling number?")
}
otu.new <-as.data.frame(t(otu.t.rf))
otu.new$taxonomy<-otu_tax
data.new[[label]] <- otu.new
}
return(data.new)
}
combine.OTU <- function(data, meta) {
if ( class(data) != "list" ) {
stop("please provide otu tables as list; see ?RAM.input.formatting")
}
.valid.data(data, is.OTU=TRUE)
###check for zero entries?
num.data <- length(data)
labels <- names(data)
matched <-match.data(data=data, is.OTU=TRUE, meta=meta)
meta <- matched$meta
# check sampleID orders,
# make the samples IDs in otu tables match metadata
order <- rownames(meta)
otu.list <- list()
samples <- list()
for ( i in 1:(length(matched)-1) ){
otu <- matched[[i]]
if ( is.null(otu) ) { break }
label <- names(matched)[i]
valid.OTU(otu)
.valid.meta(otu1=otu, meta=meta)
# give a prefix
sampleID <- colnames(otu)[-ncol(otu)]
if ( ! identical(sampleID, order) ) {
otu <- otu[, match(c(order, "taxonomy"), colnames(otu))]
if ( ! identical(c(order, "taxonomy"), colnames(otu)) ) {
stop(paste(label, " doesn't have same samples as metadata",
sep=""))
}
}
#rownames(elm) <- paste(lab, rownames(elm), sep="_")
otu.list[[label]] <- otu
}
df <- do.call("rbind", otu.list)
return(df)
}
match.data <- function(data, is.OTU=TRUE, meta) {
samples <- list()
samples[["meta"]] <- rownames(meta)
elems <- list()
for ( i in 1:length(data) ) {
elem <- data[[i]]
if ( is.null(elem) ) { break }
label <- names(data)[i]
elems[[label]] <- elem
# otus
if ( is.OTU ) {
elem <- elem[rowSums(elem[, -ncol(elem)])>0,
colSums(elem[, -ncol(elem)]) >0]
samples.elem <- colnames(elem)[-ncol(elem)]
} else {
elem <- elem[rowSums(elem)>0, colSums(elem) >0]
samples.elem <- rownames(elem)
}
samples[[label]] <- samples.elem
}
# samples exist in all data and metadata
samples.all <- Reduce(intersect, samples)
print (paste(length(samples.all), " samples exist in otus and metadata!", sep=""))
# new metadata
sel <- which(samples[["meta"]] %in% samples.all)
meta.new <- meta[sel, ]
# new data
elems.new <- list()
for ( i in 1:length(elems) ) {
label <- names(elems)[i]
elem <- elems[[i]]
if ( is.null(elem) ) { break }
if ( is.OTU ) {
to_match <- c(rownames(meta.new), "taxonomy")
elem.new <- elem[, match(to_match,
colnames(elem))]
elem.new <- elem.new[rowSums(elem.new[,
-ncol(elem.new)])>0,
colSums(elem.new[,
-ncol(elem.new)]) >0]
#return(elem.new)
if ( identical(to_match, colnames(elem.new)) ) {
elems.new[[label]] <- elem.new
} else {
stop(paste("couldn't match samples in ", label, " and metadata, please make sure your input is otu table", sep=""))
}
} else {
to_match <- rownames(meta.new)
elem.new <- elem[match(to_match,
rownames(elem)), ]
elem.new <- elem.new[rowSums(elem.new)>0,
colSums(elem.new) >0]
if ( identical(to_match, rownames(elem.new)) ) {
elems.new[[label]] <- elem.new
} else {
stop(paste("couldn't match samples in ", label, " and metadata, please make sure your input is a taxonomy abundance table", sep=""))
}
}
}
elems.new[["meta"]] <- meta.new
return(elems.new)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.