#'@title OTU2Taxa
#'@description This function aggregates OTUs into their taxonomic characteristics,i.e.
#' genera or family level and returns a 'Taxonomic Enrichment Test' by using the hypergeometric distribution function P(x>=X), which gives the
#' probability of a particular
#' genera/family has been selected by chance. This function determines whether a selected set of genera/family shows
#' a statistically significant difference in comparison with the genera/family into the whole samples.
#' This method is used to identify selected families or genus that are
#' over-represented in a large set of OTUs. The output may have an important biological meaning since the selected genus/families
#' can be associated with
#' phenotypes. The function outputs -log(p+0.05), i.e. a higher value means an 'enriched' genus or family.
#'@param Selection list or data frame from VarSelection or dAB function
#'@param TaxonInfo data.frame with taxonomic table associated to Data input.
#'For instance, if Data comes from 16_S level, TaxoInfo should be a data.frame with 16_S
#' associated taxonomic information. Note that the first column of this table must have the OTUs ids.
#'@param tableName a character indicating the table name. For instance, if your data comes
#' from 16_S, this parameter should be '16_S'. Importantly, this argument should match with the names from the input
#' list, ExpressionSet or MultiAssayExperiment
#' object, which corresponds the Data argument of LinkData function.
#'@param AnalysisLev It is a character indicating if data should be aggregate into genera
#' or family level
#'@export OTU2Taxa
#'@return List. The function outputs a list of size two. The first element contains all the selected taxa with
#'their associated value from the hyperg distribution -log(p+0.05); whereas the second element
#' only includes taxas' counting up to 1.
#'
#'
#'
#'@author Laura M Zingatetti
#'
#'@references
#'{
#'\enumerate{
#'\item
#'\item Da Wei Huang, B. T. S., & Lempicki, R. A. (2009). Bioinformatics enrichment tools:
#' paths toward the comprehensive functional analysis of large gene lists.
#' Nucleic acids research, 37(1), 1.
#'\item Zheng, Q., & Wang, X. J. (2008). GOEAST: a web-based software toolkit for
#' Gene Ontology enrichment analysis. Nucleic acids research, 36(suppl_2), W358-W363.
#'}
#'}
#' @examples
#' {
#'data('Ruminotypes')
#'Normalization<-lapply(list(Ruminotypes$`16_S`,Ruminotypes$Archaea,Ruminotypes$`18_S`),
#'function(x){DataProcessing(x,Method='Compositional')})
#'Dataset<-Normalization
#'names(Dataset)<-c('16_S','Archaea','18_S')
#'#Running LinkData
#'Output<-LinkData(Dataset,Distance=rep('euclidean',3),
#'Scale = FALSE,Center=FALSE,nCluster = 3)
#'Select_Var<-VarSelection(Output,Data=Dataset,Crit = 'Rsquare',perc=0.9)
#'SignTaxa<-OTU2Taxa(Selection=VarTable(Select_Var),
#'TaxonInfo=Ruminotypes$Taxa_16S,tableName='16_S',AnalysisLev = 'Family')
#'Selected<-SignTaxa$TotalUp1
#' }
#'
#' @name OTU2Taxa
#' @rdname OTU2Taxa-OTU2Taxa
#' @import stats
#' @importFrom reshape2 melt
OTU2Taxa <- function(Selection, TaxonInfo, tableName, AnalysisLev = "Genus") {
if (!is.data.frame(Selection) && !is.list(Selection)) {
stop("Selection should be a list or data.frame")
}
if (length(tableName) > 1) {
stop("Please, you must to indicate only one table name")
}
if (("Genus" %in% AnalysisLev || "Family" %in% AnalysisLev) == FALSE) {
stop("You should choose an unit to data aggregation.")
}
if (is(Selection, "data.frame")) {
if (tableName %in% Selection == FALSE) {
stop("wrong table name specification. Please,
make sure that you are using a right table name")
}
SelectedVar <- Selection[Selection %in% tableName]
# number of variables from the table
ntable <- ncol(SelectedVar)
SelectedVar <- names(SelectedVar)
}
if (is(Selection, "list")) {
if (tableName %in% names(Selection) == FALSE) {
stop("wrong table name specification.
Please, make sure that you are using a right table name")
}
# changed names changed names
if (is(Selection[[which(names(Selection) %in% tableName)]], "character")) {
SelectedVar <- Selection[[which(names(Selection) %in% tableName)]]
ntable <- length(SelectedVar)
}
if (is(Selection[[which(names(Selection) %in% tableName)]], "numeric")) {
SelectedVar <- names(Selection[[which(names(Selection) %in% tableName)]])
ntable <- length(SelectedVar)
}
}
TaxonInfo <- as.data.frame(TaxonInfo)
if (AnalysisLev %in% colnames(TaxonInfo) == FALSE) {
stop("Your table should contains genus or family level")
}
TotaltoGenLev <- table(TaxonInfo[colnames(TaxonInfo) == AnalysisLev])
if (!any(suppressWarnings(is.na(as.numeric(SelectedVar))))) {
TotalSelected <- table(TaxonInfo[TaxonInfo[, 1] %in% as.numeric(SelectedVar), ][, colnames(TaxonInfo) ==
AnalysisLev])
} else {
if (any(TaxonInfo[, 1] %in% SelectedVar)) {
TotalSelected <- table(TaxonInfo[TaxonInfo[, 1] %in% SelectedVar, ][, colnames(TaxonInfo) == AnalysisLev])
} else {
TotalSelected = 0
}
}
if (is(TotalSelected, "table")) {
# hypergeometric test
Test <- c()
for (i in seq_along(TotalSelected)) {
x = TotalSelected[i]
m = TotaltoGenLev[names(TotaltoGenLev) %in% names(TotalSelected)[i]]
n = sum(TotaltoGenLev[-which(names(TotaltoGenLev) %in% names(TotalSelected)[i])])
k = sum(TotalSelected)
Test <- c(Test, 1 - phyper(x, m = m, n = n, k = k))
}
Test <- sort(Test, decreasing = TRUE)
# look for some empty genera or family
if (length(which(nchar(trimws(names(Test))) == 0)) > 0) {
names(Test)[which(nchar(trimws(names(Test))) == 0)] <- paste0("unknown_", seq(seq_along(which(nchar(trimws(names(Test))) ==
0))), AnalysisLev)
}
Test2 <- Test[names(Test) %in% names(TotalSelected[which(TotalSelected > 1)])]
Test <- (-log(Test + 0.05))
Test2 <- (-log(Test2 + 0.05))
return(list(TotalHyp = Test, TotalUp1 = Test2))
} else {
return(list(TotalHyp = c("selected variables are not annotated,please check"), TotalUp1 = c("selected variables are not annotated, please check!")))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.