#' A housekeeping OTUs finding function
#'
#' This function allows you to find housekeeping OTUs and calculate size factors for normalization.
#' @param OTU_table pool for OTUs which will be tested for differential abundance in form of OTUs x Samples
#' @param nOTUs_hkpool number of OTUs in the pool from which to search housekeeping OTUs
#' @param quant quantile of distance distribution as the network threshold
#' @keywords housekeeping OTUs
#' @keywords size factor
#' @keywords normalization
#' @export
#' @examples
hk_find <- function(OTU_table, nOTUs_hkpool=100, quant = 0.03){
# input:
# OTU table: pool for OTUs which will be tested for differential abundance (OTUs x Samples)
# nOTUs_hkpool: number of OTUs in the pool from which to search housekeeping OTUs
# quant: quantile of distance distribution as the network threshold
# output:
# housekeeping OTUs' IDs (rownames of OTU table)
# size factor used for later on normalization
samobs = apply(OTU_table, 1, function(x) sum(x != 0))
sums = apply(OTU_table, 1, sum)
otudf = data.frame(prev = samobs, sums = sums)
otudf = otudf[order(-otudf$prev, -otudf$sums),]
hk_pool = OTU_table[rownames(otudf)[1:nOTUs_hkpool],]
OTU_ID = rownames(hk_pool)
# create symmetric distance matrix between selected OTUs
ratio_var = matrix(0, nrow = nOTUs_hkpool, ncol = nOTUs_hkpool)
for (i in 1:nOTUs_hkpool){
for (j in 1:nOTUs_hkpool){
mul = (hk_pool[i,]*hk_pool[j,])
ind = unname(unlist(mul)) != 0
ratio_var[i,j] = var(unlist(log(hk_pool[i,][ind]/hk_pool[j,][ind])))
}
}
ratio_var[lower.tri(ratio_var, diag = TRUE)] <- 0
dist = ratio_var[ratio_var > 0]
nodes = data.frame(seq(1, nOTUs_hkpool, 1), OTU_ID)
colnames(nodes) = c("order", "OTU_ID")
links = matrix(0, nrow = 1, ncol = 3)
h = quantile(dist, probs = quant)
order = seq(1, nOTUs_hkpool, 1)
for (i in 1:nOTUs_hkpool){
check = ratio_var[i,]
ind = order[check < h & check > 0]
if (length(ind) > 0){
dist = check[ind]
rep = replicate(length(ind), i)
record = cbind(rep, ind, dist)
links = rbind(links, record)
}
}
links = links[-1,]
net <- graph_from_data_frame(d = links, vertices = nodes, directed = F)
list_cliques = cliques(net)
clique_size = sapply(list_cliques, length)
largest_cliques = largest_cliques(net)
## intersect of all largest cliques as housekeeping OTUs
#if (length(largest_cliques) > 1){
# intersect_cliques = largest_cliques[[1]]
# for (i in 2:length(largest_cliques)){
# intersect_cliques = intersect(intersect_cliques, largest_cliques[[i]])
# }
#}else{
# intersect_cliques = largest_cliques[[1]]
#}
#intersect_cliques = intersect_cliques[order(intersect_cliques)]
## choose the first largest clique as housekeeping OTUs
intersect_cliques = largest_cliques[[1]]
hk_ID = OTU_ID[intersect_cliques]
size_factor = colSums(hk_pool[hk_ID,])
return(list(hk_ID = hk_ID, size_factor = size_factor))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.