sm_Hsampling <- function(input_file, output_dir, iterations){
pkgs <- list("vegan", "iNEXT", "ggplot2", "ggpubr")
pkg_attach <- function(x){
library(package = x, character.only = T)
}
lapply(pkgs, pkg_attach)
data <- read.table(file = input_file, header = TRUE, comment.char = "", sep = "\t")
data <- data[,c(6:ncol(data))]
number_sequences <- apply(data, 2, function(x){which(x > 0)})
number_sequences <- unlist(lapply(number_sequences, length))
min_number_sequences <- number_sequences[which(number_sequences == min(number_sequences))]
sam_number_sequences <- names(number_sequences)[which(number_sequences == min(number_sequences))]
h_iteration <- list()
i <- 0
while (i < iterations) {
i <- i + 1
h_iteration[[i]] <- vector()
for (n in seq_len(ncol(data[, c(1:ncol(data))]))){
random_choosen <- sample(x = seq_len(nrow(data)), size = min_number_sequences, prob = data[, n] / sum(data[, n]))
random_diversity <- diversity(t(data[random_choosen, n]))
h_iteration[[i]][n] <- random_diversity
}
}
diversity_data <- cbind.data.frame(rep(colnames(data)[c(1:ncol(data))], iterations), unlist(h_iteration))
colnames(diversity_data) <- c("Sample", "Shannon")
diversity_data$Iteration <- rep(iterations, nrow(diversity_data))
h_iteration <- as.data.frame(matrix(data = unlist(h_iteration), nrow = iterations, byrow = T))
colnames(h_iteration) <- colnames(data)[c(1:ncol(data))]
h_iteration$Iterations <- rep(iterations, nrow(h_iteration))
h_ditribution_plot <-
ggplot(data = diversity_data, aes(x = Sample, y = Shannon, fill = Sample)) +
geom_boxplot() +
ggtitle(label = paste0("Number of iterations: ", iterations)) +
ylab(label = "Shannon index") +
theme_pubr() +
theme(legend.position = "none", axis.title.x = element_blank(), axis.title.y = element_text(face = "bold"), axis.text.x = element_text(angle = 90)) +
write.table(x = h_iteration, file = paste0(output_dir, "/", colnames(data[2]), "-", iterations, ".tsv"), append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
ggsave(plot = h_ditribution_plot, filename = paste0(colnames(data[2]), "-", iterations, ".png"), device = "png", path = output_dir, width = 17, height = 15, units = "cm", dpi = 350)
print("Info: Shannon index was calculated using the smallest sample size")
print(paste0("Smallest sample was ", sam_number_sequences, " (", min_number_sequences, ")"))
print(paste0("Data was wrote in ", output_dir, "/", colnames(data[2]), "-", iterations, ".tsv"))
print(paste0("Chart was wrote in ", output_dir, "/", colnames(data[2]), "-", iterations, ".png"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.