#' Extract CDR3-length profile
#'
#' @param input_file File containing the CDR3 sequences length and frequency.
#' @param output_dir A directory to write the output (by default current working directory).
#' @param numb_threads Number of threads to use (by default 4).
#' @details Two files containing the CDR3-length profile i.e. like-histogram and tab-separated file from \code{input_file}. Both files are written in \code{output_dir}.
#' @examples smm_lengthprofile(input_file = "example_file.tsv", output_dir = "/path/to/dump/directory", numb_threads = 4)
smm_lengthprofiler <- function(input_file, output_dir, numb_threads){
# By default settigns:
# Output directory is the working directory
output_dir <- getwd()
# Number of threads to used is 4
numb_threads <- c(4)
# Attach required packages
if (!require(package = "ggplot2", character.only = TRUE)){
install.packages(pkgs = "ggplot2", repos = "https://cran.rediris.es/", dependencies = T)
library(package = "ggplot2", character.only = TRUE)
}
# Load full file to R enviroment
data_obj <- read.table(file = input_file, header = TRUE, comment.char = "", sep = "\t")
# Remove surplus columns
data_col <- grep(pattern = "avg", x = colnames(data_obj))
leng_col <- grep(pattern = "length", x = colnames(data_obj))
data_fre <- list()
# Generate large naive vectors
for (i in seq_len(length(data_col))){
data_fre[[i]] <- cbind.data.frame(data_obj[,leng_col], data_obj[,data_col[i]])
data_fre[[i]] <- data_fre[[i]][which(data_fre[[i]][,2] > 0),]
data_fre[[i]] <- as.data.frame(table(data_fre[[i]][,1]))
colnames(data_fre[[i]]) <- c("CDR3_Length", "Abs_Freq")
}
names(data_fre) <- colnames(data_obj)[data_col]
# Export the files
for (i in seq_len(length(data_fre))){
leng_plot <- ggplot(data = data_fre[[i]], aes(x = as.numeric(as.character(data_fre[[i]][,1])), y = data_fre[[i]][,2])) +
geom_bar(stat = "identity") + xlab("CDR3 Length") + ylab("Frequency") + ggtitle(label = names(data_fre)[i])
write.table(x = data_fre[i], file = paste0(output_dir, "/", names(data_fre)[i], ".tsv"), append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
print(paste("Frequency file from", names(data_fre)[i], "has been written in", paste0(output_dir, "/", names(data_fre[i]), ".tsv")))
ggsave(plot = leng_plot, filename = paste0(names(data_fre)[i], ".png"), path = output_dir, device = "png", width = 15, height = 20, units = "cm", dpi = 350)
print(paste("Frequency plot from", names(data_fre)[i], "has been written in", paste0(output_dir, "/", names(data_fre[i]), ".png")))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.