R/sm_lengthsum.R

Defines functions smm_lengthsum

smm_lengthsum <- function(input_file, output_dir){
  # By default settigns:
  # Output directory is the working directory
  output_dir <- getwd()

  # Attach required packages
  if (!require(package = "ggplot2", character.only = TRUE)){
    install.packages(pkgs = "ggplot2", repos = "https://cran.rediris.es/", dependencies = T)
    library(package = "ggplot2", character.only = TRUE)
  }

  # Load full file to R enviroment
  data_obj <- read.table(file = input_file, header = TRUE, comment.char = "", sep = "\t")

  # Length and data columns
  data_col <- grep(pattern = "avg", x = colnames(data_obj))
  leng_col <- grep(pattern = "length", x = colnames(data_obj))

  # Remove surplus columns
  data_sum <- rowsum(x = data_obj[, data_col], group = as.factor(as.character(data_obj[, leng_col])))

  # Export the files
  for (i in seq_len(ncol(data_sum))){
    data_exp <- cbind.data.frame(rownames(data_sum), data_sum[, i])
    colnames(data_exp) <- c("CDR3 Length", "Sum")
    data_exp$Relative_Freq <- data_exp$Sum / sum(data_exp$Sum)

    plot_exp <- ggplot(data = data_exp, aes(x = as.numeric(as.character(data_exp[,1])), y = data_exp[,2])) +
      geom_bar(stat = "identity") +
      ggtitle(label = colnames(data_sum)[i]) +
      xlab(label = "CDR3 Length") +
      ylab("Sum")

    plot_rel <- ggplot(data = data_exp, aes(x = as.numeric(as.character(data_exp[,1])), y = data_exp[,3])) +
      geom_bar(stat = "identity") +
      ggtitle(label = colnames(data_sum)[i]) +
      xlab(label = "CDR3 Length") +
      ylab("Relative frequency")

    write.table(x = data_exp, file = paste0(output_dir, "/", names(data_sum)[i], ".tsv"), append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
    print(paste("Frequency file from", names(data_sum)[i], "has been written in", paste0(output_dir, "/", names(data_sum[i]), ".tsv")))
    ggsave(plot = plot_exp, filename = paste0(names(data_sum)[i], "_Sum.png"), path = output_dir, device = "png", width = 15, height = 20, units = "cm", dpi = 350)
    print(paste("Frequency plot from", names(data_sum)[i], "has been written in", paste0(output_dir, "/", names(data_sum[i]), ".png")))
    ggsave(plot = plot_rel, filename = paste0(names(data_sum)[i], "_RelativeFreq.png"), path = output_dir, device = "png", width = 15, height = 20, units = "cm", dpi = 350)
    print(paste("Frequency plot from", names(data_sum)[i], "has been written in", paste0(output_dir, "/", names(data_sum[i]), ".png")))
  }
}
manuelsmendoza/smmcdr3 documentation built on Aug. 12, 2018, 9:08 a.m.