R/autobar.R

Defines functions autobar

Documented in autobar

#' Automated barplot
#'
#' @importFrom rstatix group_by
#' @importFrom rstatix add_xy_position
#' @importFrom rstatix get_y_position
#' @importFrom rstatix tukey_hsd
#' @importFrom rstatix add_significance
#' @importFrom rstatix t_test
#' @importFrom gdata read.xls
#' @importFrom ggpubr ggbarplot
#' @importFrom ggpubr stat_pvalue_manual
#' @importFrom ggpubr facet
#' @importFrom ggpubr mean_se_
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 scale_fill_grey
#' @importFrom tidyr gather
#' @importFrom dplyr %>%
#' @importFrom dplyr filter
#' @importFrom dplyr arrange
#' @importFrom multcomp glht
#' @importFrom multcomp mcp
#' @importFrom utils read.csv
#' @importFrom utils read.table
#' @importFrom utils write.table
#' @importFrom grDevices dev.off
#' @importFrom grDevices pdf
#' @importFrom stats aov
#' @import RColorBrewer
#' @import Hmisc
#' @import ggpubr
#' @import ggplot2
#' @param directory Directory including count matrix files
#' @param input excel, csv, or txt
#' @param test The default setting is TRUE. In the case of FALSE, statical tests are not performed.
#' @export
#'
autobar <- function(directory, input = "excel", test = TRUE){
  setwd(directory)
  if(input == "excel") files <- list.files(pattern = "*.xlsx")
  if(input == "csv") files <- list.files(pattern = "*.csv")
  if(input == "txt") files <- list.files(pattern = "*.txt")
  files <- gsub("\\..+$", "", files)

  for (name in files) {
    value <- NA
    Row.names <- NA
    dir.create(name,showWarnings = F)
    if(input == "excel") data.file <- paste(name, '.xlsx', sep = '')
    if(input == "csv") data.file <- paste(name, '.csv', sep = '')
    if(input == "txt") data.file <- paste(name, '.txt', sep = '')
    print(data.file)
    if(input == "excel") data <- read.xls(data.file)
    if(input == "csv") data <- read.csv(data.file,header = T, sep=",")
    if(input == "txt") data <- read.table(data.file,header = T, sep="\t")
    collist <- gsub("\\_.+$", "", colnames(data))
    collist <- unique(collist[-1])
    rowlist <- gsub("\\_.+$", "", data[,1])
    rowlist <- unique(rowlist)
    data <- data %>% tidyr::gather(key=sample, value=value,-Row.names)
    data$sample<-gsub("\\_.+$", "", data$sample)
    data$Row.names <- as.factor(data$Row.names)
    data$sample <- as.factor(data$sample)
    data$value <- as.numeric(data$value)
    data$sample <- factor(data$sample,levels=collist,ordered=TRUE)


    if ((length(rowlist) > 81) && (length(rowlist) <= 200))
    {pdf_hsize <- 15
    pdf_wsize <- 15}
    if ((length(rowlist) > 64) && (length(rowlist) <= 81))
    {pdf_hsize <- 13.5
    pdf_wsize <- 13.5}
    if ((length(rowlist) > 49) && (length(rowlist) <= 64))
    {pdf_hsize <- 12
    pdf_wsize <- 12}
    if ((length(rowlist) > 36) && (length(rowlist) <= 49))
    {pdf_hsize <- 10.5
    pdf_wsize <- 10.5}
    if ((length(rowlist) > 25) && (length(rowlist) <= 36))
    {pdf_hsize <- 9
    pdf_wsize <- 9}
    if ((length(rowlist) > 16) && (length(rowlist) <= 25))
    {pdf_hsize <- 7.5
    pdf_wsize <- 7.5}
    if ((length(rowlist) > 12) && (length(rowlist) <= 16))
    {pdf_hsize <- 6
    pdf_wsize <- 6}
    if ((length(rowlist) > 9) && (length(rowlist) <= 12))
    {pdf_hsize <- 5
    pdf_wsize <- 6}
    if ((length(rowlist) > 6) && (length(rowlist) <= 9))
    {pdf_hsize <- 5
    pdf_wsize <- 4.5}
    if ((length(rowlist) > 4) && (length(rowlist) <= 6))
    {pdf_hsize <- 4
    pdf_wsize <- 6}
    if (length(rowlist) == 4)
    {pdf_hsize <- 4
    pdf_wsize <- 4}
    if (length(rowlist) == 3)
    {pdf_hsize <- 2
    pdf_wsize <- 6}
    if (length(rowlist) == 2)
    {pdf_hsize <- 2
    pdf_wsize <- 4}
    if (length(rowlist) == 1)
    {pdf_hsize <- 2
    pdf_wsize <- 2}
    if (length(rowlist) > 200)
    {pdf_hsize <- 30
    pdf_wsize <- 30}

    if (test == TRUE){
    df <- data.frame(matrix(rep(NA, 11), nrow=1))[numeric(0), ]
    colnames(df) <- c("Row.names", "group1", "group2", "term", "null.value","Std.Error","coefficients","t.value","p.adj","xmin", "xmax")
    if (length(collist) >= 3){
      stat.test <- data %>% group_by(Row.names)
      stat.test <- stat.test %>% tukey_hsd(value ~ sample)
      stat.test <- stat.test %>% add_significance("p.adj")
      stat.test <- stat.test %>% add_xy_position(scales = "free", step.increase = 0.2)
      for (name2 in rowlist){
        data2 <- dplyr::filter(data, Row.names == name2)
        dun <- aov(value~sample, data2)
        dunnette <- glht(model = dun, linfct=mcp(sample="Dunnett"))
        dunnette2 <- summary(dunnette)
        p.adj <- c()
        coefficients <- c()
        Std.Error <- c()
        t.value <- c()
        group1 <- c()
        group2 <- c()
        term <- c()
        null.value <- c()
        xmin <- c()
        xmax <- c()
        for (i in 1:(length(collist)-1)){
          p.adj <- c(p.adj, dunnette2[["test"]][["pvalues"]][i])
          coefficients <- c(coefficients, dunnette2[["test"]][["coefficients"]][i])
          Std.Error <- c(Std.Error, dunnette2[["test"]][["sigma"]][i])
          t.value <- c(t.value, dunnette2[["test"]][["tstat"]][i])
          group1 <- c(group1, c(collist[1]))
          group2 <- c(group2, c(collist[i+1]))
          term <- c(term, c("sample"))
          null.value <- c(null.value, 0)
          xmin <- c(xmin, c(1))
          xmax <- c(xmax, c(i+1))
        }
        df2 <- data.frame(Row.names = name2, group1 = group1, group2 = group2, term = term,
                          null.value = null.value, Std.Error = Std.Error, coefficients = coefficients,
                          t.value = t.value, p.adj = p.adj, xmin = xmin, xmax = xmax)
        df <- rbind(df, df2)
      }

    df <- df %>% arrange(Row.names)
    df <- df %>% group_by(Row.names)
    stat.test2 <- data %>% group_by(Row.names)
    stat.test2 <- stat.test2 %>% get_y_position(value ~ sample, scales = "free", step.increase = 0.15, fun = "mean_se")
    stat.test2 <- stat.test2 %>% dplyr::filter(group1 == collist[1])
    stat.test3 <- cbind(stat.test2,df[,-1:-3])
    stat.test3$Row.names <- as.factor(stat.test3$Row.names)
    stat.test3 <- stat.test3 %>% add_significance("p.adj")

    image.file <- paste0(paste0(name, "/"), paste0(name, '_TukeyHSD.pdf'))
    pdf(image.file, height = pdf_hsize, width = pdf_wsize)
    p <- ggbarplot(data,x = "sample", y = "value", scales = "free",
                   facet.by = "Row.names", fill = "sample",add = c("mean_se", "jitter"),
                   add.params = list(size=0.5), xlab = FALSE, legend = "none")
    plot(facet(p, facet.by = "Row.names", panel.labs.background = list(fill = "transparent",
                                                                       color = "transparent"),
               scales = "free", short.panel.labs = T)+ stat_pvalue_manual(stat.test,hide.ns = T, size = 2) +
           theme(axis.text.x= element_text(size = 5),axis.text.y= element_text(size = 7),
                 panel.background = element_rect(fill = "transparent", size = 0.5),
                 title = element_text(size = 7),text = element_text(size = 10)))
    dev.off()
    image.file2 <- paste0(paste0(name, "/"), paste0(name, '_dunnett.pdf'))
    pdf(image.file2, height = pdf_hsize, width = pdf_wsize)
    p <- ggbarplot(data,x = "sample", y = "value",
                           scales = "free", fill = "sample",
                           add = c("mean_se", "jitter"), facet.by = "Row.names",
                           add.params = list(size=0.5), xlab = FALSE, legend = "none")
    plot(facet(p, facet.by = "Row.names", panel.labs.background = list(fill = "transparent", color = "transparent"),
               scales = "free", short.panel.labs = T)+
           stat_pvalue_manual(stat.test3,hide.ns = T, size = 2) +
           theme(axis.text.x= element_text(size = 5), axis.text.y= element_text(size = 7),
                 panel.background = element_rect(fill = "transparent", size = 0.5),
                 title = element_text(size = 7), text = element_text(size = 10)))
    dev.off()
    test.file <- paste0(name, "/result_TukeyHSD.txt")
    write.table(stat.test[,1:10], file = test.file, row.names = F, col.names = T, quote = F, sep = "\t")
    dunnett_file <- paste0(name, "/result_dunnett.txt")
    write.table(stat.test3[,-4:-5], file = dunnett_file, row.names = F, col.names = T, quote = F, sep = "\t")
    }else{
      stat.test <- data %>% group_by(Row.names)
      stat.test <- stat.test %>% t_test(value ~ sample)
      stat.test <- stat.test %>% add_significance()
      stat.test <- stat.test %>% add_xy_position(scales = "free", step.increase = 0.2)
      image.file <- paste0(paste0(name, "/"), paste0(name, '_Welch_t-test.pdf'))
      pdf(image.file, height = pdf_hsize, width = pdf_wsize)
      p <- ggbarplot(data,x = "sample", y = "value", scales = "free",
                     facet.by = "Row.names", fill = "sample",
                     add = c("mean_se", "jitter"),
                     add.params = list(size=0.5), xlab = FALSE, legend = "none")
      plot(facet(p, facet.by = "Row.names",
                 panel.labs.background = list(fill = "transparent",
                                              color = "transparent"),
                 scales = "free", short.panel.labs = T)+
             stat_pvalue_manual(stat.test,hide.ns = T, size = 2) +
             theme(axis.text.x= element_text(size = 5),
                            axis.text.y= element_text(size = 7),
                            panel.background = element_rect(fill = "transparent", size = 0.5),
                            title = element_text(size = 7),
                            text = element_text(size = 10)))
      dev.off()
      test.file <- paste0(name, "/result_Welch_t-test.txt")
      write.table(stat.test[,1:10], file = test.file, row.names = F, col.names = T, quote = F, sep = "\t")
    }
    }else{
      image.file <- paste0(paste0(name, "/"), paste0(name, '.pdf'))
      pdf(image.file, height = pdf_hsize, width = pdf_wsize)
      p <- ggbarplot(data,x = "sample", y = "value", scales = "free",
                     facet.by = "Row.names", fill = "sample",
                     add = c("mean_se", "jitter"),
                     add.params = list(size=0.5), xlab = FALSE, legend = "none")
      plot(facet(p, facet.by = "Row.names",
                 panel.labs.background = list(fill = "transparent",
                                              color = "transparent"),
                 scales = "free", short.panel.labs = T)+
             theme(axis.text.x= element_text(size = 5),
                   axis.text.y= element_text(size = 7),
                   panel.background = element_rect(fill = "transparent", size = 0.5),
                   title = element_text(size = 7),
                   text = element_text(size = 10)))
      dev.off()
    }
    file.copy(data.file, to = paste0(name,"/"))
    file.remove(data.file)
    }
}
Kan-E/Automate documentation built on Jan. 15, 2022, 8:10 a.m.