R/runSummary_Tables.R

Defines functions runSummary_Tables

Documented in runSummary_Tables

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param type which type of results to summarize
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @export
#'
runSummary_Tables <- function(path,type){

    Data = Groups = Cell = Rarity = Noise = Depth = Dissimilarity = Clustering = Method = NULL
    ARI = Mean_ARI = SD_ARI = AMI = Mean_AMI = SD_AMI = `Time (min)` = Mean_Time = SD_Time = NULL
    Groups = Cells = NULL

    # Creating the final table
    time.files <- list.files(path = path,pattern = '*_Table_Time.tsv',full.names = TRUE)
    clustering.files <- list.files(path = path,pattern = '*_Table_Means.tsv',full.names = TRUE)

    time.dt <- list()
    clustering.dt <- list()

    for(i in 1:length(time.files)){
        time.dt[[i]] <- data.table::fread(time.files[i])
    }
    time.dt <- data.table::rbindlist(time.dt,fill = TRUE)

    for(i in 1:length(time.files)){
        clustering.dt[[i]] <- data.table::fread(clustering.files[i])
    }
    clustering.dt <- data.table::rbindlist(clustering.dt,fill = TRUE)

    dt.summary <- data.table::merge.data.table(clustering.dt,time.dt,all.x = TRUE,
                                               by = c('Data','Groups','Cells','Rarity','Noise','Depth','Dissimilarity','Method'))

    tb1 <- dt.summary[(Data == 'scATACseq' & Groups == 1 & Cells == 1 & Rarity == 1 & Noise %in% c(1,2) & Depth %in% c(1,3) & is.na(Dissimilarity) & Clustering == 'Kmeans'),]
    tb2 <- dt.summary[(Data == 'scChIPseq' & Groups == 1 & Cells == 1 & Rarity == 1 & Noise == 1 & Depth %in% c(1,3) & Dissimilarity %in% c(1,2) & Clustering == 'Kmeans'),]

    # Setting up scATAC-seq table

    tb1 <- tb1[,c('Depth','Noise','Method','Mean_ARI','SD_ARI','Mean_AMI','SD_AMI','Mean_Time','SD_Time')][order(Depth,Noise,Method),]
    tb1[,ARI := paste0(rd(Mean_ARI),' (',rd(SD_ARI),')')][,Mean_ARI:= NULL][,SD_ARI:= NULL]
    tb1[,AMI := paste0(rd(Mean_AMI),' (',rd(SD_AMI),')')][,Mean_AMI:=NULL][,SD_AMI:=NULL]
    tb1[,`Time (min)` := paste0(rd(Mean_Time),' (',rd(SD_Time),')')][,Mean_Time:=NULL][,SD_Time:=NULL]

    tb1$Depth %<>% plyr::mapvalues(from = c(1,3), to = c('5,000','25,000')) %<>% factor(levels = c('5,000','25,000'))
    tb1$Noise %<>% plyr::mapvalues(from = c(1,2), to = c('0\\%','25\\%')) %<>% factor(levels = c('0\\%','25\\%'))

    # Setting up scChIP-seq table

    tb2 <- tb2[,c('Depth','Dissimilarity','Method','Mean_ARI','SD_ARI','Mean_AMI','SD_AMI','Mean_Time','SD_Time')][order(Depth,Dissimilarity,Method),]
    tb2[,ARI := paste0(rd(Mean_ARI),' (',rd(SD_ARI),')')][,Mean_ARI:= NULL][,SD_ARI:= NULL]
    tb2[,AMI := paste0(rd(Mean_AMI),' (',rd(SD_AMI),')')][,Mean_AMI:=NULL][,SD_AMI:=NULL]
    tb2[,`Time (min)` := paste0(rd(Mean_Time),' (',rd(SD_Time),')')][,Mean_Time:=NULL][,SD_Time:=NULL]

    tb2$Depth %<>% plyr::mapvalues(from = c(1,3), to = c('5,000','25,000')) %<>% factor(levels = c('5,000','25,000'))
    tb2$Dissimilarity %<>% plyr::mapvalues(from = c(1,2), to = c('1\\%','5\\%')) %<>% factor(levels = c('1\\%','5\\%'))

    # Saving kable

    kable1 <- kableExtra::kable(tb1,format = 'latex',booktabs = T, align = c('c','c','l','r','r','r'),escape = F,
                                caption = 'Performance of scATAC-seq methods on simulated scATAC-seq data under different sequencing depths (5,000 and 25,000) and different noise levels (0\\%  and 25\\%) for 3 clusters, 500 cells/cluster, and no rare cell sub populations..',label = 'project3_table1') %>%
        kableExtra::collapse_rows(columns = 1:2,latex_hline = 'major',valign = 'top')

    kable2 <- kableExtra::kable(tb2,format = 'latex',booktabs = T, align = c('c','c','l','r','r','r'),escape = F,
                                caption = 'Performance of scATAC-seq methods on simulated scChIP-seq data under different sequencing depths (5,000 and 25,000) and cluster-to-cluster difference levels (1\\%  and 5\\%). The scenario with 5,000 reads/cell and 1\\% difference level better approximates real data \\\\citep{grosselin2019high}.',label = 'project3_table2') %>%
        kableExtra::collapse_rows(columns = 1:2,latex_hline = 'major',valign = 'top')

    kableExtra::save_kable(kable1,file = file.path(path,'Simulation_Table1.tex'))
    kableExtra::save_kable(kable2,file = file.path(path,'Simulation_Table2.tex'))
}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.