R/exportResults.R
In exomePeak2: Bias Awared Peak Calling and Quantification for MeRIP-Seq

#' @title Export the (Differential) Modification Peaks/Sites and their associated Statistics
#' @param sep a \code{\link{SummarizedExomePeak}} object.
#' @param format a \code{character} for the exported format, could be a vector that contains \code{c("CSV", "BED", "RDS")}.
#'
#' \describe{
#'  \item{\strong{\code{CSV}}}{
#'  export a comma separated values (CSV) table with the genomic location and LFC statistics.
#'  }
#'
#'  \item{\strong{\code{BED}}}{
#'  export a BEDGraph file with the score column = -log2(adjusted p value).
#'  }
#'
#'  \item{\strong{\code{RDS}}}{
#'  export the RDS file of the \code{\link{SummarizedExperiment}} object.
#'  }
#' }
#'
#' @param save_dir a \code{character} for the name of the directory being saved; Default \code{= "exomePeak2_output"}.
#'
#' @param cut_off_pvalue a \code{numeric} value for the p value cutoff in the exported result; Default \code{= NULL}.
#'
#' @param cut_off_padj a \code{numeric} value for the adjusted p value cutoff in the exported result; Default \code{= 0.05}.
#'
#' @param cut_off_log2FC a \code{numeric} value for the log2 fold change cutoff of the exported result,
#' only the sites with abs(LFC) larger than this value are kept; Default \code{= 0}.
#'
#' @param min_num_of_positive a \code{numeric} value for the minimum number of reported sites.
#' If the number of remaining sites is less than this number after filtering, additional sites will be reported by the increasing order of the p value to meet this number.
#'
#' @param expected_direction a \code{character} for the expected differential modification direction, could be one in \code{c("hyper", "hypo", "both")}.
#'
#' \describe{
#'  \item{\strong{\code{hyper}}}{
#'  only report the peaks/sites with interactive LFC > 0.
#'  }
#'
#'  \item{\strong{\code{hypo}}}{
#'  only report the peaks/sites with interactive LFC < 0.
#'  }
#'
#'  \item{\strong{\code{both}}}{
#'  report the peaks/sites in both directions.
#'  }
#' }
#'
#' This argument is useful when the treated group involves the perturbation of a known writer or eraser protein; Default "both".
#'
#' @param inhibit_filter a \code{logical} of whether to remove all the filters, this option is useful when quantification on single based site annotation; Default \code{= FALSE}.
#'
#' @param table_style a \code{character} for the style of the CSV table being exported, could be one in \code{c("bed","granges")}.
#'
#' \describe{
#'  \item{\strong{\code{bed}}}{
#'  the genomic locations in the table are represented by BEDgraph style.
#'  }
#'
#'  \item{\strong{\code{granges}}}{
#'  the genomic locations in the table are represented by GRanges style.
#'  }
#' }
#'
#' @param reads_count a \code{logical} of whether to export the reads count for each sample; Default \code{ = TRUE}.
#'
#' @param GC_sizeFactors a \code{logical} of whether to export the GC content correction size factors; Default \code{ = TRUE}.
#'
#' @examples
#'
#' ### Load the example SummarizedExomPeak object
#' f1 = system.file("extdata", "sep_ex_mod.rds", package="exomePeak2")
#'
#' sep <- readRDS(f1)
#'
#' ### Save the modification peaks/sites statistics on the current directory.
#' exportResults(sep)
#'
#' @importFrom rtracklayer export
#' @import GenomicRanges
#'
#' @aliases exportResults
#'
#' @rdname exportResults-methods
#'
#' @return none, a folder will be saved on the disk.
#'
#' @export
#'

setMethod("exportResults",
          "SummarizedExomePeak",
          function(sep,
                   format = c("CSV", "BED", "RDS"),
                   table_style = c("bed", "granges"),
                   save_dir = "exomePeak2_output",
                   cut_off_pvalue = NULL,
                   cut_off_padj = 0.1,
                   cut_off_log2FC = 0,
                   min_num_of_positive = 100,
                   expected_direction = c("both", "hyper", "hypo"),
                   inhibit_filter = FALSE,
                   reads_count = TRUE,
                   GC_sizeFactors = TRUE) {

            #Check the input
            stopifnot(cut_off_pvalue <= 1 & cut_off_pvalue >= 0)
            stopifnot(cut_off_log2FC >= 0)
            stopifnot(all(format %in% c("CSV", "BED", "RDS")))
            table_style <- match.arg(table_style)
            expected_direction <- match.arg(expected_direction)

            #Create the saving directory
            if (!dir.exists(save_dir)) {
              dir.create(save_dir)
            }

            #Create the folder for the additional information
            if (!dir.exists(file.path(save_dir,"ADDInfo"))) {
              dir.create(file.path(save_dir,"ADDInfo"))
            }

            #Decide the file names
            if (!any(grepl("Diff",colnames(exomePeak2Results( sep ))))) {
              file_name <- "Mod"
            } else{
              file_name <- "DiffMod"
            }

            #Check if the users have calculated the log2FCs and their associated p-values
            if (nrow(exomePeak2Results(sep)) == 0) {
              if (any(sep$design_Treatment)) {
                sep <- glmDM(sep)
              } else{
                sep <- glmM(sep)
              }
            }

            #Generate row index for the exported modification in SummarizedExomePeak
            if (inhibit_filter) {

              #Do not filter the rows on modification
              index_keep <- T

            } else {

            if (!any(grepl("Diff",colnames(exomePeak2Results( sep ))))) {

              #Decide the filter on modification
              decision_mod <- decision_deseq2(
                Inf_RES = exomePeak2Results(sep),
                log2FC_cut = cut_off_log2FC,
                P_cut = cut_off_pvalue,
                Padj_cut = cut_off_padj,
                Min_mod = min(min_num_of_positive, nrow(exomePeak2Results(sep)))
              )

              #P.S. If no sites are reported, export all the p values that are not NA
              index_keep <-
                which(
                  (exomePeak2Results(sep)[[decision_mod$Cut_By_expected]] <= decision_mod$Cut_Val_expected) &
                    (exomePeak2Results(sep)$log2FoldChange > cut_off_log2FC)
                )

            } else {

              #Decide the filter on differential modification

              decision_dm <- decision_deseq2(
                Inf_RES = exomePeak2Results(sep),
                log2FC_cut = cut_off_log2FC,
                P_cut = cut_off_pvalue,
                Padj_cut = cut_off_padj,
                Min_mod = min_num_of_positive,
                Exp_dir = expected_direction
              )

              if (expected_direction == "both") {
                indx_es <-
                  (abs(exomePeak2Results(sep)$log2FoldChange) > cut_off_log2FC)
              } else {
                if (expected_direction == "hyper") {
                  indx_es <- (exomePeak2Results(sep)$log2FoldChange > cut_off_log2FC)
                } else {
                  indx_es <- (exomePeak2Results(sep)$log2FoldChange < -1 * cut_off_log2FC)
                }
              }

              index_keep <-
                which(exomePeak2Results(sep)[[decision_dm$Cut_By_expected]] < decision_dm$Cut_Val_expected & indx_es)


              if (length(index_keep) == min_num_of_positive) {
                warning(paste0("The number of significantly differentially modified sites < ",min_num_of_positive,", the insignificantly differentially modified sites are reported to reach the minimum site number of ",min_num_of_positive,"; to report the differential modification status on all modification sites, please set 'inhibit_filter = TRUE.'\n"))
              }
            }
            }

            #Subset the SummarizedExomePeak object according to user defined filters
            sep <- sep[grepl("peak_", rownames(sep)),][index_keep,]

            exomePeak2Results(sep) <- exomePeak2Results(sep)[grepl("peak_", rownames(sep)),][index_keep,]

            id_num <- as.numeric(gsub("^.*_", "", rownames(sep)))

            id_index <- order(id_num)

            sep <- sep[id_index,]

            exomePeak2Results(sep) <- exomePeak2Results(sep)[id_index,]

            rownames(sep) <- paste0("peak_", rep(seq_along(id_num), table(id_num[id_index])))

            rm(id_num, id_index)

            if (any(format == "RDS")) saveRDS(sep, file.path(save_dir, paste0(file_name, ".rds")))

            result_grl <- rowRanges(sep)

            result_stat <- exomePeak2Results(sep)

            #Correct the names in the resulting files
            result_gr <- unlist(result_grl)
            result_gr$gene_id <- gsub("\\.[0-9]*$", "", result_gr$gene_id)
            rownames(result_stat) = NULL

            #Generate the GRangesList file that is ready to export
            indx_split <- names(result_gr)
            names(result_gr) <- NULL
            result_gr$gene_id[is.na(result_gr$gene_id)] <- " "
            result_grl <- split(result_gr, indx_split)
            #reorder the list
            result_grl <- result_grl[order(as.numeric(gsub("^.*_", "", names(result_grl))))]
            mcols(result_grl) <- result_stat
            rm(result_gr,result_stat,indx_split)

            id_num <- as.numeric(gsub("^.*_", "", names(result_grl)))
            id_index <- order(id_num)
            renamed_id <- paste0("peak_", rep(seq_along(id_num), table(id_num[id_index])))
            result_grl <- result_grl[id_index,]
            names(result_grl) <- renamed_id

            #Generate the tables according to the style choices
                if (table_style == "granges") {
                  result_df <- as.data.frame(result_grl)
                  result_df <-
                    result_df[, colnames(result_df) != "group"]
                  colnames(result_df)[colnames(result_df) == "group_name"] = "peak_name"
                  colnames_range_info <- colnames(result_df)
                  result_df <-
                    cbind(result_df, as.data.frame(mcols(result_grl))[rep(seq_along(result_grl), elementNROWS(result_grl)),])
                  site_widths <- sum(width(result_grl[rep(seq_along(result_grl), elementNROWS(result_grl))]))

                } else {

                  scores <- -1 * log2(mcols(result_grl)$padj)
                  scores[is.na(scores)] <- 0
                  mcols(result_grl)$score <- scores

                  export(
                    object = result_grl,
                    con = file.path(save_dir, paste0(file_name, ".bed")),
                    format = "BED"
                  )

                  result_df <- read.table(file.path(save_dir, paste0(file_name, ".bed")),
                               header = F,
                               sep = "\t")

                  colnames(result_df) <- c(
                    "chr",
                    "chromStart",
                    "chromEnd",
                    "name",
                    "score",
                    "strand",
                    "thickStart",
                    "thickEnd",
                    "itemRgb",
                    "blockCount",
                    "blockSizes",
                    "blockStarts"
                  )

                  result_df$geneID <- vapply( result_grl, function(x) x$gene_id[1] , character(1))

                  colnames_range_info <- colnames(result_df)

                  mcols(result_grl) <- mcols(result_grl)[,!colnames(mcols(result_grl)) %in% "score"]

                  site_widths <- sum(width(result_grl))

                  result_df <- cbind(result_df , as.data.frame(mcols(result_grl)))

                }


            #Retrieve the index for columns
            indx_range_info <- colnames(result_df) %in% colnames_range_info
            indx_reads_count <- grepl("ReadsCount",colnames(result_df))
            indx_major_stat <- colnames(result_df) %in% c("log2FoldChange","pvalue","padj")

            #Export the estimates and statistics of all GLM design
            write.csv(
              result_df[,!(indx_reads_count|indx_major_stat|indx_range_info)],
              file = file.path(save_dir, "ADDInfo", "ADDInfo_GLM_allDesigns.csv")
            )

            #Report a matrix of RPKM
            rpkm_df <- result_df[,indx_reads_count]
            rpkm_df <- t(t((rpkm_df/(site_widths/1e3)))/colSums(rpkm_df))
            colnames(rpkm_df) <- gsub("ReadsCount","RPKM",colnames(rpkm_df))

            write.csv(
              rpkm_df,
              file = file.path(save_dir, "ADDInfo", "ADDInfo_RPKM.csv")
            )

            if (!any(grepl("Diff", colnames(exomePeak2Results(sep))))){

              result_df <- result_df[,indx_range_info|indx_reads_count|indx_major_stat]

            } else {

              result_df <- result_df[,indx_range_info|indx_reads_count|indx_major_stat|colnames(result_df) %in% c("log2fcMod.Control.MLE","log2fcMod.Treated.MLE")]

              colnames( result_df )[colnames(result_df) %in% c("log2fcMod.Control.MLE",
                                                             "log2fcMod.Treated.MLE",
                                                             "log2FoldChange")] <- c("ModLog2FC_control",
                                                                                     "ModLog2FC_treated",
                                                                                     "DiffModLog2FC")
            }

            if (any(format == "CSV")) write.csv(x = result_df,
                                                file = file.path(save_dir,
                                                                 paste0(file_name, ".csv")),
                                                row.names = FALSE
                                                )

                scores <- -1 * log2(mcols(result_grl)$padj)

                scores[is.na(scores)] <- 0

                mcols(result_grl)$score <- scores

          if (any(format == "BED")) export(
                  object = result_grl,
                  con = file.path(save_dir, paste0(file_name, ".bed")),
                  format = "BED"
                )

          if (reads_count) write.csv(x = assays(sep)[[1]],
                                     file = file.path(save_dir,"ADDInfo","ADDInfo_ReadsCount.csv")
                                     )


          if (!is.null(GCsizeFactors(sep))&
                       GC_sizeFactors) write.csv(
                                          x = assays(sep)[[2]],
                                          file = file.path(save_dir,"ADDInfo","ADDInfo_SizeFactors.csv")
                                          )

message(paste0("Result files have saved under the directory: '", file.path(save_dir)), "'.")

})
Any scripts or data that you put into this service are public.
exomePeak2 documentation built on Nov. 8, 2020, 5:27 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
exomePeak2
Bias Awared Peak Calling and Quantification for MeRIP-Seq

R/exportResults.R
In exomePeak2: Bias Awared Peak Calling and Quantification for MeRIP-Seq

Try the exomePeak2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

exomePeak2 Bias Awared Peak Calling and Quantification for MeRIP-Seq

R/exportResults.R In exomePeak2: Bias Awared Peak Calling and Quantification for MeRIP-Seq

Try the exomePeak2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

exomePeak2
Bias Awared Peak Calling and Quantification for MeRIP-Seq

R/exportResults.R
In exomePeak2: Bias Awared Peak Calling and Quantification for MeRIP-Seq