R/ggepitracks2.R

Defines functions epiplot_bsseq_multi epiplot_bsseq epiplot_cov epiplot_model .check_bigWigToBedGraph .check_bedtools .check_windows ggepitracks2

Documented in ggepitracks2

#' Plot Epigenetics Tracks for NGS Data using ggplot2 (version 2)
#'
#' @description Plot genome tracks for NGS data using ggplot2, resembling GBrowse/JBrowse/IGV style (ver.2 for enhanced performance). Supporting coverage data in bigWig or bedGraph format from RNA-seq, sRNA-seq, BS-seq, ChIP-seq, RIP-seq, GRO-seq, NET-seq, DNase-seq, MNase-seq, ATAC-seq, SHAPE-seq etc. Especially optimized for BS-seq data in single-base resolution. Requires `bigWigToBedGraph` and `bedtools` on Linux.
#'
#' @param locus_file BED file containing locus to plot. For each line, TAB-delimited information should contain: <chr>, <start>, <end>, <locus_name>, without header.
#' @param track_conf Configuration file containing track info. For each line, TAB-delimited information should contain: <track_name>, <track_type>, <track_bw/bg>, <y_min>, <y_max>, <color>, <group>, without header. NOTE: If you write "BS-seq" in "<track_type>", provide mC type (one of CG, CHG, CHH, All) to plot in "<track_bw/bg>" and provide detailed info in `bsseq_conf`, "<color>" is ignored in multi-mC mode of BS-seq. "<group>" is currently unused.
#' @param bsseq_conf Configuration file of BS-seq track. If there is no BS-seq track to plot, keep `NULL`. For each line, TAB-delimited information should contain: <track_name>, <CG_bw/bg>, <CHG_bw/bg>, <CHH_bw/bg>, <C_context_file>, without header. "<track_name>" here should be the same as in `track_conf`. If you choose only one mC type to plot, you can keep the column of other files as "NA", but provide all the bigWig files may be safe. "<C_context_file>" can be "NA" to ignore drawing C context track (currently unused).
#' @param gene_model BED file containing all elements of gene and/or TE model, generated by `[tinyfuncr::extractPos]`
#' @param ann_region BED file of annotation regions. If not provided, no annotation will be added. For each line, TAB-delimited information should contain: <chr>, <start>, <end>, <annotation_name>, without header. <annotation_name> could be empty but the last TAB should be kept.
#' @param ann_color Color of annotated regions, defalut "goldenrod1" (alpha 30)
#' @param out_dir Path to output directory, will be created if not exist, default "out/". Output plots in PDF format will be saved as <out_dir>/<locus_name>.pdf
#' @param height Plot height, in "mm", defalut 6*ntracks+10
#' @param width Plot width, in "mm", defalut 80
#'
#' @import magrittr
#' @import ggplot2
#' @importFrom cowplot plot_grid
#' @importFrom stringr str_split
#'
#' @author Yujie Liu
#' @export ggepitracks2
#'


######################### main IO #########################
ggepitracks2 <- function(locus_file,
                         track_conf,
                         bsseq_conf = NULL,
                         gene_model,
                         ann_region = NULL,
                         ann_color = "goldenrod1",
                         out_dir = "out",
                         height = NULL,
                         width = 80) {
  # check dependencies
  .check_windows()
  .check_bedtools()
  .check_bigWigToBedGraph()

  # make output dirs
  dir_name <- paste0(out_dir, "/")
  system(paste0("mkdir -p ", dir_name, "/tmp"))

  # read in and parse track info
  track_info <-
    read_tcsv(
      track_conf,
      header = F,
      col.names = c(
        "track_name",
        "track_type",
        "track_file",
        "y_min",
        "y_max",
        "color",
        "group"
      )
    )

  # calculate plot height according to ntracks if not provided
  if (is.null(height)) {
    height <- 6 * nrow(track_info) + 10
  }

  # read loci info
  loci <-
    read_tcsv(
      locus_file,
      header = F,
      col.names = c("chr", "start", "end", "name")
    )

  # convert each track into bedgraph format, and intersect with loci to plot
  for (track_idx in 1:nrow(track_info)) {
    track_name <-  track_info[track_idx, "track_name"]
    if (track_info[track_idx, "track_type"] == "BS-seq") {
      bsseq_info <- read_tcsv(
        bsseq_conf,
        header = F,
        col.names = c("track_name",
                      "CG",
                      "CHG",
                      "CHH",
                      "C_context_file")
      )
      track_idx_in_bsseq <-
        which(track_name == bsseq_info$track_name)
      if (track_info[track_idx, "track_file"] == "All") {
        for (ctype in c("CG", "CHG", "CHH")) {
          track_file <- bsseq_info[track_idx_in_bsseq, ctype]
          if (grepl("\\.(bw|bigWig|bigwig)$", track_file)) {
            system(
              paste0(
                "bigWigToBedGraph ",
                track_file,
                " ",
                dir_name,
                "/tmp/",
                track_name,
                ".",
                ctype,
                ".bg"
              )
            )
            system(
              paste0(
                "bedtools intersect -a ",
                locus_file,
                " -b ",
                dir_name,
                "/tmp/",
                track_name,
                ".",
                ctype,
                ".bg -wo | cut -f 4-8 >",
                dir_name,
                "/tmp/",
                track_name,
                ".",
                ctype,
                ".bg.lociInter"
              )
            )
            system(paste0(
              "rm -f ",
              dir_name,
              "/tmp/",
              track_name,
              ".",
              ctype,
              ".bg"
            ))
          } else if (grepl("\\.(bg|bdg|bedGraph|bedgraph)$", track_file)) {
            system(
              paste0(
                "bedtools intersect -a ",
                locus_file,
                " -b ",
                track_file,
                " -wo | cut -f 4-8 >",
                dir_name,
                "/tmp/",
                track_name,
                ".",
                ctype,
                ".bg.lociInter"
              )
            )
          } else {
            stop(
              "File format not supported !! Please provide bigWig (ends with .bw/.bigWig/.bigwig) or bedGraph (ends with .bg/.bdg/.bedGraph/.bedgraph) files ..."
            )
          }
        }
      } else {
        ctype <- track_info[track_idx, "track_file"]
        track_file <- bsseq_info[track_idx_in_bsseq, ctype]
        if (grepl("\\.(bw|bigWig|bigwig)$", track_file)) {
          system(
            paste0(
              "bigWigToBedGraph ",
              track_file,
              " ",
              dir_name,
              "/tmp/",
              track_name,
              ".",
              ctype,
              ".bg"
            )
          )
          system(
            paste0(
              "bedtools intersect -a ",
              locus_file,
              " -b ",
              dir_name,
              "/tmp/",
              track_name,
              ".",
              ctype,
              ".bg -wo | cut -f 4-8 >",
              dir_name,
              "/tmp/",
              track_name,
              ".",
              ctype,
              ".bg.lociInter"
            )
          )
          system(paste0(
            "rm -f ",
            dir_name,
            "/tmp/",
            track_name,
            ".",
            ctype,
            ".bg"
          ))
        }
        else if (grepl("\\.(bg|bdg|bedGraph|bedgraph)$", track_file)) {
          system(
            paste0(
              "bedtools intersect -a ",
              locus_file,
              " -b ",
              track_file,
              " -wo | cut -f 4-8 >",
              dir_name,
              "/tmp/",
              track_name,
              ".",
              ctype,
              ".bg.lociInter"
            )
          )
        } else {
          stop(
            "File format not supported !! Please provide bigWig (ends with .bw/.bigWig/.bigwig) or bedGraph (ends with .bg/.bdg/.bedGraph/.bedgraph) files ..."
          )
        }
      }
    } else {
      track_file <- track_info[track_idx, "track_file"]
      if (grepl("\\.(bw|bigWig|bigwig)$", track_file)) {
        system(
          paste0(
            "bigWigToBedGraph ",
            track_file,
            " ",
            dir_name,
            "/tmp/",
            track_name,
            ".bg"
          )
        )
        system(
          paste0(
            "bedtools intersect -a ",
            locus_file,
            " -b ",
            dir_name,
            "/tmp/",
            track_name,
            ".",
            ctype,
            ".bg -wo | cut -f 4-8 >",
            dir_name,
            "/tmp/",
            track_name,
            ".bg.lociInter"
          )
        )
        system(paste0("rm -f ",
                      dir_name,
                      "/tmp/",
                      track_name,
                      ".bg"))
      } else if (grepl("\\.(bg|bdg|bedGraph|bedgraph)$", track_file)) {
        system(
          paste0(
            "bedtools intersect -a ",
            locus_file,
            " -b ",
            track_file,
            " -wo | cut -f 4-8 >",
            dir_name,
            "/tmp/",
            track_name,
            ".bg.lociInter"
          )
        )
      } else {
        stop(
          "File format not supported !! Please provide bigWig (ends with .bw/.bigWig/.bigwig) or bedGraph (ends with .bg/.bdg/.bedGraph/.bedgraph) files ..."
        )
      }
    }
  }

  # loop against each locus and plot tracks
  for (locus_idx in 1:nrow(loci)) {
    # parse info of this locus and make granges
    chrom <- loci[locus_idx, 1]
    begin <- loci[locus_idx, 2]
    end <- loci[locus_idx, 3]
    locus_name <- loci[locus_idx, 4]

    write_tcsv(
      data.frame(chrom, as.character(begin), as.character(end)),
      file = paste0(dir_name, "/tmp/tmp_locus.bed"),
      col.names = F
    )

    # extract annotation region within this locus
    if (!is.null(ann_region)) {
      system(
        paste0(
          "bedtools intersect -a ",
          dir_name,
          "/tmp/tmp_locus.bed -b ",
          ann_region,
          " -wo | cut -f 4-7 >",
          dir_name,
          "/tmp/tmp_ann.bed"
        )
      )
      this_mark_region <-
        read_tcsv(
          paste0(dir_name, "/tmp/tmp_ann.bed"),
          header = F,
          col.names = c("chr", "start", "end", "name")
        )
      this_mark_region$start <- this_mark_region$start + 1
      this_mark_region$end <- this_mark_region$end + 1
    } else {
      this_mark_region <- NULL
    }

    # extract gene model within this locus
    system(
      paste0(
        "bedtools intersect -a ",
        dir_name,
        "/tmp/tmp_locus.bed -b ",
        gene_model,
        " -wo | cut -f 4-9 >",
        dir_name,
        "/tmp/tmp_model.bed"
      )
    )
    this_locus_model <-
      read_tcsv(paste0(dir_name, "/tmp/tmp_model.bed"), header = F)

    # plot gene model for this locus
    p_model <- epiplot_model(
      model = this_locus_model,
      start = begin,
      end = end
    )
    p_track <- list()
    p_track[[1]] <- p_model

    # loop against each track and plot for this locus
    for (track_idx in 1:nrow(track_info)) {
      track_name <-  track_info[track_idx, "track_name"]
      if (track_info[track_idx, "track_type"] == "BS-seq") {
        if (track_info[track_idx, "track_file"] != "All") {
          # read in track file in bedgraph format
          ctype <- track_info[track_idx, "track_file"]
          bgfile <- paste0(dir_name,
                           "/tmp/",
                           track_name,
                           ".",
                           ctype,
                           ".bg.lociInter")
          bg <-
            read_tcsv(
              bgfile,
              header = F,
              col.names = c("name", "chrom", "start", "end", "score")
            )
          bg_this_locus <- bg[bg$name == locus_name, c(2, 3, 4, 5)]
          bg_this_locus$start <- bg_this_locus$start + 1
          bg_this_locus$end <- bg_this_locus$end + 1

          # plot bsseq tracks
          p_track[[track_idx + 1]] <-
            epiplot_bsseq(
              bw_df = bg_this_locus,
              start = begin,
              end = end,
              y_min = track_info[track_idx, "y_min"],
              y_max = track_info[track_idx, "y_max"],
              track_name = track_info[track_idx, "track_name"],
              track_color = track_info[track_idx, "color"],
              ann_df = this_mark_region,
              ann_color = ann_color
            )
        } else {
          # read in track file in bedgraph format for each ctype into a list
          bglist_this_locus <- list()
          for (ctype in c("CG", "CHG", "CHH")) {
            bgfile <- paste0(dir_name,
                             "/tmp/",
                             track_name,
                             ".",
                             ctype,
                             ".bg.lociInter")
            bg <-
              read_tcsv(
                bgfile,
                header = F,
                col.names = c("name", "chrom", "start", "end", "score")
              )
            bg_this_locus <-
              bg[bg$name == locus_name, c(2, 3, 4, 5)]
            bg_this_locus$start <- bg_this_locus$start + 1
            bg_this_locus$end <- bg_this_locus$end + 1

            bglist_this_locus[[ctype]] <- bg_this_locus
          }

          # plot multi C type bsseq
          p_track[[track_idx + 1]] <-
            epiplot_bsseq_multi(
              CG_df = bglist_this_locus[["CG"]],
              CHG_df = bglist_this_locus[["CHG"]],
              CHH_df = bglist_this_locus[["CHH"]],
              start = begin,
              end = end,
              y_min = track_info[track_idx, "y_min"],
              y_max = track_info[track_idx, "y_max"],
              track_name = track_info[track_idx, "track_name"],
              ann_df = this_mark_region,
              ann_color = ann_color
            )
        }
      } else{
        # read in track file for non BS-seq data
        bgfile <- paste0(dir_name,
                         "/tmp/",
                         track_name,
                         ".bg.lociInter")
        bg <-
          read_tcsv(
            bgfile,
            header = F,
            col.names = c("name", "chrom", "start", "end", "score")
          )
        bg_this_locus <-
          bg[bg$name == locus_name, c(2, 3, 4, 5)]
        bg_this_locus$start <- bg_this_locus$start + 1
        bg_this_locus$end <- bg_this_locus$end + 1

        # plot coverage
        p_track[[track_idx + 1]] <-
          epiplot_cov(
            bw_df = bg_this_locus,
            start = begin,
            end = end,
            y_min = track_info[track_idx, "y_min"],
            y_max = track_info[track_idx, "y_max"],
            track_name = track_info[track_idx, "track_name"],
            track_color = track_info[track_idx, "color"],
            ann_df = this_mark_region,
            ann_color = ann_color
          )
      }
    }

    # merge plots
    p_merge <-
      cowplot::plot_grid(
        plotlist = p_track,
        nrow = nrow(track_info) + 1,
        rel_heights = c(5, rep(3, nrow(track_info))),
        align = "v"
      )
    ggsave(
      paste0(dir_name, locus_name, ".pdf"),
      p_merge,
      height = height,
      width = width,
      units = "mm"
    )

    # clean tmp files
    #system(paste0("rm -rf ", dir_name, "/tmp"))
  }
}


############################ check dependency ############################
.check_windows <- function() {
  if (Sys.info()["sysname"] == "Windows") {
    stop("Windows is not supported :(")
  }
}

.check_bedtools <- function() {
  check <- as.character(Sys.which(names = "bedtools"))[1]
  if (check == "") {
    stop("Could not locate bedtools !")
  }
}

.check_bigWigToBedGraph <- function() {
  check <- as.character(Sys.which(names = "bigWigToBedGraph"))[1]
  if (check == "") {
    stop("Could not locate bigWigToBedGraph !")
  }
}


######################### plot tracks for gene model #########################
epiplot_model <- function(model, start, end) {
  # parse model info
  tmp_model_info <-
    model[[4]] %>% stringr::str_split("_") %>% as.data.frame() %>% t() %>% as.data.frame()
  model_info <- data.frame()
  for (i in 1:nrow(tmp_model_info)) {
    tmp <-
      data.frame(
        Chr = model[i, 1],
        Start = model[i, 2] + 1,
        End = model[i, 3] + 1,
        ID = paste(tmp_model_info[i, 2:ncol(tmp_model_info)], collapse = "_"),
        Type = tmp_model_info[i, 1],
        Strand = model[i, 6]
      )
    model_info <- rbind(model_info, tmp)
  }
  model_info <- model_info[order(model_info$Start),]

  # make draw data

  # parse coord
  genes_to_plot <- unique(model_info$ID)
  model_info$Group <-
    unlist(lapply(model_info$ID, function(x)
      which(x == genes_to_plot)))
  model_info$Start <-
    unlist(lapply(model_info$Start, function(x)
      max(x, start)))
  model_info$End <-
    unlist(lapply(model_info$End, function(x)
      min(x, end)))
  model_info <- model_info[model_info$Start <= model_info$End,]

  # split body and end
  model_info_group_first <-
    model_info[!duplicated(model_info$Group),]
  model_info_des <-
    model_info[order(model_info$Start, decreasing = T),]
  model_info_group_last <-
    model_info_des[!duplicated(model_info_des$Group),]
  model_info_group_last <-
    model_info_group_last[order(model_info_group_last$Group),]
  model_info_group_length <-
    model_info_group_last$End - model_info_group_first$Start

  for (group_idx in 1:nrow(model_info_group_first)) {
    if (model_info_group_first[group_idx, 6] == "+") {
      model_info_group_first[group_idx,] <-
        model_info_group_last[group_idx,]
    }
  }
  model_info_end <- data.frame()
  for (group_idx in 1:nrow(model_info_group_first)) {
    if (model_info_group_first[group_idx, "Strand"] == "+") {
      x <- c(
        model_info_group_first[group_idx, "Start"],
        model_info_group_first[group_idx, "Start"],
        model_info_group_first[group_idx, "End"],
        model_info_group_first[group_idx, "End"] + 0.02 * model_info_group_length[group_idx],
        model_info_group_first[group_idx, "End"]
      )
    } else {
      x <- c(
        model_info_group_first[group_idx, "End"],
        model_info_group_first[group_idx, "End"],
        model_info_group_first[group_idx, "Start"],
        model_info_group_first[group_idx, "Start"] - 0.02 * model_info_group_length[group_idx],
        model_info_group_first[group_idx, "Start"]
      )
    }
    model_info_end <-
      rbind(model_info_end,
            data.frame(
              Type = rep(model_info_group_first[group_idx, "Type"], 5),
              X = x,
              Y = c(
                model_info_group_first[group_idx, "Group"] - 0.4,
                model_info_group_first[group_idx, "Group"] - 0.6,
                model_info_group_first[group_idx, "Group"] - 0.6,
                model_info_group_first[group_idx, "Group"] - 0.5,
                model_info_group_first[group_idx, "Group"] - 0.4
              ),
              Group = rep(model_info_group_first[group_idx, "Group"], 5)
            ))
  }

  # extract three context
  model_info_name <- model_info[!duplicated(model_info$Group),]
  model_info_CDS <- model_info[model_info$Type == "CDS",]
  model_info_intron <- model_info[model_info$Type == "Intron",]
  model_info_intron$Mid <-
    rowMeans(model_info_intron[c("Start", "End")])
  model_info_UTR <-
    model_info[model_info$Type == "5UTR" |
                 model_info$Type == "3UTR",]
  model_info_end_CDS <-
    model_info_end[model_info_end$Type == "CDS",]
  model_info_end_UTR <-
    model_info_end[model_info_end$Type == "5UTR" |
                     model_info_end$Type == "3UTR",]

  #### Note for ggplot2 ####
  # In ggplot2, 1 linewidth or size is about 2.141959pt
  # (https://blog.csdn.net/weixin_43250801/article/details/130049894),
  # so commonly used 0.75pt line can be set as 0.35 linewidth;
  # for GBrowse, use 0.5pt for axis is better (set linewidth as 0.233);
  # 7pt text can be set as 3.27 size (BUT wrong!),
  # however, 7pt text should be set as size=7 due to pt in unit;
  # in geom_text, size=1 means 3pt (?)

  # plot gene models
  p_model <-
    ggplot() +
    scale_x_continuous(
      limits = c(start, end),
      position = "top",
      expand = c(0, 0)
    ) +
    scale_y_continuous(limits = c(0, length(genes_to_plot)),
                       expand = c(0, 0)) +
    theme_classic() + theme(
      axis.title.x = element_blank(),
      axis.line.x = element_line(
        color = "black",
        linewidth = 0.35,
        lineend = "round"
      ),
      axis.ticks.x = element_line(
        color = "black",
        linewidth = 0.35,
        lineend = "round"
      ),
      axis.ticks.length.x = unit(2, "pt"),
      axis.text.x = element_text(color = "black", size = 5),
      axis.title.y = element_text(
        color = "black",
        size = 7,
        angle = 0,
        vjust = 0.5,
        hjust = 0.5
      ),
      axis.line.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.text.y =  element_blank(),
      panel.grid = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank(),
      plot.background = element_blank(),
      plot.margin = margin()
    ) + ylab("Genes") + geom_text(
      data = model_info_name,
      mapping = aes(x = Start,
                    y = Group - 0.2,
                    label = ID),
      color = "black",
      size = 2.33
    ) + geom_rect(
      data = model_info_CDS,
      mapping = aes(
        xmin = Start,
        ymin = Group - 0.6,
        xmax = End,
        ymax = Group - 0.4
      ),
      color = "black",
      fill = "black",
      linewidth = 0.233
    ) + geom_segment(
      data = model_info_intron,
      mapping = aes(
        x = Start,
        y = Group - 0.5,
        xend = Mid,
        yend = Group - 0.4
      ),
      color = "black",
      linewidth = 0.233
    ) + geom_segment(
      data = model_info_intron,
      mapping = aes(
        x = End,
        y = Group - 0.5,
        xend = Mid,
        yend = Group - 0.4
      ),
      color = "black",
      linewidth = 0.233
    ) + geom_rect(
      data = model_info_UTR,
      mapping = aes(
        xmin = Start,
        ymin = Group - 0.6,
        xmax = End,
        ymax = Group - 0.4
      ),
      color = "black",
      fill = "grey",
      linewidth = 0.233
    ) + geom_polygon(
      data = model_info_end_CDS,
      mapping = aes(x = X,
                    y = Y,
                    group = Group),
      color = "black",
      fill = "black",
      linewidth = 0.233
    ) + geom_polygon(
      data = model_info_end_UTR,
      mapping = aes(x = X,
                    y = Y,
                    group = Group),
      color = "black",
      fill = "grey",
      linewidth = 0.233
    )

  # return
  p_model
}


######################### plot tracks for coverage data #########################
epiplot_cov <- function(bw_df,
                        start,
                        end,
                        y_min,
                        y_max,
                        track_name,
                        track_color,
                        ann_df = NULL,
                        ann_color = "goldenrod1") {
  p <-
    ggplot() +
    scale_x_continuous(limits = c(start, end),
                       expand = c(0, 0)) +
    scale_y_continuous(
      limits = c(y_min, y_max),
      breaks = c(y_min, (y_min + y_max) / 2, y_max),
      expand = c(0, 0),
      sec.axis = dup_axis(name = NULL)
    ) +
    theme_classic() + theme(
      axis.title.x = element_blank(),
      axis.line.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.text.x = element_blank(),
      axis.title.y = element_text(
        color = "black",
        size = 7,
        angle = 0,
        vjust = 0.5,
        hjust = 0.5
      ),
      axis.line.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.length.y = unit(1, "pt"),
      axis.text.y = element_text(color = "black", size = 5),
      panel.grid = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank(),
      plot.background = element_blank(),
      plot.margin = margin()
    )  + ylab(track_name) + geom_hline(mapping = aes(yintercept = 0), linewidth = 0.233) + geom_rect(
      data = bw_df,
      mapping = aes(
        xmin = start,
        ymin = 0,
        xmax = end,
        ymax = score
      ),
      color = NA,
      fill = track_color
    )

  if (!is.null(ann_df)) {
    p <- p +
      geom_rect(
        data = ann_df,
        mapping = aes(
          xmin = start,
          ymin = y_min,
          xmax = end,
          ymax = y_max
        ),
        color = NA,
        fill = ann_color,
        alpha = 0.3
      ) + geom_text(
        data = ann_df,
        mapping = aes(
          x = mean(c(start, end)),
          y = y_max - 0.2 * (y_max - y_min),
          label = name
        ),
        color = "black",
        size = 6
      )
  }

  p
}


######################### plot tracks BS-seq data #########################
epiplot_bsseq <- function(bw_df,
                          start,
                          end,
                          y_min,
                          y_max,
                          track_name,
                          track_color,
                          ann_df = NULL,
                          ann_color = "goldenrod1") {
  p <-
    ggplot() +
    scale_x_continuous(limits = c(start, end),
                       expand = c(0, 0)) +
    scale_y_continuous(
      limits = c(y_min, y_max),
      breaks = c(-y_max,-y_max / 2, 0, y_max / 2, y_max),
      expand = c(0, 0),
      sec.axis = dup_axis(name = NULL)
    ) +
    theme_classic() + theme(
      axis.title.x = element_blank(),
      axis.line.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.text.x = element_blank(),
      axis.title.y = element_text(
        color = "black",
        size = 7,
        angle = 0,
        vjust = 0.5,
        hjust = 0.5
      ),
      axis.line.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.length.y = unit(1, "pt"),
      axis.text.y = element_text(color = "black", size = 5),
      panel.grid = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank(),
      plot.background = element_blank(),
      plot.margin = margin()
    )  + ylab(track_name) + geom_hline(mapping = aes(yintercept = 0), linewidth = 0.233) + geom_rect(
      data = bw_df,
      mapping = aes(
        xmin = start,
        xmax = end,
        ymin = 0,
        ymax = score
      ),
      color = NA,
      fill = track_color
    )

  if (!is.null(ann_df)) {
    p <- p +
      geom_rect(
        data = ann_df,
        mapping = aes(
          xmin = start,
          ymin = y_min,
          xmax = end,
          ymax = y_max
        ),
        color = NA,
        fill = ann_color,
        alpha = 0.3
      ) + geom_text(
        data = ann_df,
        mapping = aes(
          x = mean(c(start, end)),
          y = y_max - 0.2 * (y_max - y_min),
          label = name
        ),
        color = "black",
        size = 6
      )
  }

  p
}


################# plot tracks BS-seq data (multi C type) #################
epiplot_bsseq_multi <- function(CG_df,
                                CHG_df,
                                CHH_df,
                                start,
                                end,
                                y_min,
                                y_max,
                                track_name,
                                ann_df = NULL,
                                ann_color = "goldenrod1") {
  p <-
    ggplot() +
    scale_x_continuous(limits = c(start, end),
                       expand = c(0, 0)) +
    scale_y_continuous(
      limits = c(y_min, y_max),
      breaks = c(-y_max,-y_max / 2, 0, y_max / 2, y_max),
      expand = c(0, 0),
      sec.axis = dup_axis(name = NULL)
    ) +
    theme_classic() + theme(
      axis.title.x = element_blank(),
      axis.line.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.text.x = element_blank(),
      axis.title.y = element_text(
        color = "black",
        size = 7,
        angle = 0,
        vjust = 0.5,
        hjust = 0.5
      ),
      axis.line.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.y = element_line(
        color = "black",
        linewidth = 0.233,
        lineend = "round"
      ),
      axis.ticks.length.y = unit(1, "pt"),
      axis.text.y = element_text(color = "black", size = 5),
      panel.grid = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank(),
      plot.background = element_blank(),
      plot.margin = margin()
    ) + ylab(track_name) + geom_hline(mapping = aes(yintercept = 0), linewidth = 0.233) + geom_rect(
      data = CG_df,
      mapping = aes(
        xmin = start,
        xmax = end,
        ymin = 0,
        ymax = score
      ),
      color = NA,
      fill = "#FF0000",
      alpha = 1
    ) + geom_rect(
      data = CHG_df,
      mapping = aes(
        xmin = start,
        xmax = end,
        ymin = 0,
        ymax = score
      ),
      color = NA,
      fill = "#0070C0",
      alpha = 1
    ) + geom_rect(
      data = CHH_df,
      mapping = aes(
        xmin = start,
        xmax = end,
        ymin = 0,
        ymax = score
      ),
      color = NA,
      fill = "#00B050",
      alpha = 1
    )

  if (!is.null(ann_df)) {
    p <- p +
      geom_rect(
        data = ann_df,
        mapping = aes(
          xmin = start,
          ymin = y_min,
          xmax = end,
          ymax = y_max
        ),
        color = NA,
        fill = ann_color,
        alpha = 0.3
      ) + geom_text(
        data = ann_df,
        mapping = aes(
          x = mean(c(start, end)),
          y = y_max - 0.2 * (y_max - y_min),
          label = name
        ),
        color = "black",
        size = 6
      )
  }

  p
}
liuyujie0136/tinyfuncr documentation built on Dec. 13, 2024, 8:49 a.m.