R/cnv_visualising.R

Defines functions cnv_visual

Documented in cnv_visual

#' Custom visualizing CNV
#'
#' This function is designed to custom plot CNVs, it now support to visualize CNVs by Chromosome, Interested Individual, Interested Region, Target Gene, et al.
#'
#' @param clean_cnv cnv list file, standard file was generated by 'cnv_clean' function
#' @param max_chr_length the maximum length of chromosome, default unit is 'Mb'
#' @param chr_id the number of chromosome want to plot
#' @param start_position decimal digit, default unit is 'Mb'. such as 23.2112
#' @param end_position decimal digit, default unit is 'Mb'. such as 23.2112
#' @param individual_id the ID of individual in cnv list, used to plot all chromosome for specific Individual
#' @param target_gene the
#' @param refgene if true, will plot gene above the CNV plot, need to combine with the clean_cnv, and the stardard clean_cnv file need the annotated cnv list which was generated from call_gene function, internal reference gene-list are "ARS_ens", "ARS_UCSC" and "UMD_UCSC". Or provide the reference genes list corresponding to your data
#' @param plot_title set the title of final plot
#' @param max_chr the maximum number of chromosomes to plot, it used for plot all chromosomes at once
#' @param report_id report the sample ID while plotting
#' @param pedigree pedigree list, require at least three columns, Sample_Id, Sire and Dam
#' @param show_name default value is show_name = c(0, 160). accept the vectors only, unit is Mb. for example show_name = c(11.2, 12.4, 15.3, 18.4), means only plot the genes within the given interval
#' @param width_1 number to set the width of final plot size, unit is 'cm'
#' @param height_1 number to set the height of final plot size, unit is 'cm'
#' @param species which species in the input data, unused at the moment
#' @param folder set the name of folder to save results
#' @param col_0 set color for 0 copy of CNV
#' @param col_1 set color for 1 copy of CNV
#' @param col_2 set color for 2 copy of CNV (which might be ROH)
#' @param col_3 set color for 3 copy of CNV
#' @param col_4 set color for 4 copy of CNV
#' @param col_gene set the color of Gene, only work in Gene annotated at CNV plot
#' @param gene_font_size set the size of Gene in CNV Annotation plot
#'
#' @import dplyr ggplot2 tidyr ggrepel grDevices
#'
#' @importFrom data.table fread fwrite
#' @importFrom scales unit_format
#'
#' @return
#' CNV distribution plot
#'
#' @export cnv_visual
#'

cnv_visual <- function(clean_cnv, max_chr = NULL, chr_id = NULL, species = NULL, start_position = NULL, end_position = NULL, individual_id = NULL, target_gene = NULL, max_chr_length = 160, refgene = NULL, plot_title = NULL, report_id = NULL, pedigree = NULL, show_name = c(0,160), width_1 = 13, height_1 = 10, folder = "cnv_visual", col_0 = "hotpink",  col_1 = "turquoise", col_2 = "gray", col_3 = "tomato", col_4= "deepskyblue", col_gene = "gray", gene_font_size = 2.2) {
  if(!file.exists(paste0(folder))){
    dir.create(paste0(folder))
    cat(paste0("A new folder '", folder, "' was created in working directory.\n"))
  }

  #prepare for population data
  if(typeof(clean_cnv) == "character"){
    cnv <- fread(file = clean_cnv, header = TRUE, sep ="\t")
  } else {
    cnv = clean_cnv
  }

  standard_col <- c("Sample_ID", "Chr", "Start", "End")
  standard_col_anot <- c("Sample_ID", "Chr", "CNV_Start", "CNV_End")
  if(all(standard_col %in% colnames(cnv))){
    cat("Input data passed requirment check...\n")
  } else if(all(standard_col_anot %in% colnames(cnv))){
    cnv <- cnv %>%
           rename(Start = CNV_Start, End = CNV_End)
    cat("Renamed 'CNV_Start' and 'CNV_End' as 'Start' and 'End', input data passed requirment check...\n")
  } else{
    cat("Missing mandatory columns, please conform that input file at least has four columns: 'Sample_ID', 'Chr', 'Start', 'End' \n")
  }

  #id_coord <- data.frame("Sample_ID" = sort(unique(cnv$Sample_ID))) #extract unique ID prepare coordinate
  #id_coord$Order <- seq(from = 1, to = nrow(id_coord), by = 1)
  #id_coord$x <- 160
  #id_coord$y <- (id_coord$Order-1)*5 + 1
  #cnv_coord <- merge(cnv, id_coord, all.x = TRUE, sort = FALSE) #prepare original data


  #set length of chr
  # chr_length_ars <- data.frame("Chr" = c(29:1), "Length" = c( 51.098607, 45.94015, 45.612108, 51.992305,
  #                                                               42.350435, 62.317253, 52.498615, 60.773035, 69.862954,
  #                                                               71.974595, 63.449741, 65.820629, 73.167244, 81.013979,
  #                                                               85.00778, 82.403003, 83.472345, 87.216183, 106.982474,
  #                                                               103.308737, 105.454467, 113.31977, 110.682743, 117.80634,
  #                                                               120.089316, 120.000601, 121.005158, 136.231102, 158.53411))
  # chr_length_ars <- chr_length_ars[order(chr_length_ars$Chr),]

  chr_length_ars <- cnv %>%
                    group_by(Chr) %>%
                    summarise(Length = max(End) / 1000000) %>%
                    arrange(as.numeric(Chr)) %>%
                    select(Chr, Length)

  #plot CNVs on specific chr
  plot_cnv_chr <- function(clean_cnv = NULL, chr_id = NULL, folder = folder, width_1 = width_1, height_1 = height_1){
    #2.plot for specific chromosome
    cnv <- clean_cnv
    cnv_chr <- cnv[cnv$Chr == chr_id, ]
    if(nrow(cnv_chr) == 0){
      cat("The ID of chromosome not found in the input data, please check and reset the number of Chromosome!\n")
    }
    id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr$Sample_ID))) #extract unique ID prepare coordinate
    id_coord$Order <- seq(1,nrow(id_coord),1)
    #id_coord$x <- chr_length_ars[chr_id, 2]
    max_len = chr_length_ars %>%
              filter(Chr == chr_id) %>%
              select(Length)
    id_coord$x <- max_len$Length

    id_coord$y <- (id_coord$Order-1)*5 + 1
    cnv_chr <- merge(cnv_chr, id_coord, all.x = TRUE, sort = FALSE) #prepare original data

    chr_name <- paste0("Chr", chr_id, ".png")
    id_number <- nrow(id_coord)
    chr_title <- paste0("CNV on Chr", chr_id, " with ", id_number, " Samples")
    png(res = 350, filename = paste0(folder,"/", chr_name), width = width_1, height = height_1, units = "cm")
    #add manual color for cnv number
    color_copy <- c("0" = col_0,
                    "1" = col_1,
                    "2" = col_2,
                    "3" = col_3,
                    "4" = col_4)
    chr_plot <- ggplot(cnv_chr, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
      geom_rect(aes(fill = as.factor(CNV_Value))) +
      scale_fill_manual(values = color_copy) +
      #geom_text(aes(x,y, label = Sample_ID), size = 1.5, check_overlap = TRUE) +
      theme_bw() +
      theme(legend.key.size = unit(0.5,"line"),
            legend.title = element_text(size = 6),
            legend.text  = element_text(size = 6),
            legend.margin=margin(0, 0, 0, 0)) +
      scale_y_continuous(labels = NULL) +
      scale_x_continuous(breaks = seq(0, max_len$Length, by = 5), limits = c(0, max_len$Length)) +
      labs(x = "Physical Position (Mb)", y ="Individual ID", title = chr_title, fill = "Copy")
    print(chr_plot)
    dev.off()
  }

  #plot CNVs on each chr
  if(is.null(max_chr) == "FALSE") {

    if(missing(width_1) & missing(height_1)){
      width_1 = 23
      height_1 = 13.5
    }

    for(i in 1:max_chr){
      plot_cnv_chr(clean_cnv = cnv, chr_id = i, folder = folder, width_1 = width_1, height_1 = height_1)
      print(paste0("Plotting the CNVs on Chromosome ", i))
    }

    cat("Task done.\n")

  #1.plot all CNV on all chromosome on population level
  #cnv_pop <- cnv_coord
  #cnv_pop <- dplyr::filter(cnv_pop, cnv_pop$Chr >=1 & cnv_pop$Chr <= max_chr )
  #png(res = 300, filename = "1_chr_all.png", width = 4000, height = 20000)
  #popu_plot <- ggplot(cnv_pop, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
  #  geom_rect(aes(fill = as.factor(CNV_Value))) +
  #  #geom_text(aes(x,y, label = Sample_ID), size = 1.5) +
  #  theme_classic() +
  #  scale_y_continuous(labels = NULL) +
  #  scale_x_continuous(breaks = seq(0, max_chr_length +10, by = 10)) +
  #  facet_wrap(~as.numeric(Chr), nrow = 10) +
  #  labs(x = "Physical Position", y = "Individual ID", title = "CNV Distribution on Population Level", fill = "CNV_Num")
  #print(popu_plot)
  #dev.off()
  #print("Task done, plot was stored in working directory.")
  }

  else if(is.null(chr_id) == "FALSE" & is.null(start_position) & is.null(refgene)){
  #2.plot for specific chromosome
  cnv_chr <- cnv[cnv$Chr == chr_id, ]
  id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr$Sample_ID))) #extract unique ID prepare coordinate
  id_coord$Order <- seq(1,nrow(id_coord),1)
  max_len = chr_length_ars %>%
            filter(Chr == chr_id) %>%
            select(Length)
  id_coord$x <- max_len$Length
  #id_coord$x <- chr_length_ars[chr_id, 2]
  id_coord$y <- (id_coord$Order-1)*5 + 1
  cnv_chr <- merge(cnv_chr, id_coord, all.x = TRUE, sort = FALSE) #prepare original data

  chr_name <- paste0("Chr", chr_id, ".png")
  id_number <- nrow(id_coord)
  chr_title <- paste0("CNV on Chr", chr_id, " with ", id_number, " Samples")
  png(res = 350, filename = paste0(folder, "/", chr_name), width = width_1, height = height_1, units = "cm")

  #add manual color for cnv number
  color_copy <- c("0" = col_0,
                  "1" = col_1,
                  "2" = col_2,
                  "3" = col_3,
                  "4" = col_4)

  chr_plot <- ggplot(cnv_chr, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
    geom_rect(aes(fill = as.factor(CNV_Value))) +
    scale_fill_manual(values = color_copy) +
    #geom_text(aes(x,y, label = Sample_ID), size = 1.5, check_overlap = TRUE) +
    theme_bw() +
    theme(legend.key.size = unit(0.5,"line"),
          legend.title = element_text(size = 6),
          legend.text  = element_text(size = 6),
          legend.margin=margin(0, 0, 0, 0)) +
    scale_y_continuous(labels = NULL) +
    scale_x_continuous(breaks = seq(0, max_len$Length, by = 5), limits = c(0, max_len$Length)) +
    labs(x = "Physical Position (Mb)", y ="Individual ID", title = chr_title, fill = "Copy")
  print(chr_plot)
  dev.off()
  cat("Task done, plot was stored in working directory.\n")
  return(chr_plot)
  }

  #else if(is.null(start_position & end_position) == "FALSE")
  else if(is.null(start_position) == "FALSE" & is.null(end_position) == "FALSE" & is.null(refgene)){
    #3.zoom into specific region
    cnv_chr <- cnv[cnv$Chr == chr_id, ]
    cnv_chr_zoom <- filter(cnv_chr, Start >= start_position * 1000000 -1 & End <= end_position * 1000000 + 1)
    id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr_zoom$Sample_ID))) #extract unique ID prepare coordinate
    id_coord$Order <- seq(1,nrow(id_coord),1)
    #id_coord$x <- chr_length_ars[chr_id, 2]
    max_len = chr_length_ars %>%
      filter(Chr == chr_id) %>%
      select(Length)
    id_coord$x <- max_len$Length
    id_coord$y <- (id_coord$Order-1)*5 + 1
    cnv_chr_zoom <- merge(cnv_chr_zoom, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
    cnv_chr_zoom$zoom_x <- end_position
    zoom_name <- paste0(folder, "/Chr", chr_id, "_",start_position,"-",end_position, "Mb", ".png")
    id_number <- nrow(id_coord)
    zoom_title <- paste0("Chr", chr_id, ":",start_position," - ",end_position, "Mb", " with ", id_number," Samples" ," - ", plot_title)

    #add manual color for cnv number
    color_copy <- c("0" = col_0,
                    "1" = col_1,
                    "2" = col_2,
                    "3" = col_3,
                    "4" = col_4)
    zoom_plot <- ggplot(cnv_chr_zoom, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
      geom_rect(aes(fill = as.factor(CNV_Value))) +
      scale_fill_manual(values = color_copy) +
      #geom_text(aes(zoom_x, y, label = Sample_ID), size = 2.5) +
      theme_bw() +
      scale_y_continuous(labels = NULL) +
      scale_x_continuous(breaks = seq(start_position, end_position)) +
      labs(x = "Physical Position (Mb)", y ="Individual ID", title = zoom_title, fill = "Copy")
    if(is.null(report_id)){
      png(res = 350, filename = zoom_name, width = width_1, height = height_1, units = "cm")
      print(zoom_plot)
      dev.off()
      cat("Task done, plot was stored in working directory.\n")
      return(zoom_plot)
    } else {
      indiv_id <- unique(cnv_chr_zoom$Sample_ID)
      cat("Individual ID in this CNVRs as following: \n")
      print(indiv_id)
      #assign("indiv_id", value = cnv_chr_zoom, envir = .GlobalEnv)
      indiv <- cnv_chr_zoom
      #cnv_visual(clean_cnv = "clean_cnv/penncnv_clean.cnv", chr_id = 5, start_position = 93.6, end_position = 93.7, report_id = 1)
      if(!(is.null(pedigree))){
        pedb <- fread(pedigree)
        if (exists("pedb")) {
          cat("Pedigree was read in.\n")
        }
        indiv_id_ped <- merge(indiv, pedb, by.x = "Sample_ID", by.y = "Chip_ID", all.x = TRUE)
        #print(colnames(indiv_id_ped))
        color_copy <- c("0" = col_0,
                        "1" = col_1,
                        "2" = col_2,
                        "3" = col_3,
                        "4" = col_4)
        if ("Herd" %in% colnames(indiv_id_ped)){
          herd_cnv <- ggplot(indiv_id_ped, aes(x = as.factor(Herd), fill = as.factor(CNV_Value))) +
            geom_bar(show.legend = FALSE) +
            scale_fill_manual(values = color_copy) +
            theme_classic()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            labs(x = "Name of Herd", y = "Number of CNV")
        }

        if ("Sire_Source" %in% colnames(indiv_id_ped)){
          source_cnv <- ggplot(indiv_id_ped, aes(x = Sire_Source, fill = as.factor(CNV_Value))) +
            geom_bar(show.legend = FALSE) +
            scale_fill_manual(values = color_copy) +
            theme_classic()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            labs(x = "Bull Source", y = "Number of CNV")
        }

        if ("Sire_ID" %in% colnames(indiv_id_ped)){
          sire_cnv <- ggplot(indiv_id_ped, aes(x = Sire_ID, fill = as.factor(CNV_Value))) +
            geom_bar(show.legend = FALSE)+
            scale_fill_manual(values = color_copy) +
            theme_classic()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            labs(x = "Sire ID", y = "Number of CNV")
        }

        source_title <- paste0(folder, "/Chr", chr_id, "_",start_position,"-",end_position, "Mb_Source", ".png")
        png(filename = paste0(source_title), res = 350, bg = "transparent", height = height_1, width = width_1, units = "cm")
        if (exists("herd_cnv") & exists("source_cnv") & exists("sire_cnv")){
          cnv_source <- plot_grid(zoom_plot, herd_cnv, source_cnv, sire_cnv, ncol = 1)
          print(cnv_source)
          dev.off()
        } else if (exists("herd_cnv") & exists("source_cnv")) {
          cnv_source <- plot_grid(zoom_plot, herd_cnv, source_cnv, sire_cnv, ncol = 1)
          print(cnv_source)
          dev.off()
        } else if (exists("herd_cnv") & exists("sire_cnv")) {
          cnv_source <- plot_grid(zoom_plot, herd_cnv, sire_cnv, ncol = 1)
          print(cnv_source)
          dev.off()
        } else if (exists("source_cnv") & exists("sire_cnv")) {
          cnv_source <- plot_grid(zoom_plot, source_cnv, sire_cnv, ncol = 1)
          print(cnv_source)
          dev.off()
        } else {
          print(sire_cnv)
          dev.off()
        }
      }
      return(indiv_id)
    }
    }
  else if (is.null(chr_id) == "FALSE" & is.null(start_position) == "FALSE" & is.null(end_position) == "FALSE" & is.null(refgene) == "FALSE") {
    cnv_chr <- cnv[cnv$Chr == chr_id, ]
    cnv_chr_zoom <- filter(cnv_chr, Start >= start_position * 1000000 -1 & End <= end_position * 1000000 + 1)
    id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr_zoom$Sample_ID))) #extract unique ID prepare coordinate
    try(id_coord$Order <- seq(1, nrow(id_coord),1), silent = FALSE)
    #id_coord$x <- chr_length_ars[chr_id, 2]
    max_len = chr_length_ars %>%
      filter(Chr == chr_id) %>%
      select(Length)
    id_coord$x <- max_len$Length

    id_coord$y <- (id_coord$Order-1)*5 + 1
    cnv_chr_zoom <- merge(cnv_chr_zoom, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
    cnv_chr_zoom$zoom_x <- end_position
    cnv_chr_zoom$gene_order <- max(cnv_chr_zoom$Order) + 3

    if(!is.null(report_id)) {
      indiv_id <- cnv_chr_zoom$Sample_ID
      cat("Individual ID in this CNVRs as following: \n")
      print(indiv_id)
      }
    #gene_freq <- data.table(gene_freq) #convert it to data.table to set key
    #setkey(x = gene_freq, name2)
    #gene_coord <- group_by(cnv_chr_zoom, name2) %>% slice(1) # generate gene data
    #gene_coord$CNV_Start <- gene_coord$g_Start
    #gene_coord$Order <- gene_coord$gene_order
    #try(gene_coord$CNV_Value <- "5", silent = FALSE)

    #gene_freq <- cnv %>% group_by(name2) %>% count(name2, name = "Frequent", sort = TRUE) #gene freq
    #gene_coord_freq <- merge(gene_coord, gene_freq)
    #gene_coord_freq <- gene_coord_freq[c(gene_coord_freq$Frequent >= 5), ]

    zoom_name <- paste0("Chr", chr_id, "_",start_position,"-",end_position, "Mb", ".png")
    id_number <- nrow(id_coord)
    zoom_title <- paste0("CNV on Chromosome ", chr_id, ": ",start_position," - ",end_position, " Mb", " with ", id_number," Individual" ," - ", plot_title)
    print(zoom_title)
    #png(res = 300, filename = zoom_name, width = 3500, height = 2000)
    #add manual color for cnv number
    color_copy <- c("0" = col_0,
                    "1" = col_1,
                    "2" = col_2,
                    "3" = col_3,
                    "4" = col_4)
    zoom_plot <- ggplot() +
      geom_rect(data = cnv_chr_zoom, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3, fill = as.factor(CNV_Value))) +
      #geom_rect(data = gene_coord, aes(xmin = g_Start/1000000, xmax = g_End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3), fill = "black") +
      #geom_text_repel(data = gene_coord_freq, aes(x = g_Start/1000000, y = (Order-1)*5 + 10, label = name2)) +
      #geom_hline(yintercept = (max(cnv_chr_zoom$Order) + 2)*5 - 2, linetype = "dashed") +
      #geom_text(aes(zoom_x, y, label = Sample_ID), size = 2.5) +
      scale_fill_manual(values = color_copy) +
      theme_bw() +
      theme(legend.key.size = unit(0.5,"line"),
            legend.title = element_text(size = 6),
            legend.text  = element_text(size = 6),
            legend.margin=margin(0, 0, 0, 0),
            #legend.position = "right",
            legend.position = c(0.95, 0.01),
            legend.justification='right',
            legend.direction='horizontal',
            plot.margin=unit(c(0,1,1,1), "cm"),
            axis.ticks.y = element_blank()) +
      scale_y_continuous(labels = NULL) +
      scale_x_continuous(limits = c(start_position, end_position)) +
      #labs(x = "Physical Position (Mb)", y ="Individual ID", title = zoom_title, fill = "CNV_Num")
      labs(x = "Position (Mb)", y ="CNV", fill = "Copy")

    cat("Plotting gene...\n")
    gene_plot <- HandyCNV::plot_gene(refgene = refgene, chr_id = chr_id, start = start_position, end = end_position, show_name = show_name, cnv = "yes", gene_font_size = gene_font_size, col_gene = col_gene)

    roh_gene <- plot_grid(gene_plot, zoom_plot, ncol = 1, rel_heights = c(1, 3))
    print(roh_gene)
    png(filename = paste0(folder, "/", zoom_name), width = width_1, height = height_1, units = "cm",  res = 350, bg = "transparent")
    print(roh_gene)
    dev.off()
    cat("Task done, plot was stored in working directory.\n")
    return(roh_gene)
  }

  else if (is.null(individual_id) == "FALSE"){
  #plot on individual level
    #cnv_indiv <- cnv_p[which(cnv_p$Sample_ID == "204806050057_R01C01")]
  cnv_indiv <- cnv[which(cnv$Sample_ID == individual_id),]
  chr_coord <- data.frame("Chr" = seq(1,max(as.integer(cnv_indiv$Chr)),1))
  chr_coord$x <- max(cnv$End) #adjust x for geom_text

  chr_coord$y <- (chr_coord$Chr-1)*5 + 1 #adjust y for geom_text

  cnv_indiv$Chr <- as.character(cnv_indiv$Chr)
  chr_coord$Chr <- as.character(chr_coord$Chr)
  cnv_indiv_coord <- merge(cnv_indiv, chr_coord, by = "Chr", all.x = TRUE, sort = FALSE)
  cnv_indiv_coord$Chr <- as.integer(cnv_indiv_coord$Chr)

  #4.
  indiv_name <- paste0("CNV_of_", individual_id,".png")
  indiv_title <- paste0("CNV Distribution of Individual ", individual_id)
  png(res = 350, filename = paste0(folder,"/", indiv_name), width = width_1, height = height_1, units = "cm")
  #add manual color for cnv number
  color_copy <- c("0" = col_0,
                  "1" = col_1,
                  "2" = col_2,
                  "3" = col_3,
                  "4" = col_4)
  indiv_plot <- ggplot(cnv_indiv_coord, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Chr-1)*5, ymax = (Chr-1)*5 + 3)) +
    geom_rect(aes(fill = as.factor(CNV_Value))) +
    scale_fill_manual(values = color_copy) +
    geom_text(aes(x/1000000, y, label = Chr), size = 4) + theme_bw() +
    scale_y_continuous(breaks = seq(0, max(cnv$End)/1000000, by = 10),labels = NULL) +
    scale_x_continuous(breaks = seq(0, max(cnv$End)/1000000, by = 10)) +
    labs(x = "Physical Position (Mb)", y ="Autosome", title = indiv_title,  fill= "Copy")
  print(indiv_plot)
  dev.off()

  cat("Task done, plot was stored in working directory.\n")
  return(indiv_plot)
  }

  else if(!(missing(target_gene))){
    cnv_gene <- cnv %>%
                filter(name2 == target_gene)

    id_coord <- data.frame("Sample_ID" = sort(unique(cnv_gene$Sample_ID))) #extract unique ID prepare coordinate
    id_coord$Order <- seq(1,nrow(id_coord),1)
    #id_coord$x <- chr_length_ars[chr_id, 2]
    #max_len = chr_length_ars %>%
    #  filter(Chr == chr_id) %>%
    #  select(Length)
    #id_coord$x <- max_len$Length

    id_coord$y <- (id_coord$Order-1)*5 + 1
    cnv_gene <- merge(cnv_gene, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
    max_y <- max(cnv_gene$Order)

    gene_coord <- cnv_gene %>%
                  group_by(name2) %>%
                  distinct(name2, .keep_all = T) %>%
                  mutate(Order = max_y)
    title_gene <- paste0(target_gene, "-Chr", gene_coord$Chr, ":", round(gene_coord$g_Start/1000000, 2), "-", round(gene_coord$g_End/1000000, 2), "Mb ", nrow(id_coord), "Samples")

    png(res = 350, filename = paste0(folder, "/", target_gene, ".png"), width = width_1, height = height_1, units = "cm")

    #add manual color for cnv number
    color_copy <- c("0" = col_0,
                    "1" = col_1,
                    "2" = col_2,
                    "3" = col_3,
                    "4" = col_4)

    gene_cnv <- ggplot(cnv_gene, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
                geom_rect(aes(fill = as.factor(CNV_Value))) +
                scale_fill_manual(values = color_copy) +
                geom_rect(data = gene_coord, aes(xmin = g_Start/1000000, xmax = g_End/1000000, ymin = (Order+2)*5 + 2, ymax = (Order+2)*5 + 7), fill = "black") +
                geom_text(data = gene_coord, aes(x = g_Start/1000000, y = (Order + 2)*5 + 11, label = name2), size = 2.5) +
                geom_hline(yintercept = (max(cnv_gene$Order) + 2)*5 - 2, linetype = "dashed") +
                #geom_text(aes(x,y, label = Sample_ID), size = 1.5, check_overlap = TRUE) +
                theme_bw() +
                theme(legend.key.size = unit(0.5,"line"),
                      legend.title = element_text(size = 6),
                      legend.text  = element_text(size = 6),
                      legend.margin=margin(0, 0, 0, 0)) +
                scale_y_continuous(labels = NULL) +
                #scale_x_continuous(breaks = seq(0, max_len$Length, by = 5), limits = c(0, max_len$Length))
                labs(x = "Physical Position (Mb)", y ="Individual ID", title = title_gene, fill = "Copy")
    print(gene_cnv)
    dev.off()
    cat("Task done, plot was stored in working directory.\n")
    return(gene_cnv)
  }


  else{
    cat("Warning: Lack of input parameters!!!\nWarning: Please check input parameters carefully!!!\n")
  }

}
JH-Zhou/HandyCNV documentation built on Dec. 18, 2021, 12:25 a.m.