#' Custom plotting gene from reference gene list
#'
#' The function is used for plotting gene from reference gene list, the standard reference gene lists was generated by 'get_refgene' function in HandyCNV.
#'
#'
#' @param refgene reference gene list. Standard input file was generated by 'get_refgene' function.
#' @param chr_id chromosome ID, should be the integer type only, such as 1 indicates Chromosome 1.
#' @param start the start physical position used in the plot, the unit is Mb.
#' @param end the end physical position used in plot, unit in Mb.
#' @param show_name the default value of show_name = c(0, 160). accept the vectors only, unit in Mb. for example show_name = c(11.2, 12.4, 15.3, 18.4), means only plot the genes within the given interval
#' @param cnv is support for both CNV and ROH, set cnv argument as TRUE will plot gene for CNV, otherwise will plot gene for ROH
#' interval 11.2-12.4 Mb and 15.3-18.4 Mb, the maximum pairs of intervals are three.
#' @param height_1 set height of gene plot.
#' @param width_1 set width of gene plot.
#' @param gene_font_size set the size of gene font
#' @param col_gene set the color of Gene, only work when the 'cnv' argument was set as 'TRUE'.
#'
#' @import ggplot2 dplyr
#' @importFrom data.table fread fwrite setkey foverlaps setDT data.table
#'
#' @return gene plot with given interval
#' @export plot_gene
#'
plot_gene <- function(refgene = NULL, chr_id, start, end, show_name = c(0,160), cnv = NULL, height_1 = 2, width_1 = 10, col_gene = "gray", gene_font_size = 2.5){
###
#read gene
#if(refgene == "ARS_ens"){
# refgene = system.file("extdata", "Demo_data/gene_annotation/ensGene_ars_210202.txt", package = "HandyCNV")
# gene <- fread(file = refgene, header = TRUE)
#} else if(refgene == "ARS_UCSC"){
# refgene = system.file("extdata", "Demo_data/gene_annotation/refGene_ars1.2.txt", package = "HandyCNV")
# gene <- fread(file = refgene, header = FALSE)
# names(gene) <- c("bin", "name", "Chr", "strand", "Start", "End",
# "cdsStart", "cdsEnd", "exonCount", "exonStarts", "exonEnds",
# "score", "name2", "cdsStartStat", "cdsEndStat", "exonFrames")
#} else if(refgene == "UMD_UCSC"){
# refgene = system.file("extdata", "Demo_data/gene_annotation/refGene_umd3.1.txt", package = "HandyCNV")
# gene <- fread(file = refgene, header = FALSE)
# names(gene) <- c("bin", "name", "Chr", "strand", "Start", "End",
# "cdsStart", "cdsEnd", "exonCount", "exonStarts", "exonEnds",
# "score", "name2", "cdsStartStat", "cdsEndStat", "exonFrames")
#}else{
if(typeof(refgene) == "character"){
gene <- fread(file = refgene, header = TRUE, sep = "\t")
} else {
gene <- refgene
}
#gene <- refgene
# names(gene) <- c("bin", "name", "Chr", "strand", "Start", "End",
# "cdsStart", "cdsEnd", "exonCount", "exonStarts", "exonEnds",
# "score", "name2", "cdsStartStat", "cdsEndStat", "exonFrames")
#}
#if(missing(gene)){
# gene <- fread(file = gene, header = T)
#} else{
# gene <- fread(gene, header = F)
# names(gene) <- c("bin", "name", "Chr", "strand", "Start", "End",
# "cdsStart", "cdsEnd", "exonCount", "exonStarts", "exonEnds",
# "score", "name2", "cdsStartStat", "cdsEndStat", "exonFrames")
#}
gene$Chr <- sub("chr", "", gene$Chr)
#extract gene list
gene_sub <- gene %>%
filter(Chr == chr_id & Start >= start * 1000000 & End <= end * 1000000) %>%
arrange(Start) %>%
mutate(y_min = case_when(length(Chr) <= 5 ~ rep(c(1, 1.1),length.out = length(Chr)),
length(Chr) > 5 & length(Chr) <= 15 ~ rep(c(1, 1.1, 1.2),length.out = length(Chr)),
length(Chr) > 15 & length(Chr) <= 25 ~ rep(c(1, 1.1, 1.2, 1.3),length.out = length(Chr)),
length(Chr) > 25 ~ rep(c(1, 1.1, 1.2, 1.3, 1.4),length.out = length(Chr))),
y_max = case_when(length(Chr) <= 5 ~ rep(c(1.1, 1.2),length.out = length(Chr)),
length(Chr) > 5 & length(Chr) <= 15 ~ rep(c(1.1, 1.2, 1.3),length.out = length(Chr)),
length(Chr) > 15 & length(Chr) <= 25 ~ rep(c(1.1, 1.2, 1.3, 1.4),length.out = length(Chr)),
length(Chr) > 25 ~ rep(c(1.1, 1.2, 1.3, 1.4, 1.5),length.out = length(Chr))))
#mutate(y_min = case_when(row_number() %% 2 == 0 ~ "1",
# row_number() %% 2 == 1 ~ "1.2"),
# y_max = case_when(row_number() %% 2 == 0 ~ "1.1",
# row_number() %% 2 == 1 ~ "1.3")) #assign y axis by even and odd row number
if(nrow(gene_sub) == 0){
print("No gene in this region")
} else{
#plot the name of genes
coord_name <- as.vector(show_name) * 1000000 #read the interval of gene want to present name
if(length(coord_name) == 2){
gene_present <- gene_sub%>%
filter(Start > coord_name[1] & End < coord_name[2])
} else if (length(coord_name) == 4){
gene_present <- gene_sub %>%
filter(Start > coord_name[1] & End < coord_name[2] | Start > coord_name[3] & End < coord_name[4])
} else{
gene_present <- gene_sub %>%
filter(Start > coord_name[1] & End < coord_name[2] | Start > coord_name[3] & End < coord_name[4] | Start > coord_name[5] & End < coord_name[6])
}
#check if plot for roh or CNV, the difference is when gene figure combine to interval reduce the distance between the middle (from top to bottom) are reduced when set 'cnv'
if(is.null(cnv)){
if(missing(height_1)){
ggplot() +
geom_rect(data = gene_present, aes(xmin = Start/1000000, xmax = End/1000000, ymin = y_min, ymax = y_max, fill = as.character(name2)), show.legend = F) +
#{if(nrow(gene_sub) < 50)geom_text_repel(aes(x = Start/1000000, y = y_max, label = name2))} +
#{if(nrow(gene_present) < 50)geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2))} +
geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2), size = gene_font_size) +
scale_x_continuous(limits = c(start, end)) +
#geom_vline(xintercept = coord_name/1000000, linetype = "dashed") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.title.x = element_blank()) +
labs(y = "Gene")
} else {
title_gene <- paste0("Chr", chr_id,"_", start,"_", end, ".png")
png(filename = title_gene, width = width_1, height = height_1, res = 350, units = "cm")
ggplot() +
geom_rect(data = gene_present, aes(xmin = Start/1000000, xmax = End/1000000, ymin = y_min, ymax = y_max, fill = as.character(name2)), show.legend = F) +
#{if(nrow(gene_sub) < 50)geom_text_repel(aes(x = Start/1000000, y = y_max, label = name2))} +
#{if(nrow(gene_present) < 50)geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2))} +
geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2), size = gene_font_size) +
scale_x_continuous(limits = c(start, end)) +
#geom_vline(xintercept = coord_name/1000000, linetype = "dashed") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.title.x = element_blank()) +
labs(y = "Gene")
dev.off()
}
} else {
ggplot() +
geom_rect(data = gene_present, aes(xmin = Start/1000000, xmax = End/1000000, ymin = y_min, ymax = y_max), fill = col_gene, show.legend = F) +
#{if(nrow(gene_sub) < 50)geom_text_repel(aes(x = Start/1000000, y = y_max, label = name2))} +
#{if(nrow(gene_present) < 50)geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2))} +
geom_text_repel(data = gene_present, aes(x = Start/1000000, y = y_max, label = name2), size = gene_font_size) +
scale_x_continuous(limits = c(start, end), labels = NULL) +
#scale_x_continuous(limits = c(start, end)) +
#geom_vline(xintercept = coord_name/1000000, linetype = "dashed") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.title.x = element_blank(),
plot.margin=unit(c(1,1,0,1), "cm"),
axis.ticks.y = element_blank()) +
labs(y = "Gene")
}
}
}
#' Custom Visualizing ROH
#'
#' This function is designed to custom visualizing ROH. It support to plot ROH by Chromosome, Interested region, Single sample, Target gene, and combine ROH plot with Gene annotation plot.
#' It also support users to customize the color and size of final graph.
#'
#' @param chr_id the number of chromosome want to plot, integer type, such as '1' indicate the Chromosome 1.
#' @param chr_length set the length for the chromosome, unnecessary at the moment.
#' @param start_position decimal digit, default unit in 'Mb'. such as 23.2112
#' @param end_position decimal digit, default unit in 'Mb'. such as 23.2112
#' @param individual_id sample ID, which used to plot ROH by interested Samples.
#' @param target_region plot target region by setting given interval in 'c(value1, value2)'.
#' @param plot_title add title in the plot
#' @param width_1 number to set the width of final plot size, unit is 'cm'
#' @param height_1 number to set the height of final plot size, unit is 'cm'
#' @param clean_roh support roh results from Plink or generated by cnv_clean form CNVPartition results
#' @param max_chr the maximum number of chromosomes to plot, it used for plot all chromosomes at once
#' @param refgene if true, will plot gene above roh plot
#' @param report_id report the sample ID while plotting
#' @param pedigree pedigree list, require at least three columns, Sample_Id, Sire and Dam
#' @param show_name default value is show_name = c(0, 160). accept the vectors only, unit is Mb. for example show_name = c(11.2, 12.4, 15.3, 18.4), means only plot the genes within the given interval
#' @param col_short set color to shorter length of roh in distribution plot
#' @param col_mid set color to middle length of roh in distribution plot
#' @param col_long set color to longer length of roh in distribution plot
#' @param folder set new folder to save results
#' @param gene_font_size set the font size of gene in ROH annotation plot
#'
#' @import dplyr ggplot2 tidyr
#' @importFrom data.table fread fwrite setkey foverlaps setDT
#' @return
#' ROH distribution plot
#'
#' @export roh_visual
#'
roh_visual <- function(clean_roh, max_chr = NULL, chr_id = NULL, chr_length = NULL, start_position = NULL, end_position = NULL, individual_id = NULL, refgene = NULL, target_region = c(0, 160), plot_title = NULL, report_id = NULL, pedigree = NULL, show_name =c(0, 160), width_1 = 13, height_1 = 10, col_short = "deepskyblue", col_mid = "black", col_long = "deeppink2", folder = "roh_visual", gene_font_size = 2.2) {
if(!(dir.exists(folder))){
dir.create(folder)
print(paste0("New folder ", folder, " was created in working directory."))
}
#prepare for population data
if(typeof(clean_roh) == "character"){
roh <- fread(file = clean_roh, header = TRUE)
} else {
roh <- clean_roh
}
#check if the input is a Plink results
#convert to the standards format if it is
plink_roh_names <- c("FID", "IID", "PHE", "CHR", "SNP1", "SNP2", "POS1", "POS2",
"KB", "NSNP", "DENSITY", "PHOM", "PHET")
if(length(colnames(roh)) == length(plink_roh_names)){
cat("ROH with input file in PLINK format was detected, coverting to HandyCNV standard formats\n")
colnames(roh) <- c("FID", "Sample_ID", "PHE", "Chr", "Start_SNP", "End_SNP",
"Start", "End", "Length", "Num_SNP", "DENSITY", "PHOM", "PHET")
handycnv_name <- c("Sample_ID", "Chr", "Start", "End", "Num_SNP", "Length", "Start_SNP", "End_SNP")
roh <- roh %>% select(handycnv_name)
roh$Chr <- as.numeric(roh$Chr)
roh <- dplyr::filter(roh, roh$Chr %in% c(1:max(roh$Chr)))
} else{
cat("Preparing plot data...\n")
}
# id_coord <- data.frame("Sample_ID" = sort(unique(roh$Sample_ID))) #extract unique ID prepare coordinate
# id_coord$Order <- seq(1,nrow(id_coord),1)
# id_coord$x <- max(roh$End)
# id_coord$y <- (id_coord$Order-1)*5 + 1
# cnv_coord <- merge(roh, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
# #set length of chr
# chr_length_ars <- data.frame("Chr" = c(29:1), "Length" = c( 51.098607, 45.94015, 45.612108, 51.992305,
# 42.350435, 62.317253, 52.498615, 60.773035, 69.862954,
# 71.974595, 63.449741, 65.820629, 73.167244, 81.013979,
# 85.00778, 82.403003, 83.472345, 87.216183, 106.982474,
# 103.308737, 105.454467, 113.31977, 110.682743, 117.80634,
# 120.089316, 120.000601, 121.005158, 136.231102, 158.53411))
# #names(chr_length_ars) <- c("Chr", "Length")
# chr_length_ars <- chr_length_ars[order(chr_length_ars$Chr),]
chr_length_ars <- roh %>%
group_by(Chr) %>%
summarise(Length = max(End) / 1000000) %>%
arrange(as.numeric(Chr)) %>%
select(Chr, Length)
if(is.null(max_chr) == "FALSE") {
#1.plot all CNV on all chromosome on population level
id_coord <- data.frame("Sample_ID" = sort(unique(roh$Sample_ID))) #extract unique ID prepare coordinate
id_coord$Order <- seq(1,nrow(id_coord),1)
id_coord$x <- max(roh$End)
id_coord$y <- (id_coord$Order-1)*5 + 1
cnv_coord <- merge(roh, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
cnv_pop <- cnv_coord
cnv_pop <- dplyr::filter(cnv_pop, cnv_pop$Chr >=1 & cnv_pop$Chr <= max_chr)
png(res = 350, filename = paste0(folder, "/1_chr_all_roh.png"), width = width_1, height = height_1, units = "cm")
popu_plot <- ggplot(cnv_pop, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
geom_rect(aes(fill = Length/1000000)) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
theme_classic() +
scale_y_continuous(labels = NULL) +
#scale_x_continuous(breaks = seq(0, max_chr_length +10, by = 10)) +
scale_x_continuous(labels = NULL) +
facet_wrap(~as.numeric(Chr), nrow = 1) +
labs(title = "ROH Distribution on Population Level", fill = "Length")
print(popu_plot)
dev.off()
cat("Task done, plot was stored in working directory.")
return(popu_plot)
}
else if(is.null(chr_id) == "FALSE" & is.null(start_position) & is.null(refgene)){
#2.plot for specific chromosome
cnv_chr <- roh[roh$Chr == chr_id, ]
id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr$Sample_ID))) #extract unique ID prepare coordinate
id_coord$Order <- seq(1,nrow(id_coord),1)
#id_coord$x <- chr_length_ars[chr_id, 2]
max_len = chr_length_ars %>%
filter(Chr == chr_id) %>%
select(Length)
id_coord$x <- max_len$Length
id_coord$y <- (id_coord$Order-1)*5 + 1
cnv_chr <- merge(cnv_chr, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
chr_name <- paste0(folder, "/Chr", chr_id, "_roh.png")
id_number <- nrow(id_coord)
chr_title <- paste0("ROH on Chromosome ", chr_id, " with ", id_number, " Individuals")
png(res = 350, filename = chr_name, width = width_1, height = height_1, units = "cm")
#png(res = 300, filename = "10_chr.png", width = 3500, height = 2000)
chr_plot <- ggplot(cnv_chr, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
geom_rect(aes(fill = Length/1000000), show.legend = FALSE) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
#geom_text(aes(x,y, label = Sample_ID), size = 1.5, check_overlap = TRUE) +
theme_bw() +
scale_y_continuous(labels = NULL) +
scale_x_continuous(breaks = seq(0, max_len$Length, by = 5), limits = c(0, max_len$Length)) +
labs(x = "Physical Position (Mb)", y ="Individual ID", title = chr_title, fill = "Length")
print(chr_plot)
dev.off()
cat("Task done, plot was stored in working directory.\n")
return(chr_plot)
}
#else if(is.null(start_position & end_position) == "FALSE")
else if(is.null(start_position) == "FALSE" & is.null(end_position) == "FALSE" & is.null(refgene))
{
#3.zoom into specific region
cnv_chr <- roh[roh$Chr == chr_id, ]
cnv_chr_zoom <- filter(cnv_chr, Start >= start_position * 1000000 -1 & End <= end_position * 1000000 + 1)
id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr_zoom$Sample_ID))) #extract unique ID prepare coordinate
id_coord$Order <- seq(1,nrow(id_coord),1)
#id_coord$x <- chr_length_ars[chr_id, 2]
max_len = chr_length_ars %>%
filter(Chr == chr_id) %>%
select(Length)
id_coord$x <- max_len$Length
id_coord$y <- (id_coord$Order-1)*5 + 1
cnv_chr_zoom <- merge(cnv_chr_zoom, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
cnv_chr_zoom$zoom_x <- end_position
zoom_name <- paste0(folder, "/Chr", chr_id, "_",start_position,"-",end_position, "Mb", "_roh.png")
id_number <- nrow(id_coord)
zoom_title <- paste0("/ROH on Chr ", chr_id, ": ",start_position,"-",end_position, " Mb", " with ", id_number," Samples" ," - ", plot_title)
png(res = 350, filename = zoom_name, width = width_1, height = height_1, units = "cm")
zoom_plot <- ggplot(cnv_chr_zoom, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
geom_rect(aes(fill = Length/1000000)) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
#geom_text(aes(zoom_x, y, label = Sample_ID), size = 2.5) +
theme_bw() +
scale_y_continuous(labels = NULL) +
scale_x_continuous(breaks = seq(start_position, end_position)) +
labs(x = "Physical Position (Mb)", y ="Individual ID", title = zoom_title, fill = "Length")
print(zoom_plot)
dev.off()
print("Task done, plot was stored in working directory.")
if(!is.null(report_id)) {
indiv_id <- unique(cnv_chr_zoom$Sample_ID)
cat("Individual ID in this CNVRs as following: \n")
print(indiv_id)
return(indiv_id)
#assign("indiv_id", value = cnv_chr_zoom, envir = .GlobalEnv)
indiv <- cnv_chr_zoom
#cnv_visual(clean_cnv = "clean_cnv/penncnv_clean.cnv", chr_id = 5, start_position = 93.6, end_position = 93.7, report_id = 1)
if(!(is.null(pedigree))){
pedb <- fread(pedigree)
if (exists("pedb")) {
cat("Pedigree was read in.")
}
indiv_id_ped <- merge(indiv, pedb, by.x = "Sample_ID", by.y = "Chip_ID", all.x = TRUE)
#print(colnames(indiv_id_ped))
if ("Herd" %in% colnames(indiv_id_ped)){
herd_cnv <- ggplot(indiv_id_ped, aes(x = as.factor(Herd), fill = as.factor(CNV_Value))) +
geom_bar() +
theme_classic()+
theme(axis.text.x = element_text(angle = 45), legend.position = "top") +
labs(x = "Name of Herd", y = "Number of ROH", fill = "ROH")
}
if ("Sire_Source" %in% colnames(indiv_id_ped)){
source_cnv <- ggplot(indiv_id_ped, aes(x = Sire_Source, fill = as.factor(CNV_Value))) +
geom_bar() +
theme_classic()+
theme(axis.text.x = element_text(angle = 45), legend.position = "top") +
labs(x = "Bull Source", y = "Number of ROH", fill = "ROH")
}
if ("Sire_ID" %in% colnames(indiv_id_ped)){
sire_cnv <- ggplot(indiv_id_ped, aes(x = Sire_ID, fill = as.factor(CNV_Value))) +
geom_bar()+
theme_classic()+
theme(axis.text.x = element_text(angle = 45), legend.position = "top") +
labs(x = "Sire ID", y = "Number of ROH", fill = "Copy of ROH")
}
png(filename = paste0(zoom_name, "source.png"), res = 350, bg = "transparent", height = height_1, width = width_1, units = "cm")
if (exists("herd_cnv") & exists("source_cnv") & exists("sire_cnv")){
cnv_source <- plot_grid(herd_cnv, source_cnv, sire_cnv, ncol = 1)
print(cnv_source)
dev.off()
} else if (exists("herd_cnv") & exists("source_cnv")) {
cnv_source <- plot_grid(herd_cnv, source_cnv, sire_cnv, ncol = 1)
print(cnv_source)
dev.off()
} else if (exists("herd_cnv") & exists("sire_cnv")) {
cnv_source <- plot_grid(herd_cnv, sire_cnv, ncol = 1)
print(cnv_source)
dev.off()
} else if (exists("source_cnv") & exists("sire_cnv")) {
cnv_source <- plot_grid(source_cnv, sire_cnv, ncol = 1)
print(cnv_source)
dev.off()
} else {
print(sire_cnv)
dev.off()
}
}
}
}
else if (is.null(start_position) == "FALSE" & is.null(end_position) == "FALSE" & is.null(refgene) == "FALSE") {
#3.zoom into specific region
coord_name <- as.vector(show_name) # add vline for plot
cnv_chr <- roh[roh$Chr == chr_id, ]
cnv_chr_zoom <- filter(cnv_chr, Start >= start_position * 1000000 -1 & End <= end_position * 1000000 + 1)
id_coord <- data.frame("Sample_ID" = sort(unique(cnv_chr_zoom$Sample_ID))) #extract unique ID prepare coordinate
id_coord$Order <- seq(1,nrow(id_coord),1)
#id_coord$x <- chr_length_ars[chr_id, 2]
max_len = chr_length_ars %>%
filter(Chr == chr_id) %>%
select(Length)
id_coord$x <- max_len$Length
id_coord$y <- (id_coord$Order-1)*5 + 1
cnv_chr_zoom <- merge(cnv_chr_zoom, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
cnv_chr_zoom$zoom_x <- end_position
zoom_name <- paste0(folder, "/Chr", chr_id, "_",start_position,"-",end_position, "Mb", "_roh.png")
id_number <- nrow(id_coord)
zoom_title <- paste0("Chr", chr_id, ": ",start_position," - ",end_position, " Mb", " with ", id_number," Individual" ," - ", plot_title)
print(zoom_title)
zoom_plot <- ggplot(cnv_chr_zoom, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
geom_rect(aes(fill = Length), show.legend = F) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
#geom_text(aes(zoom_x, y, label = Sample_ID), size = 2.5) +
theme_bw() +
theme(legend.position = "top") +
scale_y_continuous(labels = NULL) +
geom_vline(xintercept = coord_name, linetype = "dashed") +
scale_x_continuous(limits = c(start_position, end_position)) +
#labs(x = "Physical Position (Mb)", y ="Individual ID", title = zoom_title, fill = "Length")
labs(x = "Physical Position (Mb)", y ="Individual ID")
cat("plotting gene....\n")
gene_plot <- HandyCNV::plot_gene(refgene = refgene, chr_id = chr_id, start = start_position, end = end_position, show_name = show_name, gene_font_size = gene_font_size)
#png(res = 300, filename = zoom_name, width = 3500, height = 2000)
png(filename = zoom_name, width = width_1, height = height_1, units = "cm", res = 350)
roh_gene <- plot_grid(gene_plot, zoom_plot, ncol = 1, rel_heights = c(1, 3))
print(roh_gene)
dev.off()
cat("Task done, plot was stored in working directory.\n")
return(roh_gene)
}
else if (is.null(individual_id) == "FALSE")
{
#plot on individual level
#cnv_indiv <- cnv_p[which(cnv_p$Sample_ID == "204806050057_R01C01")]
cnv_indiv <- roh[which(roh$Sample_ID == individual_id),]
chr_coord <- data.frame("Chr" = seq(1,max(roh$Chr),1))
chr_coord$x <- max(roh$End) #adjust x for geom_text
chr_coord$y <- (chr_coord$Chr-1)*5 + 1 #adjust y for geom_text
cnv_indiv_coord <- merge(cnv_indiv, chr_coord, all.x = TRUE, sort = FALSE)
#4.
indiv_name <- paste0(folder, "/ROH_of_", individual_id,".png")
indiv_title <- paste0("ROH Distribution of Individual ", individual_id)
png(res = 350, filename = indiv_name, width = width_1, height = height_1, units = "cm")
indiv_plot <- ggplot(cnv_indiv_coord) +
geom_rect(aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Chr-1)*5, ymax = (Chr-1)*5 + 3, fill = Length/1000000)) +
#scale_fill_gradient(low = col_short, high = col_long) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
geom_text(aes(x = x/1000000, y, label = Chr), size = 4) +
theme_bw() +
scale_y_continuous(breaks = seq(0, max(roh$End)/1000000, by = 10),labels = NULL) +
scale_x_continuous(breaks = seq(0, max(roh$End)/1000000, by = 10)) +
labs(x = "Physical Position (Mb)", y ="Autosome", title = indiv_title, fill= "Length")
print(indiv_plot)
dev.off()
cat("Task done, plot was stored in working directory.\n")
return(indiv_plot)
} else if (is.null(target_region) == "FALSE"){
#get input data
target_region <- as.vector(target_region)
target_g <- data.table(t(target_region))
target_g$V2 <- target_g$V2 * 1000000 #convert to bp
target_g$V3 <- target_g$V3 * 1000000 #convert to bp
names(target_g) <- c("Chr_TAR", "Start_TAR", "End_TAR")
#prepare target roh
roh <- setDT(roh)
setkey(roh, Chr, Start, End)
target_roh <- foverlaps(x = target_g, y = roh, by.x = c("Chr_TAR", "Start_TAR", "End_TAR"), type = "any")
id_coord <- data.frame("Sample_ID" = sort(unique(target_roh$Sample_ID))) #extract unique ID prepare coordinate
id_coord$Order <- seq(1,nrow(id_coord),1)
#id_coord$x <- chr_length_ars[chr_id, 2]
# max_len = chr_length_ars %>%
# filter(Chr == chr_id) %>%
# select(Length)
# id_coord$x <- max_len$Length
# id_coord$y <- (id_coord$Order-1)*5 + 1
target_roh <- merge(target_roh, id_coord, all.x = TRUE, sort = FALSE) #prepare original data
target_name <- paste0(folder, "/Chr", target_g$Chr_TAR, "_", target_region[2], "_", target_region[3], "_roh.png")
id_number <- nrow(id_coord)
chr_title <- paste0("ROH on Chr", target_g$Chr_TAR, ":", target_region[2], "-", target_region[3], " with ", id_number, " Samples")
#png(res = 350, filename = target_name, width = width_1, height = height_1, units = "cm")
#png(res = 300, filename = "10_chr.png", width = 3500, height = 2000)
png(filename = target_name, width = width_1, height = height_1, units = "cm", res = 350)
target_plot <- ggplot(target_roh, aes(xmin = Start/1000000, xmax = End/1000000, ymin = (Order-1)*5, ymax = (Order-1)*5 + 3)) +
geom_rect(aes(fill = Length/1000000), show.legend = FALSE) +
scale_fill_gradientn(colours = c(col_short, col_mid, col_long)) +
#geom_text(aes(x,y, label = Sample_ID), size = 1.5, check_overlap = TRUE) +
theme_bw() +
scale_y_continuous(labels = NULL) +
geom_vline(xintercept = target_region[2:3], linetype = "dashed", size = 0.3) +
#scale_x_continuous(breaks = seq(min(target_roh$Start)/1000000, max(target_roh$End)/1000000, by = 5), limits = c(min(target_roh$Start)/1000000, max(target_roh$End)/1000000)) +
scale_x_continuous(limits = c(min(target_roh$Start)/1000000, max(target_roh$End)/1000000)) +
labs(x = "Physical Position (Mb)", y ="Individual ID", title = chr_title, fill = "Length")
if(is.null(refgene) == "FALSE"){
gene_plot <- HandyCNV::plot_gene(refgene = refgene, chr_id = target_g$Chr_TAR, start = target_g$Start_TAR/1000000, end = target_g$End_TAR/1000000, show_name = show_name, gene_font_size = gene_font_size)
png(filename = target_name, width = width_1, height = height_1, units = "cm", res = 350)
roh_gene <- plot_grid(gene_plot, target_plot, ncol = 1, rel_heights = c(1, 3))
print(roh_gene)
dev.off()
cat("Task done, plot was stored in working directory.\n")
return(roh_gene)
} else {
#ggsave(filename = target_name, plot = target_plot, width = width_1, height = height_1, units = "cm", dpi = 350)
print(target_plot)
dev.off()
cat("Task done, plot was stored in working directory.\n")
return(target_plot)
}
}
else{
cat("Warning: Lack of input parameters!!!\n
Warning: Please check input parameters carefully!!!\n")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.