# Visualization Part ------------------------------------------------------
#' Plot CNV signatures
#'
#' Plot CNV signature from multiple result objects from \code{VSHunter} package.
#'
#' @param Res a result \code{list} generated from CNV signature pipeline or a \code{NMFfitX} object from \code{NMF} package.
#' @param contributions if \code{TRUE}, plot contributions instead of signatures.
#' @param color user custom colors.
#' @param patient_order user custom patient order.
#' @param font_size font size.
#' @param show_title if \code{TRUE}, show title.
#' @param axis_lwd line width of axis.
#' @param title_size title size.
#' @param show_barcodes if \code{TRUE}, show barcode.
#' @param barcode_size barcode size when plot contribution.
#' @param yaxisLim limit of y axis.
#' @param ... other options pass to \code{barplot}.
#'
#' @return Nothing
#' @family CNV analysis functions
#' @export
#'
cnv_plotSignatures = function(Res = NULL,
contributions = FALSE,
color = NULL,
patient_order = NULL,
font_size = 1.2,
show_title = TRUE,
axis_lwd = 2,
title_size = 1.3,
show_barcodes = FALSE,
barcode_size = 0.5,
yaxisLim = 0.3,
...) {
# modify from https://github.com/PoisonAlien/maftools/blob/master/R/plotSignatures.R
# input can be multipe objects from different functions
# basically, we only use w and h matrix of NMF
# assuming input is a basic NMF result
if (inherits(Res, "NMFfitX1")) {
#-- Signatures
w = NMF::basis(Res)
w = apply(w, 2, function(x)
x / sum(x)) # Scale signatures (basis)
colnames(w) = paste("Signature", 1:ncol(w), sep = "_")
#-- Contributions
h = NMF::coef(Res)
h = apply(h, 2, function(x)
x / sum(x)) # Scale contributions (coefs)
rownames(h) = paste("Signature", 1:ncol(w), sep = "_")
} else if (is.list(Res)) {
if (!all(c("signatures", "contributions") %in% names(Res)))
stop("signatures and contributions elements must in input list.")
w = Res$signatures
w = apply(w, 2, function(x)
x / sum(x)) # Scale signatures (basis)
if (is.null(colnames(w))) {
colnames(w) = paste("Signature", 1:ncol(w), sep = "_")
}
h = Res$contributions
if (is.null(rownames(h))) {
rownames(h) = paste("Signature", 1:ncol(w), sep = "_")
}
}
contrib = h
if (contributions) {
contribt = t(contrib)
#calculate sd
if (!is.null(patient_order)) {
contribt = contribt[patient_order, ] #order on user-specified ordering of the genomes
} else{
contribt = contribt[order(contribt[, ncol(contribt)]), ] #order according to standard deviation
}
#contrib = t(contribt[,1:(ncol(contribt)-1)])
contrib = t(contribt[, 1:(ncol(contribt))])
cols = RColorBrewer::brewer.pal(n = 8, name = 'Set2')
if (show_barcodes) {
lo = layout(mat = matrix(data = c(1, 2), nrow = 2),
heights = c(6, 2))
par(mar = c(6, 4, 2, 1))
b = barplot(
contrib,
axes = FALSE,
horiz = FALSE,
col = cols,
border = NA,
names.arg = rep("", ncol(contrib))
)
axis(
side = 1,
at = b,
labels = colnames(contrib),
lwd = 2,
cex.axis = barcode_size,
las = 2,
line = 1,
hadj = 0.8,
font = 2,
tick = FALSE
)
axis(
side = 2,
at = seq(0, 1, 0.25),
lwd = 3,
font = 2,
las = 2,
cex.axis = 0.9
)
mtext(
text = "Signature exposures",
side = 2,
font = 2,
cex = 1,
line = 2.8,
srt = 90
) # strange srt
par(mar = rep(2, 4))
plot.new()
#par(mar = c(2, 3, 0, 0))
legend(
x = "left",
legend = rownames(contrib),
col = cols[1:nrow(contrib)],
border = NA,
bty = "n",
pch = 15,
xpd = TRUE,
ncol = 1,
cex = 1.2,
pt.cex = 1.5,
horiz = TRUE
)
} else{
lo = layout(mat = matrix(data = c(1, 2), nrow = 2),
heights = c(6, 2))
par(mar = c(3, 4, 2, 1))
b = barplot(
contrib,
axes = FALSE,
horiz = FALSE,
col = cols,
border = NA,
names.arg = rep("", ncol(contrib))
)
axis(
side = 2,
at = seq(0, 1, 0.25),
lwd = 3,
font = 2,
las = 2,
cex.axis = 0.9
)
mtext(
text = "Signature exposure",
side = 2,
font = 2,
cex = 1,
line = 2.8,
srt = 90
)
par(mar = rep(2, 4))
plot.new()
#par(mar = c(2, 3, 0, 0))
legend(
x = "left",
legend = rownames(contrib),
col = cols[1:nrow(contrib)],
border = NA,
bty = "n",
pch = 15,
xpd = TRUE,
ncol = 1,
cex = 1.2,
pt.cex = 1.5,
horiz = TRUE
)
}
} else{
plotData = as.data.frame(t(w))
nsigs = nrow(plotData)
if (is.null(color)) {
#color = c("blue","black","red","gray","green","maroon")
color = c(
'coral4',
'lightcyan4',
'deeppink3',
'lightsalmon1',
'forestgreen',
'cornflowerblue'
)
}
cnames = colnames(plotData)
# order the features
plotData = plotData[, c(
grep("bp10MB", cnames, value = TRUE),
grep("copynumber", cnames, value = TRUE),
grep("changepoint", cnames, value = TRUE),
grep("bpchrarm", cnames, value = TRUE),
grep("osCN", cnames, value = TRUE),
grep("segsize", cnames, value = TRUE)
)]
len_c1 = sum(grepl("bp10MB", cnames))
len_c2 = sum(grepl("copynumber", cnames))
len_c3 = sum(grepl("changepoint", cnames))
len_c4 = sum(grepl("bpchrarm", cnames))
len_c5 = sum(grepl("osCN", cnames))
len_c6 = sum(grepl("segsize", cnames))
len_seqc = c(len_c1,
len_c2,
len_c3,
len_c4,
len_c5,
len_c6)
colors = rep(color, times = len_seqc)
len_seq = c(0L)
for (i in 1:length(len_seqc)) {
s = len_seqc[i] * 2 + len_seq[i]
len_seq = c(len_seq, s)
}
par(
mfrow = c(nsigs, 1),
oma = c(5, 4, 0, 0) + 0.1,
mar = c(0, 0, 2.5, 0) + 0.1,
las = 1,
tcl = -.25,
font.main = 4,
xpd = NA
)
for (i in 1:nsigs) {
ae = rownames(plotData)[i]
d = as.matrix(plotData[i, ])
if (is.na(yaxisLim)) {
bh = ceiling(max(d, na.rm = TRUE) * 10) / 10 #Bar height
} else{
bh = 0.3
}
barplot(
d,
xaxt = "n",
yaxt = "n",
col = colors,
beside = TRUE,
ylim = c(-0.1, bh),
cex.main = 1,
border = NA,
font.axis = 2,
font.lab = 2,
adj = 0.25,
...
)
if (show_title) {
title(
main = ae,
cex.main = title_size,
line = 0,
adj = 0.98
)
}
#mtext(text = ae, side = 1, line = 2, font = 1, cex = 0.5, at = 0.3)
axis(
side = 2,
at = seq(0, bh, 0.1),
pos = -2,
las = 2,
lwd = axis_lwd,
hadj = 1.1,
font = 2,
cex.axis = font_size
)
#abline(h = seq(0, 0.3, 0.1),lty=2,lwd=0.3, col = 'gray70')
rect(
xleft = len_seq,
ybottom = -0.05,
xright = ncol(plotData) * 2,
ytop = -0.02,
col = color,
border = 'gray70'
)
if (i == nsigs) {
text(
labels = c(
"bp10MB",
"copynumber",
"changepoint",
"bpchrarm",
"osCN",
"segsize"
),
y = rep(-0.1, 6),
x = len_seq[2:7] - len_seqc,
cex = font_size,
font = 1.8,
font.lab = 2,
pos = 1.2,
srt = 30
)
}
}
}
}
#' Plot copy number feature distribution
#'
#' @param features a \code{list} generate from \code{cnv_derivefeatures} function.
#' @param ylab lab of y axis.
#' @param ... other options pass to \code{\link[cowplot]{plot_grid}} function of \code{cowplot} package.
#'
#' @return a ggplot object
#' @import cowplot
#' @family CNV analysis functions
#' @export
#'
cnv_plotFeatureDistribution = function(features, ylab = "", ...) {
features = lapply(features, function(x) {
x$value = as.numeric(x$value)
return(x)
})
#requireNamespace("cowplot")
#cowplot::theme_cowplot()
#ggplot2::theme_set(cowplot::theme_cowplot(font_size = 12))
p_1 = ggplot(data = features$segsize, aes(x = log10(value))) +
geom_line(stat = "density") + labs(x = "Segment size (log10 based)", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
#p_1 = p_1 + scale_x_continuous(breaks = 10 ^c(7, 8),
# labels = scales::trans_format("log10", scales::math_format(10^.x)))
p_2 = ggplot(data = features$copynumber, aes(x = value)) +
geom_line(stat = "density") + labs(x = "Copy number", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
p_3 = ggplot(data = features$changepoint, aes(x = value)) +
geom_line(stat = "density") + labs(x = "Copy number changepoint", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
p_4 = ggplot(data = features$bp10MB, aes(x = value)) +
geom_bar(stat = "count") + labs(x = "Breakpoint count per 10MB", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
p_5 = ggplot(data = features$bpchrarm, aes(x = value)) +
geom_bar(stat = "count") + labs(x = "Breakpoint count per chr arm", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
p_6 = ggplot(data = features$osCN, aes(x = value)) +
geom_bar(stat = "count") + labs(x = "Oscilating CN chain length", y = ylab) +
theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm")) + theme_cowplot(font_size = 12)
# p_6 = ggplot(data = features$osCN, aes(x = as.factor(value))) +
# geom_bar(stat = "count") + labs(x = "Oscilating CN chain length", y = ylab) + theme(plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "cm"))
# osCN_tab = length(table(features$osCN$value))
#
# if (osCN_tab > 15 & osCN_tab <= 20) {
# p_6 = p_6 + theme(axis.text.x = element_text(size = 9))
# } else if (osCN_tab > 20 & osCN_tab <= 30) {
# p_6 = p_6 + theme(axis.text.x = element_text(size = 7))
# } else if (osCN_tab > 30 & osCN_tab <= 40) {
# p_6 = p_6 + theme(axis.text.x = element_text(size = 4))
# } else if (osCN_tab > 40) {
# p_6 = p_6 + theme(axis.text.x = element_text(size = 3))
# }
p = cowplot::plot_grid(p_1,
p_2,
p_3,
p_4,
p_5,
p_6,
nrow = 2,
align = "hv",
...)
p
}
#' Plot mixture fit model components
#'
#' @inheritParams cnv_plotFeatureDistribution
#' @param components a \code{list} contain \code{flexmix} object of copy-number features, obtain this
#' from \code{cnv_fitMixModels} function or use pre-compiled components data which come from CNV signature paper
#' https://www.nature.com/articles/s41588-018-0179-8 (set this argument as \code{NULL}).
#' @param ... other options pass to \code{\link[cowplot]{plot_grid}} function of \code{cowplot} package.
#'
#' @return a ggplot object
#' @import cowplot
#' @export
#' @family CNV analysis functions
#'
cnv_plotMixComponents = function(features, components, ...) {
cbPalette <-
c(
RColorBrewer::brewer.pal(8, "Dark2"),
RColorBrewer::brewer.pal(9, "Set1"),
"black"
)
plotNormDensity = function(value, matrix, xlab) {
p = ggplot(data = data.frame(x = seq(
min(value), max(value), length.out = 100
)),
aes(x)) + ylab("")
for (i in 1:ncol(matrix)) {
p = p + stat_function(
fun = stats::dnorm,
n = 1000,
args = list(mean = matrix[1, i],
sd = matrix[2, i]),
color = cbPalette[i]
)
}
p = p + xlab(xlab) + theme_cowplot(font_size = 12)
p
}
plotPoisDensity = function(value, lambda, xlab, max_value = 10) {
if (is.null(max_value)) {
p = ggplot(data = data.frame(x = seq(
min(value), max(value), length.out = 100
)),
aes(x)) + ylab("")
} else {
p = ggplot(data = data.frame(x = seq(
min(value), max_value, length.out = 100
)),
aes(x)) + ylab("")
}
for (i in 1:length(lambda)) {
p = p + stat_function(
geom = "line",
n = 11,
fun = stats::dpois,
args = list(lambda = lambda[i]),
color = cbPalette[i]
)
}
p = p + xlab(xlab) + theme_cowplot(font_size = 12)
p
}
features = lapply(features, function(x) {
x$value = as.numeric(x$value)
return(x)
})
# norm distribution
comp_segsize = log10(flexmix::parameters(components[["segsize"]]))
comp_copynumber = flexmix::parameters(components[["copynumber"]])
comp_changepoint = flexmix::parameters(components[["changepoint"]])
# pois distribution
comp_bp10MB = flexmix::parameters(components[["bp10MB"]])
comp_bpchrarm = flexmix::parameters(components[["bpchrarm"]])
comp_osCN = flexmix::parameters(components[["osCN"]])
# norm plots
p_1 = plotNormDensity(log10(features[["segsize"]]$value), comp_segsize, xlab = "Segment size (log10 based)")
# p_1 = p_1 + scale_x_continuous(breaks = 10 ^c(0, 7:9),
# labels = scales::trans_format("log10", scales::math_format(10^.x)))
p_2 = plotNormDensity(features[["copynumber"]]$value, comp_copynumber, xlab = "Copy number")
p_3 = plotNormDensity(features[["changepoint"]]$value, comp_changepoint, xlab = "Copy-number changepoint")
# pois plots
p_4 = plotPoisDensity(features[["bp10MB"]]$value,
comp_bp10MB,
xlab = "Breakpoint count per 10MB",
max_value = 10)
p_5 = plotPoisDensity(features[["bpchrarm"]]$value,
comp_bpchrarm,
xlab = "Breakpoint count per arm",
max_value = 50)
p_6 = plotPoisDensity(features[["osCN"]]$value, comp_osCN, xlab = "Oscilating CN chain length")
p = cowplot::plot_grid(p_1,
p_2,
p_3,
p_4,
p_5,
p_6,
nrow = 2,
align = "hv",
...)
p
}
#' Calculate length fraction profile of copy number
#'
#' @inheritParams cnv_derivefeatures
#' @inheritParams cnv_readprofile
#' @author Shixiang Wang <w_shixiang@163.com>
#' @return a data frame
#' @export
#' @family CNV analysis functions
cnv_getLengthFraction = function(CN_data,
genome_build = c("hg19", "hg38"),
cols = c("Chromosome", "Start.bp", "End.bp", "modal_cn"),
sample_col = "sample") {
stopifnot(is.list(CN_data) | is.data.frame(CN_data))
genome_build = match.arg(genome_build)
if (inherits(CN_data, "list")) {
segTab = base::Reduce(rbind, CN_data)
segTab$sample = base::rep(x = names(CN_data),
times = sapply(CN_data, function(x)
nrow(x)))
} else {
segTab = CN_data[, c(cols, sample_col)]
if (ncol(segTab) == 5) {
colnames(segTab) = c("chromosome", "start", "end", "segVal", "sample")
} else if (ncol(segTab) == 4) {
colnames(segTab) = c("chromosome", "start", "end", "sample")
} else {
stop(
"If input is a data.frame, must have 4 necessary columns (chr, start, end, sample) and 1 optional column (segVal)."
)
}
}
# unify chromosome column
segTab$chromosome = as.character(segTab$chromosome)
segTab$chromosome = sub(
pattern = "chr",
replacement = "chr",
x = segTab$chromosome,
ignore.case = TRUE
)
if (any(!grepl("chr", segTab$chromosome))) {
segTab$chromosome[!grepl("chr", segTab$chromosome)] = paste0("chr", segTab$chromosome[!grepl("chr", segTab$chromosome)])
}
if (any(grepl("chr23", segTab$chromosome))) {
warning("'23' is not a supported chromosome, related rows will be discarded.")
segTab = segTab[!grepl("chr23", segTab$chromosome),]
}
valid_chr = c(paste0("chr", 1:22), "chrX", "chrY")
if (!all(segTab$chromosome %in% valid_chr)) {
message("Filter some invalid segments... (not as 1:22 and X, Y)")
segTab = base::subset(segTab, chromosome %in% valid_chr)
}
arm_data = getArmLocation(genome_build)
# logical operation
assign_df = data.frame(
location = vector("character", nrow(segTab)),
annotation = vector("character", nrow(segTab)),
fraction = vector("numeric", nrow(segTab)),
stringsAsFactors = FALSE
)
for (i in 1:nrow(segTab)) {
x = segTab[i,]
# locate chromosome
arm_loc = base::subset(arm_data, chrom == x$chromosome)
y = c(x$start, x$end)
if (y[2] <= arm_loc$p_end & y[1] >= arm_loc$p_start) {
location = paste0(sub("chr", "", arm_loc$chrom), "p")
annotation = "short arm"
fraction = (y[2] - y[1] + 1) / (arm_loc$p_end - arm_loc$p_start + 1)
} else if (y[2] <= arm_loc$q_end &
y[1] >= arm_loc$q_start) {
location = paste0(sub("chr", "", arm_loc$chrom), "q")
annotation = "long arm"
fraction = (y[2] - y[1] + 1) / (arm_loc$q_end - arm_loc$q_start + 1)
} else if (y[1] >= arm_loc$p_start &
y[1] <= arm_loc$p_end &
y[2] >= arm_loc$q_start & y[2] <= arm_loc$q_end) {
location = paste0(sub("chr", "", arm_loc$chrom), "pq") # across p and q arm
annotation = "across short and long arm"
fraction = 2 * ((y[2] - y[1] + 1) / arm_loc$total_size)
} else if (y[1] < arm_loc$p_end & y[2] < arm_loc$q_start) {
location = paste0(sub("chr", "", arm_loc$chrom), "p")
annotation = "short arm intersect with centromere region"
# only calculate region does not intersect
fraction = (y[2] - y[1] + 1 - (y[2] - arm_loc$p_end)) / (arm_loc$p_end - arm_loc$p_start + 1)
} else if (y[1] > arm_loc$p_end &
y[1] < arm_loc$q_start & y[2] > arm_loc$q_start) {
location = paste0(sub("chr", "", arm_loc$chrom), "q")
annotation = "long arm intersect with centromere region"
# only calculate region does not intersect
fraction = (y[2] - y[1] + 1 - (y[1] - arm_loc$q_start)) / (arm_loc$q_end - arm_loc$q_start + 1)
} else {
location = "Unknown"
annotation = "Unknow segment, locate in centromere region"
fraction = NA_real_
}
assign_df[i,] = c(location, annotation, fraction)
#assign_df = base::rbind(assign_df, c(location, annotation, percentage))
}
res = base::cbind(segTab, assign_df)
res$fraction = as.numeric(res$fraction)
res
}
#' Plot copy number distribution either by length or chromosome
#'
#' @param data a \code{data.frame}, result of \code{cnv_getLengthFraction} function.
#' @param rm_normal logical. Whether removel normal copy (i.e. "segVal" equals 2), default is \code{TRUE}.
#' @param mode either "ld" for distribution by CN length or "cd" for distribution by chromosome.
#' @param fill when \code{mode} is "cd", if \code{fill} is \code{TRUE}, plot percentage instead of count.
#' @param scale_chr logical. If `TRUE`, normalize count to per Megabase unit.
#' @inheritParams cnv_derivefeatures
#' @author Shixiang Wang <w_shixiang@163.com>
#'
#' @return a ggplot object
#' @import cowplot
#' @export
#' @family CNV analysis functions
#' @examples
#' \dontrun{
#' cnv_plotLengthSummary(data, mode = "cd")
#' }
cnv_plotDistributionProfile = function(data,
rm_normal = TRUE,
mode = c("ld", "cd"),
fill = FALSE,
scale_chr = TRUE,
genome_build = c("hg19", "hg38")) {
stopifnot(is.logical(rm_normal),
is.data.frame(data),
is.logical(fill))
mode = match.arg(mode)
genome_build = match.arg(genome_build)
# if remove normal copy number segments
if (rm_normal) {
if (!"segVal" %in% colnames(data)) {
stop("'segVal' must be provided as a column.")
}
data = base::subset(data, data[, "segVal"] != 2)
}
if (mode == "ld") {
# plot length distribution
if (!"fraction" %in% colnames(data)) {
stop("'fraction' must be provided as a column.")
}
ggplot(data, aes(x = fraction, y = ..density..)) +
geom_histogram(bins = 100) +
labs(x = "Length of SCNA\n(normalized to chromosome arms)",
y = "Percentage\n(as fraction of all SCNAs)")
} else if (mode == "cd") {
# plot chr distribution
if (!all(c("chromosome", "location") %in% colnames(data))) {
stop("'chromosome', 'location' must be provided as columns.")
}
if (is.character(data$chromosome[1])) {
data$chromosome = sub("chr", "", data$chromosome, ignore.case = TRUE)
data = subset(data, chromosome %in% as.character(1:22))
} else if (is.integer(data$chromosome[1])) {
data = subset(data, chromosome %in% 1:22)
}
# only keep chr 1 to 22
data$chromosome = factor(as.character(data$chromosome), levels = as.character(1:22))
# only keep p, q, pq
data$location = factor(sub("[0-9]*", "", data$location),
levels = c("p", "pq", "q"))
if (sum(!(data$location %in% c("p", "pq", "q"))) > 0){
message("Discarding segments which located in centromere region...")
data = subset(data, location %in% c("p", "pq", "q"))
}
if (scale_chr) {
if (genome_build == "hg19") {
data("chromsize.hg19",
package = "VSHunter",
envir = environment())
chrlen = chromsize.hg19
} else {
data("chromsize.hg38",
package = "VSHunter",
envir = environment())
chrlen = chromsize.hg38
}
p = ggplot(data, aes(x = chromosome, fill = location)) +
geom_bar() + xlab("Chromosome")
q = ggplot_build(p)$data[[1]][, c("x", "count", "fill")]
q$x = factor(as.character(q$x), levels = as.character(1:22))
q$fill = factor(q$fill, levels = c("#F8766D", "#00BA38", "#619CFF"))
chrlen$chrom = gsub(
pattern = "chr",
replacement = "",
x = chrlen$chrom
)
q = merge(q, chrlen, by.x = "x", by.y = "chrom")
q[["count"]] = 1000000 * (q[["count"]] / q[["size"]])
# data.table::setDT(data)
# data.table::setDT(chrlen)
# chrlen$chrom = gsub(pattern = "chr", replacement = "", x = chrlen$chrom)
# data = merge(data, chrlen, by.x = "chromosome", by.y = "chrom")
# data[, c(), by = chromosome]
# plot
if (!fill) {
ggplot(q, aes(x, y = count, fill = fill)) +
geom_bar(stat = "identity") +
scale_fill_discrete(name = "location",
labels = c("p", "pq", "q")) +
labs(x = "Chromosome", y = "Normalized count (per Mb)")
} else {
ggplot(q, aes(x, y = count, fill = fill)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_discrete(name = "location",
labels = c("p", "pq", "q")) +
labs(x = "Chromosome", y = "Percentage")
}
} else {
# plot
if (!fill) {
ggplot(data, aes(x = chromosome, fill = location)) +
geom_bar() + xlab("Chromosome")
} else {
ggplot(data, aes(x = chromosome, fill = location)) +
geom_bar(position = "fill") + ylab("Percentage") + xlab("Chromosome")
}
}
}
}
getArmLocation = function(genome_build = c("hg19", "hg38")) {
# get chromosome lengths and centromere locations
if (genome_build == "hg19") {
data("chromsize.hg19",
package = "VSHunter",
envir = environment())
data("centromeres.hg19",
package = "VSHunter",
envir = environment())
chrlen = chromsize.hg19
centromeres = centromeres.hg19
} else {
data("chromsize.hg38",
package = "VSHunter",
envir = environment())
data("centromeres.hg38",
package = "VSHunter",
envir = environment())
chrlen = chromsize.hg38
centromeres = centromeres.hg38
}
# only keep 1:22 and x, y
chrlen = chrlen[chrlen$chrom %in% centromeres$chrom,]
# sort
chrlen = chrlen[order(chrlen$chrom),]
centromeres = centromeres[order(centromeres$chrom),]
# compute and get results
res = data.frame(
chrom = vector(mode = "character", length = 24),
p_start = vector("integer", length = 24),
p_end = vector("integer", length = 24),
p_length = vector("integer", length = 24),
q_start = vector("integer", length = 24),
q_end = vector("integer", length = 24),
q_length = vector("integer", length = 24),
total_size = vector("integer", length = 24),
stringsAsFactors = FALSE
)
i = 1
for (chr in chrlen$chrom) {
chrom = chr
# p
p_start = 1
p_end = centromeres$left.base[centromeres$chrom == chr]
p_length = p_end - p_start + 1
# q
q_start = centromeres$right.base[centromeres$chrom == chr]
q_end = chrlen$size[chrlen$chrom == chr]
q_length = q_end - q_start + 1
total_size = chrlen$size[chrlen$chrom == chr]
res[i, 1] = as.character(chrom)
res[i, 2:8] = c(p_start,
p_end,
p_length,
q_start,
q_end,
q_length,
total_size)
i = i + 1
}
#rm(i, chrom, p_start, p_end, p_length, q_start, q_end, q_length,
# total_size)
res
}
utils::globalVariables(
c(
".x",
"aes",
"chrom",
"chromosome",
"element_text",
"geom_bar",
"geom_line",
"ggplot",
"labs",
"scale_x_continuous",
"stat_function",
"theme",
"theme_classic",
"value",
"x",
"ylab",
"temp",
"fraction",
"..density..",
"geom_histogram",
"location",
"xlab",
"unit",
"count",
"scale_fill_discrete",
"ggplot_build"
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.