plotCNA <- function(destination.folder, smoothed = TRUE, sample.plot, y.min,
y.max, ...) {
start.time <- Sys.time()
## Restore work directory upon exit
wd.orig <- getwd()
on.exit(setwd(wd.orig))
## Make destination folder path absolute
destination.folder <- tools::file_path_as_absolute(destination.folder)
destination.folder <- file.path(destination.folder, "CNAprofiles")
## Provide output to log
flog.appender(appender.file(file.path(destination.folder,
"CopywriteR.log")))
args.ellipsis <- unlist(list(...))
flog.info(paste0("plotCNA was run using the following commands:", "\n\n",
"plotCNA(destination.folder = \"",
dirname(destination.folder),
"\", smoothed = ", smoothed,
if (!missing(sample.plot)) {", sample.plot = sample.plot"},
if (!missing(y.min)) {paste0(", y.min = ", y.min)},
if (!missing(y.max)) {paste0(", y.max = ", y.max)},
if (!missing(...)) {paste0(", ", paste(names(args.ellipsis),
"=", args.ellipsis,
collapse = ", "))},
")"))
## Check the existence of folders and files
if (!file.exists(destination.folder)) {
stop(.wrap("The destination folder could not be found. Please change",
"the path specified in", sQuote(destination.folder)))
}
# Pre-define inputStructure variable to avoid it from raising NOTES during
# R CMD CHECK (variable is loaded from file below)
inputStructure <- NULL
# Load inputStructure variable
load(file.path(destination.folder, "input.Rdata"))
## Set variables
chromosomes <- inputStructure$chromosomes
nautosomes <- length(grep("[0-9]", chromosomes))
prefix <- inputStructure$prefix
bin.size <- inputStructure$bin.size
## Set sample.plot
if (missing(sample.plot)) {
sample.plot <- apply(inputStructure$sample.control, c(1, 2),
function(x) {
x <- paste0("log2.", basename(x))
})
all.samples <- unique(as.vector(sample.plot))
sample.plot <- rbind(data.frame(sample.plot, stringsAsFactors = FALSE),
data.frame(samples = all.samples,
controls = rep(NA, length(all.samples)),
stringsAsFactors = FALSE))
} else {
colnames(sample.plot) <- c("samples", "controls")
sample.plot[, ] <- apply(sample.plot, c(1, 2), function(x) {
if (!is.na(x)) {
x <- paste0("log2.", gsub("_properreads", "", basename(x)))
} else {
x <- as.character(x)
}
})
}
## Read data
log2.read.counts <- read.table(file = file.path(destination.folder,
"log2_read_counts.igv"),
sep = "\t", header = TRUE,
check.names = FALSE)
## Remove prefix and convert chromosome names to integers
log2.read.counts$Chromosome <- gsub(prefix, "", log2.read.counts$Chromosome)
chromosomes <- gsub(prefix, "", chromosomes)
log2.read.counts$Chromosome <- gsub("X", nautosomes + 1,
log2.read.counts$Chromosome)
chromosomes <- gsub("X", nautosomes + 1, chromosomes)
log2.read.counts$Chromosome <- gsub("Y", nautosomes + 2,
log2.read.counts$Chromosome)
chromosomes <- gsub("Y", nautosomes + 2, chromosomes)
log2.read.counts$Chromosome <- as.integer(log2.read.counts$Chromosome)
chromosomes <- sort(as.integer(chromosomes))
## Fix behaviour of DNAcopy with 'outlier' values
log2.read.counts[, 5:ncol(log2.read.counts)] <-
apply(log2.read.counts[, 5:ncol(log2.read.counts), drop = FALSE],
c(1, 2), function(x) {
if (is.na(x)) {
x <- NA
} else if (x < -5) {
x <- -5
} else if (x > 10) {
x <- 10
} else {
x <- x
}
})
## Create table with values to be plotted
if (all(na.omit(unlist(sample.plot)) %in% colnames(log2.read.counts))) {
plotting.values <-
log2.read.counts[, c("Chromosome", "Start", "End", "Feature")]
for (i in seq_len(nrow(sample.plot))) {
if (!is.na(sample.plot$controls[i])) {
plotting.values[, paste0(sample.plot$samples[i], ".vs.",
sample.plot$controls[i])] <-
log2.read.counts[, sample.plot$samples[i]] -
log2.read.counts[, sample.plot$controls[i]]
} else {
plotting.values[, paste0(sample.plot$samples[i], ".vs.none")] <-
log2.read.counts[, sample.plot$samples[i]]
}
}
} else {
unique.samples <- unique(na.omit(unlist(sample.plot)))
print(unique.samples)
print(unique.samples %in% colnames(log2.read.counts))
missing.samples <- unique.samples[!unique.samples %in% colnames(log2.read.counts)]
print(missing.samples)
stop(.wrap("The following samples refer to .bam files that have not",
"been processed by CopywriteR:",
paste(sQuote(missing.samples), collapse = ", "),
". Please re-analyze all required samples using CopywriteR",
"and run the plotCNA function again."))
}
## Apply DNAcopy
CNA.object <- CNA(plotting.values[, 5:ncol(plotting.values), drop = FALSE],
plotting.values$Chromosome,
rowMeans(plotting.values[, c("Start", "End")]),
data.type = "logratio",
sampleid = colnames(plotting.values)[5:ncol(plotting.values)])
if (smoothed) {
CNA.object <- smooth.CNA(CNA.object)
}
segment.CNA.object <- segment(CNA.object, verbose = 1, ...)
save(segment.CNA.object, file = file.path(destination.folder,
"segment.Rdata"))
## Calculate the chromosome lengths from the bin.bed file
chrom.lengths <-
scanBamHeader(inputStructure$sample.control$samples[1])[[1]]$targets
names(chrom.lengths) <- gsub(prefix, "", names(chrom.lengths))
chrom.lengths <- data.frame(Chromosome = names(chrom.lengths),
Length = chrom.lengths, row.names = NULL,
stringsAsFactors = FALSE)
suppressWarnings(chrom.lengths <- chrom.lengths[chrom.lengths$Chromosome %in% c("X", "Y") |
!is.na(as.integer(as.vector(chrom.lengths$Chromosome))), ])
chrom.lengths <- chrom.lengths[mixedorder(chrom.lengths$Chromosome), ]
chrom.lengths$Chromosome <- gsub("X", nautosomes + 1,
chrom.lengths$Chromosome)
chrom.lengths$Chromosome <- gsub("Y", nautosomes + 2,
chrom.lengths$Chromosome)
chrom.lengths[, "CumSum"] <- c(0, cumsum(as.numeric(chrom.lengths$Length))[seq_len(nrow(chrom.lengths) - 1)])
## Create plots
segment.CNA.object$output[, "start.position.chrom"] <-
chrom.lengths$CumSum[match(as.integer(segment.CNA.object$output$chrom),
chrom.lengths$Chromosome)]
segment.CNA.object$output$loc.start <- segment.CNA.object$output$loc.start + segment.CNA.object$output$start.position.chrom
segment.CNA.object$output$loc.end <- segment.CNA.object$output$loc.end + segment.CNA.object$output$start.position.chrom
segment.CNA.object$output$start.position.chrom <- NULL
segment.CNA.object$data[, "start.position.chrom"] <-
chrom.lengths$CumSum[match(as.integer(segment.CNA.object$data$chrom),
chrom.lengths$Chromosome)]
segment.CNA.object$data$maploc <- segment.CNA.object$data$maploc + segment.CNA.object$data$start.position.chrom
segment.CNA.object$data$start.position.chrom <- NULL
# Get sample names
samples <- colnames(segment.CNA.object$data)
samples <- samples[3:length(samples)]
# Create plots folder
plot.folder <- file.path(destination.folder, "plots")
dir.create(plot.folder)
setwd(plot.folder)
# Set boundaries plots
if (missing(y.min)) {
y.min <- -2
}
if (missing(y.max)) {
y.max <- 5
}
# Loop through samples using lapply
invisible(lapply(samples, function(x) {
# Select sample
select.sample <- x
current.sample <- segment.CNA.object
current.sample$data <-
current.sample$data[, c("chrom", "maploc", select.sample)]
current.sample$data <-
current.sample$data[!is.na(current.sample$data[, select.sample]), ]
current.sample$output <-
current.sample$output[current.sample$output$ID == select.sample, ]
# Print MAD-values in log file
flog.info(paste("The MAD-value for sample", select.sample, "is:",
round(madDiff(current.sample$data[, select.sample]), 3)))
# Create and set new directory
dir.create(file.path(plot.folder, select.sample))
setwd(file.path(plot.folder, select.sample))
# Loop through chromosomes using lapply
invisible(lapply(c(as.list(chromosomes), list(chromosomes)),
function(x) {
# Select chromosome
select.chrom <- x
current.sample$data <- current.sample$data[current.sample$data$chrom %in% select.chrom, ]
current.sample$output <- current.sample$output[current.sample$output$chrom %in% select.chrom, ]
# Set variables
genome.position.min <-
chrom.lengths$CumSum[match(min(select.chrom),
chrom.lengths$Chromosome)]
genome.position.max <-
chrom.lengths$CumSum[match(max(select.chrom),
chrom.lengths$Chromosome)] +
chrom.lengths$Length[match(max(select.chrom), chrom.lengths$Chromosome)]
# Plot data
if (length(x) > 1) {
name.chrom <- "all_chrom"
} else {
name.chrom <- paste0("chrom_", x)
}
pdf(file = paste0(name.chrom, ".pdf"), width = 14, height = 7)
plot(current.sample$data[, "maploc"],
current.sample$data[, select.sample], ylim = c(y.min, y.max),
pch = ".", cex = 1, xlim = c(genome.position.min,
genome.position.max),
xaxs = "i", xlab = "", ylab = "log2 value", xaxt = "n",
main = select.sample)
points(current.sample$data$maploc[current.sample$data[, select.sample] > y.max],
rep.int(y.max, sum(current.sample$data[, select.sample] > y.max)),
cex = 0.25, col = "green", pch = 2)
points(current.sample$data$maploc[current.sample$data[, select.sample] < y.min],
rep.int(y.min, sum(current.sample$data[, select.sample] < y.min)),
cex = 0.25, col = "red", pch = 6)
invisible(apply(current.sample$output, 1, function(x) {
segments(x0 = as.numeric(x["loc.start"]),
y0 = as.numeric(x["seg.mean"]),
x1 = as.numeric(x["loc.end"]),
col = "red", pch = ".", cex = 0.2)
}))
par(xpd = TRUE)
text(x = 0.96 * (genome.position.max - genome.position.min) + genome.position.min,
y = 0.075 * (y.max - y.min) + y.max,
labels = paste0("mad = ", round(madDiff(current.sample$data[, select.sample]), 3)))
text(x = 0.035 * (genome.position.max - genome.position.min) + genome.position.min,
y = 0.075 * (y.max - y.min) + y.max,
labels = paste0(bin.size / 1000, " kb bins"))
par(xpd = FALSE)
ticks <- (chrom.lengths$CumSum + chrom.lengths$Length / 2)[select.chrom]
axis(1, at = ticks, labels = select.chrom)
if (length(select.chrom) > 1) {
abline(v = chrom.lengths$CumSum[2:nrow(chrom.lengths)],
lty = "dotted")
}
dev.off()
}))
}))
flog.info(paste("Total calculation time of plotCNA was",
round(difftime(Sys.time(), start.time, units = "mins"), 2),
"minutes"))
cat("Total calculation time of CopywriteR was: ",
Sys.time() - start.time, "\n\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.