#'GC normalization.
#'
#'Normalize the bin count file to adjust for nonuniformity results from variation in GC content across the genome.
#'@param bin_mat The bin count files of cells. For example, one bin count file for a cell is CJA1023.varbin.20k.txt
#'@param gc The GC content table.
#'@return The normalized bin count table based on GC content.
#'@export
gc_one <- function(bin_mat, gc) {
bin_mat$gc.content <- gc$gc.content
a <- bin_mat$bincount + 1
bin_mat$ratio <- a / mean(a)
bin_mat$lowratio <- lowess.gc(bin_mat$gc.content, bin_mat$ratio)
return(bin_mat)
}
#'Segment the normalzied bin count profile for individual cell for GC content.
#'
#'Generate the segmented profile for individual cell with CBS.
#'@param bin_mat The normalized bin count files generated by gc_one.
#'@param alpha Parameter alpha defined in DNAcopy package.
#'@param nperm Parameter nperm defined in DNAcopy package.
#'@param undo.SD Parameter undo.SD defined in DNAcopy package.
#'@param min.width Parameter min.width defined in DNAcopy package.
#'@param method Default: "multiplier" to transform the ratio data to integer CN state. When genome is hg-dm hybrid, set method as "dmploidies" to use dm plodies as reference.
#'@param genome Default: "hg" (Human genome). hg-dm hybrid genome: "hgdm".
#'@param graphic Default: TRUE. Generate the copy number profile plot.
#'@return The segmented profile for individual cell
#'@export
cbs.segment_one <- function(bin_mat, alpha, nperm, undo.SD, min.width, method = "multiplier", genome = "hg", graphic = TRUE){
set.seed(25)
bin_mat$chrom.numeric <- substring(bin_mat$chrom, 4)
bin_mat$chrom.numeric[which(bin_mat$chrom == "chrX")] <- "23"
bin_mat$chrom.numeric[which(bin_mat$chrom == "chrY")] <- "24"
if (genome == "hgdm"){
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chrX")] <- "25"
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chr2L")] <- "26"
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chr2R")] <- "27"
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chr3L")] <- "28"
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chr3R")] <- "29"
bin_mat$chrom.numeric[which(bin_mat$chrom == "dm_chr4")] <- "30"
}
bin_mat$chrom.numeric <- as.numeric(bin_mat$chrom.numeric)
CNA.object <- DNAcopy::CNA(log(bin_mat$lowratio, base=2), bin_mat$chrom.numeric,
bin_mat$chrompos, data.type="logratio")
smoothed.CNA.object <- DNAcopy::smooth.CNA(CNA.object)
segment.smoothed.CNA.object <- DNAcopy::segment(smoothed.CNA.object, alpha=alpha,
nperm=nperm, undo.splits="sdundo", undo.SD=undo.SD, min.width=2)
#A table contains segmented regions and the corresponding segment means (log)
thisShort <- segment.smoothed.CNA.object[[2]]
#A long table:each row corresponds to the segment mean for each bin
m <- matrix(data=0, nrow=nrow(bin_mat), ncol=1)
prevEnd <- 0
for (i in 1:nrow(thisShort)) {
thisStart <- prevEnd + 1
thisEnd <- prevEnd + thisShort$num.mark[i]
m[thisStart:thisEnd, 1] <- 2^thisShort$seg.mean[i]
prevEnd = thisEnd
}
cbs.long.nobad <- m[, 1]
##### NEW STUFF also check min.width=2 above
workShort <- thisShort
workShort$segnum <- 0
workShort$seg.start <- 0
workShort$seg.end <- 0
prevEnd <- 0
for (i in 1:nrow(thisShort)) {
thisStart <- prevEnd + 1
thisEnd <- prevEnd + thisShort$num.mark[i]
workShort$seg.start[i] <- thisStart
workShort$seg.end[i] <- thisEnd
workShort$segnum[i] <- i
prevEnd = thisEnd
}
### deal with short segments (appending, combining)
discardSegments <- TRUE
while (discardSegments) {
orderShort <- workShort[order(workShort$num.mark, abs(workShort$seg.mean)), ]
if (orderShort[1, "num.mark"] < min.width) {
workShort <- remove.segment(workShort, orderShort[1, "segnum"], bin_mat, undo.SD)
} else {
discardSegments <- FALSE
}
}
workShort <- sdundo.all(workShort, bin_mat, undo.SD)
thisShort <- workShort
##### END NEW STUFF
m <- matrix(data=0, nrow=nrow(bin_mat), ncol=1)
prevEnd <- 0
for (i in 1:nrow(thisShort)) {
thisStart <- prevEnd + 1
thisEnd <- prevEnd + thisShort$num.mark[i]
m[thisStart:thisEnd, 1] <- 2^thisShort$seg.mean[i]
prevEnd = thisEnd
}
bin_mat$seg.mean.LOWESS <- m[, 1]
chr <- bin_mat$chrom.numeric
chr.shift <- c(chr[-1], chr[length(chr)])
vlines <- c(1, bin_mat$abspos[which(chr != chr.shift)+1],
bin_mat$abspos[nrow(bin_mat)])
vlines.shift <- c(vlines[-1], 4*10^9)
chr.at <- vlines + (vlines.shift - vlines)/2
chr.text <- sub(".*chr", "", unique(as.character(bin_mat$chrom)))
col.chr.text <- c(rep("black", 24), rep("darkred", 6))[1:length(chr.text)]
old.par <- par(mfrow=c(2, 1))
par(mar=c(1.1,4.2,1.1,1.1))
if (graphic) {
plot(x = bin_mat$abspos, y = bin_mat$lowratio, log = "y", xaxt = "n", ylab = "Ratio", xlab = "", col = "#CCCCCC")
#axis(1, at = x.at, labels = x.labels)
lines(x = bin_mat$abspos, y = bin_mat$lowratio, col = "#CCCCCC")
points(x = bin_mat$abspos, y = bin_mat$seg.mean.LOWESS, col = "#0000AA")
lines(x = bin_mat$abspos, y = bin_mat$seg.mean.LOWESS, col = "#0000AA")
hlines = c(0.5, 1.0, 1.5, 2.0)
mtext(chr.text[1:24], at = chr.at[1:24], cex = 0.7, col = col.chr.text[1:24],las = 1)
if (genome == "hgdm"){
text(chr.at[25], max(bin_mat$lowratio), chr.text[25], col = col.chr.text[25],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[26], at = chr.at[26], cex = 0.7, col = col.chr.text[26],las = 1)
text(chr.at[27], max(bin_mat$lowratio), chr.text[27], col = col.chr.text[27],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[28], at = chr.at[28], cex = 0.7, col = col.chr.text[28],las = 1)
text(chr.at[29], max(bin_mat$lowratio), chr.text[29], col = col.chr.text[29],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[30], at = chr.at[30], cex = 0.7, col = col.chr.text[30],las = 1)
}
abline(h = hlines, col="black", lty = 3)
abline(v = vlines, col="black", lty = 2)
}
if (method == "multiplier"){
thisGrid <- seq(1.5, 5.5, by=0.05)
thisOuter <- bin_mat$seg.mean.LOWESS %o% thisGrid
thisOuterRound <- round(thisOuter)
thisOuterDiff <- (thisOuter - thisOuterRound) ^ 2
thisOuterColsums <- colSums(thisOuterDiff, na.rm = FALSE, dims = 1)
thisMultiplier <- thisGrid[which.min(thisOuterColsums)]
bin_mat$seg.quantal <- bin_mat$seg.mean.LOWESS * thisMultiplier
bin_mat$ratio.quantal <- bin_mat$lowratio * thisMultiplier
if (graphic) {
plot(x = bin_mat$abspos, y = bin_mat$ratio.quantal, log = "y", xaxt = "n", yaxt = "n", ylab = "Ratio.quantal", xlab= "", col = "#CCCCCC")
# axis(1, at = x.at, labels = x.labels)
lines(x = bin_mat$abspos, y = bin_mat$ratio.quantal, col = "#CCCCCC")
points(x = bin_mat$abspos, y = bin_mat$seg.quantal, col = "#0000AA")
lines(x = bin_mat$abspos, y = bin_mat$seg.quantal, col = "#0000AA")
hlines = c(1, 3, 4, 5, 6)
mtext(chr.text[1:24], at = chr.at[1:24], cex = 0.7, col = col.chr.text[1:24],las = 1)
if (genome == "hgdm"){
text(chr.at[25], max(bin_mat$lowratio), chr.text[25], col = col.chr.text[25],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[26], at = chr.at[26], cex = 0.7, col = col.chr.text[26],las = 1)
text(chr.at[27], max(bin_mat$lowratio), chr.text[27], col = col.chr.text[27],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[28], at = chr.at[28], cex = 0.7, col = col.chr.text[28],las = 1)
text(chr.at[29], max(bin_mat$lowratio), chr.text[29], col = col.chr.text[29],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[30], at = chr.at[30], cex = 0.7, col = col.chr.text[30],las = 1)
}
abline(h = hlines, col="black", lty = 3)
abline(v = vlines, col="black", lty = 2)
abline(h = 2, col="red", lty = 1)
axis(2, at = c(2,4,6), labels = c("2","4","6"))
}
}
if (method == "dmploidies"){
dmbinID <- which(bin_mat$chrom.numeric%in%(25:30))
median_ratio <- median(bin_mat$lowratio[dmbinID], na.rm = TRUE)
theMultiplier <- 2/median_ratio
bin_mat$seg.quantal <- bin_mat$seg.mean.LOWESS * theMultiplier
bin_mat$ratio.quantal <- bin_mat$lowratio * theMultiplier
if (genome == "hg"){
stop("method dmploidies only works when genome is hgdm!")
}
if (graphic) {
plot(x = bin_mat$abspos, y = bin_mat$ratio.quantal, log = "y", xaxt = "n",yaxt="n", ylab = "Ratio.quantal", xlab="", col = "#CCCCCC")
# axis(1, at = x.at, labels = x.labels)
lines(x = bin_mat$abspos, y = bin_mat$ratio.quantal, col = "#CCCCCC")
points(x = bin_mat$abspos, y = bin_mat$seg.quantal, col = "#0000AA")
lines(x = bin_mat$abspos, y = bin_mat$seg.quantal, col = "#0000AA")
hlines = c(1, 3, 4, 5, 6)
mtext(chr.text[1:24], at = chr.at[1:24], cex = 0.7, col = col.chr.text[1:24],las = 1)
text(chr.at[25], max(bin_mat$lowratio), chr.text[25], col = col.chr.text[25],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[26], at = chr.at[26], cex = 0.7, col = col.chr.text[26],las = 1)
text(chr.at[27], max(bin_mat$lowratio), chr.text[27], col = col.chr.text[27],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[28], at = chr.at[28], cex = 0.7, col = col.chr.text[28],las = 1)
text(chr.at[29], max(bin_mat$lowratio), chr.text[29], col = col.chr.text[29],srt = 90,
adj = 1 ,cex=1)
mtext(chr.text[30], at = chr.at[30], cex = 0.7, col = col.chr.text[30],las = 1)
abline(h = hlines, col="black", lty = 3)
abline(v = vlines, col="black", lty = 2)
abline(h = 2, col="red", lty = 1)
axis(2, at = c(2,4,6), labels = c("2","4","6"))
}
}
par(old.par)
return(bin_mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.