#' CN Processing Centromeres
#'
#' @param dataName
#' @param dataDir
#' @param outputDir
#' @param alpha
#' @param min.width
#' @param undo.prune
#' @param fileName
#' @param cpu
#' @param centName
#' @param makeFig
#'
#' @return
#' @export
#'
#' @examples
CNProcessingCentromeres <- function(dataName = dataName,
dataDir = dataDir,
outputDir = outputDir,
alpha = 0.01,
min.width= 5,
undo.prune = 0.05,
fileName = ".vb",
cpu = cpu,
centName = centName,
makeFig = FALSE)
{
set.seed(5471)
tryCatch(expr = { library("DNAcopy")},
error = function(e) {
source("https://bioconductor.org/biocLite.R")
biocLite("DNAcopy")},
finally = library("DNAcopy"))
library(parallel)
centr.loc <- read.delim(centName, as.is=TRUE)
if (!file.exists(outputDir))
{ dir.create(outputDir) }
if (!file.exists(paste(outputDir,"IGV_shorts", sep="/")))
{ dir.create(paste(outputDir,"IGV_shorts", sep="/")) }
if (!file.exists(paste(outputDir,"IGV_longs", sep="/")))
{ dir.create(paste(outputDir,"IGV_longs", sep="/")) }
cnBins <- NULL
cnRatio <- NULL
for(x in list.files(dataDir, pattern=fileName))
{
temp <- read.delim(paste(dataDir,x, sep="/"), header=FALSE)
sample.name <- substring(x, 1, nchar(x) - nchar(fileName))
sample.name <- sub(".bl","",sample.name)
if(is.null(cnBins))
{
cnBins <- temp[,c(1:3,7)]
cnRatio <- temp[,c(1:3,6)]
colnames(cnBins)[ncol(cnBins)] <- sample.name
colnames(cnRatio)[ncol(cnRatio)] <- sample.name
} else
if(!is.null(cnBins))
{
temp.bin <- temp$V7
temp.r <- temp$V6
cnBins <- cbind(cnBins, temp.bin)
cnRatio <- cbind(cnRatio, temp.r)
colnames(cnBins)[which(colnames(cnBins) == "temp.bin")] <- sample.name
colnames(cnRatio)[which(colnames(cnRatio) == "temp.r")] <- sample.name
}
}
colnames(cnBins)[match(c("V1", "V2", "V3"), colnames(cnBins))] <- c("chrom","chrompos","abspos")
colnames(cnRatio)[match(c("V1", "V2", "V3"), colnames(cnRatio))] <- c("chrom","chrompos","abspos")
write.table(cnBins, file = paste(outputDir, "/uber.", dataName, ".bin.txt", sep=""),
quote = FALSE, sep = "\t",row.names = FALSE)
write.table(cnRatio, file = paste(outputDir, "/uber.", dataName, ".ratio.txt", sep=""),
quote = FALSE, sep = "\t",row.names = FALSE)
fun_parallel_seg<-function(x){
sample.name <- colnames(cnRatio[,c(match(c("chrom","chrompos","abspos"), colnames(cnRatio)), x)])[4]
thisRatio <- cbind(cnBins[,c(match(c("chrom","chrompos","abspos"), colnames(cnBins)), x)],cnRatio[,x] )
colnames(thisRatio) <-c("chrom", "chrompos", "abspos", "bincount", "ratio")
thisRatio$ratio[which(thisRatio$ratio == 0)] <- 1e-3
thisRatio$chrom <- as.numeric(thisRatio$chrom)
CNA.object <- CNA(log(thisRatio$ratio, base=2), thisRatio$chrom, thisRatio$chrompos, data.type="logratio",
sampleid=sample.name)
smoothed.CNA.object <- smooth.CNA(CNA.object)
segment.smoothed.CNA.object <- segment(smoothed.CNA.object, alpha=alpha, min.width=5, undo.splits="prune", undo.prune=undo.prune)
thisShort <- segment.smoothed.CNA.object[[2]]
log.seg.mean.LOWESS <- rep(thisShort$seg.mean, thisShort$num.mark)
merge.obj <- MergeLevels(log(thisRatio$ratio, base=2),log.seg.mean.LOWESS)
#keep absolution ratio data for plots
seg.mean.LOWESS <- rep(2^thisShort$seg.mean, thisShort$num.mark)
# after mergeLevels
log.seg.mean <- merge.obj$vecMerged
seg.merge<- 2^merge.obj$vecMerged
################this step should be flexable for different datasets, default not to include this unless the samples are noisy
# mean.log.seg <- median(log.seg.mean)
# experimental.diff <- abs(log.seg.mean-log(thisRatio$ratio, base=2))
# experimental.MAD <-median(experimental.diff)
# m <- 1
#for (m in 1:length(log.seg.mean)){
#if(abs(log.seg.mean[m]-mean.log.seg)< 1.25*experimental.MAD){
#log.seg.mean[m] <- mean.log.seg}
#m <- m+1
#}
##############################end of cutoff steps
meanseg <- 2^log.seg.mean
ratiox <- meanseg/mean(meanseg)
ratiox[ratiox==0] <- 0.0001
thisRatio$seg.mean.LOWESS <- ratiox
#making the short file after mergeLevels, note thisShort is written to seg file that keep raw segmented ratios
#thisRatio <- read.delim("/volumes/lab/ruligao/CNV_BRAC/06_13_2016_ES-BRCA-A_PM320/output_regular_parameter/IGV_longs/BRCA-A01.hg19.50k.varbin.txt")
#head(thisRatio)
#sample.name <- "test"
Short <- NULL
chr <- rle(thisRatio$chrom)[[2]]
for (x in 1:length(chr)){
thisRatio.sub <- thisRatio[which(thisRatio$chrom==chr[x]), ]
seg.mean.sub <- log(rle(thisRatio.sub$seg.mean.LOWESS)[[2]], base=2)
rle.length.sub <- rle(thisRatio.sub$seg.mean.LOWESS)[[1]]
num.mark.sub <- seq(1,length(rle.length.sub),1)
loc.start.sub <-seq(1,length(rle.length.sub),1)
loc.end.sub <- seq(1,length(rle.length.sub),1)
len <-0
j <-1
for (j in 1: length(rle.length.sub)){
num.mark.sub[j] <- rle.length.sub[j]
loc.start.sub[j] <- thisRatio.sub$chrompos[len+1]
len <- num.mark.sub[j]+len
loc.end.sub[j] <- thisRatio.sub$chrompos[len]
j <- j+1
}
ID <- rep(sample.name, times=length(rle.length.sub))
chrom <- rep(chr[x], times=length(rle.length.sub))
Short.sub <- cbind(ID,chrom,loc.start.sub,loc.end.sub,num.mark.sub,seg.mean.sub)
Short <- rbind(Short, Short.sub)
x <- x+1
}
colnames(Short) <- c("ID","chrom","loc.start","loc.end","num.mark","normalized_log2_seg.mean")
if(makeFig)
{
if (!file.exists(paste(outputDir,"Figures", sep="/")))
{ dir.create(paste(outputDir,"Figures", sep="/")) }
chr <- thisRatio$chrom
chr.shift <- c(chr[-1], chr[length(chr)])
vlines <- c(1, thisRatio$abspos[which(chr != chr.shift) + 1], thisRatio$abspos[nrow(thisRatio)])
hlines <- c(0.5, 1.0, 1.5, 2.0)
chr.text <- c(1:22, "X", "Y")
vlines.shift <- c(vlines[-1], 4*10^9)
chr.at <- vlines + (vlines.shift - vlines) / 2
x.at <- c(0, 0.5, 1, 1.5, 2, 2.5, 3) * 10^9
x.labels <- c("0", "0.5", "1.0", "1.5", "2.0", "2.5", "3.0")
y.at <- c(0.005, 0.020, 0.100, 0.500, 2.000)
y.labels <- c("0.005", "0.020", "0.100", "0.500", "2.000")
print("Making figure")
jpeg(paste(outputDir,"/Figures/profile_", sample.name, ".jpg", sep=""), height=800, width=1200)
par(mar=c(5.1,4.1,4.1,4.1))
plot(x=thisRatio$abspos, y=thisRatio$ratio, log="y", main=c(paste(sample.name, ": GCnormalized+MergeLevels", sep=""),paste0("Average Read Counts Each Bin: ",round(mean(thisRatio$bincount),0))),xaxt="n", xlab="Genome Position (Gb)", yaxt="n", ylab="Copy Number Ratio", col="grey", cex=0.5)
axis(1, at=x.at, labels=x.labels)
axis(2, at=y.at, labels=y.labels)
lines(x=thisRatio$abspos, y=thisRatio$ratio, col="grey", cex=0.5)
points(x=thisRatio$abspos, y=thisRatio$seg.mean.LOWESS, col="blue",lwd=1.5, cex=0.5)
lines(x=thisRatio$abspos, y=thisRatio$seg.mean.LOWESS, col="blue", lwd=1.5,cex=0.5)
#points(x=thisRatio$abspos, y=seg.mean.LOWESS, col="blue",lwd=1.5, cex=0.5)
#lines(x=thisRatio$abspos, y=seg.merge, col="red",lwd=1.5, cex=0.5)
#points(x=thisRatio$abspos, y=seg.merge, col="red",lwd=1.5, cex=0.5)
#lines(x=thisRatio$abspos, y=seg.mean.LOWESS, col="blue",lwd=1.5, cex=0.5)
abline(h=hlines, col="red")
abline(v= centr.loc$abs, col="red", lty = 2)
abline(v=vlines, par(lty=2))
mtext(chr.text, at = chr.at)
dev.off()
}
write.table(thisRatio, sep="\t", file=paste(outputDir, "/IGV_longs/", sample.name, ".hg19.50k.varbin.txt",
sep=""), quote=FALSE, row.names=FALSE)
write.table(Short, sep="\t", file=paste(outputDir, "/IGV_shorts/", sample.name, ".hg19.50k.varbin.txt",
sep=""), quote=FALSE, row.names=FALSE)
normlized_seg<-list(thisRatio$seg.mean.LOWESS)
names(normlized_seg)<-sample.name
sub_thisShort<-list(thisShort)
names(sub_thisShort)<-sample.name
#return(normlized_seg)
results <- list(
normlized_seg=normlized_seg,
thisShort=thisShort
)
return(results)
}
mc <- getOption("mc.cores", cpu)
res<-mclapply(4:ncol(cnRatio),fun_parallel_seg,mc.cores=mc)
all_seg<-as.data.frame(sapply(res,function(x){x$normlized_seg}))
chr_info<-cnRatio[, c("chrom","chrompos","abspos")]
all_seg<-cbind(chr_info,all_seg)
all_thisShort<-lapply(res,function(x){x$thisShort})
all_thisShort<-do.call(rbind,all_thisShort)
write.table(all_seg, file = paste(outputDir, "/uber.", dataName, ".seg.txt", sep=""),
quote = FALSE, sep = "\t",row.names = FALSE)
colnames(all_thisShort) <- c("ID","chrom","loc.start","loc.end","num.mark","Raw_seg.mean")
write.table(all_thisShort, sep="\t", file=paste(outputDir, "/IGV_shorts/", dataName, ".all.hg19.50k.varbin.seg",
sep=""), quote=FALSE, row.names=FALSE, col.names=TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.