Nothing
# What does ACEcall contribute to currently available callers for CNAs? Well ...
# you can set a cutoff for absolute copy number gains or losses. A gain of 0.1
# can be "irrelevant" in a very pure tumor sample, but represents a full gain in
# a sample with only 10% tumor material! This function will output a new
# "template" data frame that contains the adjusted copynumber and segment
# information, a call, and a p-value and q-value assessing the probability that
# if the true segment mean equals the ploidy, the resulting or a more extreme
# segment mean would be found. The plot color codes the segments according to
# the calls.
ACEcall <- function(template, QDNAseqobjectsample=FALSE, cellularity=1, ploidy=2, standard, plot=TRUE, title,
pcutoff, qcutoff=-3, subcutoff=0.2, trncname=FALSE, cap=12, bottom=0, chrsubset, onlyautosomes = TRUE) {
if(QDNAseqobjectsample) {
if(missing(title)) {
pd<-Biobase::pData(template)
if(trncname==TRUE) {pd$name <- gsub("_.*","",pd$name)}
if(trncname!=FALSE&&trncname!=TRUE) {pd$name <- gsub(trncname,"",pd$name)}
title <- pd$name[QDNAseqobjectsample]
}
template <- objectsampletotemplate(template, QDNAseqobjectsample)
}
if(missing(title)) {title <- "Plot"}
segmentdata <- rle(as.vector(na.exclude(template$segments)))
if(missing(standard) || !is(standard, "numeric")) { standard <- median(rep(segmentdata$values,segmentdata$lengths)) }
adjustedcopynumbers <- ploidy + ((template$copynumbers-standard)*(cellularity*(ploidy-2)+2))/(cellularity*standard)
adjcopynumberdata <- as.vector(na.exclude(adjustedcopynumbers))
adjustedsegments <- ploidy + ((template$segments-standard)*(cellularity*(ploidy-2)+2))/(cellularity*standard)
template$chr <- gsub("chr","",template$chr,ignore.case = TRUE)
df <- template
templatena <- na.exclude(template)
bin <- templatena$bin
df$adjustedcopynumbers <- adjustedcopynumbers
df$adjustedsegments <- adjustedsegments
num_bins <- segmentdata$lengths
segment_mean <- c()
segment_SE <- c()
calls <- c()
probnorm <- c()
qprobnorm <- c()
probloss <- c()
probgain <- c()
probdloss <- c()
probamp <- c()
color <- c()
p <- c()
dl <- 1-subcutoff
l <- round(ploidy-1,digits=0)+subcutoff
sl <- round(ploidy,digits=0)-subcutoff
sg <- round(ploidy,digits=0)+subcutoff
g <- round(ploidy+1,digits=0)-subcutoff
dg <- round(ploidy+2,digits=0)-subcutoff
amp <- round(ploidy+3,digits=0)-subcutoff
counter <- 1
for (i in seq_along(segmentdata$lengths)) {
segment_mean[i] <- mean(adjcopynumberdata[seq(counter, counter+segmentdata$lengths[i]-1)])
segment_SE[i] <- sd(adjcopynumberdata[seq(counter, counter+segmentdata$lengths[i]-1)])/sqrt(segmentdata$lengths[i])
p[i] <- 2*(1-pnorm(abs(round(ploidy,digits = 0)-segment_mean[i])/segment_SE[i]))
probnorm[i] <- round(log(p[i],10),digits=3)
counter <- counter+segmentdata$lengths[i]
}
qprobnorm <- round(log(p.adjust(p, method='BH'),10),digits=3)
calls[(segment_mean<sl)] <- -0.5
color[(segment_mean<sl)] <- 'turquoise'
calls[(segment_mean<l)] <- -1
color[(segment_mean<l)] <- 'blue'
calls[(segment_mean<dl)] <- -2
color[(segment_mean<dl)] <- 'darkblue'
calls[(segment_mean>sg)] <- 0.5
color[(segment_mean>sg)] <- 'gold'
calls[(segment_mean>g)] <- 1
color[(segment_mean>g)] <- 'darkorange'
calls[(segment_mean>dg)] <- 2
color[(segment_mean>dg)] <- 'red'
calls[(segment_mean>amp)] <- 3
color[(segment_mean>amp)] <- 'purple'
calls[(segment_mean>=sl&segment_mean<=sg)] <- 0
color[(segment_mean>=sl&segment_mean<=sg)] <- 'black'
if(missing(pcutoff)){
calls[(qprobnorm>=qcutoff)] <- 0
color[(qprobnorm>=qcutoff)] <- 'black'
} else {
calls[(probnorm>=pcutoff)] <- 0
color[(probnorm>=pcutoff)] <- 'black'
}
templatena$segment_mean <- rep(segment_mean,num_bins)
templatena$segment_SE <- rep(segment_SE,num_bins)
templatena$probnorm <- rep(probnorm,num_bins)
templatena$qprobnorm <- rep(qprobnorm,num_bins)
templatena$calls <- rep(calls,num_bins)
templatena$color <- rep(color,num_bins)
df$segment_mean <- rep(NA,length(df$bin))
df$segment_mean[templatena$bin] <- templatena$segment_mean
df$segment_SE <- rep(NA,length(df$bin))
df$segment_SE[templatena$bin] <- templatena$segment_SE
df$pnorm_log10 <- rep(NA,length(df$bin))
df$pnorm_log10[templatena$bin] <- templatena$probnorm
df$qnorm_log10 <- rep(NA,length(df$bin))
df$qnorm_log10[templatena$bin] <- templatena$qprobnorm
df$calls <- rep(NA,length(df$bin))
df$calls[templatena$bin] <- templatena$calls
df$color <- rep('black',length(df$bin))
df$color[templatena$bin] <- templatena$color
if(plot==TRUE){
if(onlyautosomes==TRUE) {
rlechr <- rle(as.vector(template$chr[template$chr %in% seq(1, 22)]))
} else if (onlyautosomes==FALSE) {rlechr <- rle(as.vector(template$chr))
} else {rlechr <- rle(as.vector(template$chr[template$chr %in% seq(1, onlyautosomes)]))}
binchrend <- c()
currentbin <- 0
binchrmdl <- c()
for (i in seq_along(rlechr$values)) {
currentmiddle <- currentbin+rlechr$lengths[i]/2
currentbin <- currentbin+rlechr$lengths[i]
binchrend <- append(binchrend, currentbin)
binchrmdl <- append(binchrmdl, currentmiddle)
}
dfna <- na.exclude(df)
if(bottom=="min"){bottom<-floor(min(dfna$adjustedcopynumbers))}
if(cap=="max"){cap<-ceiling(max(dfna$adjustedcopynumbers))}
cappedcopynumbers <- dfna[(dfna$adjustedcopynumbers > cap),]
if(length(cappedcopynumbers$adjustedcopynumbers)>0) {cappedcopynumbers$adjustedcopynumbers <- cap-0.1}
cappedsegments <- dfna[(dfna$adjustedsegments > cap),]
if(length(cappedsegments$segments)>0) {cappedsegments$adjustedsegments <- cap-0.1}
toppedcopynumbers <- dfna[(dfna$adjustedcopynumbers <= bottom),]
if(length(toppedcopynumbers$adjustedcopynumbers)>0) {toppedcopynumbers$adjustedcopynumbers <- bottom+0.1}
toppedsegments <- dfna[(dfna$adjustedsegments <= bottom),]
if(length(toppedsegments$adjustedsegments)>0) {toppedsegments$adjustedsegments <- bottom+0.1}
line1 <- paste0("Cellularity: ", cellularity)
if(missing(chrsubset)){
calledplot <- ggplot2::ggplot() +
scale_y_continuous(name = "copies", limits = c(bottom,cap), breaks = seq(bottom, cap), expand=c(0,0)) +
scale_x_continuous(name = "chromosome", limits = c(0,tail(binchrend,1)), breaks = binchrmdl, labels = rlechr$values, expand = c(0,0)) +
geom_hline(yintercept = seq(0, 4), color = '#333333', size = 0.5) +
geom_hline(yintercept = seq(5, cap-1), color = 'lightgray', size = 0.5) +
geom_vline(xintercept = binchrend, color = "#666666", linetype = "dashed") +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=dfna[(dfna$adjustedcopynumbers>bottom&dfna$adjustedcopynumbers<cap),], size = 0.1, color = 'gray') +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=cappedcopynumbers, size = 0.5, color = 'gray', shape = 24) +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=toppedcopynumbers, size = 0.5, color = 'gray', shape = 25) +
geom_point(aes(x = bin,y = adjustedsegments),data=dfna[(dfna$adjustedsegments>bottom&dfna$adjustedsegments<cap),], size = 1, color = dfna$color[(dfna$adjustedsegments>bottom&dfna$adjustedsegments<cap)]) +
geom_point(aes(x = bin,y = adjustedsegments),data=cappedsegments, size = 1, color = 'purple', shape = 24) +
geom_point(aes(x = bin,y = adjustedsegments),data=toppedsegments, size = 1, color = 'darkblue', shape = 25) +
theme_classic() + theme(
axis.line = element_line(color='black'), axis.ticks = element_line(color='black'), axis.text = element_text(color='black')) +
ggtitle(title) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_rect(aes(xmin=binchrmdl[1]/10, xmax = binchrmdl[4], ymin = cap-0.9, ymax = cap), fill = 'white') +
annotate("text", x = binchrmdl[2], y = cap-0.3, label = line1)
} else {
firstchr <- range(chrsubset)[1]
lastchr <- range(chrsubset)[2]
if(firstchr==1){firstbin<-0
} else {firstbin<-binchrend[firstchr-1]+1}
dfna <- dfna[dfna$chr %in% rlechr$values[seq(firstchr, lastchr)],]
calledplot <- ggplot2::ggplot() +
scale_y_continuous(name = "copies", limits = c(bottom,cap), breaks = seq(bottom, cap), expand=c(0,0)) +
scale_x_continuous(name = "chromosome", limits = c(firstbin,binchrend[lastchr]), breaks = binchrmdl[seq(firstchr,lastchr)], labels = rlechr$values[seq(firstchr,lastchr)], expand = c(0,0)) +
geom_hline(yintercept = seq(0, 4), color = '#333333', size = 0.5) +
geom_hline(yintercept = seq(5, cap-1), color = 'lightgray', size = 0.5) +
geom_vline(xintercept = binchrend[seq(firstchr,lastchr)], color = "#666666", linetype = "dashed") +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=dfna[(dfna$adjustedcopynumbers>bottom&dfna$adjustedcopynumbers<cap),], size = 0.1, color = 'gray') +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=cappedcopynumbers, size = 0.5, color = 'gray', shape = 24) +
geom_point(aes(x = bin,y = adjustedcopynumbers),data=toppedcopynumbers, size = 0.5, color = 'gray', shape = 25) +
geom_point(aes(x = bin,y = adjustedsegments),data=dfna[(dfna$adjustedsegments>bottom&dfna$adjustedsegments<cap),], size = 1, color = dfna$color[(dfna$adjustedsegments>bottom&dfna$adjustedsegments<cap)]) +
geom_point(aes(x = bin,y = adjustedsegments),data=cappedsegments, size = 1, color = 'purple', shape = 24) +
geom_point(aes(x = bin,y = adjustedsegments),data=toppedsegments, size = 1, color = 'darkblue', shape = 25) +
theme_classic() + theme(
axis.line = element_line(color='black'), axis.ticks = element_line(color='black'), axis.text = element_text(color='black')) +
ggtitle(title) +
theme(plot.title = element_text(hjust = 0.5))
}
return(list(calledtemplate=df,calledplot=calledplot))
} else {return(calledtemplate=df)}
}
# Finally, the combined copy number plot is given. Segment values are converted to absolute copies as in other ACE functions.
# To keep the plots from becoming too busy, individual bins are left out. Negative log10 q-values are given on a secondary
# axis. When using altmethod="SD", keep in mind the y-axis now displays the Z-score. You will have to adjust your y-axis limits,
# to include negative numbers, for instance bottom=-3.
twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1, standard1, name1,
template2, index2=FALSE, ploidy2=2, cellularity2=1, standard2, name2,
equalsegments=FALSE, altmethod=FALSE, cap=12, qcap=12, bottom=0,
plot=TRUE, trncname=FALSE, legend=TRUE, chrsubset, onlyautosomes=TRUE,
showcorrelation=TRUE) {
if (missing(template2)) {template2<-template1}
if(index1) {
pd1<-Biobase::pData(template1)
if(trncname==TRUE) {pd1$name <- gsub("_.*","",pd1$name)}
if(trncname!=FALSE&&trncname!=TRUE) {pd1$name <- gsub(trncname,"",pd1$name)}
template1 <- objectsampletotemplate(template1, index1)
if(missing(name1)) {name1<-pd1$name[index1]}
} else if(missing(name1)) {name1<-"sample1"}
if(index2) {
pd2<-Biobase::pData(template2)
if(trncname==TRUE) {pd2$name <- gsub("_.*","",pd2$name)}
if(trncname!=FALSE&&trncname!=TRUE) {pd2$name <- gsub(trncname,"",pd2$name)}
template2 <- objectsampletotemplate(template2, index2)
if(missing(name2)) {name2<-pd2$name[index2]}
} else if(missing(name2)) {name2<-"sample2"}
if(nrow(template1)!=nrow(template2)){print("bins not matching")}
na1 <- apply(template1, 1, function(x) {any(is.na(x))})
na2 <- apply(template2, 1, function(x) {any(is.na(x))})
template1na <- template1[!(na1 | na2), ]
template2na <- template2[!(na1 | na2), ]
segmentdata1 <- rle(as.vector(na.exclude(template1na$segments)))
segmentdata2 <- rle(as.vector(na.exclude(template2na$segments)))
if(missing(standard1) || !is(standard1, "numeric")) { standard1 <- median(rep(segmentdata1$values,segmentdata1$lengths)) }
adjustedcopynumbers1 <- ploidy1 + ((template1na$copynumbers-standard1)*(cellularity1*(ploidy1-2)+2))/(cellularity1*standard1)
adjcnna1 <- as.vector(na.exclude(adjustedcopynumbers1))
if(missing(standard2) || !is(standard2, "numeric")) { standard2 <- median(rep(segmentdata2$values,segmentdata2$lengths)) }
adjustedcopynumbers2 <- ploidy2 + ((template2na$copynumbers-standard2)*(cellularity2*(ploidy2-2)+2))/(cellularity2*standard2)
adjcnna2 <- as.vector(na.exclude(adjustedcopynumbers2))
bin <- template1na$bin
if(altmethod==FALSE){
template1na <- template1na[, seq(1,4)]
template1na$copynumbers <- adjcnna1
}
if(altmethod==FALSE){
template2na <- template2na[, seq(1,4)]
template2na$copynumbers <- adjcnna2
}
Chromosome <- c()
Start <- c()
End <- c()
Num_Bins <- c()
Mean1 <- c()
Mean2 <- c()
SE1 <- c()
SE2 <- c()
p_value <- c()
q_value <- c()
if(equalsegments==FALSE){
segments1 <- c()
for (s in seq_along(segmentdata1$lengths)) {
segments1[s] <- sum(segmentdata1$lengths[seq(1,s)])
}
segments2 <- c()
for (t in seq_along(segmentdata2$lengths)) {
segments2[t] <- sum(segmentdata2$lengths[seq(1,t)])
}
allsegments <- sort(unique(c(segments1,segments2)))
primer <- 1
for (i in seq_along(allsegments)) {
Chromosome[i] <- as.vector(template1na$chr)[primer]
Start[i] <- as.vector(template1na$start)[primer]
End[i] <- as.vector(template1na$end)[allsegments[i]]
Num_Bins[i] <- allsegments[i]-primer+1
Mean1[i] <- mean(template1na$copynumbers[seq(primer,allsegments[i])])
if(altmethod=="MAD"){Mean1[i] <- median(template1na$copynumbers[seq(primer,allsegments[i])])}
SE1[i] <- sd(template1na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])
if(altmethod=="MAD"){SE1[i] <- mad(template1na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])}
Mean2[i] <- mean(template2na$copynumbers[seq(primer,allsegments[i])])
if(altmethod=="MAD"){Mean2[i] <- median(template2na$copynumbers[seq(primer,allsegments[i])])}
SE2[i] <- sd(template2na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])
if(altmethod=="MAD"){SE2[i] <- mad(template2na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])}
if (Num_Bins[i]>1) {p_value[i] <- t.test(template1na$copynumbers[seq(primer,allsegments[i])],
template2na$copynumbers[seq(primer,allsegments[i])])$p.value
} else {p_value[i]<-1}
primer <- allsegments[i]+1
}
if(altmethod=="SD"){
primer <- 1
ns1 <- mean(rep(Mean1,Num_Bins))
sd1 <- sd(rep(Mean1,Num_Bins))
Mean1 <- (Mean1-ns1)/sd1
SE1 <- SE1/sd1
ns2 <- mean(rep(Mean2,Num_Bins))
sd2 <- sd(rep(Mean2,Num_Bins))
Mean2 <- (Mean2-ns2)/sd2
SE2 <- SE2/sd2
for (i in seq_along(allsegments)) {
cn1 <- (template1na$copynumbers[seq(primer,allsegments[i])]-ns1)/sd1
cn2 <- (template2na$copynumbers[seq(primer,allsegments[i])]-ns2)/sd2
if (Num_Bins[i]>1) {p_value[i] <- t.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
primer <- allsegments[i]+1
}
}
if(altmethod=="MAD"){
primer <- 1
ns1 <- median(rep(Mean1,Num_Bins))
mad1 <- mad(rep(Mean1,Num_Bins))
Mean1 <- (Mean1-ns1)/mad1
SE1 <- SE1/mad1
ns2 <- median(rep(Mean2,Num_Bins))
mad2 <- mad(rep(Mean2,Num_Bins))
Mean2 <- (Mean2-ns2)/mad2
SE2 <- SE2/mad2
for (i in seq_along(allsegments)) {
cn1 <- (template1na$copynumbers[seq(primer,allsegments[i])]-ns1)/mad1
cn2 <- (template2na$copynumbers[seq(primer,allsegments[i])]-ns2)/mad2
if (Num_Bins[i]>1) {p_value[i] <- wilcox.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
primer <- allsegments[i]+1
}
}
} else {
bincounter <- 1
segmentcounter <- 0
if(!is(equalsegments, "numeric")){equalsegments<-20}
if(onlyautosomes==TRUE) {
rlechr <- rle(as.vector(template1$chr[template1$chr %in% seq(1,22)]))
} else if (onlyautosomes==FALSE) {rlechr <- rle(as.vector(template1$chr))
} else {rlechr <- rle(as.vector(template1$chr[template1$chr %in% seq(1,onlyautosomes)]))}
for (c in rlechr$values) {
nos <- floor(length(template1na$chr[(template1na$chr==c)])/equalsegments)
leftovers <- length(template1na$chr[(template1na$chr==c)])%%equalsegments
nobs <- rep(equalsegments,nos)
if (nos == 0) {
nos <- 1
nobs <- leftovers
leftovers <- 0
}
if (leftovers > 0) {
for (a in seq(0, leftovers-1)) {
divvy <- a%%nos+1
nobs[divvy] <- nobs[divvy] + 1
}
}
for (i in seq(1, nos)) {
if(nobs[i]) {
Chromosome[segmentcounter + i] <- as.vector(template1na$chr)[bincounter]
Start[segmentcounter + i] <- as.vector(template1na$start)[bincounter]
End[segmentcounter + i] <- as.vector(template1na$end)[bincounter+nobs[i]-1]
Num_Bins[segmentcounter + i] <- nobs[i]
Mean1[segmentcounter + i] <- mean(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])
SE1[segmentcounter + i] <- sd(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])/sqrt(nobs[i])
Mean2[segmentcounter + i] <- mean(template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])
SE2[segmentcounter + i] <- sd(template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])/sqrt(nobs[i])
p_value[segmentcounter + i] <- t.test(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)],
template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])$p.value
bincounter <- bincounter + nobs[i]
}
}
segmentcounter <- segmentcounter+nos
}
if(altmethod=="SD"){
ns1 <- mean(rep(Mean1,Num_Bins))
sd1 <- sd(rep(Mean1,Num_Bins))
Mean1 <- (Mean1-ns1)/sd1
SE1 <- SE1/sd1
ns2 <- mean(rep(Mean2,Num_Bins))
sd2 <- sd(rep(Mean2,Num_Bins))
Mean2 <- (Mean2-ns2)/sd2
SE2 <- SE2/sd2
for (i in seq_along(Mean1)) {
firstbin <- which(template1na$chr==Chromosome[i] & template1na$start==Start[i])
lastbin <- which(template1na$chr==Chromosome[i] & template1na$end==End[i])
cn1 <- (template1na$copynumbers[seq(firstbin, lastbin)]-ns1)/sd1
cn2 <- (template2na$copynumbers[seq(firstbin, lastbin)]-ns2)/sd2
if (Num_Bins[i]>1) {p_value[i] <- t.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
}
}
if(altmethod=="MAD"){
ns1 <- median(rep(Mean1,Num_Bins))
mad1 <- mad(rep(Mean1,Num_Bins))
Mean1 <- (Mean1-ns1)/mad1
SE1 <- SE1/mad1
ns2 <- median(rep(Mean2,Num_Bins))
mad2 <- mad(rep(Mean2,Num_Bins))
Mean2 <- (Mean2-ns2)/mad2
SE2 <- SE2/mad2
for (i in seq_along(Mean1)) {
firstbin <- which(template1na$chr==Chromosome[i] & template1na$start==Start[i])
lastbin <- which(template1na$chr==Chromosome[i] & template1na$end==End[i])
cn1 <- (template1na$copynumbers[seq(firstbin,lastbin)]-ns1)/mad1
cn2 <- (template2na$copynumbers[seq(firstbin,lastbin)]-ns2)/mad2
if (Num_Bins[i]>1) {p_value[i] <- wilcox.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
}
}
}
q_value <- p.adjust(p_value,method="BH")
combinedsegmentsdf <- data.frame(Chromosome,Start,End,Num_Bins,Mean1,SE1,Mean2,SE2,p_value,q_value)
correlation <- cor(rep(Mean1,Num_Bins),rep(Mean2,Num_Bins))
if (plot==TRUE) {
if(bottom=="min"){bottom<-floor(min(Mean1, Mean2))}
if(cap=="max"){cap<-ceiling(max(Mean1, Mean2))}
q_capped <- sapply(q_value, function(x) max(10^-qcap,x))
q_corrected <- -log(q_capped,10)*cap/qcap
df <- data.frame(bin=template1na$bin,chr=template1na$chr,Mean1=rep(Mean1,Num_Bins),Mean2=rep(Mean2,Num_Bins),q_value=rep(q_corrected,Num_Bins))
if(!missing(chrsubset)) {
firstchr <- range(chrsubset)[1]
lastchr <- range(chrsubset)[2]
rlechr <- rle(as.vector(template1$chr))
df <- df[df$chr %in% rlechr$values[seq(firstchr,lastchr)],]
}
subsetcorrelation <- cor(df$Mean1,df$Mean2)
if(onlyautosomes==TRUE) {
rlechr <- rle(as.vector(template1$chr[template1$chr %in% seq(1,22)]))
} else if (onlyautosomes==FALSE) {rlechr <- rle(as.vector(template1$chr))
} else {rlechr <- rle(as.vector(template1$chr[template1$chr %in% seq(1,onlyautosomes)]))}
binchrend <- c()
currentbin <- 0
binchrmdl <- c()
if(altmethod==FALSE){yname<-"copies"} else {yname <- paste0(altmethod," units")}
for (i in seq_along(rlechr$values)) {
currentmiddle <- currentbin+rlechr$lengths[i]/2
currentbin <- currentbin+rlechr$lengths[i]
binchrend <- append(binchrend, currentbin)
binchrmdl <- append(binchrmdl, currentmiddle)
}
if(missing(chrsubset)){
compareplot <- ggplot2::ggplot() +
scale_y_continuous(name = yname, limits = c(bottom,cap), breaks = seq(bottom,cap), expand=c(0,0),
sec.axis = sec_axis(~.*qcap/cap, name = "-log10(q-value)")) +
scale_x_continuous(name = "chromosome", limits = c(0,tail(binchrend,1)),
breaks = binchrmdl, labels = rlechr$values, expand = c(0,0)) +
geom_bar(aes(x=bin, y = q_value), data=df, fill='green', stat='identity') +
geom_hline(yintercept = seq(0,4), color = '#333333', size = 0.5) +
geom_hline(yintercept = seq(5,cap-1), color = 'lightgray', size = 0.5) +
geom_vline(xintercept = binchrend, color = "#666666", linetype = "dashed") +
geom_point(aes(x = bin,y = Mean1),data=df, size = 1, color = 'red') +
geom_point(aes(x = bin,y = Mean2),data=df, size = 1, color = 'blue') +
theme_classic() + theme(
axis.line = element_line(color='black'), axis.ticks = element_line(color='black'),
axis.text = element_text(color='black'))
if (legend==TRUE){
compareplot <- compareplot +
geom_rect(aes(xmin=binchrmdl[1]/10, xmax = binchrmdl[4], ymin = cap-1.2, ymax = cap), fill = 'white') +
annotate("text", x = binchrend[2], y = cap-0.2, label = name1, color = 'red') +
annotate("text", x = binchrend[2], y = cap-0.6, label = name2, color = 'blue')
if (showcorrelation==TRUE) {
compareplot <- compareplot +
annotate("text", x = binchrend[2], y = cap-1, label = paste0("r = ",round(correlation,digits=3)), color = 'black')
}
}
} else {
firstchr <- range(chrsubset)[1]
lastchr <- range(chrsubset)[2]
if(firstchr==1){firstbin<-0
} else {firstbin<-binchrend[firstchr-1]+1}
compareplot <- ggplot2::ggplot() +
scale_y_continuous(name = yname, limits = c(bottom,cap), breaks = seq(bottom,cap), expand=c(0,0),
sec.axis = sec_axis(~.*qcap/cap, name = "-log10(q-value)")) +
scale_x_continuous(name = "chromosome", limits = c(firstbin,binchrend[lastchr]),
breaks = binchrmdl[seq(firstchr,lastchr)],
labels = rlechr$values[seq(firstchr,lastchr)], expand = c(0,0)) +
geom_bar(aes(x=bin, y = q_value), data=df, fill='green', stat='identity') +
geom_hline(yintercept = seq(0,4), color = '#333333', size = 0.5) +
geom_hline(yintercept = seq(5,cap-1), color = 'lightgray', size = 0.5) +
geom_vline(xintercept = binchrend[seq(firstchr,lastchr)], color = "#666666", linetype = "dashed") +
geom_point(aes(x = bin,y = Mean1),data=df, size = 1, color = 'red') +
geom_point(aes(x = bin,y = Mean2),data=df, size = 1, color = 'blue') +
theme_classic() + theme(
axis.line = element_line(color='black'), axis.ticks = element_line(color='black'), axis.text = element_text(color='black'))
if (legend==TRUE){
compareplot <- compareplot +
geom_rect(aes(xmin=firstbin+1, xmax = firstbin+(binchrend[lastchr]-firstbin)/3.5, ymin = cap-1.2, ymax = cap), fill = 'white') +
annotate("text", x = firstbin+(binchrend[lastchr]-firstbin)/7, y = cap-0.2, label = name1, color = 'red') +
annotate("text", x = firstbin+(binchrend[lastchr]-firstbin)/7, y = cap-0.6, label = name2, color = 'blue')
if(showcorrelation==TRUE) {
compareplot <- compareplot +
annotate("text", x = firstbin+(binchrend[lastchr]-firstbin)/7, y = cap-1, label = paste0("r = ",round(subsetcorrelation,digits=3)), color = 'black')
}
}
}
if (!missing(chrsubset)) {
return(list(twosampledf=combinedsegmentsdf,
correlation=correlation,
subsetcorrelation=subsetcorrelation,
compareplot=compareplot))
} else {return(list(twosampledf=combinedsegmentsdf,
correlation=correlation,
compareplot=compareplot))}
} else if (showcorrelation==TRUE) {return(list(twosampledf=combinedsegmentsdf,
correlation=correlation))
} else {return(combinedsegmentsdf)}
}
# This code can be used to more systematically use the squaremodel function.
# squaremodelsummary() returns a list of plots starting with the matrix plots and followed by up to 7 CNPs of the best fits.
# Besides the squaremodel, it also requires the sample data, either as template dataframe or as QDNAseq-object with the appropriate sample index
# You can save the plots to a variable or directly print them (which currently is the default!).
# I figured a 2x4 summary page would be nice. You can't currently specify this with parameters,
# so if you want to adjust this you have to manually change the code below.
squaremodelsummary <- function(template,QDNAseqobjectsample=FALSE,squaremodel,samplename,printplots=TRUE,outputdir,imagetype='pdf',trncname=FALSE) {
binsize <- 0
if (QDNAseqobjectsample) {
fd <- Biobase::fData(template)
pd <- Biobase::pData(template)
binsize <- fd$end[1]/1000
} else {binsize <- template$end[1]/1000}
if (missing(outputdir)) {outputdir<-"."}
if (missing(samplename)) {
if (QDNAseqobjectsample) {
samplename<-pd$name[QDNAseqobjectsample]
if(trncname==TRUE) {samplename <- gsub("_.*","",samplename)}
if(trncname!=FALSE&&trncname!=TRUE) {samplename <- gsub(trncname,"",samplename)}
} else {samplename<-"Sample"}
}
imagefunction <- get(imagetype)
cnplots <- min(7,length(unique(squaremodel$minimadf$error)))
plots <- vector(mode = 'list', length = 8)
plots[[1]] <- squaremodel$matrixplot + ggtitle(paste0(samplename,", bin size ",binsize,
" kbp, method = ",squaremodel$method,
", penalty = ",squaremodel$penalty,
", penploidy = ",squaremodel$penploidy))
if (cnplots!=0) {
for (i in seq(1,cnplots)) {
index <- tail(which(squaremodel$minimadf$error==unique(sort(squaremodel$minimadf$error))[i]),1)
plots[[1+i]] <- singleplot(template=template,QDNAseqobjectsample=QDNAseqobjectsample,
cellularity=squaremodel$minimadf$cellularity[index],
error=squaremodel$minimadf$error[index],
ploidy=squaremodel$minimadf$ploidy[index],
title=paste0("fit ",i, ", ploidy = ",squaremodel$minimadf$ploidy[index]))
}
}
if (printplots == TRUE) {
if (imagetype == 'pdf') {
imagefunction(file.path(outputdir,paste0(samplename,".pdf")),width=10.5)
print(plots)
dev.off()
} else {
if (length(plots)==1) {
imagefunction(file.path(outputdir,paste0(samplename,".",imagetype)),width = 720, height = 480)
print(plots)
dev.off()
} else {
imagefunction(file.path(outputdir,paste0(samplename,".",imagetype)),width = 1440, height = 1920)
print(multiplot(plotlist = plots, cols=2))
dev.off()
}
}
}
return(plots)
}
# loop through all samples of your object and make squaremodel summaries for those
# since this only works on QDNAseq-objects, I use some of the info in those objects for the filenames and graphs
loopsquaremodel <- function(object,ptop=5,pbottom=1,prows=100,method='RMSE',penalty=0,penploidy=0,
outputdir,imagetype='pdf',trncname=FALSE, returnmodels = FALSE,
printplots = TRUE, printobjectsummary=TRUE) {
imagefunction <- get(imagetype)
if (missing(outputdir)) {outputdir<-"."}
if(!dir.exists(outputdir)) {dir.create(outputdir)}
fd <- Biobase::fData(object)
pd <- Biobase::pData(object)
if(trncname==TRUE) {pd$name <- gsub("_.*","",pd$name)}
if(trncname!=FALSE&&trncname!=TRUE) {pd$name <- gsub(trncname,"",pd$name)}
binsize <- fd$end[1]/1000
if (returnmodels[1] != FALSE) {sqmlist <- vector(mode = 'list', length = length(pd$name))}
matrixplots <- vector(mode = 'list', length = length(pd$name))
for (i in seq_along(pd$name)) {
model <- squaremodel(object,QDNAseqobjectsample=i,ptop=ptop,pbottom=pbottom,prows=prows,
penalty=penalty,penploidy=penploidy,method=method)
sms <- squaremodelsummary(object,model,QDNAseqobjectsample=i,outputdir=outputdir,imagetype=imagetype,trncname=trncname,printplots=printplots)
matrixplots[[2*(i-1)+1]] <- model$matrixplot + ggtitle(paste0(pd$name[i]))
matrixplots[[2*(i-1)+2]] <- sms[[2]]
if (returnmodels[1] != FALSE) {
sqmlist[[i]]$samplename <- pd$name[i]
if (returnmodels[1] != TRUE) {model <- model[name = returnmodels]}
sqmlist[[i]] <- append(sqmlist[[i]], model)
}
}
if(printobjectsummary == TRUE) {
if(imagetype == 'pdf') {
imagefunction(file.path(outputdir,paste0("objectsummary.",imagetype)),width=10.5)
print(matrixplots)
dev.off()
} else {
imagefunction(file.path(outputdir,paste0("objectsummary.",imagetype)),width=1440,height = 480*ceiling(length(matrixplots)/2))
print(multiplot(plotlist = matrixplots, cols=2))
dev.off()
}
}
if (returnmodels[1] != FALSE) {return(sqmlist)}
}
# The first function, correlationmatrix(), is straightforward: it makes a correlation matrix
# of all samples in a QDNAseq-object. The correlations are based on the correlation of segment
# values of all bins. But this analysis may be hampered by poor segmentation! Instead, you could
# just take the raw copynumber values, but there is too much noise that is relatively equal between
# samples. You would, for instance, see pretty good correlation between two samples with absolutely
# no copy numer aberrations. That is where the correlationmatrixadjusted() comes in. It either
# gives everything equal segments, or it performs pair-wise matching of segment break points for
# all pairs. It therefore calls the templatefromequalsegments() and twosamplecompare() functions,
# respectively.
correlationmatrix <- function(object, trncname=FALSE) {
samples <- Biobase::sampleNames(object)
if (trncname==TRUE) {samples <- gsub("_.*","",samples)}
if (trncname!=FALSE&&trncname!=TRUE) {samples <- gsub(trncname,"",samples)}
cormat <- cor(na.exclude(assayData(object)$segmented))
rownames(cormat) <- samples
colnames(cormat) <- samples
return(cormat)
}
correlationmatrixadjusted <- function(object, trncname=FALSE, equalsegments=FALSE, funtype = 'mean') {
samples <- Biobase::sampleNames(object)
n <- length(samples)
if (trncname==TRUE) {samples <- gsub("_.*","",samples)}
if (trncname!=FALSE&&trncname!=TRUE) {samples <- gsub(trncname,"",samples)}
if (equalsegments==FALSE) {
cormat <- matrix(nrow=n,ncol=n)
colnames(cormat) <- samples
rownames(cormat) <- samples
cormat[n,n] <- 1
for (i in seq(1, n-1)) {
cormat[i,i] <- 1
for (j in seq(i+1, n)) {
cormat[i,j] <- twosamplecompare(object,i,index2=j)$correlation
cormat[j,i] <- cormat[i,j]
}
}
} else {
size <- length(fData(object)$chromosome)
tempmat <- matrix(nrow = size, ncol = n)
for (i in seq_along(samples)) {
tempmat[,i] <- templatefromequalsegments(object,i,equalsegments=equalsegments,funtype=funtype)$segments
}
segmentdf <- na.exclude(as.data.frame(tempmat))
colnames(segmentdf) <- samples
cormat <- cor(segmentdf)
}
return(cormat)
}
# This function creates a template (used in many ACE functions) from a segment file (created by many other programs, and available
# for download from the TCGA amongst others!!!).
# The argument segmentdf can either be a data frame with segmented data or the location of a tab-delimited text-file
# It is important that chromosomes are either integers or chr followed by the number of the chromosome (e.g. chr1)
# It doesn't hurt if the file contains segmented data on X and Y chromosome, as long as they are listed after the autosomes
# The function expects the following order of columns: chromosome, start, end, number of bins, segment value
# You can use the arguments of the function to specify the index of the relevant columns, if necessary
# The argument log actually transforms log-values back to linear scaling; if set to TRUE, it converts back from natural logarithm
# Alternatively you can specify the base of the log-conversion (commonly 2)
# The standard error and standard deviation columns are "cheat" arguments. Segment files do not specify the copynumber value per bin,
# which is one of the columns that the template needs. If omitted, copynumbers will take the same values as segments, and will
# disappear behind the segment values in the copy number plot. It is possible that standard error or standard deviation data is
# available per segment. In that case, you can create "simulated" copynumber values that are taken from a normal distribution
# corresponding to the segment mean and the given standard deviation or standard error. Obviously, this is merely meant as a
# visualization aide!!!
segmentstotemplate <- function(segmentdf, chrci=1, startci=2, endci=3, binsci=4, meanci=5, seci, sdci, log=FALSE) {
if(is(segmentdf, "character")) {segmentdf <- try(read.table(segmentdf, header = TRUE, comment.char = "", sep = "\t"))}
if (inherits(segmentdf, "try-error")) {print(paste0("could not find ", segmentdf))
} else {
if(log==TRUE){log<-exp(1)}
if(log) {
segmentdf[,meanci] <- log^segmentdf[,meanci]
}
segments <- rep(segmentdf[,meanci],segmentdf[,binsci])
chr <- rep(segmentdf[,chrci],segmentdf[,binsci])
chr <- gsub("chr","",chr,ignore.case = TRUE)
if(!missing(sdci)) {
copynumbers <- c()
for (i in seq_along(segmentdf[,1])) {copynumbers <- append(copynumbers,rnorm(n=segmentdf[i,binsci],
mean=segmentdf[i,meanci],
sd=segmentdf[i,sdci]))}
} else if(!missing(seci)) {
copynumbers <- c()
for (i in seq_along(segmentdf[,1])) {copynumbers <- append(copynumbers,rnorm(n=segmentdf[i,binsci],
mean=segmentdf[i,meanci],
sd=(segmentdf[i,seci]*sqrt(segmentdf[i,binsci]))))}
} else {
copynumbers <- segments
}
bin <- seq_along(segments)
start <- c()
end <- c()
for (i in seq_along(segmentdf[,1])) {
start <- append(start,round(seq(from=segmentdf[i,startci],by=(segmentdf[i,endci]+1-segmentdf[i,startci])/segmentdf[i,binsci],
length.out=segmentdf[i,binsci])))
end <- append(end,round(seq(to=segmentdf[i,endci],by=(segmentdf[i,endci]+1-segmentdf[i,startci])/segmentdf[i,binsci],
length.out=segmentdf[i,binsci])))
}
template <- data.frame(bin,chr,start,end,copynumbers,segments)
return(template)
}
}
compresstemplate <- function(template, factor = 20, funtype = 'median'){
fun <- get(funtype)
chr <- c()
start <- c()
end <- c()
bin <- c()
copynumbers <- c()
segments <- c()
bincounter <- 1
nbcounter <- 1
rlechr <- rle(as.vector(template$chr))
for (c in seq_along(rlechr$values)) {
nob <- rlechr$lengths[c]
nnb <- floor(nob/factor)
vob <- rep(factor, nnb)
leftovers <- nob%%factor
if (leftovers > 0) {
for (a in seq(0, leftovers-1)) {
divvy <- a%%nnb+1
vob[divvy] <- vob[divvy] + 1
}
}
for (i in (vob)) {
bin[nbcounter] <- nbcounter
chr[nbcounter] <- rlechr$values[c]
start[nbcounter] <- template$start[bincounter]
end[nbcounter] <- template$end[bincounter+i-1]
copynumbers[nbcounter] <- fun(template$copynumbers[seq(bincounter, bincounter+i-1)], na.rm = TRUE)
segments[nbcounter] <- fun(template$segments[seq(bincounter, bincounter+i-1)], na.rm = TRUE)
nbcounter <- nbcounter + 1
bincounter <- bincounter + i
}
}
compressedtemplate <- data.frame(bin,chr,start,end,copynumbers,segments)
return(compressedtemplate)
}
# This function was inspired by twosamplecompare, which also has the option to "resegment" copy number data using equally
# sized segments. I figured the option to do this was useful enough to make it its own function, since the output can be used as
# input for other functions (singlemodel, squaremodel, singleplot, ACEcall)
# You obviously want equal segments if you use this function ... the argument equalsegments specifies how many bins (or probes)
# should be in this segment. Each chromosome is divided into segments of roughly this number of bins.
# The function expects an ACE template, but obviously the value of segments is not necessary. You can also feed it a data frame
# which contains the columns "bin", "chr", "start", "end", "copynumbers", with copynumbers being the value of a bin or probe.
# The columns "bin", "start", and "end" are not used, but they are integral to templates for other ACE functions.
templatefromequalsegments <- function(template, QDNAseqobjectsample = FALSE, equalsegments = 20, funtype = 'mean', chrsubset, onlyautosomes = TRUE) {
fun <- get(funtype)
if(QDNAseqobjectsample) {
template <- objectsampletotemplate(template, QDNAseqobjectsample)
} else { template$chr <- gsub("chr","",template$chr,ignore.case = TRUE)}
copynumbers <- as.vector(template$copynumbers)
if("segments" %in% colnames(template)) {
segments <- as.vector(template$segments)
} else {segments <- copynumbers}
chr <- as.vector(template$chr)
start <- as.vector(template$start)
end <- as.vector(template$end)
bin <- as.vector(template$bin)
if(onlyautosomes==TRUE) {
rlechr <- rle(as.vector(template$chr[template$chr %in% seq(1,22)]))
} else if (onlyautosomes==FALSE) {rlechr <- rle(as.vector(template$chr))
} else {rlechr <- rle(as.vector(template$chr[template$chr %in% seq(1, onlyautosomes)]))}
if(!missing(chrsubset)){
chromosomes <- rlechr$values[chrsubset]
} else {chromosomes <- rlechr$values}
for (c in chromosomes) {
bincounter <- 1
nos <- floor(sum(!is.na(copynumbers)&chr==c)/equalsegments)
leftovers <- sum(!is.na(copynumbers)&chr==c)%%equalsegments
nobs <- rep(equalsegments,nos)
if (nos == 0) {
nos <- 1
nobs <- leftovers
leftovers <- 0
}
if (leftovers > 0) {
for (a in seq(0, leftovers-1)) {
divvy <- a%%nos+1
nobs[divvy] <- nobs[divvy] + 1
}
}
for (i in seq(1, nos)) {
if(nobs[i]) {
segments[which(!is.na(copynumbers)&chr==c)[seq(bincounter, bincounter+nobs[i]-1)]] <- rep(fun(copynumbers[which(!is.na(copynumbers)&chr==c)[seq(bincounter, bincounter+nobs[i]-1)]]),nobs[i])
bincounter <- bincounter + nobs[i]
}
}
}
template <- data.frame(bin,chr,start,end,copynumbers,segments)
return(template)
}
# Another function inspired by twosamplecompare! The idea of this one is
# actually more basic. The name of the function says it all. This time there are
# two types of input for the segments and an important option. First off: you
# can give your segments as something like a GRanges object or a data frame
# obtained from getadjustedsegments, or you can use an ACE template and use that
# (similar as in twosamplecompare). Second, you now have the option to either
# only use the segments from the segmentinput, or you combine segments as in
# twosamplecompare. Another nice feature of this function when compared to
# twosamplecompare is that you do not have to have equal number of bins, since
# bins of the segmentinput are ignored: it will just use segment info with
# "Chromosome", "Start", and "End" specified in aptly named columns. Since you
# are forcing segments on a template, the template does not need to have segment
# values itself if combinesegments = FALSE. This functions returns the
# resegmented template.
forcesegmentsontemplate <- function(segmentinput, template, QDNAseqobjectsample = FALSE,
combinesegments = FALSE, funtype = 'mean') {
fun <- get(funtype)
if (QDNAseqobjectsample) {template <- objectsampletotemplate(template, QDNAseqobjectsample)}
if (is(segmentinput, "GRanges")) {
segmentdf <- data.frame(Chromosome = GenomicRanges::seqnames(segmentinput),
Start = GenomicRanges::start(segmentinput),
End = GenomicRanges::end(segmentinput))
} else if (colnames(segmentinput)[1] == "bin") {
segmentdf <- getadjustedsegments(segmentinput)
} else {
Chromosome <- segmentinput[,grep("chr", colnames(segmentinput), ignore.case = TRUE)[1]]
Start <- segmentinput[,grep("start", colnames(segmentinput), ignore.case = TRUE)[1]]
End <- segmentinput[,grep("end", colnames(segmentinput), ignore.case = TRUE)[1]]
segmentdf <- data.frame(Chromosome, Start, End)
}
template$chr <- as.vector(gsub("chr", "", template$chr,ignore.case = TRUE))
segmentdf$Chromosome <- as.vector(gsub("chr", "", segmentdf$Chromosome,ignore.case = TRUE))
if (combinesegments == TRUE) {
ownsegments <- getadjustedsegments(template)
allsegments <- rbind(segmentdf[, seq(1, 3)], ownsegments[, seq(1, 3)])
allsegments <- allsegments[order(allsegments$Chromosome, allsegments$Start, allsegments$End),]
Chromosome <- c()
Start <- c()
End <- c()
for (c in rle(as.vector(allsegments$Chromosome))$values) {
starts <- sort(unique(allsegments$Start[allsegments$Chromosome == c]))
ends <- sort(unique(allsegments$End[allsegments$Chromosome == c]))
if (length(starts != length(ends))) {
starts <- append(starts, ends[-length(ends)]+1)
ends <- append(ends, starts[-1]-1)
starts <- sort(unique(starts))
ends <- sort(unique(ends))
}
Chromosome <- append(Chromosome, rep(c, length(starts)))
Start <- append(Start, starts)
End <- append(End, ends)
}
segmentdf <- data.frame(Chromosome, Start, End)
}
template$segments <- NA
for (s in seq_along(segmentdf$Chromosome)) {
indices <- which(template$chr == segmentdf$Chromosome[s] &
template$start >= segmentdf$Start[s] &
template$end <= segmentdf$End[s])
template$segments[indices] <- fun(na.exclude(template$copynumbers[indices]))
}
template$segments[is.na(template$copynumbers)] <- NA
return(template)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.