Nothing
patchwork.CG.copynumbers = function(cn2,delta,het,hom,maxCn=8,ceiling=1,name="copynumbers_",CGfile=NULL,forcedelta=F)
{
data(ideogram,package="patchworkCG")
if (length(CGfile)==1)
{
load(CGfile)
} else { load("CG.Rdata")
}
voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
tix=NULL #temporary index
int=NULL ## contains coverage estimate of each (total) copy number
ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
regions <- segm
regions$np <- round((regions$end - regions$start)/10000)
regions <- regions[(is.autosome(regions$chr)®ions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
regions <- regions[!is.na(regions$ratio),]
## likely cn2 regions sit within delta/3 from cn2.
expectedAt <- cn2
tix$cn2 <- abs(regions$ratio - expectedAt) < (delta/3) ## index of likely cn2 regions
temp <- regions[tix$cn2,] ## cn2 regions
med <- weightedMedian(temp$median,temp$np) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
## likely cn1 regions sit at about cn2 - delta:
d <- delta ## the (Log-R) distance to cn1
expectedAt <- int$cn2-d ## cn1 is expected here
tix$cn1 <- abs(regions$ratio - expectedAt) < (d/3) ## index of likely cn1 regions
temp <- regions[tix$cn1,]
med <- weightedMedian(temp$median,temp$np)
int$cn1 <- ifelse(!is.null(med),med,expectedAt)
if (forcedelta) int$cn1 <- int$cn2-delta
## likely cn0 regions sit below cn1 - delta:
d <- int$cn2-int$cn1
expectedAt <- int$cn1-d
tix$cn0 <- regions$ratio < expectedAt
temp <- regions[tix$cn0,]
med <- weightedMedian(temp$median,temp$np)
int$cn0 <- ifelse(!is.null(med),med,expectedAt)
if (forcedelta) int$cn0 <- int$cn1-delta
## likely cn3 regions sit at about cn2+delta
d <- delta
expectedAt <- int$cn2+d
tix$cn3 <- abs(regions$ratio - expectedAt) < (d/3)
temp <- regions[tix$cn3,]
med <- weightedMedian(temp$median,temp$np)
int$cn3 <- ifelse(!is.null(med),med,expectedAt)
if (forcedelta) int$cn3 <- int$cn2+delta
## cn4 follows at ...
d <- delta
expectedAt <- int$cn3+d
tix$cn4 <- abs(regions$ratio - expectedAt) < (d/4)
temp <- regions[tix$cn4,]
med <- weightedMedian(temp$median,temp$np)
int$cn4 <- ifelse(!is.null(med),med,expectedAt)
if (forcedelta) int$cn4 <- int$cn3+delta
## generalized for higher cns
for (cn in 5:maxCn)
{
thisCn <- paste('cn',cn,sep='')
prevCn <- paste('cn',cn-1,sep='')
pprevCn <- paste('cn',cn-2,sep='')
d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
expectedAt <- int[prevCn][[1]]+d
tix[[thisCn]] <- abs(regions$ratio - expectedAt) < (d/5)
temp <- regions[tix[thisCn][[1]],]
med <- weightedMedian(temp$median,temp$np)
int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
if (forcedelta) int[[thisCn]] <- int[[prevCn]]+delta
}
# smooth the cn relationship ?
## at cn2, find the variant clusters (normal and CNNLOH)
ix <- (abs(regions$ratio - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
data <- regions[ix,]
data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
expectedAt <- het
ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
expectedAt <- hom
ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
## for cn1 (and 0)
ix <- (abs(regions$ratio - int$cn1) < 0.2*(int$cn2-int$cn1) )
data <- regions[ix,]
data <- data[!is.na(data$ai),]
expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt) ## Will be NA if there was no CNNLOH
ai$cn0m0 <- NA #unimportant
## for cn3:
ix <- (abs(regions$ratio - int$cn3) < 0.2*(int$cn4-int$cn3) )
data <- regions[ix,]
data <- data[!is.na(data$ai),]
range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
# get the 3(1) regions:
expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
# now for cn3m0
expectedAt <- ai$cn3m1 + range; if (expectedAt>1) expectedAt <- (ai$cn2m0+1)/2
ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4) # take those much closer to exp than to cn3m1
med <- weightedMedian(data$ai[ix],data$snvs[ix])
try (if (med<ai$cn2m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3
## now for cn4
ix <- abs(regions$ratio - int$cn4) < 0.2*(int$cn4-int$cn3)
data <- regions[ix,]
data <- data[!is.na(data$ai),]
# get the 4(2) regions: they are at AI about cn2m1
expectedAt <- ai$cn2m1 # this is a good approx
ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
data <- data[!ix,] # coutinue with the remaining
# now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
# now 4(0) is at about 3(0) + [1 - 3(0)] * [3(0)-2(0)] / [1-2(0)]
expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
med <- weightedMedian(data$ai[ix],data$snvs[ix])
try (if (med<ai$cn3m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
## generalization for higher copy numbers. it gets complicated.
for (cn in 5:maxCn) {
thisCn <- paste('cn',cn,sep='')
prevCn <- paste('cn',cn-1,sep='')
pprevCn <- paste('cn',cn-2,sep='')
ix <- (abs(regions$ratio - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
data <- regions[ix,]
data <- data[!is.na(data$ai),]
data <- data[data$np>50,] # long regions for safety
## try to find variants, starting with LOH
# LOH such as 5(0)
m <- 0
thisVariant=paste(thisCn,'m',0,sep='')
c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
#ai[thisVariant] <- expectedAt
# we then take those close to exp (within delta[3(0),4(0)])
ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0)
med <- weightedMedian(data$ai[ix],data$snvs[ix])
try (if (med<ai$cn4m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
## then from balanced to less balanced
minorVariants=trunc(cn/2):1
first <- T
for (m in minorVariants) {
thisVariant=paste(thisCn,'m',m,sep='')
if (m==cn/2) {
# We have balanced variant, so rather easy.
expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
first <- F
} else if (first) {
# its not balanced but its the most balanced of the unbalanced. something like 5(2)
expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
first <- F
} else {
# not the most balanced unbalanced variant, for example 5(1):
expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
med <- weightedMedian(data$ai[ix],data$snvs[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
}
} # done with minor variants
} # done with copy numbers
sampleInfo <- list('int'=int,'ai'=ai)
## Reassignment of regions! This removes regions$np which is used later..
## Assign total and minor copy numbers to all segments.
regions <- segm[!is.na(segm$ratio),] ## This time, work on all segments available.
regions$np <- round((regions$end - regions$start)/10000)
Cn <- rep(NA,nrow(regions)) ## Total copy number
mCn <- rep(NA,nrow(regions)) ## Minor allele copy number
fullCN <- rep(NA,nrow(regions)) ## Variant label. ('cnXmY')
intDist <- NULL ## distance to certain Log-R
imbaDist <- NULL ## distance to certain allelic imbalance
## give each segment the best matching CN and mCN
for (i in 1:nrow(regions))
{
# set total copy number
distance <- Inf
for (cn in 0:maxCn)
{
t_int <- int[paste('cn',cn,sep='')][[1]] ## get Log-R of particular cn from 'int'
t_dis <- abs(regions$ratio[i]-t_int) ## distance to that particular cn
if (t_dis < distance)
{ ## nearest so far, save.
distance <- t_dis -> intDist[i]
Cn[i] <- cn
}
}
}
# Y makes CN0 sit very low, this is a fix on non-Y Cn0
#if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
for (i in 1:nrow(regions))
{
# set minor CN
distance <- Inf
if (Cn[i]<=1)
{
mCn[i] <- 0
}
else if (!is.na(regions$ai[i]))
{
if (!is.nan(regions$ai[i]))
{
for (m in 0:trunc(Cn[i]/2))
{
t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
t_dis <- abs(regions$ai[i]-t_ai)
if (t_dis < distance)
{
distance <- t_dis -> imbaDist[i]
mCn[i] <- m
}
}
}
}
else
{
mCn[i] <- NA
}
fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
}
regions$Cn <- Cn
regions$mCn <- mCn
regions$fullCN <- fullCN
save(regions,int,ai,file="copynumbers.Rdata")
write.csv(regions,file='Copynumbers.csv')
meanCn <- weightedMean(regions$Cn,regions$np)
ix1 <- regions$Cn==trunc(meanCn)
ix2 <- regions$Cn==ceiling(meanCn)
xdelta <- weightedMean(regions$ratio[ix2],regions$np[ix2]) - weightedMean(regions$ratio[ix1],regions$np[ix1])
expected_delta <- 1/meanCn
tumor_percentDNA <- xdelta / expected_delta
tumor_percent <- tumor_percentDNA/(meanCn/2) / ( tumor_percentDNA/(meanCn/2) + (1-tumor_percentDNA) )
# ## Fix sex chromosomes in case of male sample.
# if (male.sample) {
# ix <- which(!is.autosome(regions$chr))
# regions$mCn[ix] <- 0
# ## Treats differently depending on sex of the reference
# if (!male2femref) {
# # X and Y (originally cn1) were compared to cn1 (matched normal or male references).
# # Unchanged copy number (=1) is expected at relative coverage 1.
# # Gains/loss are expected at xdelta (the delta of the autosomes)*2.
# rawcopychange <- (regions$median[ix]-1) / (xdelta*2)
# regions$Cn[ix] <- round(1+rawcopychange)
# regions$fullCN[ix] <- paste('cn',round(1+rawcopychange),'m0')
# } else
# {
# # X and Y (originally cn1) were compared to cn2 (female references).
# # Unchanged copy number (=1) is expected at relative coverage 1/2.
# # Gains/loss are expected at xdelta from there.
# rawcopychange <- (regions$median[ix]-1/2) / (xdelta)
# regions$Cn[ix] <- round(1+rawcopychange)
# regions$fullCN[ix] <- paste('cn',round(1+rawcopychange),'m0')
# }
# }
# ## And a small fix on female samples
# if (!male.sample) {
# ix <- which(regions$chr=='chrY')
# regions$mCn[ix] <- regions$Cn[ix] <- 0
# }
regions$meanCn <- meanCn
regions$tumor_percent <- tumor_percent
write.csv(regions,file='Copynumbers.csv')
CG_Ka_check(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
xlim=c(0,2.4),ylim=c(0,1))
CG_KaChCN(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
regions$Cn,regions$mCn,depcov$chr,
depcov$begin,depcov$ratio,
(1 - mastervar$min/mastervar$max),
mastervar$chr,mastervar$begin,
name=name,xlim=c(0,2.4),ylim=c(0,1))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.