Nothing
vcConfParams <- function(
minStrandCov = 5,
maxStrandCov = 200,
minStrandAltSupport = 2,
maxStrandAltSupportControl = 0,
minStrandDelSupport = minStrandAltSupport,
maxStrandDelSupportControl = maxStrandAltSupportControl,
minStrandInsSupport = minStrandAltSupport,
maxStrandInsSupportControl = maxStrandAltSupportControl,
minStrandCovControl = 5,
maxStrandCovControl = 200,
bases = 5:8,
returnDataPoints = TRUE,
annotateWithBackground = TRUE,
mergeCalls = TRUE,
mergeAggregator = mean,
pValueAggregator = max
){
return(
list(
minStrandCov = minStrandCov,
maxStrandCov = maxStrandCov,
minStrandAltSupport = minStrandAltSupport,
maxStrandAltSupportControl = maxStrandAltSupportControl,
minStrandDelSupport = minStrandDelSupport,
maxStrandDelSupportControl = maxStrandDelSupportControl,
minStrandInsSupport = minStrandInsSupport,
maxStrandInsSupportControl = maxStrandInsSupportControl,
minStrandCovControl = minStrandCovControl,
maxStrandCovControl = maxStrandCovControl,
bases = bases,
returnDataPoints = returnDataPoints,
annotateWithBackground = annotateWithBackground,
mergeCalls = mergeCalls,
mergeAggregator = mergeAggregator,
pValueAggregator = pValueAggregator
)
)
}
callVariantsPaired <- function( data, sampledata, cl = vcConfParams() ){
refBases <- c("A", "C", "G", "T", "N")
cases = which( sampledata$Type == "Case" )
controls = which( sampledata$Type == "Control" )
stopifnot(length(cases)>0, length(controls)>0)
if( cl$annotateWithBackground ){
## Background Error Estimates
cc = sampledata$Column[controls]
controlCounts = data$Counts[1:4,cc,,,drop=F] + data$Counts[5:8,cc,,,drop=F] + data$Counts[9:12,cc,,,drop=F]
controlCoverages = data$Coverages[cc,,,drop=F]
mismatchFrequencies = colSums(colSums( controlCounts )) / colSums(controlCoverages)
controlDeletions = data$Deletions[cc,,]
deletionFrequencies = colSums(controlDeletions) / ( colSums(controlCoverages) + colSums(controlDeletions) )
if("Insertions" %in% names(data)){
controlInsertions = data$Insertions[cc,,]
insertionFrequencies = colSums(controlInsertions) / ( colSums(controlCoverages) + colSums(controlInsertions) )
}
}
names(cases) = sampledata$Sample[cases]
ret = lapply(seq(along=cases), function(i) {
caseColumn = sampledata$Column[cases[i]]
controlColumn = if( "Group" %in% colnames(sampledata) ){
sampledata$Column[ (sampledata$Group == sampledata$Group[cases[i]]) & (sampledata$Type == "Control") ]
}else{
sampledata$Column[ (sampledata$Patient == sampledata$Patient[cases[i]]) & (sampledata$Type == "Control") ]
}
if( length(controlColumn) != 1 ){
msg = paste("Number of 'Control' samples for patient", sampledata$Patient[cases[i]], "is", length(controlColumn) )
con = textConnection("tmpstr", open="w"); tmpstr=""; sink(con); print(sampledata); sink()
msg = paste(msg, "'sampledata' is:", paste(tmpstr, collapse="\n"), sep="\n")
stop(msg)
}
caseCounts = data$Counts[cl$bases,caseColumn,,]
## sum up all mismatches, also the low quality ones
controlCounts = data$Counts[1:4,controlColumn,,] + data$Counts[5:8,controlColumn,,] + data$Counts[9:12,controlColumn,,]
caseCoverage = data$Coverages[caseColumn,,]
controlCoverage = data$Coverages[controlColumn,,]
cov_filter = (caseCoverage[1,] >= cl$minStrandCov &
caseCoverage[1,] <= cl$maxStrandCov &
caseCoverage[2,] >= cl$minStrandCov &
caseCoverage[2,] <= cl$maxStrandCov &
controlCoverage[1,] >= cl$minStrandCov &
controlCoverage[1,] <= cl$maxStrandCov &
controlCoverage[2,] >= cl$minStrandCov &
controlCoverage[2,] <= cl$maxStrandCov)
count_filter = (caseCounts[,1,] >= cl$minStrandAltSupport &
caseCounts[,2,] >= cl$minStrandAltSupport &
controlCounts[,1,] <= cl$maxStrandAltSupportControl &
controlCounts[,2,] <= cl$maxStrandAltSupportControl)
combined_filter = matrix(
rep( cov_filter, dim(count_filter)[1] ),
nrow = dim(count_filter)[1], byrow=TRUE
) & count_filter
varpos = which(colSums( combined_filter ) > 0)
caseDeletions = data$Deletions[caseColumn,,]
controlDeletions = data$Deletions[controlColumn,,]
deletions_filter = (caseDeletions[1,] >= cl$minStrandDelSupport &
caseDeletions[2,] >= cl$minStrandDelSupport &
controlDeletions[1,] <= cl$maxStrandDelSupportControl &
controlDeletions[2,] <= cl$maxStrandDelSupportControl)
combined_filter_del = cov_filter & deletions_filter
delpos = which(combined_filter_del)
if("Insertions" %in% names(data)){
caseInsertions = data$Insertions[caseColumn,,]
controlInsertions = data$Insertions[controlColumn,,]
insertions_filter = (caseInsertions[1,] >= cl$minStrandInsSupport &
caseInsertions[2,] >= cl$minStrandInsSupport &
controlInsertions[1,] <= cl$maxStrandInsSupportControl &
controlInsertions[2,] <= cl$maxStrandInsSupportControl)
combined_filter_ins = cov_filter & insertions_filter
inspos = which(combined_filter_ins)
}else{
inspos <- NULL
}
if( cl$returnDataPoints ){
vars <- {
if( length(varpos) > 0 ){
idxs = lapply( varpos, function(j) which(combined_filter[,j]) )
alt_alleles = lapply(idxs, function(j) bases[j])
gr_pos = unlist(lapply( seq( along = idxs ), function( j ) rep(varpos[j], length(idxs[[j]]))))
idxs = unlist(idxs)
gr_aa = unlist(alt_alleles)
gr_mmf_case = sapply( seq(along = gr_pos), function( j ) caseCounts[idxs[j],1,gr_pos[j]] )
gr_mmr_case = sapply( seq(along = gr_pos), function( j ) caseCounts[idxs[j],2,gr_pos[j]] )
gr_cf_case = sapply( seq(along = gr_pos), function( j ) caseCoverage[1,gr_pos[j]] )
gr_cr_case = sapply( seq(along = gr_pos), function( j ) caseCoverage[2,gr_pos[j]] )
gr_mmf_cont = sapply( seq(along = gr_pos), function( j ) controlCounts[idxs[j],1,gr_pos[j]] )
gr_mmr_cont = sapply( seq(along = gr_pos), function( j ) controlCounts[idxs[j],2,gr_pos[j]] )
gr_cf_cont = sapply( seq(along = gr_pos), function( j ) controlCoverage[1,gr_pos[j]] )
gr_cr_cont = sapply( seq(along = gr_pos), function( j ) controlCoverage[2,gr_pos[j]] )
tmpRefIdx <- data$Reference[gr_pos] + 1
tmpRefIdx[tmpRefIdx == 0] <- 5
tmpRef <- refBases[tmpRefIdx]
tmp = data.frame(
Chrom = strsplit( data$h5dapplyInfo$Group, split="/")[[1]][3],
Start = gr_pos + data$h5dapplyInfo$Blockstart - 1,
End = gr_pos + data$h5dapplyInfo$Blockstart - 1,
Sample = names(cases)[i],
altAllele = gr_aa,
refAllele = tmpRef,
caseCountFwd = gr_mmf_case,
caseCountRev = gr_mmr_case,
caseCoverageFwd = gr_cf_case,
caseCoverageRev = gr_cr_case,
controlCountFwd = gr_mmf_cont,
controlCountRev = gr_mmr_cont,
controlCoverageFwd = gr_cf_cont,
controlCoverageRev = gr_cr_cont
)
rm(tmpRef)
rm(tmpRefIdx)
if( cl$annotateWithBackground ){
tmp$backgroundFrequencyFwd = mismatchFrequencies[1, gr_pos]
tmp$backgroundFrequencyRev = mismatchFrequencies[2, gr_pos]
tmp$pValueFwd = sapply( seq(along=gr_pos), function( j ) binom.test( caseCounts[idxs[j],1,gr_pos[j]], caseCoverage[1,gr_pos[j]], p = mismatchFrequencies[1,gr_pos[j]], alternative = "greater" )$p.value )
tmp$pValueRev = sapply( seq(along=gr_pos), function( j ) binom.test( caseCounts[idxs[j],2,gr_pos[j]], caseCoverage[2,gr_pos[j]], p = mismatchFrequencies[2,gr_pos[j]], alternative = "greater" )$p.value )
}
tmp
}else{
NULL
}
}
dels <- {
if( length(delpos) > 0 ){
tmpRefIdx <- data$Reference[delpos] + 1
tmpRefIdx[tmpRefIdx == 0] <- 5
tmpRef <- refBases[tmpRefIdx]
tmp = data.frame(
Chrom = strsplit( data$h5dapplyInfo$Group, split="/")[[1]][3],
Start = delpos + data$h5dapplyInfo$Blockstart - 1,
End = delpos + data$h5dapplyInfo$Blockstart - 1,
Sample = names(cases)[i],
altAllele = "-",
refAllele = tmpRef,
caseCountFwd = sapply( delpos, function(j) caseDeletions[1,j]),
caseCountRev = sapply( delpos, function(j) caseDeletions[2,j]),
caseCoverageFwd = sapply( delpos, function(j) caseCoverage[1,j]),
caseCoverageRev = sapply( delpos, function(j) caseCoverage[2,j]),
controlCountFwd = sapply( delpos, function(j) controlDeletions[1,j]),
controlCountRev = sapply( delpos, function(j) controlDeletions[2,j]),
controlCoverageFwd = sapply( delpos, function(j) controlCoverage[1,j]),
controlCoverageRev = sapply( delpos, function(j) controlCoverage[2,j])
)
rm(tmpRef)
rm(tmpRefIdx)
# Deletions don't count as coverage but the reads still span that position
tmp$caseCoverageFwd = tmp$caseCoverageFwd + tmp$caseCountFwd
tmp$caseCoverageRev = tmp$caseCoverageRev + tmp$caseCountRev
tmp$controlCoverageFwd = tmp$controlCoverageFwd + tmp$controlCountFwd
tmp$controlCoverageRev = tmp$controlCoverageRev + tmp$controlCountRev
if( cl$annotateWithBackground ){
tmp$pValueFwd = sapply( delpos, function( j ) binom.test( caseDeletions[1,j], caseCoverage[1,j] + caseDeletions[1,j], p = deletionFrequencies[1,j], alternative = "greater" )$p.value )
tmp$pValueRev = sapply( delpos, function( j ) binom.test( caseDeletions[2,j], caseCoverage[2,j] + caseDeletions[2,j], p = deletionFrequencies[2,j], alternative = "greater" )$p.value )
tmp$backgroundFrequencyFwd = deletionFrequencies[1, delpos]
tmp$backgroundFrequencyRev = deletionFrequencies[2, delpos]
}
if( cl$mergeCalls ){
sample_gr = list()
chrom = strsplit( data$h5dapplyInfo$Group, split="/")[[1]][3]
if( nrow(tmp) != 0 ){
tmp_gr = GRanges( S4Vectors::Rle( tmp$Chrom ), ranges = IRanges::IRanges( start = tmp$Start, end = tmp$End ) )
reduced_gr = IRanges::reduce(tmp_gr)
reduced_gr$Sample = names(cases)[i]
ol = IRanges::findOverlaps(reduced_gr, tmp_gr)
reduced_gr$altAllele = "-"
reduced_gr$refAllele = NA
reduced_gr$caseCountFwd = NA
reduced_gr$caseCountRev = NA
reduced_gr$caseCoverageFwd = NA
reduced_gr$caseCoverageRev = NA
reduced_gr$controlCountFwd = NA
reduced_gr$controlCountRev = NA
reduced_gr$controlCoverageFwd = NA
reduced_gr$controlCoverageRev = NA
if( cl$annotateWithBackground ){
reduced_gr$backgroundFrequencyFwd = NA
reduced_gr$backgroundFrequencyRev = NA
reduced_gr$pValueFwd = NA
reduced_gr$pValueRev = NA
}
for( id in seq( length(reduced_gr) ) ){
reduced_gr$refAllele[id] = paste0(tmp$refAllele[subjectHits(ol)[queryHits(ol)==id]], collapse="")
reduced_gr$caseCoverageFwd[id] = cl$mergeAggregator(tmp$caseCoverageFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$caseCoverageRev[id] = cl$mergeAggregator(tmp$caseCoverageRev[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$caseCountFwd[id] = cl$mergeAggregator(tmp$caseCountFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$caseCountRev[id] = cl$mergeAggregator(tmp$caseCountRev[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$controlCoverageFwd[id] = cl$mergeAggregator(tmp$controlCoverageFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$controlCoverageRev[id] = cl$mergeAggregator(tmp$controlCoverageRev[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$controlCountFwd[id] = cl$mergeAggregator(tmp$controlCountFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$controlCountRev[id] = cl$mergeAggregator(tmp$controlCountRev[subjectHits(ol)[queryHits(ol)==id]])
if( cl$annotateWithBackground ){
reduced_gr$backgroundFrequencyFwd[id] = cl$mergeAggregator(tmp$backgroundFrequencyFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$backgroundFrequencyRev[id] = cl$mergeAggregator(tmp$backgroundFrequencyRev[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$pValueFwd[id] = cl$pValueAggregator(tmp$pValueFwd[subjectHits(ol)[queryHits(ol)==id]])
reduced_gr$pValueRev[id] = cl$pValueAggregator(tmp$pValueRev[subjectHits(ol)[queryHits(ol)==id]])
}
}
tmp = as.data.frame( reduced_gr )
tmp$width = NULL
tmp$strand = NULL
colnames(tmp)[1:3] = c( "Chrom", "Start", "End" )
}
}
tmp
}else{
NULL
}
}
insertions <- {
if( length(inspos) > 0 ){
tmpRefIdx <- data$Reference[inspos] + 1
tmpRefIdx[tmpRefIdx == 0] <- 5
tmpRef <- refBases[tmpRefIdx]
tmp = data.frame(
Chrom = strsplit( data$h5dapplyInfo$Group, split="/")[[1]][3],
Start = delpos + data$h5dapplyInfo$Blockstart - 1,
End = delpos + data$h5dapplyInfo$Blockstart - 1,
Sample = names(cases)[i],
altAllele = "+",
refAllele = tmpRef,
caseCountFwd = sapply( inspos, function(j) caseInsertions[1,j]),
caseCountRev = sapply( inspos, function(j) caseInsertions[2,j]),
caseCoverageFwd = sapply( inspos, function(j) caseCoverage[1,j]),
caseCoverageRev = sapply( inspos, function(j) caseCoverage[2,j]),
controlCountFwd = sapply( inspos, function(j) controlInsertions[1,j]),
controlCountRev = sapply( inspos, function(j) controlInsertions[2,j]),
controlCoverageFwd = sapply( inspos, function(j) controlCoverage[1,j]),
controlCoverageRev = sapply( inspos, function(j) controlCoverage[2,j])
)
rm(tmpRef)
rm(tmpRefIdx)
if( cl$annotateWithBackground ){
tmp$pValueFwd = sapply( inspos, function( j ) binom.test( caseInsertions[1,j], caseCoverage[1,j] + caseInsertions[1,j], p = insertionFrequencies[1,j], alternative = "greater" )$p.value )
tmp$pValueRev = sapply( inspos, function( j ) binom.test( caseInsertions[2,j], caseCoverage[2,j] + caseInsertions[2,j], p = insertionFrequencies[2,j], alternative = "greater" )$p.value )
tmp$backgroundFrequencyFwd = insertionFrequencies[1, inspos]
tmp$backgroundFrequencyRev = insertionFrequencies[2, inspos]
}
tmp
}else{
NULL
}
}
ret <- rbind( vars, dels, insertions )
ret$caseCount <- ret$caseCountFwd + ret$caseCountRev
ret$controlCount <- ret$controlCountFwd + ret$controlCountRev
ret$caseCoverage <- ret$caseCoverageFwd + ret$caseCoverageRev
ret$controlCoverage <- ret$controlCoverageFwd + ret$controlCoverageRev
return(ret)
}else{
if( length(varpos) > 0 | length(delpos) > 0 | length(inspos) > 0 ){
return( c( varpos, delpos, inspos ) + data$h5dapplyInfo$Blockstart )
}else{
return( NULL )
}
}
})
return( do.call( rbind, ret ) )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.