Nothing
.blockify <- function( positions ){
if(length(positions) == 1){
return(c(1))
}else{
distances <- diff(positions)
blocks <- rep(0, length(positions))
blocks[1] <- 1
for( j in seq(length(distances)) ){
if( distances[j] == 1 ){
blocks[j+1] <- blocks[j]
}else{
blocks[j+1] <- blocks[j] + 1
}
}
return( blocks )
}
}
callVariantsPairedFisher <- function( data, sampledata, pValCutOff = 0.05, minCoverage = 5, mergeDels = TRUE, mergeAggregator = mean ){
blocksize <- data$h5dapplyInfo$Blockend - data$h5dapplyInfo$Blockstart + 1
cases <- which( sampledata$Type == "Case" )
controls <- which( sampledata$Type == "Control" )
stopifnot(length(cases)>0, length(controls)>0)
names(cases) <- sampledata$Sample[cases]
callDels <- "Deletions" %in% names(data)
the_bases <- c( "A", "C", "G", "T", "-", "N" )
if(callDels){
altAlleles <- the_bases[1:5]
}else{
altAlleles <- the_bases[1:4]
}
ret <- lapply(seq(along=cases), function(i) {
caseColumn = sampledata$Column[cases[i]]
controlColumn = if( "Group" %in% colnames(sampledata) ){
sampledata$Column[ (sampledata$Group == sampledata$Group[cases[i]]) & (sampledata$Type == "Control") ]
}else{
sampledata$Column[ (sampledata$Patient == sampledata$Patient[cases[i]]) & (sampledata$Type == "Control") ]
}
if( length(controlColumn) != 1 ){
msg = paste("Number of 'Control' samples for patient", sampledata$Patient[cases[i]], "is", length(controlColumn) )
con = textConnection("tmpstr", open="w"); tmpstr=""; sink(con); print(sampledata); sink()
msg = paste(msg, "'sampledata' is:", paste(tmpstr, collapse="\n"), sep="\n")
stop(msg)
}
caseCounts <- apply(data$Counts[,caseColumn,,], c(1,3), sum)
caseCounts <- caseCounts[1:4,] + caseCounts[5:8,] + caseCounts[9:12,]
controlCounts <- apply(data$Counts[,controlColumn,,], c(1,3), sum)
controlCounts <- controlCounts[1:4,] + controlCounts[5:8,] + controlCounts[9:12,]
caseCoverage <- data$Coverages[caseColumn,1,] + data$Coverages[caseColumn,2,]
controlCoverage <- data$Coverages[controlColumn,1,] + data$Coverages[controlColumn,2,]
if( callDels ){
caseDeletions <- data$Deletions[caseColumn,1,] + data$Deletions[caseColumn,2,]
controlDeletions <- data$Deletions[controlColumn,1,] + data$Deletions[controlColumn,2,]
}
refs <- data$Reference
refs[refs == -1] = 5
refs <- the_bases[refs + 1]
template <- data.frame(
Sample = names(cases)[i],
Chrom = strsplit(data$h5dapplyInfo$Group, split="/")[[1]][3],
Start = seq(length = blocksize) + data$h5dapplyInfo$Blockstart - 1,
End = seq(length = blocksize) + data$h5dapplyInfo$Blockstart - 1,
refAllele = refs
)
pvals <- lapply( seq(length = length(altAlleles)), function(altAllele){
tmp <- template
tmp$altAllele <- the_bases[altAllele]
tmp$caseCoverage <- caseCoverage
tmp$controlCoverage <- controlCoverage
if( altAllele == 5 ){
tmp$caseCount <- caseDeletions
tmp$controlCount <- controlDeletions
tmp$caseCoverage <- tmp$caseCoverage + caseDeletions
tmp$controlCoverage <- tmp$controlCoverage + controlDeletions
}else{
tmp$caseCount <- caseCounts[altAllele,]
tmp$controlCount <- controlCounts[altAllele,]
}
tmp$pValue <- sapply(
seq(length = blocksize),
function(gp) {
contingencyTable <- matrix(
c( tmp$caseCount[gp], tmp$caseCoverage[gp] - tmp$caseCount[gp], tmp$controlCount[gp], tmp$controlCoverage[gp] - tmp$controlCount[gp] ),
nrow = 2
)
ft = fisher.test( contingencyTable )
return( ft$p.value )
})
tmp <- subset(tmp, pValue <= pValCutOff & caseCoverage >= minCoverage & controlCoverage >= minCoverage )
if(nrow(tmp) == 0){
return(NULL)
}
if( callDels & mergeDels & altAllele == 5 ){
tmp$BlockID <- .blockify(tmp$Start)
blockCounts <- table(tmp$BlockID)
singleValueBlocks <- names(blockCounts)[blockCounts == 1]
tmptmp <- subset(tmp, !(BlockID %in% singleValueBlocks) )
tmp <- subset(tmp, (BlockID %in% singleValueBlocks) )
tmptmp <- lapply( split( x = tmptmp, f = tmptmp$BlockID ), function(df){
tmp_df <- data.frame( Sample = df$Sample[1], Chrom = df$Chrom[1], Start = min(df$Start), End = max(df$End), refAllele = paste(df$refAllele, collapse=""), altAllele = "-" )
for( slot in colnames(df) ){
if(!(slot %in% colnames(tmp_df))){
tmp_df[[slot]] <- mergeAggregator( df[[slot]] )
}
}
return(tmp_df)
})
tmp <- rbind( tmp, do.call(rbind, tmptmp) )
tmp$BlockID <- NULL
}
return(tmp)
}
)
return( do.call(rbind, pvals) )
})
ret <- do.call( rbind, ret )
ret$caseAF <- ret$caseCount / ret$caseCoverage
ret$controlAF <- ret$controlCount / ret$controlCoverage
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.