R/mutation.spectrum.R

Defines functions mutationSpectrum

Documented in mutationSpectrum

bases = c("A", "C", "G", "T")

mutationSpectrum <- function( variantCalls, tallyFile, study, context = 1 ){
  stopifnot(class(variantCalls) == "data.frame")
  chromosomes = unique( variantCalls$Chrom )
  variantCalls$Prefix = ""
  variantCalls$Suffix = ""
  for(chrom in chromosomes){
    sel = variantCalls$Chrom == chrom
    minpos = max( min( variantCalls$Start[sel] ) - 10 - context, 0 )
    maxpos = max( variantCalls$End[sel] ) + 10 + context
    reference = as.integer(h5read( tallyFile, paste( study, chrom, "Reference", sep = "/" ), index = list(minpos:maxpos))) + 1 #Added explicit cast because of h5read returning raw instead of in in this context
    offset = minpos
    variantCalls$Prefix[sel] = sapply( variantCalls$Start[sel], function(pos) paste(bases[reference[ (pos - context - offset + 1):(pos - offset) ]], collapse="") )
    variantCalls$Suffix[sel] = sapply( variantCalls$End[sel], function(pos) paste(bases[reference[ (pos - offset + 2):(pos + context - offset + 1) ]], collapse="") )
    variantCalls$Context[sel] = sapply( variantCalls$Start[sel], function(pos) paste(bases[reference[ (pos - context - offset + 1):(pos - offset + context + 1) ]], collapse="") )
  }
  MutationTypes <- c( "A>C" = "T>G", "A>G" = "T>C", "A>T" = "T>A", "G>C" = "C>G", "G>A" = "C>T", "G>T" = "C>A" )
  revComp <- c( "A" = "T", "C" = "G", "G" = "C", "T" = "A" )
  variantCalls$MutationType <- paste(variantCalls$refAllele, variantCalls$altAllele, sep=">")
  variantCalls$FlipMe <- variantCalls$MutationType %in% names(MutationTypes)
  variantCalls$MutationType[variantCalls$FlipMe] <- MutationTypes[ variantCalls$MutationType[variantCalls$FlipMe] ]
  tmp <- variantCalls$Prefix[variantCalls$FlipMe]
  variantCalls$Prefix[variantCalls$FlipMe] <- revComp[ variantCalls$Suffix[variantCalls$FlipMe] ]
  variantCalls$Suffix[variantCalls$FlipMe] <- revComp[ tmp ]
  variantCalls$refAllele[variantCalls$FlipMe] <- revComp[ variantCalls$refAllele[variantCalls$FlipMe] ]
  variantCalls$altAllele[variantCalls$FlipMe] <- revComp[ variantCalls$altAllele[variantCalls$FlipMe] ]
  return( variantCalls[,c("refAllele","altAllele","Sample","Prefix","Suffix","MutationType","Context")] )
}

Try the h5vc package in your browser

Any scripts or data that you put into this service are public.

h5vc documentation built on Nov. 8, 2020, 4:56 p.m.