superFreq: Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

Documented in getCohortMutationMatrix plotCNAheatmapOverGoI plotCNAheatmapWithGoI plotCohortMutationHeatmap

#' plots a heatmap of copy numbers, highlighting genes of interest
#'
#' @param GoI character vector. Genes of interest.
#' @param Rdirectory character The Rdirectory where the superFreq run was run.
#' @param genome character. The genome the sample is aligned to. 'hg19', 'hg38' or 'mm10'
#'
#' @details This function plots a heatmap of copy numbers across the samples and genome. Genes of Interest are highlighted. Can be run after the superFreq run is completed.
#'
#'
#' @export
#'
plotCNAheatmapWithGoI = function(GoI, Rdirectory, genome, metaDataFile, excludeIndividuals=c(), excludeSamples=c(), GoIakas=c(), cpus=1) {
  superFreq:::resetMargins()
  superFreq:::plotCNAbatchHeatmap(Rdirectory=Rdirectory, metaDataFile=metaDataFile, genome=genome,
                                  excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples, cpus=cpus)
  captureRegions = loadAnyCaptureRegions(Rdirectory, metaDataFile, excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples)
  ymax = par("usr")[4]
  for ( gene in GoI ) {
    crs = captureRegions[captureRegions$region == gene]
    maxx = chrToX(seqnames(crs[1]), max(end(crs)), genome=genome)
    minx = chrToX(seqnames(crs[1]), min(start(crs)), genome=genome)
    midx = (minx+maxx)/2
    segments(midx, 0.5, minx, ymax-0.75, col=mcri('cyan', 0.5), lwd=0.5)
    text(midx, 0.25, renameGoIs(gene, GoIakas), cex=0.8, col=mcri('cyan'))
  }
}

#' plots a heatmap of copy numbers, focusing on a gene of interest
#'
#' @param GoI character vector. Genes of interest.
#' @param Rdirectory character The Rdirectory where the superFreq run was run.
#' @param genome character. The genome the sample is aligned to. 'hg19', 'hg38' or 'mm10'
#' @param padding numeric. How many basepairs next to the gene to show. default 3e6.
#'
#' @details This function plots a heatmap of copy numbers across the samples, focused at a gene of interest. Can be run after the superFreq run is completed.
#'
#'
#' @export
#'
plotCNAheatmapOverGoI = function(gene, Rdirectory, genome, metaDataFile, excludeIndividuals=c(), excludeSamples=c(), GoIakas=c(), cpus=1, padding=3e6) {
  captureRegions = loadAnyCaptureRegions(Rdirectory, metaDataFile, excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples)
  crs = captureRegions[captureRegions$region == gene]
  maxx = chrToX(seqnames(crs[1]), max(end(crs)), genome=genome)
  minx = chrToX(seqnames(crs[1]), min(start(crs)), genome=genome)
  midx = (minx+maxx)/2
  
  superFreq:::resetMargins()
  superFreq:::plotCNAbatchHeatmap(Rdirectory=Rdirectory, metaDataFile=metaDataFile, genome=genome, xlim=c(minx-padding, maxx+padding),
                                  excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples, cpus=cpus)
  ymax = par("usr")[4]
  segments(minx, 0.5, minx, ymax-0.75, col=mcri('cyan'))
  segments(maxx, 0.5, maxx, ymax-0.75, col=mcri('cyan'))
  text(midx, 0.25, renameGoIs(gene, GoIakas), cex=0.8, col=mcri('cyan'))
}



#' plots mutations in samples in Genes of Interest
#'
#' @param GoI character vector. Genes of interest for the analysis
#' @param Rdirectory character The Rdirectory where the superFreq run was run.
#'
#' @details This function plots a mutation matrix showing CNAs and point mutations in the Genes of Interest across the analysed samples in the Rdirectory. Should be run after the superFreq() run has completed.
#'
#' @export
#'
plotCohortMutationHeatmap = function(GoI, Rdirectory, metaDataFile, excludeIndividuals=c(), excludeSamples=c(), GoIakas=c(), cpus=1) {
  clusterList = superFreq:::loadClusterList(Rdirectory=Rdirectory, metaDataFile=metaDataFile, excludeIndividuals=excludeIndividuals,
                                excludeSamples=excludeSamples, cpus=cpus)
  sampleList = lapply(clusterList, names)
  qsList = superFreq:::loadQsList(Rdirectory=Rdirectory, metaDataFile=metaDataFile,
                                  excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples, cpus=cpus)
  
  #set up plot and sample/gene labels
  superFreq:::setupCohortPlot(GoI, sampleList, GoIakas)

  #fill in CNAs as background colour for each box
  colMx = superFreq:::addCNAs(GoI, clusterList, sampleList)

  #add dots for point mutations
  superFreq:::addPointMutations(GoI, qsList, sampleList, colMx=colMx)
}

#helper function
setupCohortPlot = function(GoI, sampleList, GoIakas=c()) {

  #set up scale and grid
  xScale = length(GoI)
  yScale = length(unlist(sampleList))
  xTicks = 1:xScale
  yTicks = 1:yScale

  #title
  main = 'mutation matrix'
  
  #empty plot
  plot(0, type='n', xaxt='n', xlim=c(-0.15, 1)*(xScale+0.7), ylim=c(0, 1.3)*yScale, frame=F, yaxt='n', xlab='', ylab='', main=main)

  #grid
  segments(0:xScale + 0.5, 0.5, 0:xScale + 0.5, yScale*1.15, lwd=0.5, col='grey')
  if ( yScale < 100 )
    segments(-0.03*xScale, 0:yScale + 0.5, xScale + 0.5, 0:yScale + 0.5, lwd=0.3*min(1, 30/yScale), col=rgb(0.8, 0.8, 0.8))
  if ( length(sampleList) > 1 ) {
    indSeparators = cumsum(sapply(sampleList, length))
    indSeparators = indSeparators[-length(indSeparators)]
        #if many samples, thinner separating lines or no lines at all.
    if ( length(sampleList) < 100 )
      segments(-0.15*xScale, indSeparators + 0.5, xScale + 0.5, indSeparators + 0.5, lwd=min(1, 30/length(sampleList)), col='grey')
  }

  textCol = rgb(0.5,0.5,0.5)
  for ( ind in names(sampleList) ) {
    sys = cumsum(sapply(sampleList, length))[ind] - length(sampleList[[ind]]) + 1:length(sampleList[[ind]])
    text(-0.03*xScale, sys, sampleList[[ind]], col=textCol, adj=c(1, 0.5), cex=0.8, font=2)
  }

  #topbar
  text(xTicks, yScale+0.7, superFreq:::renameGoIs(GoI, GoIakas), adj=c(0, 0.5), srt=90, cex=0.8, font=2, col=textCol)

  #legend
  legend('topleft', c('ampli', 'gain', 'LOH', 'loss', 'SNV', 'biall', 'trunc'), bg='white', pch=c(15, 15, 15, 15, 16, 16, 17), col=mcri(c('darkred', 'red', 'green', 'blue', 'black', 'black', 'black')), pt.cex=c(2,2,2,2,1,1.4,1.2), pt.lwd=c(1,1,1,1,1,1,2))
}

#helper function
addCNAs = function(GoI, clusterList, sampleList) {
  #set up scale and grid
  xScale = length(GoI)
  yScale = length(unlist(sampleList))
  xTicks = 1:xScale
  yTicks = 1:yScale

  cr = clusterList[[1]][[1]]$CR
  XoI = (cr[GoI,]$x1+cr[GoI,]$x2)/2
  names(XoI) = GoI
  xs = 1:length(GoI)
  names(xs) = GoI

  colMx = matrix('black', ncol=length(GoI), nrow=length(unlist(sampleList)))
  rownames(colMx) = unlist(sampleList)
  colnames(colMx) = GoI
  for ( ind in names(sampleList) ) {
    sys = cumsum(sapply(sampleList, length))[ind] - length(sampleList[[ind]]) + 1:length(sampleList[[ind]])
    samples = sampleList[[ind]]
    names(sys) = samples
    for ( sample in samples ) {
      clusters = clusterList[[ind]][[sample]]$clusters
      y = sys[sample]
      for ( gene in GoI ) {
        call = clusters$call[clusters$x2 > XoI[gene] & clusters$x1 < XoI[gene]]
        if  ( length(call) == 0 | call %in% c('AB', 'AB?', 'AB??') ) next
        clonality = clusters$clonality[clusters$x2 > XoI[gene] & clusters$x1 < XoI[gene]]
        col = superFreq:::callToColHeatmap(call, sqrt(clonality))
        colMx[sample, gene] = col
        x = xs[gene]
        rect(x-0.45, y-0.5, x+0.45, y+0.5, border=F, col=col)
      }
    }
  }

  return(invisible(colMx))
}

#helper function
callToColHeatmap = function(call, clonality) {
  if ( call %in% c('A', 'A?', 'A??', 'B', 'B?', 'B??') ) return(mcri('blue', clonality))
  if ( call %in% c('AAB', 'AAB?', 'AAB??', 'BBA', 'BBA?', 'BBA??') ) return(mcri('red', clonality))
  if ( call %in% c('AA', 'AA?', 'AA??', 'BB', 'BB?', 'BB??', 'AAA', 'AAA?', 'AAA??') ) return(mcri('green', clonality))
  if ( call %in% c('CL', 'CL?', 'CL??') ) return(mcri('darkblue', clonality))
  if ( nchar(gsub('?', '', call)) > 3 ) {
    effectiveN = nchar(gsub('?', '', call))*clonality
    return(mcri('darkred', pmin(1, effectiveN/5)))
  }
  stop('Couldnt find colour.')
}


#helper function
addPointMutations = function(GoI, qsList, sampleList, colMx) {
  #set up scale and grid
  xScale = length(GoI)
  yScale = length(unlist(sampleList))
  xTicks = 1:xScale
  yTicks = 1:yScale

  xs = 1:length(GoI)
  names(xs) = GoI

  for ( ind in names(sampleList) ) {
    sys = cumsum(sapply(sampleList, length))[ind] - length(sampleList[[ind]]) + 1:length(sampleList[[ind]])
    samples = sampleList[[ind]]
    names(sys) = samples
    for ( sample in samples ) {
      q = qsList[[ind]][[sample]]
      q = q[q$somaticP > 0.5 & q$severity < 11 & q$consensusGene %in% GoI & q$var > 0.15*q$cov,]
      y = sys[sample]
      for ( gene in GoI ) {
        qg = q[q$consensusGene == gene,]
        if  ( nrow(qg) == 0 ) next
        cex=1
        if ( nrow(qg) > 1 ) cex=2
        else if ( qg$var > qg$cov*0.6 & pbinom(qg$var, qg$cov, 0.5, lower.tail=F) < 0.05 ) cex=2
        else if ( qg$var < qg$cov/4 ) cex = 4*qg$var/qg$cov
        pointcol = 'black'
        if ( yScale >= 100 ) pointcol = colMx[sample, gene]
        bordercol = 'white'
        x = xs[gene]
        cex=sqrt(cex*1.2)*min(1, max(0.6, sqrt(50/yScale)))
        lwd = 1.5*min(1, max(0.6, sqrt(50/yScale)))
        points(x, y, col=bordercol, cex=cex, pch=ifelse(qg$severity < 10, 24, 21), lwd=lwd, bg=pointcol)
      }
    }
  }
}


renameGoIs = function(genes, GoIakas) {
  for ( SYMBOLname in names(GoIakas) ) {
    genes = gsub(SYMBOLname, GoIakas[SYMBOLname], genes)
  }
  return(genes)
}


findGoI = function(Rdirectory, metaDataFile, cosmicCensus=T, clinvarPathogenic=T, clinvarAny=F, excludeIndividuals=c(), excludeSamples=c(), maxNumberOfGenes=50, cpus=1, genome='hg19') {
  qsList = superFreq:::loadQsList(Rdirectory=Rdirectory, metaDataFile=metaDataFile, excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples, cpus=cpus)
  qs = do.call(c, qsList)
  GoIlist = lapply(qs, function(q) {
    isSomatic = q$somaticP > 0.5
    isCoding = q$severity < 11
    isntSubclonal = q$var > 0.15*q$cov
    isCosmicCensus = q$isCosmicCensus
    isClinvarAny = q$ClinVar_ClinicalSignificance != ''
    isClinvarPathogenic =
      grepl('[pP]athogenic,', paste0(q$ClinVar_ClinicalSignificance)) |
      grepl('[pP]athogenic$', paste0(q$ClinVar_ClinicalSignificance))
    use =
      isSomatic & isCoding & isntSubclonal &
      ((isCosmicCensus&cosmicCensus) | (isClinvarPathogenic&clinvarPathogenic) | (isClinvarAny&clinvarAny))
    if ( genome == 'mm10' )
    	use = isSomatic & isCoding & isntSubclonal & (isCosmicCensus | !cosmicCensus)

    return(unique(q$consensusGene[use]))
    })
  GoI = unique(unlist(GoIlist))
  hits = sapply(GoI, function(gene) {
    sum(sapply(names(qsList), function(ind) {
      qs = qsList[[ind]]
      GoIsublist = GoIlist[paste0(ind, '.', names(qs))]
      genesInInd = unique(unlist(GoIsublist))
      hit = gene %in% genesInInd
      return(hit)
    }))
  })
  GoI = GoI[order(-hits)]
  if ( length(GoI) > maxNumberOfGenes ) GoI = GoI[1:maxNumberOfGenes]
}




#' returns relevant mutations in samples in Genes of Interest
#'
#' @param GoI character vector. Genes of interest for the analysis
#' @param Rdirectory character The Rdirectory where the superFreq run was run.
#'
#' @details This function plots a mutation matrix showing CNAs and point mutations in the Genes of Interest across the analysed samples in the Rdirectory. Should be run after the superFreq() run has completed.
#'
#' @export
#'
getCohortMutationMatrix = function(GoI, Rdirectory, metaDataFile, excludeIndividuals=c(), excludeSamples=c(), GoIakas=c(), cpus=1, includeGermlineLike=T) {
  clusterList = superFreq:::loadClusterList(Rdirectory=Rdirectory, metaDataFile=metaDataFile, excludeIndividuals=excludeIndividuals,
                                excludeSamples=excludeSamples, cpus=cpus)
  sampleList = lapply(clusterList, names)
  qsList = superFreq:::loadQsList(Rdirectory=Rdirectory, metaDataFile=metaDataFile,
                                  excludeIndividuals=excludeIndividuals, excludeSamples=excludeSamples, cpus=cpus)
  

  #get CNA calls and clonality
  cnaMx = superFreq:::getCohortCNAmatrix(GoI, clusterList, sampleList)

  #add dots for point mutations
  snvMx = getCohortSNVmatrix(GoI, qsList, sampleList, colMx=colMx, includeGermlineLike=includeGermlineLike)

  return(list(cnaMx=cnaMx, snvMx=snvMx))
}

#helper function
getCohortSNVmatrix = function(GoI, qsList, sampleList, colMx, includeGermlineLike=includeGermlineLike) {

  biallelicMx = matrix(FALSE, ncol=length(unlist(sampleList)), nrow=length(GoI))
  mostSevereMx = matrix('', ncol=length(unlist(sampleList)), nrow=length(GoI))
  rownames(biallelicMx) = rownames(mostSevereMx) = GoI
  colnames(biallelicMx) = colnames(mostSevereMx) = unlist(sampleList)
  
  for ( ind in names(sampleList) ) {
    samples = sampleList[[ind]]
    for ( sample in samples ) {
      q = qsList[[ind]][[sample]]
      q = q[q$somaticP > 0.5 & q$severity < 11 & q$consensusGene %in% GoI & q$var > 0.15*q$cov,]
	  if ( !includeGermlineLike ) q = q[!q$germline | is.na(q$germline),]
      for ( gene in GoI ) {
        qg = q[q$consensusGene == gene,]
        if  ( nrow(qg) == 0 ) next
        if ( nrow(qg) > 1 ) biallelicMx[gene, sample] = TRUE
        else biallelicMx[gene, sample] = qg$var > qg$cov*0.6 & pbinom(qg$var, qg$cov, 0.5, lower.tail=F) < 0.05
        mostSevereMx[gene, sample] = qg$type[which.min(qg$severity)]
      }
    }
  }

  return(list(biallelicMx=biallelicMx, mostSevereMx=mostSevereMx))  
}

#helper function
getCohortCNAmatrix = function(GoI, clusterList, sampleList) {
  #set up scale and grid

  cr = clusterList[[1]][[1]]$CR
  XoI = (cr[GoI,]$x1+cr[GoI,]$x2)/2
  names(XoI) = GoI

  callMx = matrix('', ncol=length(unlist(sampleList)), nrow=length(GoI))
  clonalityMx = matrix(0, ncol=length(unlist(sampleList)), nrow=length(GoI))
  rownames(callMx) = rownames(clonalityMx) = GoI
  colnames(callMx) = colnames(clonalityMx) = unlist(sampleList)
  for ( ind in names(sampleList) ) {
    samples = sampleList[[ind]]
    for ( sample in samples ) {
      clusters = clusterList[[ind]][[sample]]$clusters
      for ( gene in GoI ) {
        call = clusters$call[clusters$x2 > XoI[gene] & clusters$x1 < XoI[gene]]
        if  ( length(call) == 0 ) next
        clonalityMx[gene, sample] = clusters$clonality[clusters$x2 > XoI[gene] & clusters$x1 < XoI[gene]]
        callMx[gene, sample] = call
      }
    }
  }

  return(list(callMx=callMx, clonalityMx=clonalityMx))
}

ChristofferFlensburg/superFreq documentation built on Nov. 15, 2023, 6:15 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ChristofferFlensburg/superFreq
Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

R/genesOfInterest.R
In ChristofferFlensburg/superFreq: Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

Defines functions getCohortCNAmatrix getCohortSNVmatrix getCohortMutationMatrix findGoI renameGoIs addPointMutations callToColHeatmap addCNAs setupCohortPlot plotCohortMutationHeatmap plotCNAheatmapOverGoI plotCNAheatmapWithGoI

Documented in getCohortMutationMatrix plotCNAheatmapOverGoI plotCNAheatmapWithGoI plotCohortMutationHeatmap

R Package Documentation

Browse R Packages

We want your feedback!

ChristofferFlensburg/superFreq Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

R/genesOfInterest.R In ChristofferFlensburg/superFreq: Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

Defines functions getCohortCNAmatrix getCohortSNVmatrix getCohortMutationMatrix findGoI renameGoIs addPointMutations callToColHeatmap addCNAs setupCohortPlot plotCohortMutationHeatmap plotCNAheatmapOverGoI plotCNAheatmapWithGoI

Documented in getCohortMutationMatrix plotCNAheatmapOverGoI plotCNAheatmapWithGoI plotCohortMutationHeatmap

R Package Documentation

Browse R Packages

We want your feedback!

ChristofferFlensburg/superFreq
Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.

R/genesOfInterest.R
In ChristofferFlensburg/superFreq: Calls and tracks CNAs, SNVs and clones over multiple cancer exomes.