Nothing
#' Read and summarize gistic output.
#' @description A little function to summarize gistic output files. Summarized output is returned as a list of tables.
#' @param gisticAllLesionsFile All Lesions file generated by gistic. e.g; all_lesions.conf_XX.txt, where XX is the confidence level. Required. Default NULL.
#' @param gisticAmpGenesFile Amplification Genes file generated by gistic. e.g; amp_genes.conf_XX.txt, where XX is the confidence level. Default NULL.
#' @param gisticDelGenesFile Deletion Genes file generated by gistic. e.g; del_genes.conf_XX.txt, where XX is the confidence level. Default NULL.
#' @param gisticScoresFile scores.gistic file generated by gistic.
#' @param cnLevel level of CN changes to use. Can be 'all', 'deep' or 'shallow'. Default uses all i.e, genes with both 'shallow' or 'deep' CN changes
#' @param isTCGA Is the data from TCGA. Default FALSE.
#' @param verbose Default TRUE
#' @return A list of summarized data.
#' @export
#' @examples
#' all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
#' amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
#' del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
#' scores.gistic <- system.file("extdata", "scores.gistic", package = "maftools")
#' laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gistic, isTCGA = TRUE)
#' @details
#' Requires output files generated from GISTIC. Gistic documentation can be found here ftp://ftp.broadinstitute.org/pub/GISTIC2.0/GISTICDocumentation_standalone.htm
readGistic = function(gisticAllLesionsFile = NULL, gisticAmpGenesFile = NULL, gisticDelGenesFile = NULL, gisticScoresFile = NULL, cnLevel = 'all', isTCGA = FALSE, verbose = TRUE){
if(is.null(gisticAllLesionsFile)){
stop('Misisng gisticAllLesionsFile file! Provide all_lesions.conf_xx.txt generated by Gistic')
}
if(is.null(gisticAmpGenesFile) & is.null(gisticDelGenesFile)){
stop('Missing gistic genes files! Provide atleast one of amp_genes.conf_xx.txt or del_genes.conf_xx.txt file.')
}
if(is.null(gisticScoresFile)){
stop('Misisng scores.gistic file!')
}
if(verbose){
cat('-Processing Gistic files..\n')
}
all.lesions = data.table::fread(input = gisticAllLesionsFile,stringsAsFactors = FALSE, header = TRUE)
#Remove an empty columns at the end of the table
if(colnames(all.lesions)[ncol(all.lesions)] == paste('V', ncol(all.lesions), sep='')){
all.lesions = all.lesions[,1:c(ncol(all.lesions)-1), with = FALSE]
}
#Rename columns and fix spaces and dirty underscores
colnames(all.lesions)[c(1,2,3)] = c('Unique_Name', 'cytoband', 'Wide_Peak_Limits')
all.lesions$Unique_Name = gsub(pattern = ' ', replacement = '_', x = all.lesions$Unique_Name)
all.lesions$Unique_Name = gsub(pattern = '__', replacement = '_', x = all.lesions$Unique_Name)
#Remove actual sn values and get wide peak limits
all.lesions = all.lesions[!grep(pattern = 'CN_values', x = all.lesions[,Unique_Name]),]
all.lesions[, Wide_Peak_Limits := sapply(strsplit(x = as.character(all.lesions[,Wide_Peak_Limits]), split = '(', fixed = TRUE), '[', 1)]
#qvlaues for all wide peaks
qval = all.lesions[,c(1, 2, 3, 6), with =FALSE]
colnames(x = qval) = c('Unique_Name', 'Cytoband', 'Wide_Peak_Limits', 'qvalues')
all.lesions = all.lesions[,c(1,2,3,10:ncol(all.lesions)), with =FALSE]
#Generate an unique peak-id
all.lesions$cytoband = paste(substr(x = all.lesions$Unique_Name, start = 1, stop = 3), all.lesions$cytoband, sep = '_')
all.lesions$cytoband = paste(all.lesions$cytoband, all.lesions$Wide_Peak_Limits, sep = '_')
qval$peakID = all.lesions[,cytoband]
all.lesions[,Unique_Name := NULL]
all.lesions[,Wide_Peak_Limits := NULL]
cnSamples = colnames(all.lesions)[2:ncol(all.lesions)]
all.lesions.melt = data.table::melt(all.lesions, id.vars = 'cytoband')
cnLevel = match.arg(arg = cnLevel, choices = c("all", "deep", "shallow"))
if(cnLevel == 'all'){
all.lesions.melt = all.lesions.melt[value %in% c(1, 2)] #1 = shallow amp/del; 2 = deep amp/del
}else if(cnLevel == 'deep'){
all.lesions.melt = all.lesions.melt[value %in% 2] #1 = shallow amp/del; 2 = deep amp/del
}else if(cnLevel == 'shallow'){
all.lesions.melt = all.lesions.melt[value %in% 1] #1 = shallow amp/del; 2 = deep amp/del
}
cnGenes = data.table::data.table()
if(!is.null(gisticAmpGenesFile)){
if(verbose){
cat(paste0('--Processing ', basename(gisticAmpGenesFile), '\n'))
}
ampGenes = data.table::fread(input = gisticAmpGenesFile, stringsAsFactors = FALSE, header = TRUE)
if(colnames(ampGenes)[ncol(ampGenes)] == paste('V', ncol(ampGenes), sep='')){
ampGenes = ampGenes[,1:c(ncol(ampGenes)-1), with = FALSE]
}
wpb = as.character(ampGenes[3]) #wide peak boundaries
wpb = wpb[2:length(wpb)]
#we need data only from 4th row
ampGenes = ampGenes[c(4:nrow(ampGenes)),]
ampGenes$cytoband = gsub(pattern = ' ', replacement = '_', x = ampGenes$cytoband)
colnames(ampGenes) = c( 'cytoband',paste0( 'Amp_', colnames(ampGenes)[2:length(colnames(ampGenes))], '_',wpb))
#data.table::setDF(x = ampGenes)
ampGenes = suppressWarnings(data.table::melt(ampGenes, id.vars = 'cytoband'))
data.table::setDT(ampGenes)
#Remove empty values
ampGenes = ampGenes[!value %in% '']
#ampGenes$value = sapply(strsplit(x = ampGenes$value, split = '|', fixed = TRUE), '[', 1)
ampGenes = ampGenes[!grep(pattern = '|', x = ampGenes$value, fixed = TRUE)] #remove genes with ambiguous annotation
ampGenes = ampGenes[,.(variable, value)]
cnGenes = rbind(cnGenes, ampGenes)
}
if(!is.null(gisticDelGenesFile)){
if(verbose){
cat(paste0('--Processing ', basename(gisticDelGenesFile), '\n'))
}
delGenes = data.table::fread(input = gisticDelGenesFile, stringsAsFactors = FALSE, header = TRUE)
if(colnames(delGenes)[ncol(delGenes)] == paste('V', ncol(delGenes), sep='')){
delGenes = delGenes[,1:c(ncol(delGenes)-1), with = FALSE]
}
wpb = as.character(delGenes[3])
wpb = wpb[2:length(wpb)]
delGenes = delGenes[c(4:nrow(delGenes)),]
delGenes$cytoband = gsub(pattern = ' ', replacement = '_', x = delGenes$cytoband)
colnames(delGenes) = c( 'cytoband',paste0( 'Del_', colnames(delGenes)[2:length(colnames(delGenes))], '_',wpb))
#data.table::setDF(x = delGenes)
delGenes = suppressWarnings(data.table::melt(delGenes, id.vars = 'cytoband'))
data.table::setDT(delGenes)
delGenes = delGenes[!value %in% '']
#delGenes$value = sapply(strsplit(x = delGenes$value, split = '|', fixed = TRUE), '[', 1)
delGenes = delGenes[!grep(pattern = '|', x = delGenes$value, fixed = TRUE)]
delGenes = delGenes[,.(variable, value)]
cnGenes = rbind(cnGenes, delGenes)
}
if(verbose){
cat(paste0('--Processing ', basename(gisticScoresFile), '\n'))
}
gis.scores = data.table::fread(input = gisticScoresFile)
colnames(gis.scores) = c('Variant_Classification', 'Chromosome', 'Start_Position', 'End_Position', 'fdr', 'G_Score', 'Avg_amplitude', 'frequency')
#gis.scores$Variant_Classification = ifelse(test = as.numeric(gis.scores$fdr) > fdrCutOff, yes = gis.scores$Variant_Classification, no = 'neutral')
#gis.scores$Variant_Classification = factor(gis.scores$Variant_Classification, levels = c('neutral', 'Amp', 'Del'))
#cnSamples = unique(x = as.character(all.lesions.melt$variable))
if(verbose){
cat('--Summarizing by samples\n')
}
cnDT = data.table::rbindlist( lapply(X = cnSamples, FUN = function(x){
cytBands = all.lesions.melt[variable %in% x, cytoband]
cytBandsGenes = cnGenes[variable %in% cytBands]
cytBandsGenes[,TumorSampleBarcode := x]
}))
cnDT[, CN := substr(x = cnDT[,variable], start = 1, stop = 3)]
cnDT = cnDT[,.(variable, value, TumorSampleBarcode, CN)]
colnames(cnDT) = c('Cytoband', 'Hugo_Symbol', 'Tumor_Sample_Barcode', 'Variant_Classification')
cnDT = suppressWarnings(cnDT[,Variant_Type := 'CNV'])
if(isTCGA){
cnDT$Tumor_Sample_Barcode = substr(x = cnDT$Tumor_Sample_Barcode, start = 1, stop = 12)
cnSamples = substr(x = cnSamples, start = 1, stop = 12)
cnSamples = unique(x = cnSamples)
}
qval$Unique_Name = gsub(pattern = 'Amplification_Peak', replacement = 'AP', x = qval$Unique_Name)
qval$Unique_Name = gsub(pattern = 'Deletion_Peak', replacement = 'DP', x = qval$Unique_Name)
qval[, Unique_Name := paste(Unique_Name, Cytoband, sep = ':')]
cnDT$peakID = as.character(factor(x = cnDT[,Cytoband], levels = qval[,peakID], labels = qval[,Unique_Name]))
qval[,peakID := NULL]
cnDT[,Cytoband := NULL]
colnames(cnDT)[5] = 'Cytoband'
cnDT[, Tumor_Sample_Barcode := factor(x = Tumor_Sample_Barcode, levels = cnSamples)]
cnDt.summary = summarizeGistic(gistic = cnDT)
cnDt.mat = gisticMap(gistic = cnDT)
cnDt.summary$cytoband.summary = merge(cnDt.summary$cytoband.summary, qval, by.x = 'Cytoband', by.y = 'Unique_Name', all.x = TRUE)
colnames(cnDt.summary$cytoband.summary)[c(1, 5)] = c('Unique_Name', 'Cytoband')
cnDt.summary$cytoband.summary = cnDt.summary$cytoband.summary[order(qvalues)]
g = GISTIC(data = cnDT, cnv.summary = cnDt.summary$cnv.summary,
cytoband.summary = cnDt.summary$cytoband.summary,
gene.summary = cnDt.summary$gene.summary,
cnMatrix = cnDt.mat$oncomat, numericMatrix = cnDt.mat$nummat,
classCode = cnDt.mat$vc, summary = cnDt.summary$summary, gis.scores = gis.scores)
return(g)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.