R/external.R
In TRONCO: TRONCO, an R package for TRanslational ONCOlogy

Documented in export.graphml export.mutex export.nbs.input import.mutex.groups TCGA.map.clinical.data TCGA.multiple.samples TCGA.remove.multiple.samples TCGA.shorten.barcodes

#### TRONCO: a tool for TRanslational ONCOlogy
####
#### Copyright (c) 2015-2017, Marco Antoniotti, Giulio Caravagna, Luca De Sano,
#### Alex Graudenzi, Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti.
####
#### All rights reserved. This program and the accompanying materials
#### are made available under the terms of the GNU GPL v3.0
#### which accompanies this distribution.

#' Create an input file for MUTEX
#' (ref: https://code.google.com/p/mutex/ )
#' @title export,mutex
#'
#' @examples
#' data(crc_gistic)
#' dataset = import.GISTIC(crc_gistic)
#' export.mutex(dataset)
#'
#' @param x A TRONCO compliant dataset.
#' @param filename The name of the file
#' @param filepath The path where to save the file
#' @param label.mutation The event type to use as mutation
#' @param label.amplification The event type to use as amplification (can be a list)
#' @param label.deletion The event type to use as amplification (can be a list)
#' @return A MUTEX example matrix
#' @export export.mutex
#' @importFrom utils write.table
#' 
export.mutex <- function(x, 
                         filename = 'tronco_to_mutex',
                         filepath = './',
                         label.mutation = 'SNV',
                         label.amplification = list('High-level Gain'),
                         label.deletion = list('Homozygous Loss')) {

    is.compliant(x)
    data = x
    alteration =
        list(unlist(label.mutation),
             unlist(label.amplification),
             unlist(label.deletion))

    ## Merge amplification.
    
    if (length(label.amplification) >= 0) {
        amplification = label.amplification[[1]]
    }
    if (length(label.amplification) >= 2) {
        amplification = 'amplification'
        data = join.types(data, label.amplification[[1]], label.amplification[[2]], 'amplification', 'red')
    }
    if (length(label.amplification) > 2) {
        for (label in label.amplification[3:length(label.amplification)]) {
            data = join.types(data, label, 'amplification', 'amplification', 'red')
        }
    }

    ## Merge deletion.
    
    if (length(label.deletion) >= 0) {
        deletion = label.deletion[[1]]
    }
    if (length(label.deletion) >= 2) {
        deletion = 'deletion'
        data = join.types(data, label.deletion[[1]], label.deletion[[2]], 'deletion', 'blue')
    }
    if (length(label.deletion) > 2) {
        for (label in label.deletion[3:length(label.deletion)]) {
            data = join.types(data, label, 'deletion', 'deletion', 'blue')
        }
    }

    ## Merge mutation.
    
    if (length(label.mutation) >= 0) {
        mutation = label.mutation[[1]]
    }
    if (length(label.mutation) >= 2) {
        mutation = 'mutation'
        data = join.types(data, label.mutation[[1]], label.mutation[[2]], 'mutation', 'green')
    }
    if (length(label.mutation) > 2) {
        for (label in label.mutation[3:length(label.mutation)]) {
            data = join.types(data, label, 'mutation', 'mutation', 'green')
        }
    }

    samples = rownames(data$genotypes) 
    genes = unique(data$annotation[,'event'])

    mutex.matrix = matrix(0, nrow = length(genes), ncol = length(samples))
    colnames(mutex.matrix) = samples
    rownames(mutex.matrix) = genes

    ## Legend:
    ## 0: no alteration
    ## 1: mutation
    ## 2: amplification
    ## 4: deletion
    ## 3: 1+2 a+m
    ## 5: 1+4 d+m

    legend = list(1, 2, 4)
    names(legend) = list(mutation, amplification, deletion)
    tronco.matrix = data$genotypes

    for (sample in rownames(tronco.matrix)) {
        for (gene in colnames(tronco.matrix)) {
            type = data$annotations[[gene, 'type']]

            if (type %in% alteration && tronco.matrix[sample, gene] == 1) {
                to.add = legend[[data$annotations[[gene, 'type']]]]
                actual.value = mutex.matrix[data$annotations[[gene, 'event']], sample]
                mutex.matrix[data$annotations[[gene, 'event']], sample] = actual.value + to.add
            }
        }
    }

    ## Reassign value according to mutex notation

    ## Legend:
    ## 0: no alteration
    ## 1: mutation
    ## 2: amplification
    ## 3: deletion
    ## 4: 1+2 a+m
    ## 5: 1+4 d+m

    ## Move a+m to 10
    mutex.matrix[which(mutex.matrix == 3)] = 10

    ## move deletion to 3
    mutex.matrix[which(mutex.matrix == 4)] = 3

    ## move a+m to 4
    mutex.matrix[which(mutex.matrix == 10)] = 4

    mutex.header = append("Symbol", samples)

    filepath = if (grepl("\\/$", filepath)) filepath else paste0(filepath, "/")
    con = paste0(filepath, filename)
    write(mutex.header, file = con, sep = "\t", ncolumns = length(mutex.header))
    write.table(mutex.matrix, con, sep="\t", append = TRUE, col.names = FALSE, quote = FALSE)

    return(mutex.matrix)
}


#' Create a .mat file which can be used with NBS clustering
#' (ref: http://chianti.ucsd.edu/~mhofree/wordpress/?page_id=26)
#' @title export.nbs.input
#'
#' @param x A TRONCO compliant dataset.
#' @param map_hugo_entrez Hugo_Symbol-Entrez_Gene_Id map
#' @param file output file name
#' @importFrom R.matlab writeMat
#' @export export.nbs.input
#' 
export.nbs.input <-function(x, 
                            map_hugo_entrez,
                            file = 'tronco_to_nbs.mat') {

    is.compliant(x);

    cat('*** Exporting for NBS v. 0.2\n')
    cat('Preparing binary input matrix\n')

    ## gene_indiv_mat <- the matrix
    gene_indiv_mat = as.matrix(x$genotypes)

    ## Remove colnames and rownames from gene_indiv_mat.
    
    rownames(gene_indiv_mat) = NULL
    colnames(gene_indiv_mat) = NULL

    cat('Preparing samples IDs \n')

    ## sample_id <- patient id
    sample_id = as.samples(x)

    cat('Preparing genes list (should be Hugo_Symbol) \n')
    
    ## gene_id_symbol <- sorted name of events
    gene_id_symbol = as.genes(x)

    cat('Preparing genes map (should be Hugo_Symbol -> Entrez_Gene_Id) \n')
    
    if (!('Hugo_Symbol' %in% colnames(map_hugo_entrez))) {
        stop('No Hugo_Symbol column in the input map: ', colnames(map_hugo_entrez))
    }
    if (!('Entrez_Gene_Id' %in% colnames(map_hugo_entrez))) {
        stop('No Entrez_Gene_Id column in the input map: ', colnames(map_hugo_entrez))
    }

    gene_id_all =
        mapply(function(x) as.numeric(map_hugo_entrez[[which(map_hugo_entrez[ ,'Hugo_Symbol'] == x),
                                                       'Entrez_Gene_Id']]),
               gene_id_symbol)

    file = if (grepl("\\.mat$", file)) file else paste0(file, ".mat")
    con = paste0(file)

    cat('Writing Matlab file to disk:', file,  ' ..... ' )
    writeMat(con,
             gene_indiv_mat = gene_indiv_mat,
             gene_id_all = gene_id_all,
             sample_id = sample_id,
             gene_id_symbol = gene_id_symbol)
    cat('DONE')
}


#' Create a list of unique Mutex groups for a given fdr cutoff
#' current Mutex version is Jan 8, 2015
#' (ref: https://code.google.com/p/mutex/ )
#'
#' @title import.mutex.groups
#' @param file Mutex results ("ranked-groups.txt" file)
#' @param fdr cutoff for fdr
#' @param display print summary table of extracted groups
#' @export import.mutex.groups
#' @importFrom utils count.fields
#' 
import.mutex.groups <- function(file, fdr=.2, display = TRUE) {
    ## Found somewhere on the web - makes sense.
    
    read.irregular <- function(filenm) {
        fileID <- file(filenm,open="rt")
        nFields <- count.fields(fileID)
        mat <- matrix(nrow=length(nFields),ncol=max(nFields))
        invisible(seek(fileID,where=0,origin="start",rw="read"))
        for (i in 1:nrow(mat) ) {
            mat[i, 1:nFields[i]] = scan(fileID, what = "", nlines = 1, quiet = TRUE)
        }
        close(fileID)
        df = data.frame(mat, stringsAsFactors = FALSE)
        return(df)
    }

    x = read.irregular(file)

    ## Check header.
    
    if (any(x[1,1:3] != c('Score', 'q-val', 'Members')))
        warning('File header does not seem to contain \'Score\', \'q-val\' and \'Members field\'.\n',
                'Are you sure this is a Mutex result file?')

    ## Remove header.
    
    cat(paste('*** Groups extracted - ', (nrow(x) -1), ' total groups.\n', sep=''))
    x = x[-1, , drop = FALSE]     # this is c('Score', 'q-val', 'Members')
    x[, 1] = as.numeric(x[,1])          # fdr
    x[, 2] = as.numeric(x[,2])          # q-value

    ## Remove groups  with low fdr.
    
    res = x[which(x[,1] < fdr), , drop = FALSE] 

    ## Remove duplicated groups (permutations).
    
    res.g = res[, 3:ncol(res)]

    for (i in 1:nrow(res.g))
        res[i,3:ncol(res)] = sort(res.g[i,], na.last = TRUE)   
    res = res[!duplicated((res[ , 3:ncol(res), drop = FALSE])), ] 

    cat(paste('Selected ',
              nrow(res),
              ' unique groups with fdr < ',
              fdr,
              '\n',
              sep = ''))

    ## Create groups.
    
    groups <- function(g) {
        g = g[3:length(g)]
        g = g[!is.na(g)]
        names(g) = NULL
        return(sort(g))
    }

    G = list()
    for (i in 1:nrow(res)) {
        gr = list(groups(res[i, ]))
        names(gr) = paste('MUTEX_GROUP', i, sep='')
        G = append(G,gr)  
    }

    rownames(res) = names(G)
    colnames(res)[1:2] = c('fdr', 'score')

    ## Summary report.
    
    if (display) 
        { 
            print(res)
        }
    return(G)
}


#' Check if there are multiple sample in x, according to TCGA barcodes naming
#' @title TCGA.multiple.samples
#' 
#' @examples
#' data(test_dataset)
#' TCGA.multiple.samples(test_dataset)
#'
#' @param x A TRONCO compliant dataset.
#' @return A list of barcodes. NA if no duplicated barcode is found
#' @export TCGA.multiple.samples
#' 
TCGA.multiple.samples <- function(x) {
    is.compliant(x)

    samples = as.samples(x)
    samples.truncated = substring(samples, 0, 12)

    patients = unique(samples.truncated)

    if (length(patients) != nsamples(x)) {
        dup.samples.start = which(duplicated(samples.truncated)) 
        dup.samples.last = which(duplicated(samples.truncated, fromLast = TRUE))

        return(sort(samples[c(dup.samples.start, dup.samples.last)])) 
    } else
        return(NA)
}


#' If there are multiple sample in x, according to TCGA barcodes naming, remove them
#' @title TCGA.remove.multiple.samples
#' 
#' @examples
#' data(test_dataset)
#' TCGA.remove.multiple.samples(test_dataset)
#'
#' @param x A TRONCO compliant dataset.
#' @return A TRONCO compliant dataset
#' @export TCGA.remove.multiple.samples
#' 
TCGA.remove.multiple.samples <- function(x) {
    is.compliant(x, err.fun = 'Removing TCGA multiple samples (input)')

    dup = TCGA.multiple.samples(x)
    dup.truncated = substring(dup, 0, 12)
    patients = unique(dup.truncated)

    for (i in 1:length(patients)) {
        patients.samples = which(dup.truncated == patients[i])
        multiple.samples = dup[patients.samples]

        cat('Patient', patients[i], 'with sample aliquotes\n' )
        print(substring(multiple.samples, 14, 29))

        keep = max(multiple.samples)
        discard = multiple.samples[which(multiple.samples != keep)]

        cat('Selecting', keep, '\n')
        x = delete.samples(x, discard)
    }

    is.compliant(x, err.fun = 'Removing TCGA multiple samples (output)')
    return(x)
}


#' Keep only the first 12 character of samples barcode if there are no duplicates
#' @title TCGA.shorten.barcodes
#' 
#' @examples
#' data(test_dataset)
#' TCGA.shorten.barcodes(test_dataset)
#'
#' @param x A TRONCO compliant dataset.
#' @return A TRONCO compliant dataset
#' @export TCGA.shorten.barcodes
#' 
TCGA.shorten.barcodes <- function(x) {
    is.compliant(x, err.fun='Shartening TCGA barcodes (input)')

    ## Check if it has duplicated barcodes.
    
    if (!all(is.na(TCGA.multiple.samples(x))))
        stop(paste('This dataset contains multiple samples for some patients - cannot consolidate.',
                   '\n Samples with barcodes indicating multiple patients: \n',
                   paste(TCGA.multiple.samples(x), collapse = '\n'),
                   '.',
                   sep = ''))

    ## Shorten sample barcodes.
    
    rownames(x$genotypes) = substring(rownames(x$genotypes), 0, 12)
    if (has.stages(x)) rownames(x$stages) = rownames(x$genotypes)

    is.compliant(x, err.fun='Shartening TCGA barcodes (output)')
    return(x)    
}


#' Map clinical data from the TCGA format
#' @title  TCGA.map.clinical.data
#'
#' @param file A file with the clinical data
#' @param sep file delimiter
#' @param column.samples Required columns
#' @param column.map Map to the required columns
#' @return a map
#' @export TCGA.map.clinical.data
#' @importFrom utils read.delim
#' 
TCGA.map.clinical.data <- function(file, sep='\t', column.samples, column.map) {

    data =
        read.delim(file = file,
                   sep = sep,
                   header = TRUE,
                   stringsAsFactors = FALSE)

    if (!(column.samples %in% colnames(data))) 
        stop(paste('Cannot find samples column \"',
                   column.samples,
                   '\". Available columns: \n\t',
                   paste(colnames(data), collapse = '\n\t'),
                   sep = ''))

    if (!(column.map %in% colnames(data))) 
        stop(paste('Cannot find required map column \"',
                   column.map,
                   '\". Available columns: \n\t',
                   paste(colnames(data), collapse = '\n\t'),
                   sep = ''))

    map = data.frame(data[ , column.map], row.names = data[ , column.samples])
    colnames(map) = column.map

    return(map)
}


## Internal function

sample.RColorBrewer.colors <- function(palette, ncolors) {
    if (!palette %in% rownames(brewer.pal.info))
        stop('Invalid RColorBrewer palette.')

    pmax.cols = brewer.pal.info[palette, 'maxcolors']

    cols = min(pmax.cols , ncolors)
    cols = ifelse(cols < 3, 3, cols)

    colors = brewer.pal(n=cols, name=palette)
    if (ncolors < 3) colors = colors[1:ncolors]
    else colors =  colorRampPalette(colors)(ncolors)

    return(colors)
}

#' Create a graphML object which can be imported in cytoscape
#' This function is based on the tronco.plot fuction
#' 
#' @title  export.graphml
#' 
#' @examples
#' data(test_model)
#' export.graphml(test_model, file='text.xml', scale.nodes=0.3)
#'
#' @param x A TRONCO compliant dataset
#' @param file Where to save the output
#' @param ... parameters for tronco.plot
#' @export export.graphml
#' @importFrom igraph write.graph V V<- set.vertex.attribute
#' @importFrom igraph set.edge.attribute set.graph.attribute
#' @importFrom grDevices rgb col2rgb
#' @importFrom utils packageVersion
#' 
export.graphml <- function(x, file, ...) {

    is.compliant(x)
    is.model(x)

    plot.output = tronco.plot(x, export.igraph = TRUE, create.new.dev = FALSE, ...)
    graph = plot.output$graph
    nodes = plot.output$nodes
    edges = plot.output$edges
    description = plot.output$description
    models = plot.output$models
    node.names = V(graph)$name
    edge.names = names(edges$label)

    ## Display information about vertex name

    V(graph)$name = V(graph)$label

    ## Prepare and save vertex label

    vertex.label = sapply(node.names, function(node) {
        return(gsub("\\", "", nodes$label[node], fixed = TRUE))
    })
    V(graph)$label = vertex.label

    ## Prepare and save vertex type

    vertex.type = sapply(node.names, function(node) {
        if (is.logic.node(node)) {
            return('')
        }
        return(as.events(x)[node,'type'])
    })
    graph = set.vertex.attribute(graph, 'type', value=vertex.type)
    
    ## Prepare and save vertex bg color

    vertex.fillcolor = sapply(node.names, function(node){
        rgb(t(col2rgb(nodes$fillcolor[node])), maxColorValue = 255)
    })
    graph = set.vertex.attribute(graph, 'fillcolor', value=vertex.fillcolor)

    ## Prepare and save vertex font color

    vertex.fontcolor = sapply(node.names, function(node){
        rgb(t(col2rgb(nodes$fontcolor[node])), maxColorValue = 255)
    })
    graph = set.vertex.attribute(graph, 'fontcolor', value=vertex.fontcolor)

    ## Prepare and save vertex border color

    vertex.bordercolor = sapply(node.names, function(node){
        rgb(t(col2rgb(nodes$color[node])), maxColorValue = 255)
    })
    graph = set.vertex.attribute(graph, 'bordercolor', value=vertex.bordercolor)

    ## Prepare and save vertex shape

    vertex.shape = sapply(node.names, function(node){
        shape = nodes$shape[node]
        if (shape == 'box') {
            return('Rectangle')
        }
        return(shape)
    })
    graph = set.vertex.attribute(graph, 'shape', value=vertex.shape)

    ## Prepare and save vertex width

    vertex.width = sapply(node.names, function(node){
        return(nodes$width[node] * 50)
    })
    graph = set.vertex.attribute(graph, 'width', value=vertex.width)

    ## Prepare and save vertex height

    vertex.height = sapply(node.names, function(node){
        return(nodes$height[node] * 50)
    })
    graph = set.vertex.attribute(graph, 'height', value=vertex.height)

    ## Prepare and save label fontsize

    vertex.fontsize = sapply(node.names, function(node){
        return(nodes$fontsize[node] * 1.5)
    })
    graph = set.vertex.attribute(graph, 'fontsize', value=vertex.fontsize)

    ## Prepare and save border width

    vertex.borderwidth = sapply(node.names, function(node){
        return(nodes$lwd[node])
    })
    graph = set.vertex.attribute(graph, 'borderwidth', value=vertex.borderwidth)
    
    ## Prepare and save the attribute cases
    
    vertex.cases = sapply(node.names, function(node) {
      if (is.logic.node(node)) {
        return(0)
      }else{
        return(nodes$cases[node])
      }
      
    })
    
    graph = set.vertex.attribute(graph, 'cases', value = vertex.cases)
    
    ## Prepare and save the attribute sumGenotypes
    
    vertex.sum.genotypes = sapply(node.names, function(node) {
      if (is.logic.node(node)) {
        return(0)
      }
      return(nodes$sum.genotypes[node])
      
    })
    graph = set.vertex.attribute(graph, 'sum genotypes', value = vertex.sum.genotypes)

    ## Prepare and save vertex shape

    edge.line = sapply(edge.names, function(edge){
        line = edges$lty[edge]
        if (line == 'dashed') {
            return('dash')
        }
        return('solid')
    })
    graph = set.edge.attribute(graph, 'line', value=edge.line)

    ## Prepare and save arrow type

    edge.arrow = sapply(edge.names, function(edge){
        arrow = edges$arrowsize[edge]
        if (arrow == 1) {
            return('True')
        }
        return('False')
    })
    graph = set.edge.attribute(graph, 'arrow', value=edge.arrow)

    ## Prepare and save edge color

    edge.color = sapply(edge.names, function(edge){
        rgb(t(col2rgb(edges$color[edge])), maxColorValue = 255)
    })
    graph = set.edge.attribute(graph, 'color', value=edge.color)
    
    ## Save edge label
    
    graph = set.edge.attribute(graph, 'edgelabel', value = edges$label)
    
    ## Prepare and save label color
    
    edge.fontcolor = sapply(edge.names, function(edge){
      rgb(t(col2rgb(edges$fontcolor[edge])), maxColorValue = 255)
    })
    graph = set.edge.attribute(graph, 'labelcolor', value = edge.fontcolor)
    
    ## Save edge width
    
    graph = set.edge.attribute(graph, 'width', value = edges$lwd)
    
    ## Check if cofidence are given and prepare a column for each confidence
    
    if (length(edges$confidences) > 0) {
      for (c in colnames(edges$confidences)) {
        graph = set.edge.attribute(graph, c, value = edges$confidences[, c])
      }
    }

    ## Prepare and save graph attributes

    graph = set.graph.attribute(graph, 'name', description)
    graph = set.graph.attribute(graph, 
                                'models',
                                paste(x$parameters$algorithm, models, collapse=' - '))
    graph = set.graph.attribute(graph,
                                'informations',
                                paste0('Generated with TRONCO v', packageVersion('TRONCO')))
    eloss_output = tryCatch({
      eloss = as.kfold.eloss(x)
      for(i in 1:nrow(eloss)) {
        currmodel = paste0('eloss ', row.names(eloss)[i])
        currvalue = round(eloss[i,'Mean'], digits = 4)
        graph = set.graph.attribute(graph,
                                    currmodel,
                                    currvalue)
      }
    }, error = function(e) {})

    write.graph(graph, file=file, format='graphml')
}


#### end of file -- external.R
Any scripts or data that you put into this service are public.
TRONCO documentation built on Nov. 8, 2020, 5:51 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
TRONCO
TRONCO, an R package for TRanslational ONCOlogy

R/external.R
In TRONCO: TRONCO, an R package for TRanslational ONCOlogy

Defines functions export.graphml sample.RColorBrewer.colors TCGA.map.clinical.data TCGA.shorten.barcodes TCGA.remove.multiple.samples TCGA.multiple.samples import.mutex.groups export.nbs.input export.mutex

Documented in export.graphml export.mutex export.nbs.input import.mutex.groups TCGA.map.clinical.data TCGA.multiple.samples TCGA.remove.multiple.samples TCGA.shorten.barcodes

Try the TRONCO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TRONCO TRONCO, an R package for TRanslational ONCOlogy

R/external.R In TRONCO: TRONCO, an R package for TRanslational ONCOlogy

Defines functions export.graphml sample.RColorBrewer.colors TCGA.map.clinical.data TCGA.shorten.barcodes TCGA.remove.multiple.samples TCGA.multiple.samples import.mutex.groups export.nbs.input export.mutex

Documented in export.graphml export.mutex export.nbs.input import.mutex.groups TCGA.map.clinical.data TCGA.multiple.samples TCGA.remove.multiple.samples TCGA.shorten.barcodes

Try the TRONCO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TRONCO
TRONCO, an R package for TRanslational ONCOlogy

R/external.R
In TRONCO: TRONCO, an R package for TRanslational ONCOlogy