R/grin.oncoprint.mtx.R
In GRIN2: Genomic Random Interval (GRIN)

Documented in grin.oncoprint.mtx

#' GRIN OncoPrint-Compatible Lesion Matrix
#'
#' @description
#' Prepares a binary lesion matrix based on GRIN analysis results that can be directly used as input for the \code{oncoPrint} function from the \pkg{ComplexHeatmap} package. This matrix summarizes the presence or absence of lesion types across patients for a user-defined list of genes.
#'
#' @param grin.res A data frame of GRIN results, typically generated by the \code{\link{grin.stats}} function.
#' @param oncoprint.genes A character vector of Ensembl gene IDs specifying the genes to include in the OncoPrint.
#'
#' @details
#' This function filters the GRIN results for a specified set of genes (using their Ensembl IDs), and constructs a gene-by-patient binary matrix indicating the presence of one or more lesion types per gene. Each row represents a gene, each column a patient, and the matrix values reflect whether that gene is affected by any lesion in the given patient.
#'
#' The output matrix is fully compatible with the \code{oncoPrint()} function from the \pkg{ComplexHeatmap} package and allows visualization of lesion patterns across a defined gene set.
#'
#' This is particularly useful for visualizing mutation, copy number alterations and other structural rearrangements in driver genes or genes selected by statistical criteria (e.g., significance threshold from GRIN results).
#'
#' @return
#' A binary data frame (matrix) of dimensions \code{length(oncoprint.genes)} and \code{number of patients}, suitable for use with the \code{oncoPrint()} function.
#'
#' @export
#'
#' @importFrom tibble rownames_to_column
#'
#' @references
#' Cao, X., Elsayed, A. H., & Pounds, S. B. (2023). Statistical Methods Inspired by Challenges in Pediatric Cancer Multi-omics.
#'
#' @author
#' Abdelrahman Elsayed \email{abdelrahman.elsayed@stjude.org} and Stanley Pounds \email{stanley.pounds@stjude.org}
#'
#' @seealso \code{\link{grin.stats}}, \code{\link[ComplexHeatmap]{oncoPrint}}
#'
#' @examples
#' data(lesion_data)
#' data(hg38_gene_annotation)
#' data(hg38_chrom_size)
#'
#' # Run GRIN analysis
#' grin.results <- grin.stats(lesion_data,
#'                            hg38_gene_annotation,
#'                            hg38_chrom_size)
#'
#' # Define a list of genes (using Ensembl IDs) to include in the OncoPrint
#' oncoprint.genes <- c("ENSG00000148400", "ENSG00000171862", "ENSG00000171843",
#'                      "ENSG00000156531", "ENSG00000162367", "ENSG00000096968",
#'                      "ENSG00000105639", "ENSG00000118513", "ENSG00000102974",
#'                      "ENSG00000133703")
#'
#' # Alternatively, select genes with multiple lesion types and significant q-values
#' genes.const <- grin.results$gene.hits[grin.results$gene.hits$q2.nsubj < 0.01, ]
#' selected.genes <- as.vector(genes.const$gene)
#'
#' # Generate OncoPrint-compatible lesion matrix
#' oncoprint.mtx <- grin.oncoprint.mtx(grin.results, oncoprint.genes)

grin.oncoprint.mtx=function(grin.res, # GRIN results (output of the grin.stats function)
                            oncoprint.genes) # vector of ensembl IDs for the selected list of genes

{
  selected=unlist(oncoprint.genes)
  selected=as.vector(selected)
  selected.genes= grin.res$gene.lsn.data[grin.res$gene.lsn.data$gene %in% selected,]
  selected.genes=selected.genes[,c(2,7,11)]  # extract patient IDs and lsn type for each gene in the selected genes list
  row.data=paste(selected.genes[,1],
                 selected.genes[,2],
                 selected.genes[,3],
                 sep="_")
  dup.data=duplicated(row.data)
  select.genes=selected.genes[!dup.data,]

  ord=order(select.genes$gene,
            select.genes$ID,
            select.genes$lsn.type)
  select.genes=select.genes[ord,]

  uniq.genes=unique(select.genes$gene)
  uniq.subj=unique(select.genes$ID)
  n.genes=length(uniq.genes)
  n.subj=length(uniq.subj)
  mtx=matrix("",n.genes,n.subj)   # create a matrix with each gene as a row
  colnames(mtx)=uniq.subj
  rownames(mtx)=uniq.genes

  k=nrow(select.genes)
  for (i in 1:k)
  {
    subj.id=select.genes[i,"ID"]
    gene.id=select.genes[i,"gene"]
    mtx[gene.id,subj.id]=paste0(mtx[gene.id,subj.id],
                                select.genes[i,"lsn.type"],";")
  }
  mtx=as.data.frame(mtx)
  mtx<-tibble::rownames_to_column(mtx, "ensembl.ID")

  gene.annotation= grin.res$gene.data
  ensembl.annotation=cbind(gene.annotation$gene, gene.annotation$gene.name)
  colnames(ensembl.annotation)=c("ensembl.ID", "gene.name")

  mtx.final=merge(ensembl.annotation,mtx,by="ensembl.ID", all.y=TRUE)  # add gene name
  mtx.final=mtx.final[,-1]
  rownames(mtx.final)=mtx.final[,1]
  mtx.final=mtx.final[,-1]

  return(mtx.final)

}