R/transtruct.R

Defines functions transtruct

Documented in transtruct

#' Transcript Stucture
#' 
#' @description transtruct uses alternative splicing information to find structures of transcripts generated in two conditions (usually control and treated). It can generate stuctures corresponding to multiple AS events in a gene at a time. These can be passed as a list or vector to ep.event or ip.event parameter. transtruct primarily requires RMM and iMM of the gene to find seed exons and extend them to form continuous paths of exons connected via flanking junctions.
#'
#' @param geneID geneID of the Cassette Exon and/or Intron Retention event(s). Only one geneID can be passed.
#' @param ep.event list/vector of Cassette Exon event(s), for which Transcript Structure is required. (default = NULL)
#' @param ip.event list/vector of Intron Retention event(s), for which Transcript Structure is required. (default = NULL)
#' @param Gcount Gcount matrix of given geneID. Gcount matrix generated via \code{\link{countMatrixGenes}}/\code{\link{ppAuto}}/\code{\link{ppFASE}} as Gcount.Rdata. It contains gene-wise read count summarization of meta-features times samples in the study.
#' @param RMM RMM of given geneID. readMembershipMatrix contains association of each exon with meta-features (exons, introns, skipping junctions and flanking junctions) of that gene. It is generated by \code{\link{readMembershipMatrix}} function as RMM.Rdata. It is required for both Cassette Exon as well as Intron Retention event.
#' @param iMM iMM of given geneID. intronMembershipMatrix contains association of each intron with meta-features (introns, exons and skipping junctions) of that gene. It is generated by \code{\link{intronMembershipMatrix}} function as iMM.Rdata. It is required only when Transcript Structure for Intron Retention event is required.
#' @param designM design matrix required by edgeR. \cr
#'        Example: If there are four samples, two corresponding each to control and treated condition, design matrix should be prepared as: \cr
#'        designM <-  matrix(c(rep(1,2), rep(0,4), rep(1,2)), byrow = T, ncol=2, dimnames=list(c('Sample1', 'Sample2', 'Sample3', 'Sample4'), c('Normal', 'Treated')))
#' @param annotation annotation matrix of given geneID. annotation is generated by \code{\link{readMembershipMatrix}}/\code{\link{ppFASE}} function as Annotation.Rdata. It contains gene-wise annotation of each meta-feature (exons, introns and junctions).
#' @param Groups list of sample groups. \cr
#'        Example: If there are two sample groups with three samples each, 'Groups' should be formed as: \cr
#'        Groups <- c(1, 1, 2, 2)
#'        }
#' @param keep.intron if a cassette exon event is selected by ExonPointer due to flanking introns, instead of or along with flanking junction(s), retain intron in the transcript structure and propagate structure using that intron. logical. (default = FALSE)
#'
#' @return list of three matrices: \cr
#'        \enumerate{
#'        \item numeric: transtruct.condition: It contains transcript structure(s) of given gene for corresponding Cassette Exon and/or Intron Retention event(s) (as specified by rownames), in the treated samples.
#'        \item numeric: transtruct.normal: It contains transcript structure(s) of given gene for corresponding Cassette Exon and/or Intron Retention event(s) (as specified by rownames), in the normal/untreated samples.
#'        \item numeric: expression: it contains expression values of meta-features in the given gene
#'        }
#' 
#' 
#' @export
#'
transtruct <- function(ep.event = NULL, ip.event = NULL, Gcount, RMM, iMM = NULL, designM, annotation, Groups, keep.intron = FALSE){
  expression <- .removeLECountsTS(Gcount = Gcount, designM = designM, Groups = Groups)
  cmm <- .cmm(expression = expression, RMM = RMM, iMM = iMM)
  annotation <- droplevels(annotation[match(rownames(cmm), annotation$EX_IN), ])

  #EP_inclusion
  if(!is.null(ep.event)){
    for(p in 1:length(ep.event)){
      ep_event <- ep.event[p]
      s.exon <- .seed.exon.ep.incl(cmm = cmm, ep_event = ep_event, keep.intron = keep.intron) #finds both: seed junction(s) and seed exon(s)/seed intron(s)
      if(p == 1) transtruct.ep.incl <- NULL
      transtruct.ep.incl <- .transtruct.prep(cmm = cmm, event = ep_event, eventlist = ep.event, eventtype = 'ep.incl', p = p, transtruct.ep.incl = transtruct.ep.incl, s.exon = s.exon, keep.intron = keep.intron)
    }
  }
  
  #EP_exclusion
  if(!is.null(ep.event)){
    for(p in 1:length(ep.event)){
      ep_event <- ep.event[p]
      s.exon <- .seed.exon.ep.excl(ep_event = ep_event, cmm = cmm) #finds both: seed junction(s) and seed exon(s)
      if(p == 1) transtruct.ep.excl <- NULL
      transtruct.ep.excl <- .transtruct.prep(cmm = cmm, event = ep_event, eventlist = ep.event, eventtype = 'ep.excl', p = p, transtruct.ep.excl = transtruct.ep.excl, s.exon = s.exon, keep.intron = FALSE)
    }
  }
  
  #IP_inclusion
  if(!is.null(ip.event)){
    for(p in 1:length(ip.event)){
      ip_event <- ip.event[p]
      s.exon <- .s.exon.ip.incl(ip_event = ip_event, cmm = cmm, annotation = annotation) #finds both: seed junction(s) and seed exon(s)
      if(p == 1) transtruct.ip.incl <- NULL
      transtruct.ip.incl <- .transtruct.prep(cmm = cmm, event = ip_event, eventlist = ip.event, eventtype = 'ip.incl', p = p, transtruct.ip.incl = transtruct.ip.incl, s.exon = s.exon, keep.intron = FALSE)
    }
  }
  
  #IP_exclusion
  if(!is.null(ip.event)){
    for(p in 1:length(ip.event)){
      ip_event <- ip.event[p]
      s.exon <- .seed.exon.ip.excl(ip_event = ip_event, cmm = cmm, annotation = annotation) #finds both: seed junction(s) and seed exon(s)
      if(p == 1) transtruct.ip.excl <- NULL
      transtruct.ip.excl <- .transtruct.prep(cmm = cmm, event = ip_event, eventlist = ip.event, eventtype = 'ip.excl', p = p, transtruct.ip.excl = transtruct.ip.excl, s.exon = s.exon, keep.intron = FALSE)
    }
  }

  ##Arranging TS into condition and normal
  #Combining all inclusion event TS:
  if(!is.null(ep.event) && !is.null(ip.event)){
    ts.incl <- rbind(transtruct.ep.incl$ts.new, transtruct.ip.incl$ts.new)
    ts.excl <- rbind(transtruct.ep.excl$ts.new, transtruct.ip.excl$ts.new)
  } else if(!is.null(ep.event) && is.null(ip.event)){
    ts.incl <- transtruct.ep.incl$ts.new
    ts.excl <- transtruct.ep.excl$ts.new
  } else if(!is.null(ip.event) && is.null(ep.event)){
    ts.incl <- transtruct.ip.incl$ts.new
    ts.excl <- transtruct.ip.excl$ts.new
  }
  
  if(nrow(ts.incl) == 0 && nrow(ts.excl) == 0)
    stop("No transcript structure found!")

  index <- match(colnames(ts.incl), rownames(expression)) # to match order of metafeatures in expression matrix with that of transcript structure
  expression <- expression[index, ]
  
  # #checking if total TS (incl+excl) are < 10,000
  # if(nrow(ts.incl) + nrow(ts.excl) > 10000){
  #   cat("Too many transcript structures, possibly due to 3'/5' Alternative Splicing or gene fusion.")
  #   ts.incl <- NULL; ts.excl <- NULL
  # }

  #transtructs <- list('transtruct.condition' = transtruct.condition, 'transtruct.normal' = transtruct.normal, 'expression' = expression)
  transtructs <- list('transtruct.inclusion' = ts.incl, 'transtruct.exclusion' = ts.excl, 'expression' = expression)
  return(transtructs)
}
harshsharma-cb/FASE documentation built on Aug. 6, 2023, 1:37 a.m.