R/quickAnalysis.R
In GeneStructureTools: Tools for spliced gene structure manipulation and analysis

Documented in filterWhippetEvents leafcutterTranscriptChangeSummary transcriptChangeSummary whippetTranscriptChangeSummary

#' Filter out significant events from a whippet diff comparison
#' @param whippetDataSet whippetDataSet generated from \code{readWhippetDataSet()}
#' @param probability minimum probability required to call event as significant
#' @param psiDelta minimum change in psi required to call an event as significant
#' @param eventTypes which event type to filter for? default = \code{"all"}
#' @param idList (optional) list of gene ids to filter for
#' @param minCounts minumum number of counts for all replicates
#' in at least one condition to call an event as significant
#' @param medianCounts median count for all replicates
#' in at least one condition to call an event as significant
#' @param sampleTable data.frame with sample names and conditions.
#' Only needed if filtering with counts.
#' @return filtered whippet differential comparison data.frame
#' @export
#' @importFrom stats median
#' @family whippet data processing
#' @author Beth Signal
#' @examples
#' whippetFiles <- system.file("extdata","whippet/",
#' package = "GeneStructureTools")
#' wds <- readWhippetDataSet(whippetFiles)
#' wds <- filterWhippetEvents(wds)
filterWhippetEvents <- function(whippetDataSet,
                                probability = 0.95,
                                psiDelta = 0.1,
                                eventTypes = "all",
                                idList = NA,
                                minCounts = NA,
                                medianCounts = NA,
                                sampleTable){

    if(eventTypes[1] == "all"){
        eventTypes <- unique(diffSplicingResults(whippetDataSet)$type)
    }

    if(is.na(idList[1])){
        significantEventsIndex <-
            which(diffSplicingResults(whippetDataSet)$probability >
                      probability &
                      abs(diffSplicingResults(whippetDataSet)$psi_delta) >
                      psiDelta &
                      diffSplicingResults(whippetDataSet)$type %in%
                      eventTypes)
    }else{
        significantEventsIndex <-
            which(diffSplicingResults(whippetDataSet)$probability >
                      probability &
                      abs(diffSplicingResults(whippetDataSet)$psi_delta) >
                      psiDelta &
                      diffSplicingResults(whippetDataSet)$type %in%
                      eventTypes &
                      diffSplicingResults(whippetDataSet)$gene %in% idList)
    }

    slot(whippetDataSet, "diffSplicingResults") <-
        diffSplicingResults(whippetDataSet)[significantEventsIndex,]

    m <- match(diffSplicingResults(whippetDataSet)$coord,
               coordinates(whippetDataSet)$id)

    slot(whippetDataSet, "coordinates") <-
        coordinates(whippetDataSet)[unique(m),]

    keep <- which(readCounts(whippetDataSet)$Gene %in%
                      diffSplicingResults(whippetDataSet)$gene)

    slot(whippetDataSet, "readCounts") <-
        readCounts(whippetDataSet)[keep,]

    keep <- which(junctions(whippetDataSet)$gene %in%
                      diffSplicingResults(whippetDataSet)$gene)

    slot(whippetDataSet, "junctions") <-
        junctions(whippetDataSet)[keep,]


    #filter by read counts?
    if(nrow(slot(whippetDataSet, "readCounts")) > 0 & (!is.na(minCounts) |
                                                       !is.na(medianCounts))){
        m <- match(diffSplicingResults(whippetDataSet)$unique_name,
                   readCounts(whippetDataSet)$unique_name)
        n1 <- match(sampleTable$sample[
            sampleTable$condition %in%
                unique(diffSplicingResults(whippetDataSet)$condition_1)],
                    colnames(readCounts(whippetDataSet)))
        n2 <- match(sampleTable$sample[
            sampleTable$condition %in%
                unique(diffSplicingResults(whippetDataSet)$condition_2)],
                    colnames(readCounts(whippetDataSet)))

        diffSplicingResultsTemp <- diffSplicingResults(whippetDataSet)

        diffSplicingResultsTemp$condition_1_counts <-
            apply(readCounts(whippetDataSet)[m,n1], 1, stats::median)
        diffSplicingResultsTemp$condition_2_counts <-
            apply(readCounts(whippetDataSet)[m,n2], 1, stats::median)

        if(!is.na(minCounts)){
            keep <- which(apply(readCounts(whippetDataSet)[m,n1], 1,
                                function(x) all(x > minCounts)) |
                              apply(readCounts(whippetDataSet)[m,n2], 1,
                                    function(x) all(x > minCounts)))
            slot(whippetDataSet, "diffSplicingResults") <-
                diffSplicingResultsTemp[keep,]
        }

        if(!is.na(medianCounts)){
            keep <- which(diffSplicingResultsTemp$condition_1_counts >=
                              medianCounts |
                              diffSplicingResultsTemp$condition_2_counts >=
                              medianCounts)
            slot(whippetDataSet, "diffSplicingResults") <-
                diffSplicingResultsTemp[keep,]
        }

    }

    return(whippetDataSet)

}
#' Compare open reading frames for two sets of paired transcripts
#' @param transcriptsX GRanges object with exon annotations for
#' all transcripts to be compared for the 'normal' condition
#' @param transcriptsY GRanges object with exon annotations for
#' all transcripts to be compared for the 'alternative' condition
#' @param BSgenome BSGenome object containing the genome for the species analysed
#' @param exons GRanges object made from a GTF containing exon coordinates
#' @param NMD Use NMD predictions? (Note: notNMD must be installed to use this feature)
#' @param NMDModel Use the "base" or "lncRNA" NMD model?
#' @param orfPrediction What type of orf predictions to return. default= \code{"allFrames"}
#' @param compareBy compare isoforms by 'transcript' id, or aggregate all changes occuring by 'gene'
#' @param compareToGene compare alternative isoforms to all normal gene isoforms (in exons)
#' @param whippetDataSet whippetDataSet generated from \code{readWhippetDataSet()}
#' Use if PSI directionality should be taken into account when comparing isoforms.
#' @param exportGTF file name to export alternative isoform GTFs (default=\code{NULL})
#' @return Summarised ORF changes data.frame
#' @export
#' @import GenomicRanges
#' @importFrom rtracklayer import
#' @importFrom utils installed.packages
#' @family transcript isoform comparisons
#' @author Beth Signal
#' @examples
#' whippetFiles <- system.file("extdata","whippet/",
#' package = "GeneStructureTools")
#' wds <- readWhippetDataSet(whippetFiles)
#' wds <- filterWhippetEvents(wds)
#'
#' gtf <- rtracklayer::import(system.file("extdata","example_gtf.gtf",
#' package = "GeneStructureTools"))
#' exons <- gtf[gtf$type=="exon"]
#' g <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#'
#' wds.exonSkip <- filterWhippetEvents(wds, eventTypes="CE",psiDelta = 0.2)
#'
#' exons.exonSkip <- findExonContainingTranscripts(wds.exonSkip, exons,
#' variableWidth=0, findIntrons=FALSE, transcripts)
#' ExonSkippingTranscripts <- skipExonInTranscript(exons.exonSkip, exons, whippetDataSet=wds.exonSkip)
#' transcriptChangeSummary(ExonSkippingTranscripts[ExonSkippingTranscripts$set=="included_exon"],
#' ExonSkippingTranscripts[ExonSkippingTranscripts$set=="skipped_exon"],
#' BSgenome=g,exons)
transcriptChangeSummary <- function(transcriptsX,
                                    transcriptsY,
                                    BSgenome,
                                    exons,
                                    NMD = FALSE,
                                    NMDModel = NULL,
                                    compareBy="gene",
                                    orfPrediction = "allFrames",
                                    compareToGene = FALSE,
                                    whippetDataSet = NULL,
                                    exportGTF = NULL){


    if(!is.null(whippetDataSet)){
        whippetEvents <- diffSplicingResults(whippetDataSet)
        allTranscripts <- c(transcriptsX, transcriptsY)
        type <- gsub("_X","",gsub("_Y","", allTranscripts$set))
        type <- gsub("skipped_exon", "CE", gsub("included_exon","CE", type))
        type <- gsub("retained_intron", "RI", gsub("spliced_intron","RI", type))

        m <- match(paste0(allTranscripts$whippet_id,"_",type),
                   paste0(whippetEvents$coord,"_",
                          whippetEvents$type))
        # A -- psi in condition 1 (A) is higher (i.e. included -- > skipped)
        normA <- which(whippetEvents$psi_a > whippetEvents$psi_b)
        # B -- psi in condition 2 (B) is higher (i.e. skipped -- > included)
        normB <- which(whippetEvents$psi_a < whippetEvents$psi_b)

        #sets for X (+A)
        setsX <- c(paste0(unique(type), "_Y"), "included_exon","retained_intron")
        #sets for Y (+B)
        setsY <- c(paste0(unique(type), "_X"), "skipped_exon","spliced_intron")

        transcriptsX <- allTranscripts[
            which((m %in% normA & allTranscripts$set %in% setsX) |
                      (m %in% normB & allTranscripts$set %in% setsY))]

        transcriptsY <- allTranscripts[
            which((m %in% normA & allTranscripts$set %in% setsY) |
                      (m %in% normB & allTranscripts$set %in% setsX))]

    }

    if(!is.null(exportGTF)){
        transcriptsX$comp_set <- "X"
        transcriptsY$comp_set <- "Y"
        transcriptsXY <- c(transcriptsX, transcriptsY)

        rtracklayer::export.gff(transcriptsXY, con=exportGTF, format="gtf")

        transcriptsX$comp_set <- NULL
        transcriptsY$comp_set <- NULL
    }

    if(orfPrediction == "allFrames"){
        orfsX <- getOrfs(transcriptsX, BSgenome,returnLongestOnly = FALSE,
                         allFrames = TRUE, uORFs = TRUE)
        orfsY <- getOrfs(transcriptsY, BSgenome,returnLongestOnly = FALSE,
                         allFrames = TRUE, uORFs = TRUE)
    }else{
        orfsX <- getOrfs(transcriptsX, BSgenome,returnLongestOnly = TRUE,
                         uORFs = TRUE)
        orfsY <- getOrfs(transcriptsY, BSgenome,returnLongestOnly = TRUE,
                         uORFs = TRUE)
    }

    if(all(!grepl("[+]", orfsX$id))){

        if(orfPrediction == "allFrames"){
            Yid.withFrame <- paste0(unlist(lapply(
                str_split(orfsY$id, "[+]"),"[[",1)),"_", orfsY$frame)
            Xid.withFrame <- paste0(orfsX$id,"_", orfsX$frame)
            m <- match(Yid.withFrame, Xid.withFrame)
        }else{
            m <- match(unlist(lapply(str_split(orfsY$id, "[+]"),"[[",1)),
                       orfsX$id)
        }

        orfsX<- orfsX[m,]
        orfsX$id <- orfsY$id
        #orfsX <- orfsX[which(!duplicated(orfsX$id)),]
    }

    # manual NMD
    notNMDInstalled <- "notNMD" %in% rownames(utils::installed.packages())

    if(NMD == TRUE){

        if(is.null(NMDModel)){
            useModel <- "base"
        }else{
            useModel <- NMDModel
        }

        if(notNMDInstalled){
            save(orfsX, orfsY, BSgenome, useModel, file="temp_ORFs.Rdata")

            #### run notnmd
            scriptLoc <- system.file("extdata","NMD_from_object.R",
                                     package = "notNMD")
            system(paste0("Rscript ", scriptLoc, " temp_ORFs.Rdata"))

            load("temp_ORFs.Rdata")
            system("rm -f temp_ORFs.Rdata")

        }else{
            message("package notNMD is not installed")
            message("Skipping NMD calculations")
        }
    }

    orfsX <- orfsX[which(!is.na(orfsX$orf_length)),]
    orfsY <- orfsY[which(!is.na(orfsY$orf_length)),]

    if(compareToGene == TRUE){

        if(NMD == TRUE & notNMDInstalled){
            orfAllGenes <- getOrfs(exons[exons$gene_id %in%
                                             unique(c(transcriptsX$gene_id,
                                                      transcriptsY$gene_id))],
                                   BSgenome,returnLongestOnly = TRUE)

            save(orfAllGenes, BSgenome, file="temp_ORFs.Rdata")
            system(paste0("Rscript ", scriptLoc, " temp_ORFs.Rdata"))
            load("temp_ORFs.Rdata")
            system("rm -f temp_ORFs.Rdata")
            #orfAllGenes <- orfAllGenes[orfAllGenes$nmd_prob > 0.5,]

        }else{
            orfAllGenes <- getOrfs(
                exons[exons$gene_id %in%
                          unique(c(transcriptsX$gene_id,
                                   transcriptsY$gene_id)) &
                          exons$transcript_type=="protein_coding"],
                BSgenome=BSgenome,returnLongestOnly = TRUE)
        }

        orfChange <- orfDiff(orfsX, orfsY, filterNMD = NMD, compareBy = "gene",
                             geneSimilarity = TRUE,
                             allORFs = orfAllGenes,compareUTR = TRUE)
    }else{
        orfChange <- orfDiff(orfsX, orfsY, filterNMD = NMD, compareBy = "gene",
                             compareUTR = TRUE)
    }



    if(NMD == TRUE){
        orfsX$nmd_class_manual <- "nonsense_mediated_decay"
        orfsX$nmd_class_manual[orfsX$orf_length > 50 &
                                   (orfsX$min_dist_to_junction_b < 50 |
                                        orfsX$exon_b_from_final == 0)] <-
            "not_nmd"

        orfsX$nmd_prob_manual <- 1
        orfsX$nmd_prob_manual[orfsX$nmd_class_manual == "not_nmd"] <- 0

        orfsY$nmd_class_manual <- "nonsense_mediated_decay"
        orfsY$nmd_class_manual[orfsY$orf_length > 50 &
                                   (orfsY$min_dist_to_junction_b < 50 |
                                        orfsY$exon_b_from_final == 0)] <-
            "not_nmd"

        orfsY$nmd_prob_manual <- 1
        orfsY$nmd_prob_manual[orfsY$nmd_class_manual == "not_nmd"] <- 0

    }
    if(NMD == TRUE & notNMDInstalled){
        nmdChange <- attrChangeAltSpliced(orfsX,
                                          orfsY,
                                          attribute="nmd_prob",
                                          compareBy="gene",
                                          useMax=FALSE)
        m <- match(orfChange$id, nmdChange$id)
        orfChange <- cbind(orfChange, nmdChange[m,-1])

        nmdChangeMan <- attrChangeAltSpliced(orfsX,
                                             orfsY,
                                             attribute="nmd_prob_manual",
                                             compareBy="gene",
                                             useMax=FALSE)
        m <- match(orfChange$id, nmdChangeMan$id)
        orfChange <- cbind(orfChange, nmdChangeMan[m,-1])


    }
    if(!is.null(whippetDataSet)){
        m <- match(orfChange$id, whippetEvents$coord)
        orfChange <- cbind(whippetEvents[m,], orfChange)
    }
    return(orfChange)

}

#' Compare open reading frames for whippet differentially spliced events
#' @param whippetDataSet whippetDataSet generated from \code{readWhippetDataSet()}
#' @param eventTypes which event type to filter for? default = "all"
#' @param exons GRanges gtf annotation of exons
#' @param transcripts GRanges gtf annotation of transcripts
#' @param gtf.all GRanges gtf annotation (can be used instead of specifying exons and transcripts)
#' @param BSgenome BSGenome object containing the genome for the species analysed
#' @param NMD Use NMD predictions? (Note: notNMD must be installed to use this feature)
#' @param exportGTF file name to export alternative isoform GTFs (default=NULL)
#' @return data.frame containing signficant whippet diff data and ORF change summaries
#' @export
#' @importFrom rtracklayer import
#' @import GenomicRanges
#' @family whippet data processing
#' @author Beth Signal
#' @examples
#' whippetFiles <- system.file("extdata","whippet/",
#' package = "GeneStructureTools")
#' wds <- readWhippetDataSet(whippetFiles)
#' wds <- filterWhippetEvents(wds)
#'
#' gtf <- rtracklayer::import(system.file("extdata","example_gtf.gtf",
#' package = "GeneStructureTools"))
#' g <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' whippetTranscriptChangeSummary(wds, gtf.all=gtf,BSgenome = g)
whippetTranscriptChangeSummary <- function(whippetDataSet,
                                           gtf.all=NULL,
                                           BSgenome,
                                           eventTypes = "all",
                                           exons=NULL,
                                           transcripts=NULL,
                                           NMD = FALSE,
                                           exportGTF = NULL){


    # diffSplicingResults(whippetDataSet)

    if(is.null(exons) & !is.null(gtf.all)){
        exons <- gtf.all[gtf.all$type=="exon"]
    }
    if(is.null(transcripts) & !is.null(gtf.all)){
        transcripts <- gtf.all[gtf.all$type=="transcript"]
    }else if(is.null(transcripts) & !is.null(exons)){
        transcripts <- exonsToTranscripts(exons)
    }


    if(eventTypes[1] == "all"){
        eventTypes <- unique(diffSplicingResults(whippetDataSet)$type)
    }
    if(!is.null(exportGTF)){
        exportedTranscripts <- list()
    }
    if(any(eventTypes == "CE") &
       any(diffSplicingResults(whippetDataSet)$type == "CE")){


        whippetDataSet.ce <- filterWhippetEvents(whippetDataSet,
                                                 probability = 0,
                                                 psiDelta = 0,
                                                 eventTypes="CE")

        significantEvents.ce <- diffSplicingResults(whippetDataSet.ce)

        exons.ce <- findExonContainingTranscripts(input=whippetDataSet.ce,
                                                  exons=exons,
                                                  variableWidth=0,
                                                  findIntrons=FALSE,
                                                  transcripts=transcripts)
        # make skipped exon transcripts
        skippedExonTranscripts <- skipExonInTranscript(whippetDataSet=whippetDataSet.ce,
                                                       skippedExons = exons.ce,
                                                       exons = exons,
                                                       glueExons = TRUE)

        orfChanges.ce <- transcriptChangeSummary(
            skippedExonTranscripts[skippedExonTranscripts$set=="included_exon"],
            skippedExonTranscripts[skippedExonTranscripts$set=="skipped_exon"],
            BSgenome = BSgenome,NMD = NMD, whippetDataSet=whippetDataSet.ce)
        # add to significantEvents.ce
        m <- match(significantEvents.ce$coord, orfChanges.ce$id)
        significantEvents.ce <- cbind(significantEvents.ce, orfChanges.ce[m,-1])

        if(exists("SignificantEvents.withORF")){
            SignificantEvents.withORF <- rbind(SignificantEvents.withORF,
                                               significantEvents.ce)
        }else{
            SignificantEvents.withORF <- significantEvents.ce
        }
        if(!is.null(exportGTF)){
            exportedTranscripts <- c(exportedTranscripts,
                                     skippedExonTranscripts)
        }
    }
    if(any(eventTypes == "RI") & any(diffSplicingResults(whippetDataSet)$type ==
                                     "RI")){

        whippetDataSet.ri <- filterWhippetEvents(whippetDataSet,
                                                 probability = 0,
                                                 psiDelta = 0,
                                                 eventTypes="RI")

        significantEvents.ri <- diffSplicingResults(whippetDataSet.ri)

        exons.ri <- findIntronContainingTranscripts(input=whippetDataSet.ri,
                                                    exons=exons)

        # find introns in the gtf that overlap whippet introns
        significantEvents.ri <-
            diffSplicingResults(whippetDataSet)[which(
                diffSplicingResults(whippetDataSet)$type=="RI"),]
        # add the intron into transcripts
        retainedIntronTranscripts <- addIntronInTranscript(whippetDataSet=whippetDataSet.ri,
                                                           flankingExons = exons.ri,
                                                           exons = exons,
                                                           glueExons = TRUE)


        orfChanges.ri <- transcriptChangeSummary(
            retainedIntronTranscripts[retainedIntronTranscripts$set==
                                          "spliced_intron"],
            retainedIntronTranscripts[retainedIntronTranscripts$set==
                                          "retained_intron"],
            BSgenome = BSgenome,NMD = NMD, whippetDataSet=whippetDataSet.ri)
        # add to significantEvents.ce
        m <- match(significantEvents.ri$coord, orfChanges.ri$id)
        significantEvents.ri <- cbind(significantEvents.ri, orfChanges.ri[m,-1])

        if(exists("SignificantEvents.withORF")){
            SignificantEvents.withORF <- rbind(SignificantEvents.withORF,
                                               significantEvents.ri)
        }else{
            SignificantEvents.withORF <- significantEvents.ri
        }
        if(!is.null(exportGTF)){
            exportedTranscripts <- c(exportedTranscripts,
                                     retainedIntronTranscripts)
        }
    }
    if(any(eventTypes %in% c("AA","AD","AF","AL")) &
       any(diffSplicingResults(whippetDataSet)$type %in%
           c("AA","AD","AF","AL"))){

        events.junctions <- eventTypes[eventTypes %in% c("AA","AD","AF","AL")]
        events.significant <- unique(diffSplicingResults(whippetDataSet)$type)
        events.significant <- events.significant[events.significant %in%
                                                     events.junctions]

        for(e in seq_along(events.significant)){

            event <- events.significant[e]

            whippetDataSet.jnc <- filterWhippetEvents(whippetDataSet,
                                                      probability = 0,
                                                      psiDelta = 0,
                                                      eventTypes=event)

            significantEvents.jnc <- diffSplicingResults(whippetDataSet.jnc)

            junctionPairs <- findJunctionPairs(whippetDataSet.jnc, type=event)

            # check for pairs
            ids.x <- unique(junctionPairs$whippet_id[junctionPairs$set=="X"])
            ids.x <- ids.x[ids.x %in% unique(
                junctionPairs$whippet_id[junctionPairs$set=="Y"])]

            significantEvents.jnc <-significantEvents.jnc[
                which(significantEvents.jnc$coord %in% ids.x),]
            junctionPairs <- junctionPairs[
                which(junctionPairs$whippet_id %in% ids.x),]

            if(nrow(significantEvents.jnc) > 0){
                # make transcripts with alternative junction usage
                altTranscripts <- replaceJunction(whippetDataSet.jnc,
                                                  junctionPairs,
                                                  exons, type=event)
                orfChanges.jnc <- transcriptChangeSummary(
                    transcriptsX = altTranscripts[
                        altTranscripts$set==paste0(event, "_X")],
                    transcriptsY = altTranscripts[
                        altTranscripts$set==paste0(event, "_Y")],
                    BSgenome = BSgenome,NMD = NMD,
                    whippetDataSet=whippetDataSet.jnc)

                # add to significantEvents
                m <- match(significantEvents.jnc$coord, orfChanges.jnc$id)
                significantEvents.jnc <- cbind(significantEvents.jnc,
                                               orfChanges.jnc[m,-1])

                if(exists("SignificantEvents.withORF")){
                    SignificantEvents.withORF <-
                        rbind(SignificantEvents.withORF,
                              significantEvents.jnc)
                }else{
                    SignificantEvents.withORF <- significantEvents.jnc
                }
                if(!is.null(exportGTF)){
                    exportedTranscripts <- c(exportedTranscripts,
                                             altTranscripts)
                }
            }
        }

    }
    if(!is.null(exportGTF)){
        exportedTranscripts <- do.call("c", exportedTranscripts)
        rtracklayer::export.gff(exportedTranscripts, con=exportGTF,
                                format="gtf")
    }

    return(SignificantEvents.withORF)
}
#' Compare open reading frames for whippet differentially spliced events
#' @param significantEvents  data.frame containing information from the
#' per_intron_results.tab file output from leafcutter.
#' @param combineGeneEvents combine clusters occuring in the same gene?
#' Currently not reccomended.
#' @param exons GRanges gtf annotation of exons
#' @param BSgenome BSGenome object containing the genome for the species analysed
#' @param NMD Use NMD predictions? (Note: notNMD must be installed to use this feature)
#' @param showProgressBar show a progress bar of alternative isoform generation?
#' @param exportGTF file name to export alternative isoform GTFs (default=NULL)
#' @return data.frame containing signficant whippet diff data and ORF change summaries
#' @export
#' @importFrom utils setTxtProgressBar
#' @importFrom utils txtProgressBar
#' @importFrom stringr str_split
#' @importFrom rtracklayer import
#' @import GenomicRanges
#' @family leafcutter data processing
#' @author Beth Signal
#' @examples
#' leafcutterFiles <- list.files(system.file("extdata","leafcutter/",
#' package = "GeneStructureTools"), full.names = TRUE)
#' leafcutterIntrons <- read.delim(leafcutterFiles[
#' grep("intron_results", leafcutterFiles)],stringsAsFactors=FALSE)
#' gtf <- rtracklayer::import(system.file("extdata","example_gtf.gtf",
#' package = "GeneStructureTools"))
#' exons <- gtf[gtf$type=="exon"]
#' g <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' leafcutterTranscriptChangeSummary(significantEvents = leafcutterIntrons,
#' exons=exons,BSgenome = g,NMD=FALSE)

leafcutterTranscriptChangeSummary <- function(significantEvents,
                                              combineGeneEvents=FALSE,
                                              exons,
                                              BSgenome,
                                              NMD = FALSE,
                                              showProgressBar=TRUE,
                                              exportGTF=NULL){


    geneEvents <- as.data.frame(table(significantEvents$ensemblID,
                                      significantEvents$clusterID))
    geneEvents <- geneEvents[geneEvents$Freq!=0,]

    if(combineGeneEvents == FALSE){
        if(showProgressBar){
            message(paste0("Generating alternative isoforms for ",
                           nrow(geneEvents), " clusters:"))
            pb <- utils::txtProgressBar(min = 0, max = nrow(geneEvents),
                                        style = 3)
        }

        altIso <- alternativeIntronUsage(significantEvents[
            significantEvents$clusterID == geneEvents$Var2[1],],
            exons)
        if(showProgressBar){utils::setTxtProgressBar(pb, 1)}

        if(nrow(geneEvents) > 1){
            for(i in 2:nrow(geneEvents)){
                altIntronLocs = significantEvents[
                    significantEvents$clusterID == geneEvents$Var2[i],]
                altIntronLocs <- altIntronLocs[altIntronLocs$verdict==
                                                   "annotated",]
                if(nrow(altIntronLocs) > 1){
                    altIso1 <- alternativeIntronUsage(altIntronLocs, exons)
                    altIso <- c(altIso, altIso1)
                }
                if(showProgressBar){utils::setTxtProgressBar(pb, i)}

            }
        }
    }else{
        genes <- unique(geneEvents$Var1)
        if(showProgressBar){
            message(paste0("Generating alternative isoforms for ",
                           nrow(geneEvents), " genes:"))
            pb <- utils::txtProgressBar(min = 0, max = length(genes), style = 3)
        }
        for(j in seq_along(genes)){
            clusters <- geneEvents[geneEvents$Var1==genes[j],]

            altIso <- alternativeIntronUsage(significantEvents[
                significantEvents$clusterID == clusters$Var2[1],],
                exons)

            if(nrow(clusters) > 1){
                for(i in 2:nrow(clusters)){
                    altIntronLocs = significantEvents[
                        significantEvents$clusterID == clusters$Var2[i],]
                    altIntronLocs <- altIntronLocs[altIntronLocs$verdict==
                                                       "annotated",]
                    if(nrow(altIntronLocs) > 1){
                        altIso1 <- alternativeIntronUsage(altIntronLocs,
                                                          c(exons, altIso))
                        altIso <- c(altIso, altIso1)
                    }

                }
            }
            if(showProgressBar){utils::setTxtProgressBar(pb, j)}

        }


    }
    altIso$spliced_id <- unlist(lapply(
        stringr::str_split(altIso$transcript_id, " "),"[[",2))

    transcriptsX <- altIso[grep("dnre", altIso$transcript_id)]
    transcriptsY <- altIso[grep("upre", altIso$transcript_id)]

    if(!is.null(exportGTF)){
        rtracklayer::export.gff(altIso, con=exportGTF, format="gtf")
    }

    orfDiff <- transcriptChangeSummary(transcriptsX,
                                       transcriptsY,
                                       BSgenome = BSgenome,
                                       NMD = NMD)
    m <- match(gsub("_","",significantEvents$clusterID), orfDiff$id)
    significantEvents.withORF <- cbind(significantEvents, orfDiff[m,-1])
    #significantEvents.withORF <- significantEvents.withORF[!duplicated(m),]

    return(significantEvents.withORF)
}
Any scripts or data that you put into this service are public.
GeneStructureTools documentation built on Nov. 8, 2020, 6:04 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
GeneStructureTools
Tools for spliced gene structure manipulation and analysis

R/quickAnalysis.R
In GeneStructureTools: Tools for spliced gene structure manipulation and analysis

Defines functions leafcutterTranscriptChangeSummary whippetTranscriptChangeSummary transcriptChangeSummary filterWhippetEvents

Documented in filterWhippetEvents leafcutterTranscriptChangeSummary transcriptChangeSummary whippetTranscriptChangeSummary

Try the GeneStructureTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneStructureTools Tools for spliced gene structure manipulation and analysis

R/quickAnalysis.R In GeneStructureTools: Tools for spliced gene structure manipulation and analysis

Defines functions leafcutterTranscriptChangeSummary whippetTranscriptChangeSummary transcriptChangeSummary filterWhippetEvents

Documented in filterWhippetEvents leafcutterTranscriptChangeSummary transcriptChangeSummary whippetTranscriptChangeSummary

Try the GeneStructureTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneStructureTools
Tools for spliced gene structure manipulation and analysis

R/quickAnalysis.R
In GeneStructureTools: Tools for spliced gene structure manipulation and analysis