Nothing
### Functions lifted from spliceR by KVS 2018-03-06
if(TRUE) {
### C functions copied from R 1.9.0
### Auxillary spliceR functions
if(TRUE) {
### Filter functions ###
# Filter 1: only test genes with OK status (cufflinks specific)
.filterOKGenes <- function(dataList, isoIndex) {
return( isoIndex[which( dataList[["transcript_features"]]$gene_status[isoIndex] == 'OK' )] )
}
# Filter 2: Pnly test significantly differentially expressed genes (cufflinks specific)
.filterSigGenes <- function(dataList, isoIndex) {
return( isoIndex[which( dataList[["transcript_features"]]$gene_significant[isoIndex] == 'yes' )] )
}
# Filter 3: Only analyze those with isoform quant status = OK
.filterOKIso <- function(dataList, isoIndex) {
return( isoIndex[which( dataList[["transcript_features"]]$iso_status[isoIndex] == 'OK' )] )
}
# Filter 4: Only analyze those genes that are expressed (requires gene expression values)
.filterExpressedGenes <- function(dataList, isoIndex, expressionCutoff) {
return( isoIndex[which( dataList[["transcript_features"]]$gene_value_1[isoIndex] > expressionCutoff | dataList[["transcript_features"]]$gene_value_2[isoIndex] > expressionCutoff )] )
}
# Filter 4: Get index of those isoforms that are expressed in at least one of the samples
.filterExpressedIso <- function(dataList, isoIndex, expressionCutoff) {
return( isoIndex[which( dataList[["transcript_features"]]$iso_value_1[isoIndex] > expressionCutoff | dataList[["transcript_features"]]$iso_value_2[isoIndex] > expressionCutoff )] )# Remove those isoforms that are not expressed
}
.filterIsoClassCode <- function(dataList, isoIndex) {
classCodeIndex <- isoIndex[which( dataList[["transcript_features"]]$class_code[isoIndex] != 'e' & dataList[["transcript_features"]]$class_code[isoIndex] != 'p' & dataList[["transcript_features"]]$class_code[isoIndex] != 'r' )]
# Remove those isoforms that are classified as :
### Single exon transfrag (overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment)
### Possible polymerase run-on fragment (within 2Kbases of a reference transcript)
### Repeat. (Currently determined by looking at the soft-masked reference sequence and applied to transcripts where at least 50% of the bases are lower case)
naIndex <- which(is.na(dataList[["transcript_features"]]$class_code[isoIndex])) # NA's are generated for novel genes, or genes that are not assigned to a known gene.
return(sort(c(classCodeIndex, naIndex),decreasing=FALSE))
}
# Filter 6: Only analyze significant isoforms (only analyze genes which have two significant differential regulated isoforms (rare))
.filterSigIso <- function(dataList, isoIndex) {
return( isoIndex[which( dataList[["transcript_features"]]$iso_significant[isoIndex] == 'yes' )] )
}
# Filter 7: remove isoforms with only 1 exon
.filterSingleExonIsoAll <- function(dataList,isoformsToAnalyzeIndex) {
isoformIDsToAnalyze <- dataList[["transcript_features"]]$"isoform_id"[isoformsToAnalyzeIndex] # get names of those isoforms that I should analyze
noOfExonsPerIsoform <- table(dataList[["exon_features"]]$"isoform_id"[ which(dataList[["exon_features"]]$"isoform_id" %in% isoformIDsToAnalyze) ] ) # create table with number of expons for those isoforms (here the number of conditions does not matter since there are no replicates in the feature table)
isoformIndexesToAnalyze <- isoformsToAnalyzeIndex[ which( dataList[["transcript_features"]]$"isoform_id"[isoformsToAnalyzeIndex] %in% names(noOfExonsPerIsoform[noOfExonsPerIsoform>1]) ) ] # the those indexes that belongs to transcripts with more than one exon
return(isoformIndexesToAnalyze)
}
# Filter 8: remove genes with less than two isoforms
.filterSingleIsoform <- function(dataList,isoformsToAnalyzeIndex, knownSamples) {
getUniqGeneIsoformCombinations <- unique(dataList[["transcript_features"]][isoformsToAnalyzeIndex,c('isoform_id','gene_id')]) # extract unique combinations of geneID and isoformID from those indexes that should be analyzed (the uniqe part removes dublicates du to multiple sample comparasons but includes combinations that are only in the index to analyze for some of the comparasons)
noOfisoformsPerGene <- table(getUniqGeneIsoformCombinations$gene_id )
isoformIndexesToAnalyze <- isoformsToAnalyzeIndex[ which( dataList[["transcript_features"]]$"gene_id"[isoformsToAnalyzeIndex] %in% names(noOfisoformsPerGene[noOfisoformsPerGene>1]) ) ] # overwrite the expression data with the expression data of those isoform features that have more than 1 exon
return(isoformIndexesToAnalyze)
}
# Filter 9: remove all those rows that are PTC sensitive
.filterPTC <- function(dataList,isoIndex) {
return( isoIndex[which( ! dataList[["transcript_features"]]$PTC[isoIndex]) ] ) # NA's are removed
}
###############################################
### Functions for determining Major Isoform ###
.getMajorIsoCuffDB <- function(isoformSubset, referenceSample) {
# if no refrence condition have been chosen take the isoform with the highest expression in any sample
if(referenceSample == 'none') {
maxValue <- max(
isoformSubset$iso_value_1,
isoformSubset$iso_value_2
) # get the index of the the max value
maxIsoformIndex <- c(
which( isoformSubset$iso_value_1 == maxValue), # I cannot use which.max because its two distinct vectors and I need the index
which( isoformSubset$iso_value_2 == maxValue) # I cannot use which.max because its two distinct vectors and I need the index
) # get the indexes belonging to that value
} else { # if a refrence condition have been chosen take the isoform with the highest expression in that condition
if( length(c(which(isoformSubset$sample_1 == referenceSample),which(isoformSubset$sample_2 == referenceSample))) > 0 ) {
maxValue <- max(
isoformSubset$iso_value_1[which(isoformSubset$sample_1 == referenceSample)],
isoformSubset$iso_value_2[which(isoformSubset$sample_2 == referenceSample)]
) # get the index of the the max value
maxIsoformIndex <- c(
which( isoformSubset$iso_value_1 == maxValue), # I cannot use which.max because its two distinct vectors
which( isoformSubset$iso_value_2 == maxValue) # I cannot use which.max because its two distinct vectors
) # get the indexes belonging to the max calue
} else { # if the refrence isoform is not in the subset analyzed (probably because the quantification is not ok whereby its filtered out)
return(NULL)
}
}
# If there are several (different) isoforms with same expression level (that is not as rare as I thought it was) - chose the one with the most exons - if still eqiual chose random.
if(length(unique(isoformSubset$isoform_id[maxIsoformIndex])) > 1) {
# chose the longest one
maxIsoformIndex <- maxIsoformIndex[which.max(isoformSubset$width[maxIsoformIndex])]
# If there are several (different) isoforms with same length
if(length(unique(isoformSubset$isoform_id[maxIsoformIndex])) > 1) {
# chose one at random
maxIsoformIndex <- sample(maxIsoformIndex,size=1)
}
}
return(maxIsoformIndex)
}
######################
### Core functions ###
.findOverlappingExons <- function(exon1, exon2) {
if( exon1$start < exon2$end & exon1$start >= exon2$start ) { return(TRUE) } # test whether start of exon1 is contained within exon2
if( exon1$end <= exon2$end & exon1$end > exon2$start ) { return(TRUE) } # test whether end of exon1 is contained within exon2
if( exon1$start < exon2$start & exon1$end > exon2$end ) { return(TRUE) } # test whether exon1 contains exon2
return(FALSE)
}
.findIdenticalExons <- function(exon1, exon2) {
if(exon1$start == exon2$start & exon1$end == exon2$end) {
return(TRUE)
}
return(FALSE)
}
### Functions to determin the type of Alternative splicing of overlapping exons in the LEFT side of the exons
.determineLeftOverlappingAStype <- function(exon1, exon2, isStrandEqualToPlus, ExonIndexesAnalyzed, asTypes) {
localExonList <- list(exon1, exon2)
if(exon1$start != exon2$start) {
if(! any( ExonIndexesAnalyzed == c(1,1) )) {
## Annotate AS type
if(isStrandEqualToPlus) {
asTypes$A3 <- asTypes$A3 + 1
asTypeStart <- 'A3.start' # string to help assign skipped part to the right type
asTypeEnd <- 'A3.end' # string to help assign skipped part to the right type
} else {
asTypes$A5 <- asTypes$A5 + 1
asTypeStart <- 'A5.start' # string to help assign skipped part to the right type
asTypeEnd <- 'A5.end' # string to help assign skipped part to the right type
}
## Annotate spliced out
minIndex <- which.min( c(exon1$start, exon2$start))
notMinIndex <- (1:2)[!1:2 %in% minIndex]
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( localExonList[[minIndex]]$start )
asTypes[asTypeEnd] <- paste( localExonList[[notMinIndex]]$start )
} else {
asTypes[asTypeStart] <- paste(asTypes[asTypeStart], localExonList[[minIndex]]$start, sep=';')
asTypes[asTypeEnd] <- paste(asTypes[asTypeEnd], localExonList[[notMinIndex]]$start, sep=';')
}
}
}
return(asTypes)
}
### Functions to determin the type of Alternative splicing of overlapping exons in the RIGHT side of the exons
.determineRightOverlappingAStype <- function(exon1, exon2, isStrandEqualToPlus, ExonIndexesAnalyzed, numberOfExons, asTypes) { # needs exon numbers because it looks from the right side
localExonList <- list(exon1, exon2)
if(exon1$end != exon2$end) { # Check whether the coordinates are identical - nessesary since I only filter for completly identical exons
if(! any( ExonIndexesAnalyzed == numberOfExons ) ) { # Chech wheter both exons are end exons (since the order of ExonIndexesAnalyzed and numberOfExons are the same == can be used)
## Annotate AS type
if(isStrandEqualToPlus) {
asTypes$A5 <- asTypes$A5 + 1
asTypeStart <- 'A5.start' # string to help assign skipped part to the right type
asTypeEnd <- 'A5.end' # string to help assign skipped part to the right type
} else {
asTypes$A3 <- asTypes$A3 + 1
asTypeStart <- 'A3.start' # string to help assign skipped part to the right type
asTypeEnd <- 'A3.end' # string to help assign skipped part to the right type
}
## Annotate spliced out
minIndex <- which.min( c(exon1$end, exon2$end))
notMinIndex <- (1:2)[!1:2 %in% minIndex]
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( localExonList[[minIndex]]$end )
asTypes[asTypeEnd] <- paste( localExonList[[notMinIndex]]$end )
} else {
asTypes[asTypeStart] <- paste(asTypes[asTypeStart], localExonList[[minIndex]]$end, sep=';')
asTypes[asTypeEnd] <- paste(asTypes[asTypeEnd], localExonList[[notMinIndex]]$end, sep=';')
}
}
}
return(asTypes)
}
### Function to evaluate counter of non-overlapping exons
.evaluateCounter <- function(count, asTypes, coordinats) {
if( count == 1) { # single skipping
asTypes$ESI <- asTypes$ESI + 1
if(is.na(asTypes$ESI.start)) {
asTypes$ESI.start <- paste( coordinats$start )
asTypes$ESI.end <- paste( coordinats$end )
} else {
asTypes$ESI.start <- paste(asTypes$ESI.start, coordinats$start, sep=';')
asTypes$ESI.end <- paste(asTypes$ESI.end, coordinats$end, sep=';')
}
} else { # multiple skipping
if(asTypes$MESI > 0) { # if a MESI have already been annotated, add a ',' to destinguish them from each other
for(i in 1:nrow(coordinats)) {
if(i == 1) { # if a MESI have already been anotated
asTypes$MESI.start <- paste(asTypes$MESI.start, coordinats$start[i], sep=',') # start with a ','
asTypes$MESI.end <- paste(asTypes$MESI.end, coordinats$end[i], sep=',') # start with a ','
} else {
asTypes$MESI.start <- paste(asTypes$MESI.start, coordinats$start[i], sep=';')
asTypes$MESI.end <- paste(asTypes$MESI.end, coordinats$end[i], sep=';')
}
}
} else { # if NO MESI have been anotated before
for(i in 1:nrow(coordinats)) {
if(is.na(asTypes$MESI.start)) {
asTypes$MESI.start <- paste( coordinats$start[i] )
asTypes$MESI.end <- paste( coordinats$end[i] )
} else {
asTypes$MESI.start <- paste(asTypes$MESI.start, coordinats$start[i], sep=';')
asTypes$MESI.end <- paste(asTypes$MESI.end, coordinats$end[i], sep=';')
}
}
}
# add 1 to the MESI counter
asTypes$MESI <- asTypes$MESI + 1
}
return(asTypes)
}
### Function to determine non overlapping AS types
.determineNonOverlappingAStype <- function(transcript1, transcript2, numberOfExons, index1, index2, exonsToIgnore, asTypes) {
### Extract exon info from the skipping in transcript 1
if(index1[2]-index1[1] >1) { # are there a skipping event for transcript1
index11 <- seq(index1[1]+1,index1[2]-1) # extract the exon(s) index that should be compared
if(any(index11 %in% exonsToIgnore[[1]])) { # if the exon(s) extracted are among thoes that should be ignored
removeIndex <- which(index11 %in% exonsToIgnore[[1]])
index11 <- index11[-removeIndex] # remve those to be ignored
if(length(index11) > 0) { # if there is any exons left
t1 <- data.frame(start = transcript1[index11,'start'], end = transcript1[index11,'end'] , startExon = (index11 %in% 1), endExon = (index11 %in% (numberOfExons[1])), transcript = 1, stringsAsFactors=FALSE)
} else {
t1 <- data.frame()
}
} else { # if the exons are not in the ignore exons list
t1 <- data.frame(start = transcript1[index11,'start'], end = transcript1[index11,'end'], startExon = (index11 %in% 1), endExon = (index11 %in% (numberOfExons[1])), transcript = 1, stringsAsFactors=FALSE)
}
} else { # if there is no skipping in this transcript
t1 <- data.frame()
}
### Extract exon info from the skipping in transcript 2
if(index2[2]-index2[1] >1) { # are there a skipping event for transcript1
index22 <- seq(index2[1]+1,index2[2]-1) # extract the exon(s) index that should be compared
if(any(index22 %in% exonsToIgnore[[2]])) { # if the exon(s) extracted are among thoes that should be ignored
removeIndex <- which(index22 %in% exonsToIgnore[[2]])
index22 <- index22[-removeIndex] # remve those to be ignored
if(length(index22) > 0) { # if there is any exons left
t2 <- data.frame(start = transcript2[index22,'start'], end = transcript2[index22,'end'], startExon = (index22 %in% 1), endExon = (index22 %in% (numberOfExons[1])), transcript = 1, stringsAsFactors=FALSE)
} else {
t2 <- data.frame()
}
} else { # if the exons are not in the ignore exons list
t2 <- data.frame(start = transcript2[index22,'start'], end = transcript2[index22,'end'] ,startExon = (index22 %in% 1), endExon = (index22 %in% (numberOfExons[1])), transcript = 1, stringsAsFactors=FALSE)
}
} else { # if there is no skipping in this transcript
t2 <- data.frame()
}
# combine and sort transcript (since they are all non-overlapping)
t3 <- rbind(t1,t2)
if(nrow(t3) == 0) { # are there any exon left (there migth not be due to the MEE)
return(asTypes)
}
t3 <- t3[sort.list(t3$start),]
### remove all exons that are not within the overlapping body of the two transcripts (including ends - since they are determined elsewhere)
cuttOff1 <- 0
if(any(t3$startExon)) {
cuttOff1 <- tail(which(t3$startExon),1) +1 # get cutoff for start
}
cutOff2 <- nrow(t3)
if(any(t3$endExon)) {
cutOff2 <- which(t3$endExon)[1] -1 # get cuttoff for end
}
if(cutOff2 >= cuttOff1) { # chech that the cutoffs leves anything (else cuttOff1:cutOff2 whould create a negative sequence)
t3 <- t3[cuttOff1:cutOff2,]
} else {
return(asTypes)
}
### determine AS events
if(nrow(t3) > 1) { # if there is MORE than 1 exon skipping event left to compare after first/last exons have been removed
# Use the origine to determine the order of the events
count <- 1
for(j in 2:nrow(t3)) {
if( t3$transcript[j] == t3$transcript[(j-1)] ) { # if the skipping occured from the same transcript as the previous
count <- count +1 # add one to the counter
} else {
asTypes <- .evaluateCounter(count, asTypes, t3[(j-count+1):j,c('start','end')])
count <- 1
}
}
asTypes <- .evaluateCounter(count, asTypes, t3[(j-count+1):j,c('start','end')])
} else if (nrow(t3) == 1) { # if there is only 1 exon skipping left to compare after first/last exons have been removed
asTypes$ESI <- asTypes$ESI + 1
if(is.na(asTypes$ESI.start)) {
asTypes$ESI.start <- paste( t3$start )
asTypes$ESI.end <- paste( t3$end )
} else {
asTypes$ESI.start <- paste(asTypes$ESI.start, t3$start, sep=';')
asTypes$ESI.end <- paste(asTypes$ESI.end, t3$end, sep=';')
}
}
return(asTypes)
} # end of determineNonOverlappingAStype function
### Function to create pre-RNA
.getPreRNA <- function(transcript) {
# We want to extract the the pre mRNA but intron retensions must be excluded since they will give cause to missing classificantions.
# For this purpose intron retension are defined as an exon, that overlaps at least two other exons, and at least two of the exon
# the intron retension overlaps does not overlap with one another.
# get unique exons - is better to do this before passing the df to the function
#myExonInfoUniq <- unique(transcript[,c('start','end')])
# sort them according to start and end coordinats (makes the overlap comparason much faster)
myExonInfoUniqSort <- transcript[order(transcript$start,transcript$end ),]
# Make a pairwise comparason of exons to find overlaps - have been tested thoroughly - works
overlapIndex <- matrix(nrow=0, ncol=2) # data frame to store the overlapping exon coordinats
#overlapIndexDF <- data.frame()
count=2 # counter to avoid creating the same plot twice and from making plots against oneself
for(i in 1:(nrow(myExonInfoUniqSort)-1)) { # loop over the samples (except the last since that have already been compared with by the second loop)
for(j in count:nrow(myExonInfoUniqSort)) { # loop over the samples starting from 2, since I dont want to compare an exon with itself
# Check whether the two exons are overlapping
#if( .findOverlappingExons(exon1=myExonInfoUniqSort[i,], exon2=myExonInfoUniqSort[j,]) ) {
if( .C("findOverlappingExons", as.integer(myExonInfoUniqSort[i,c("start","end")]), as.integer(myExonInfoUniqSort[j,c("start","end")]), as.integer(0))[[3]]) { #, PACKAGE='IsoformSwitchAnalyzeR'
#overlapIndexDF <- rbind(overlapIndexDF, data.frame(i,j))
overlapIndex <- rbind(overlapIndex, matrix(c(i,j),nrow=1))
}
## make sure I skip the rest of the j's when the end coordinat of exon(i) is smaller than the start value of the next j exon (the exons are ordered)
if(j !=nrow(myExonInfoUniqSort)) {
if( myExonInfoUniqSort[i,'end'] < myExonInfoUniqSort[j+1,'start'] ) {
break
}
}
} # j loop
count=count+1 # add one to the counter to keep only looking at one half of the matrix of posibilities
} # i loop
# now overlapIndex contains the indexes of exons that overlap one another
### interpret the overlapIndex generated above
# Find exons represented multiple times (meaning exons that overlaps with more than one other exon)
#dublicatedExons <- unique(unlist(overlapIndexDF)[duplicated( unlist(overlapIndexDF) )])
dublicatedExons <- unique(as.integer(overlapIndex)[duplicated( as.integer(overlapIndex) )])
intronRetensions <- NULL
if(length(dublicatedExons) > 0) { # if there are any exons that overlap more than one other exon
for(dublicatedExon in dublicatedExons) { # loop over each of the exons
# extract the indexs of the exons that this exon overlap with
whichRowsContainsTheDuplicated <- which(apply(overlapIndex,1,function(x) dublicatedExon %in% x))
RowsOverlappingWith <- overlapIndex[whichRowsContainsTheDuplicated,]
#overlappingWith <- unlist(RowsOverlappingWith)[unlist(RowsOverlappingWith) != dublicatedExon]
overlappingWith <- as.integer(RowsOverlappingWith)[unlist(RowsOverlappingWith) != dublicatedExon]
## make a pairwise comparason of the indexes that this exon overlaps with (nessesary since there migth be more than one)
count <- 2
for(i in 1:(length(overlappingWith)-1)) { # loop over the samples (except the last since that have already been compared with by the second loop)
for(j in count:length(overlappingWith)) { # loop over the samples to compare with
# Compare the two exons to find thos that are NOT overlapping
#if( ! .findOverlappingExons(exon1=myExonInfoUniqSort[overlappingWith[i],], exon2=myExonInfoUniqSort[overlappingWith[j],]) ) { # it is faster to compare the two exons again thant to look for the indexes in the overlapping table
if(!.C("findOverlappingExons", as.integer(myExonInfoUniqSort[overlappingWith[i],c("start","end")]), as.integer(myExonInfoUniqSort[overlappingWith[j],c("start","end")]), as.integer(0))[[3]] ) {
# add it to the intron retension list
intronRetensions <- c(intronRetensions, dublicatedExon )
}
}
count=count+1 # add one to the counter to keep only looking at one half of the matrix of posibilities
}
}
}
### Create the pre-RNA, and make sure to exclude intron retensions
# If any intron retensions were found exclude them from the preRNA
if(!is.null(intronRetensions)) {
myIRange <- IRanges(start=myExonInfoUniqSort$start[-intronRetensions], end=myExonInfoUniqSort$end[-intronRetensions])
} else {
myIRange <- IRanges(start=myExonInfoUniqSort$start, end=myExonInfoUniqSort$end)
}
end(myIRange) <- end(myIRange) -1 # hack to bypass the fact that IRanges start and end coordinats are 0 based and 1 based respectively, but the coordinats outputted by cufflinks are both 1-based. Does not matter that the
myReducedIrange <- reduce(myIRange , min.gapwidth=0 )
end(myReducedIrange) <- end(myReducedIrange) +1 # add one to the ends again
myPreRNA <- GenomicRanges::as.data.frame( myReducedIrange )
## make sure the end have not been cut off (happens somtimes due to intron retensions)
myPreRNA$start[1] <- min(myExonInfoUniqSort$start)
myPreRNA$end[nrow(myPreRNA)] <- max(myExonInfoUniqSort$end)
## Add strand (is removed by the IRange reduce)
myPreRNA$strand <- transcript$strand[1]
return(myPreRNA)
}
########################################################################
### Core function that loops over the two transcripts comparing them ###
.findOverlap <- function(transcript1, transcript2) {
numberOfExons1 <- nrow(transcript1) # get number of exons in transcript 1
numberOfExons2 <- nrow(transcript2) # get number of exons in transcript 2
##################################################################################################################
### Find overlapping and identical exons (using a while loop is much faster than for loops and IRanges
# this is the time consuming step (the others take no time)
exonIndex1 <- 1 # counter for the outer while loop
exonIndex2 <- 1 # counter for the inner while loop
overlappingExons <- data.frame()
identicalExons <- data.frame()
while(exonIndex1 <= numberOfExons1) {
while(exonIndex2 <= numberOfExons2) {
#print(c(exonIndex1,exonIndex2))
# Test for identical exons (and therfore also overlapping)
##### C-FUNCTION
if(.C("findIdenticalExons", as.integer(transcript1[exonIndex1,c('start','end')]), as.integer(transcript2[exonIndex2,c('start','end')]), as.integer(0))[[3]]) {
#if(.findIdenticalExons(transcript1[exonIndex1,c('start','end')], transcript2[exonIndex2,c('start','end')])) {
identicalExons <- rbind(identicalExons, c(exonIndex1,exonIndex2)) # if identical add both to identical and overlapping
overlappingExons <- rbind(overlappingExons, c(exonIndex1,exonIndex2))
if( exonIndex1 == numberOfExons1 & exonIndex2 == numberOfExons2) { # make sure I break if the last two are identical (because I only add to the counter if its not the alst two)
break
}
if(exonIndex1 < numberOfExons1) {
exonIndex1 <- exonIndex1 +1 # I only add if it means the counter does not exceed the number of exons in this transcript
}
if(exonIndex2 < numberOfExons2) {
exonIndex2 <- exonIndex2 +1 # I only add if it means the counter does not exceed the number of exons in this transcript
}
next
}
# if(.findIdenticalExons(transcript1[exonIndex1,c('start','end')], transcript2[exonIndex2,c('start','end')])) {
# identicalExons <- rbind(identicalExons, c(exonIndex1,exonIndex2)) # if identical add both to identical and overlapping
# overlappingExons <- rbind(overlappingExons, c(exonIndex1,exonIndex2))
# } else
# C-FUNCTION
# Test for overlapping exons
if(.C("findOverlappingExons", as.integer(transcript1[exonIndex1,c('start','end')]), as.integer(transcript2[exonIndex2,c('start','end')]), as.integer(0))[[3]]) {
#if(.findOverlappingExons(transcript1[exonIndex1,c('start','end')], transcript2[exonIndex2,c('start','end')])) {
overlappingExons <- rbind(overlappingExons, c(exonIndex1,exonIndex2))
}
# Determine which of the next exon have the lowest start coordinat so I know which one to go to (to make sure I dont skip exons)
if(exonIndex1 < numberOfExons1 & exonIndex2 < numberOfExons2) { # nessesary because else there is no coordinat to compare
if( transcript1[(exonIndex1+1),'start'] < transcript2[(exonIndex2+1),'start'] ) {
break
} else if(transcript1[(exonIndex1+1),'start'] == transcript2[(exonIndex2+1),'start'] ) {
# in the special case where the start coordinats are identical look at the end coordinats (nessesary because else intron retensions might cause problems)
if(transcript1[(exonIndex1+1),'end'] < transcript2[(exonIndex2+1),'end'] ) {
break
}
}
} else if(exonIndex2 == numberOfExons2) { # make sure that exonIndex2 is not made larger than n2 (meaning its not untill the outer loop is done that comparasons will not be made anymore)
break
}
exonIndex2 <- exonIndex2 + 1
} # end of inner while loop
exonIndex1 <- exonIndex1 + 1
} # end of outer while loop
# add names
if(nrow(overlappingExons) > 0) {
colnames(overlappingExons) <- c('isoform1','isoform2')
}
if(nrow(identicalExons)> 0) {
colnames(identicalExons) <- c('isoform1','isoform2')
}
return(list(overlap=overlappingExons, idenctial=identicalExons))
} # end of find overlap
.determineAStypeOverlap <- function(transcript1, transcript2, overlappingExons, identicalExons, exonsToIgnore) {
############################# Inital (nessesary) data analysis ##############################
numberOfExons1 <- nrow(transcript1) # get number of exons in transcript 1
numberOfExons2 <- nrow(transcript2) # get number of exons in transcript 2
numberOfExons <- c(numberOfExons1, numberOfExons2) # Combine into vecotr
transcriptList <- list(transcript1,transcript2) # combine transcripts into a list
### Determine strand
strandedness <- unique(c(transcript1$strand, transcript2$strand)) # extract stranness
# Create vector to store AS type findings
asTypes <- data.frame(ESI=0, MESI=0, ISI=0, A5=0, A3=0, ATSS=0, ATTS=0, ESI.start=NA, ESI.end=NA, MESI.start=NA, MESI.end=NA, ISI.start=NA, ISI.end=NA,
A5.start=NA, A5.end=NA, A3.start=NA, A3.end=NA, ATSS.start=NA, ATSS.end=NA, ATTS.start=NA, ATTS.end=NA ,stringsAsFactors=FALSE)
if(length(strandedness)>1) {
warning('In pairwise comparison - transcripts are not from the same strand, consitter using "fixCufflinksAnnotationProblem=TRUE" in prepareCuff() as this probably solves the problem');
#break
return(asTypes) }
isStrandEqualToPlus <- (strandedness == '+') # logic indicating whether the strand is plus
# ### check whether the transcripts are completely identical (they should not be)
# if( numberOfExons1 == numberOfExons2 & nrow(identicalExons) == numberOfExons1 ) {
# return(asTypes)
# } # if the number of exons in each transcrip are the same and all are listed in the identical list
################################## Test for alternative TSS and TTS ###############################
#Check left side
if(transcript1$start[1] != transcript2$start[1]) { # if left coordinats are different
if(isStrandEqualToPlus) {
asTypes$ATSS <- 1
asTypeStart <- 'ATSS.start' # string to help assign skipped part to the right type
asTypeEnd <- 'ATSS.end' # string to help assign skipped part to the right type
} else {
asTypes$ATTS <- 1
asTypeStart <- 'ATTS.start' # string to help assign skipped part to the right type
asTypeEnd <- 'ATTS.end' # string to help assign skipped part to the right type
}
### Annotate parts spliced out
# get indexes
minIndex <- which.min( c(transcript1$start[1], transcript2$start[1]) )
notMinIndex <- (1:2)[!1:2 %in% minIndex]
CoordinatsSmallerThanStart <- data.frame(rbind(
transcriptList[[minIndex]]$start < transcriptList[[notMinIndex]]$start[1],
transcriptList[[minIndex]]$end < transcriptList[[notMinIndex]]$start[1]
))
for(i in 1:ncol(CoordinatsSmallerThanStart)) {
# if both start and end coordinats are smaller
if(all(CoordinatsSmallerThanStart[,i])) {
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( transcriptList[[minIndex]]$start[i] )
asTypes[asTypeEnd] <- paste( transcriptList[[minIndex]]$end[i] )
} else {
asTypes[asTypeStart] <- paste(asTypes[asTypeStart], transcriptList[[minIndex]]$start[i], sep=';')
asTypes[asTypeEnd] <- paste(asTypes[asTypeEnd], transcriptList[[minIndex]]$end[i], sep=';')
}
} else if( any(CoordinatsSmallerThanStart[,i]) ) { # if only start coordinat is smaller
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( transcriptList[[minIndex]]$start[i] )
asTypes[asTypeEnd] <- paste( transcriptList[[notMinIndex]]$start[1] )
} else {
asTypes[asTypeStart] <- paste(asTypes[asTypeStart], transcriptList[[minIndex]]$start[i], sep=';')
asTypes[asTypeEnd] <- paste(asTypes[asTypeEnd], transcriptList[[notMinIndex]]$start[1], sep=';')
}
} else {
break # since there are no coordinats smaller thant the start
}
}
}
### Check rigth side
if(tail(transcript1$end,1) != tail(transcript2$end,1)) {
if(isStrandEqualToPlus) {
asTypes$ATTS <- 1
asTypeStart <- 'ATTS.start' # string to help assign skipped part to the right type
asTypeEnd <- 'ATTS.end' # string to help assign skipped part to the right type
} else {
asTypes$ATSS <- 1
asTypeStart <- 'ATSS.start' # string to help assign skipped part to the right type
asTypeEnd <- 'ATSS.end' # string to help assign skipped part to the right type
}
### Annotate parts spliced out
# get indexes
maxIndex <- which.max( c(tail(transcript1$end,1), tail(transcript2$end,1)) )
notMaxIndex <- (1:2)[!1:2 %in% maxIndex]
CoordinatsLargerThanEnd <- data.frame(rbind(
transcriptList[[maxIndex]]$start > tail(transcriptList[[notMaxIndex]]$end,1),
transcriptList[[maxIndex]]$end > tail(transcriptList[[notMaxIndex]]$end,1)
))
for(i in ncol(CoordinatsLargerThanEnd):1) { # loop over them backwards since it is here the action is
# if both start and end coordinats are larger
if(all(CoordinatsLargerThanEnd[,i])) {
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( transcriptList[[maxIndex]]$start[i] )
asTypes[asTypeEnd] <- paste( transcriptList[[maxIndex]]$end[i] )
} else {
asTypes[asTypeStart] <- paste(transcriptList[[maxIndex]]$start[i], asTypes[asTypeStart] , sep=';') # switched because i loop over then backwards
asTypes[asTypeEnd] <- paste(transcriptList[[maxIndex]]$end[i], asTypes[asTypeEnd], sep=';') # switched because i loop over then backwards
}
} else if( any(CoordinatsLargerThanEnd[,i]) ) { # if only the end coordinat is larger
if(is.na(asTypes[asTypeStart])) {
asTypes[asTypeStart] <- paste( tail(transcriptList[[notMaxIndex]]$end,1) )
asTypes[asTypeEnd] <- paste( transcriptList[[maxIndex]]$end[i] )
} else {
asTypes[asTypeStart] <- paste( tail(transcriptList[[notMaxIndex]]$end,1), asTypes[asTypeStart], sep=';') # switched because i loop over then backwards
asTypes[asTypeEnd] <- paste( transcriptList[[maxIndex]]$end[i], asTypes[asTypeEnd], sep=';') # switched because i loop over then backwards
}
} else {
break # since there are no coordinats smaller thant the start
}
}
}
### Vector of the same length as the number of overlapping exons containing logicis indicating whether an Intron retion have occured
duplicatedExons <-(duplicated(overlappingExons$isoform1) | duplicated(overlappingExons$isoform1, fromLast=TRUE )) | (duplicated(overlappingExons$isoform2) | duplicated(overlappingExons$isoform2, fromLast=TRUE )) # Logic indicating whether I find one exon in one trancript overlapping two or more exons in the other transcript)
# Make an index of the number of exons skipped based on index of overlapping exons - this index contains the number of skipping events between the overlapping exons (0 equals no skipping event)
exonSkippingIndex <- data.frame(transcript1=diff(c(0,overlappingExons$isoform1,numberOfExons1+1)), transcript2=diff(c(0,overlappingExons$isoform2,numberOfExons2+1)) ) -1
numberOfSkippingComparasons <- nrow(exonSkippingIndex)
#################################### Analyze overlapping exons ###############################
t1 <- Sys.time()
c1 <- 1 # counter to use in looping
while(c1 <= nrow(overlappingExons)) {
if(!duplicatedExons[c1]) { # if the exons analyzed are not part of a intron retension
# Single exon comparason
if( any(apply(identicalExons,MARGIN=1,function(x) all(overlappingExons[c1,] %in% x ))) ) { # dont do anything if the exons are identical (meaning found in the identical list)
c1 <- c1 + 1 # add one to the counter to go to the next level
next
} else {
# analyze left and rigth
#asTypes[,c('A5','A3')] <- asTypes[,c('A5','A3')] + .C("determineLeftOverlappingAStype", as.integer(transcript1[overlappingExons[c1,1],c(2,3)]), as.integer(transcript2[overlappingExons[c1,2],c(2,3)]), as.integer(isStrandEqualToPlus), as.integer(overlappingExons[c1,]), as.integer(c(0,1)))[[5]]
#asTypes[,c('A5','A3')] <- asTypes[,c('A5','A3')] + .C("determineRightOverlappingAStype", as.integer(transcript1[overlappingExons[c1,1],c(2,3)]), as.integer(transcript2[overlappingExons[c1,2],c(2,3)]), as.integer(isStrandEqualToPlus), as.integer(overlappingExons[c1,]), as.integer(numberOfExons), as.integer(c(0,1)))[[6]]
asTypes[,c('A5','A3','A5.start','A5.end','A3.start','A3.end')] <- .determineLeftOverlappingAStype( transcript1[overlappingExons[c1,1],] , transcript2[overlappingExons[c1,2],], isStrandEqualToPlus , overlappingExons[c1,], asTypes[,c('A5','A3','A5.start','A5.end','A3.start','A3.end')] )
asTypes[,c('A5','A3','A5.start','A5.end','A3.start','A3.end')] <- .determineRightOverlappingAStype( transcript1[overlappingExons[c1,1],] , transcript2[overlappingExons[c1,2],], isStrandEqualToPlus , overlappingExons[c1,], numberOfExons, asTypes[,c('A5','A3','A5.start','A5.end','A3.start','A3.end')] )
c1 <- c1 + 1 # add one to the counter to go to the next level
}
} else {
#### intron retension
# Get the exon indexe(s) involved in the intron retension (with respect to the transcript not the overlapping list)
numberOfReplicates1 <- overlappingExons[which(overlappingExons$isoform1 == overlappingExons$isoform1[c1]),'isoform2'] # get the exon(s) from transcript1 for the exon currently under investigation
numberOfReplicates2 <- overlappingExons[which(overlappingExons$isoform2 == overlappingExons$isoform2[c1]),'isoform1'] # get the exon(s) from transcript2 for the exon currently under investigation
numberOfReplicatesList <- list(numberOfReplicates1, numberOfReplicates2)
# these vectors indicates which exons is the intron retension (the one with length > 1) and the one with two exons
# therefore these vectors needs to be changed so that numberOfReplicates1 is used with transcript 2 and vice versa
# e.g. if transcript 1 have a intron retension the overlappingExons look like
# isoform1 isoform2
# 1 1
# 1 2
# Then the numberOfReplicates1 will have length 2, whereas numberOfReplicates2 will have length 1
# Since i want isoform1 exon 1 to be compared to both isoform2 exon1 and exon2 I will have to use numberOfReplicates1 with transcript2 and vice versa
# annotate ISI
#asTypes$ISI <- asTypes$ISI + 1 # KVS Correction Feb 2019
# KVS Correction Feb 2019
nRetentions <- max(sapply(numberOfReplicatesList, length))- 1
asTypes$ISI <- asTypes$ISI + nRetentions # KVS Correction Feb 2019
# test for 3' overhang in the intron retension
if( all( c(numberOfReplicates1[1], numberOfReplicates2[1]) != c(1,1) )) { # only test if non of the exons are first exon
if( transcript1[numberOfReplicates2[1],'start'] != transcript2[numberOfReplicates1[1],'start'] ) {
minIndex <- which.min( c(transcript1[numberOfReplicates2[1],'start'], transcript2[numberOfReplicates1[1],'start']) )
notMinIndex <- (1:2)[!1:2 %in% minIndex]
if(is.na(asTypes$ISI.start)) {
asTypes$ISI.start <- paste( transcriptList[[minIndex]][numberOfReplicatesList[[notMinIndex]][1],'start'] )
asTypes$ISI.end <- paste( transcriptList[[notMinIndex]][numberOfReplicatesList[[minIndex]][1],'start'] )
} else {
asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[minIndex]][numberOfReplicatesList[[notMinIndex]][1],'start'], sep=';')
asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[notMinIndex]][numberOfReplicatesList[[minIndex]][1],'start'], sep=';')
}
}
}
# anotate spliced out
transcriptWithIntronRetension <- which.max( sapply(numberOfReplicatesList, length ) )
transcriptWithOUTintronRetension <- (1:2)[!1:2 %in% transcriptWithIntronRetension]
for(i in 1:( max(sapply(numberOfReplicatesList, length )) -1) ) {
if(asTypes$ISI > 1) { # if a ISI have already been anotated for this transcript (the counter is above where it is > 1 (and not > 0))
# KVS Correction Feb 2019
#if(i == 1) { # then for the first entry use a ','
# asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] , sep=',')
# asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] , sep=',')
#} else { # for the rest use ';'
# asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] , sep=';')
# asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] , sep=';')
#}
if(is.na(asTypes$ISI.start)) {
asTypes$ISI.start <- transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] +1 # KVS Correction Feb 2019
asTypes$ISI.end <- transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] -1 # KVS Correction Feb 2019
} else {
asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] +1, sep=';') # KVS Correction Feb 2019
asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] -1, sep=';') # KVS Correction Feb 2019
}
} else { # if a ISI have not been anotated for this transcript
if(is.na(asTypes$ISI.start)) {
asTypes$ISI.start <- transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] +1 # KVS Correction Feb 2019
asTypes$ISI.end <- transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] -1 # KVS Correction Feb 2019
} else {
asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[transcriptWithOUTintronRetension]]$end [ numberOfReplicatesList[[transcriptWithIntronRetension]][i] ] +1, sep=';') # KVS Correction Feb 2019
asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[transcriptWithOUTintronRetension]]$start[ numberOfReplicatesList[[transcriptWithIntronRetension]][i] +1 ] -1, sep=';') # KVS Correction Feb 2019
}
}
}
# test for 5' overhang in the intron retension
if( all( c(tail(numberOfReplicates2,1), tail(numberOfReplicates1,1)) != numberOfExons) ) { # only test if non of the exons are last exon
if( transcript1[tail(numberOfReplicates2,1),'end'] != transcript2[tail(numberOfReplicates1,1),'end'] ) {
minIndex <- which.min( c(transcript1[tail(numberOfReplicates2,1),'end'], transcript2[tail(numberOfReplicates1,1),'end']) )
notMinIndex <- (1:2)[!1:2 %in% minIndex]
# no need to test whether it is NA - since the intron retension have already been anotated
asTypes$ISI.start <- paste(asTypes$ISI.start, transcriptList[[minIndex]][tail(numberOfReplicatesList[[notMinIndex]],1),'end'], sep=';')
asTypes$ISI.end <- paste(asTypes$ISI.end, transcriptList[[notMinIndex]][tail(numberOfReplicatesList[[minIndex]],1),'end'], sep=';')
}
}
c1 <- c1 + max(length(numberOfReplicates1), length(numberOfReplicates2)) # add the number of exons overlapped by the intron retionsion so that in next itteration these are skipped (this is the reason a while loop is used)
}
} # end of overlapping exons while loop
#a############################### Analyze non-overlapping exons ###############################
## A data.frame to help retrive the exons corresponding to the skipping indexes
overlappingExons2 <- rbind(c(0,0), overlappingExons, (numberOfExons+1))
colnames(overlappingExons2) <- c('isoform1','isoform2')
for(i in 1:numberOfSkippingComparasons) {
if(sum(exonSkippingIndex[i,]) == 0) { # if no exons were skipped
next
}
else if(sum(exonSkippingIndex[i,]) == 1) {
if(i == 1 | i == numberOfSkippingComparasons) {
next # the ATSS/ATTS have already been found - they cannot be ommitted because they might contain more than alternative TSS/TTS
} else {
### Check whether the exon skipping is part of the exons to ignore
# check whith of the transcripts the single scipping occured in
skippingInTranscript <- which(exonSkippingIndex[i,] == 1)
# Extract the exon number of the exon to be skipped
localExonSkippingIndex <- overlappingExons2[i:(i+1),skippingInTranscript]
LocalExonToSkip <- seq(localExonSkippingIndex[1]+1,localExonSkippingIndex[2]-1) # extract the exon(s) index that should be compared
# Check whether the exon to be skipped is part of those that should be ignored
if(LocalExonToSkip %in% exonsToIgnore[[skippingInTranscript]]) {
next
} else {
asTypes$ESI <- asTypes$ESI + 1
if(is.na(asTypes$ESI.start)) {
asTypes$ESI.start <- paste( transcriptList[[skippingInTranscript]]$start[LocalExonToSkip] )
asTypes$ESI.end <- paste( transcriptList[[skippingInTranscript]]$end[LocalExonToSkip] )
} else {
asTypes$ESI.start <- paste(asTypes$ESI.start, transcriptList[[skippingInTranscript]]$start[LocalExonToSkip],sep=';')
asTypes$ESI.end <- paste(asTypes$ESI.end, transcriptList[[skippingInTranscript]]$end[LocalExonToSkip], sep=';')
}
}
} # end of if not start or end
} else if(sum(exonSkippingIndex[i,]) > 1) { # if the number of exons skipped is larger than 1 (nessesary to evaluate since intron retension cause negative numbers in the index)
if(i == 1 | i == numberOfSkippingComparasons) { # if the skipping is in the first or last step
# Make sure it is not just one of the trasnscripts having many exons in the alternative TSS or TTS
if(exonSkippingIndex$transcript1[i] > 0 & exonSkippingIndex$transcript2[i] > 0) {
asTypes[,c('ESI','MESI','ESI.start','ESI.end','MESI.start','MESI.end')] <- .determineNonOverlappingAStype(transcript1, transcript2,numberOfExons , overlappingExons2$isoform1[i:(i+1)], overlappingExons2$isoform2[i:(i+1)], exonsToIgnore, asTypes[,c('ESI','MESI','ESI.start','ESI.end','MESI.start','MESI.end')] )
} else {
next # continue since ATSS/ATTS have already been found
}
} else { # if its not the first or last step
asTypes[,c('ESI','MESI','ESI.start','ESI.end','MESI.start','MESI.end')] <- .determineNonOverlappingAStype(transcript1, transcript2, numberOfExons, overlappingExons2$isoform1[i:(i+1)], overlappingExons2$isoform2[i:(i+1)], exonsToIgnore, asTypes[,c('ESI','MESI','ESI.start','ESI.end','MESI.start','MESI.end')] )
}
}
} # end of non-overlapping loop
################################ Finish up################################
return(asTypes)
}
}
#################################
### The acutal spliceR function
adaptedSpliceR <- function(transcriptData, compareTo, filters, expressionCutoff=0, useProgressBar=TRUE) {
#startTime <- Sys.time() # outcommented by KVS
# Check class and GRanges
#if (!class(transcriptData)[1]=="SpliceRList") stop("transcriptData argument is not of class SpliceRList") # outcommented KVS 2018-03-06
if ( class(transcriptData$"transcript_features") != "GRanges" || class(transcriptData$"exon_features") != "GRanges" ) stop("transcriptData must have GRanges objects in slots 'transcript_features' and 'exon_features'")
# Validate required columns in spliceRList
t_colNames <- colnames(mcols(transcriptData$"transcript_features"))
if(!all(c(
"isoform_id", "sample_1", "sample_2", "gene_id", "iso_value_1", "iso_value_2", "iso_q_value") %in% substr(t_colNames, 9, nchar(t_colNames))
)
) stop("Transcript features GRanges not compatible with spliceR - see documentation for more details")
e_colNames <- colnames(mcols(transcriptData$"exon_features"))
if(!all(c(
"isoform_id","gene_id") %in% substr(e_colNames, 9, nchar(e_colNames))
)
) stop("Exon features GRanges not compatible with spliceR - see documentation for more details")
# check correct user input
conditionNames <- transcriptData[['conditions']]
if(!compareTo %in% c('preTranscript', conditionNames)) {
stop(paste('Error in determining compareTo, must be one of: \'preTranscript\' \'', paste(conditionNames, collapse="', '"), "\'. See ?spliceR for more information", sep='') )
}
dataOrigin <- transcriptData[["source_id"]]
if(! dataOrigin %in% c('cufflinks', 'granges') ) {
stop('The input data was not recogniced, please see ?SpliceRList for more information about the input files')
}
# Check if the filters supplied are OK:
if(dataOrigin == 'cufflinks') { okFilters <- c('none','expressedGenes','geneOK', 'sigGenes', 'isoOK', 'expressedIso', 'isoClass', 'sigIso', 'singleExon') }
if(dataOrigin == 'granges') { okFilters <- c('none', 'SingleExon') } # ok since it forces the user to acknowledge that no filters are used
if('PTC' %in% filters) { # if asked to filter on PTC
if('spliceR.PTC' %in% colnames(as.data.frame(transcriptData$"transcript_features"[1,]))) { # check whether the spliceR object contain PTC info
okFilters <- c(okFilters, 'PTC')
} else {
stop('spliceR cannot filter on PTC since no PTC info is advailable. PTC information can be obtained through annotatePTC() ')
}
}
if(any(!filters %in% okFilters)) { # if one or more of the supplied filters are not recogniced
stop('One or more of the supplied filters are not recogniced, please see ?determineAStypes for more information about the filters')
}
# message("Preparing transcript data...") # outcommented KVS 2018-03-06
# Create placeholder rows
transcriptData$"transcript_features"$"spliceR.major"=NA
transcriptData$"transcript_features"$"spliceR.IF1"=NA
transcriptData$"transcript_features"$"spliceR.IF2"=NA
transcriptData$"transcript_features"$"spliceR.dIF"=NA
transcriptData$"transcript_features"$"spliceR.ESI"=NA
transcriptData$"transcript_features"$"spliceR.MEE"=NA
transcriptData$"transcript_features"$"spliceR.MESI"=NA
transcriptData$"transcript_features"$"spliceR.ISI"=NA
transcriptData$"transcript_features"$"spliceR.A5"=NA
transcriptData$"transcript_features"$"spliceR.A3"=NA
transcriptData$"transcript_features"$"spliceR.ATSS"=NA
transcriptData$"transcript_features"$"spliceR.ATTS"=NA
transcriptData$"transcript_features"$"spliceR.analyzed"='no'
transcriptData$"transcript_features"$"spliceR.ESI.start"=NA
transcriptData$"transcript_features"$"spliceR.ESI.end"=NA
transcriptData$"transcript_features"$"spliceR.MEE.start"=NA
transcriptData$"transcript_features"$"spliceR.MEE.end"=NA
transcriptData$"transcript_features"$"spliceR.MESI.start"=NA
transcriptData$"transcript_features"$"spliceR.MESI.end"=NA
transcriptData$"transcript_features"$"spliceR.ISI.start"=NA
transcriptData$"transcript_features"$"spliceR.ISI.end"=NA
transcriptData$"transcript_features"$"spliceR.A5.start"=NA
transcriptData$"transcript_features"$"spliceR.A5.end"=NA
transcriptData$"transcript_features"$"spliceR.A3.start"=NA
transcriptData$"transcript_features"$"spliceR.A3.end"=NA
transcriptData$"transcript_features"$"spliceR.ATSS.start"=NA
transcriptData$"transcript_features"$"spliceR.ATSS.end"=NA
transcriptData$"transcript_features"$"spliceR.ATTS.start"=NA
transcriptData$"transcript_features"$"spliceR.ATTS.end"=NA
#Create backup spliceRList with GRanges before converting to dataframes for output
originalTranscriptData <- transcriptData
# message("Converting to internal objects...") # outcommented KVS 2018-03-06
#Convert Granges to dataframe
tempDF <- GenomicRanges::as.data.frame(transcriptData[["transcript_features"]])
tempDF <- data.frame(lapply(tempDF, function(x) {if (class(x)=="factor") as.character(x) else (x)}), stringsAsFactors=FALSE) # remove factors
colnames(tempDF) <- c(colnames(tempDF)[1:5], substr(colnames(tempDF)[6:ncol(tempDF)],9,nchar(colnames(tempDF)[6:ncol(tempDF)])))
#remove columns not needed here
transcriptData[["transcript_features"]] <- tempDF
tempDF <- GenomicRanges::as.data.frame(transcriptData[["exon_features"]])
tempDF <- data.frame(lapply(tempDF, function(x) {if (class(x)=="factor") as.character(x) else (x)}), stringsAsFactors=FALSE) # remove factors
colnames(tempDF) <- c(colnames(tempDF)[1:5], substr(colnames(tempDF)[6:ncol(tempDF)],9,nchar(colnames(tempDF)[6:ncol(tempDF)])))
tempDF <- tempDF[,c("start", "end", "strand", "isoform_id")]
transcriptData[["exon_features"]] <- tempDF
rm(tempDF)
# message(length(unique(transcriptData[["transcript_features"]]$isoform_id)), " isoforms pre-filtering...") #outcommented KVS 2018-03-06
# message("Filtering...") # outcommented KVS 2018-03-06
### Filter transcript info
isoformsToAnalyzeIndex <- 1:nrow(transcriptData[["transcript_features"]])
# Optional filters
if('geneOK' %in% filters) { isoformsToAnalyzeIndex <- .filterOKGenes( transcriptData, isoformsToAnalyzeIndex) }
if('expressedGenes' %in% filters) { isoformsToAnalyzeIndex <- .filterExpressedGenes( transcriptData, isoformsToAnalyzeIndex, expressionCutoff) }
if('sigGenes' %in% filters) { isoformsToAnalyzeIndex <- .filterSigGenes( transcriptData, isoformsToAnalyzeIndex) }
if('isoOK' %in% filters) { isoformsToAnalyzeIndex <- .filterOKIso( transcriptData, isoformsToAnalyzeIndex) }
if('expressedIso' %in% filters) { isoformsToAnalyzeIndex <- .filterExpressedIso( transcriptData, isoformsToAnalyzeIndex, expressionCutoff) }
if('isoClass' %in% filters) { isoformsToAnalyzeIndex <- .filterIsoClassCode( transcriptData, isoformsToAnalyzeIndex) }
if('sigIso' %in% filters) { isoformsToAnalyzeIndex <- .filterSigIso( transcriptData, isoformsToAnalyzeIndex) }
if('singleExon' %in% filters) { isoformsToAnalyzeIndex <- .filterSingleExonIsoAll( transcriptData, isoformsToAnalyzeIndex) }
if('PTC' %in% filters) { isoformsToAnalyzeIndex <- .filterPTC( transcriptData, isoformsToAnalyzeIndex) }
# Mandatory filters
isoformsToAnalyzeIndex <- .filterSingleIsoform(transcriptData, isoformsToAnalyzeIndex, conditionNames) # Remove genes with only one isoform left
# message(length(unique(transcriptData[["transcript_features"]]$isoform_id[isoformsToAnalyzeIndex])), " isoforms post-filtering...") # outcommented KVS 2018-03-06
### Extract unique gene name
geneIDs <- unique(transcriptData[["transcript_features"]]$gene_id[isoformsToAnalyzeIndex])
numberOfGenes <- length(geneIDs)
### Extract gene names of all genes that ought to be analyzed
geneIdsToAnalyze <- transcriptData[["transcript_features"]]$gene_id[isoformsToAnalyzeIndex]
# message("Preparing exons...") # outcommented KVS 2018-03-06
### Split exon info
# It is fasters to split on isoform id than on genID when scaling up, probably because no exoninfo not used is extracted.
isoformIDs <- unique(transcriptData[["transcript_features"]]$isoform_id[isoformsToAnalyzeIndex])
temp <- transcriptData[["exon_features"]][which(transcriptData[["exon_features"]]$isoform_id %in% isoformIDs),]
isoformFeaturesSplit <- split(temp, f=temp$"isoform_id")
rm(temp)
## Determine whether major is chosen or not
if(compareTo == 'preTranscript') {
# A logic indicating whether preTranscrip comparason or major is chosen
major <- FALSE
} else {
### Find major isoform if that option is toggeled
major <- TRUE
}
# message('Analyzing transcripts...') # outcommented KVS 2018-03-06
# Create statusbar (this statement also automaticlly prints the statusbar)
if (useProgressBar) pb <- txtProgressBar(min = 1, max = numberOfGenes, style = 3)
for(geneIndex in 1:numberOfGenes) {
#################### Extract indexs of isoforms belonging to the genes ####################
### extract information about the gene
isoformsToAnalyzeWithinGeneIndexGlobal <- isoformsToAnalyzeIndex[which(geneIdsToAnalyze == geneIDs[geneIndex])] # get the global indexes for the gene analyzed now #Indexing moved outside of loop
isoformsToAnalyze <- transcriptData[["transcript_features"]][isoformsToAnalyzeWithinGeneIndexGlobal,] # extract info about the gene analyzed now #OBS, IMPROVE SPEED
# extract unique isoforms indexes
uniqIsoformNames <- unique(isoformsToAnalyze$isoform_id)
uniqueIsoformsIndex <- match(uniqIsoformNames, isoformsToAnalyze$isoform_id)
### annotate which have been analyzed
isoformsToAnalyze$analyzed <- 'yes'
isoformsToAnalyze$MEE <- 0 # these are not nessesarely overwritten else
#### Determine whether major or preTranscript
## Extract all exons to analyze belonging to this gene
exonList <- isoformFeaturesSplit[ uniqIsoformNames ] # this list is used several times
### Create preTranscript
################################ SLOW ######################
# !!! Improved !!! KVS
allUniqueExons <- unique(do.call(rbind,exonList)[,c('start','end','strand')])
if(nrow(allUniqueExons) > 1) {
exonInfoPreTranscript <- .getPreRNA( allUniqueExons ) # only pass unique coordinates to the function
} else {
next # for the special occation where a gene with multiple identical transcripts with only one exon
}
################################ SLOW ######################
### see whether the minimum requirements for MEE are there (to speed up the calculations)
areMEEposible <- all( length(which(sapply(exonList, nrow) >= 3)) >= 2 , nrow(exonInfoPreTranscript) >=4)
if(major) { ## Check if major is toggeled
# ###Determine which isoform is major
# if(dataOrigin == 'cufflinks') {
# maxIsoformIndex <- .getMajorIsoCuffDB(isoformsToAnalyze, compareTo)
# } else {
maxIsoformIndex <- .getMajorIsoCuffDB(isoformsToAnalyze, compareTo)
# }
if(length(maxIsoformIndex) == 0) { next } # since it means that no transcript from refrence sample is expressed in for this gene
# make sure all rows with the transcripts are annotated included in the maxIsoformIndex (nessesary when having more than two samples since else there will be rows with the isoform, but not containing the refrence sample)
maxIsoformIndex <- which(isoformsToAnalyze$isoform_id == isoformsToAnalyze$isoform_id[maxIsoformIndex[1]])
# extract exon info from the major isoform
majorExonInfo <- exonList[[ isoformsToAnalyze$isoform_id[maxIsoformIndex[1]] ]]
# annotate which is major and which is not
isoformsToAnalyze[,'major'] <- 'no'
isoformsToAnalyze[maxIsoformIndex,'major'] <- 'yes'
}
#############################################################
####################### Compare isoforms ####################
#############################################################
################# Check for MEE #################
### Create an empty list with the exons to ignore for each of the isoforms
exonsToIgnoreList <- lapply(1:length(uniqueIsoformsIndex),function(x) NULL)
if(areMEEposible) { # if not major Generate list to store the overlapping info from
### Create a dataframe to indicate whether the exons are included or not - used to find mutually exclusive exons
exonIncluded <- data.frame(matrix(0, ncol=length(uniqueIsoformsIndex), nrow=nrow(exonInfoPreTranscript)))
colnames(exonIncluded) <- uniqIsoformNames
if(!major) {
overlapList <- list(NULL) # it is faster to store the overlaps in a list than to redo them - even for small transcript
identicalList <- list(NULL) # it is faster to store the overlaps in a list than to redo them - even for small transcript
}
# Loop over genes and extract info of which exons are expressed in which transcripts
for(i in 1:length(uniqueIsoformsIndex)) {
## extract exon features of minor isoform
isoformExonInfo <- exonList[[ uniqIsoformNames[i] ]]
overlapIdenticalList <- .findOverlap(exonInfoPreTranscript,isoformExonInfo)
if(!major) {
## save the overlap tables so I dont need to create them again (they are not used again if major is choseccn)
overlapList[[i]] <- overlapIdenticalList$overlap
identicalList[[i]] <- overlapIdenticalList$idenctial
}
exonIncluded[overlapIdenticalList$overlap$isoform1,i] <- overlapIdenticalList$overlap$isoform1
# Add collum with expressed info to the data.frame
} # end of loop over unique isoforms
# Change to binary table
exonIncludedTF <- apply((exonIncluded > 0),2,function(x) as.integer(x))
# Determine MEE based on the exons included table pair else they are ends
startExons <- apply(exonIncludedTF,2,function(x) match(1, x))
endExons <- nrow(exonIncludedTF) + 1 - apply(exonIncludedTF,2,function(x) match(1, rev(x)))
for(i in 1:(nrow(exonIncludedTF)-1)) {
if( sum(exonIncludedTF[i,]) == 1 & sum(exonIncludedTF[i+1,]) == 1 ) {
expressedIn1 <- which(as.logical(exonIncludedTF[i,])) # get the transcript index
expressedIn2 <- which(as.logical(exonIncludedTF[i+1,])) # get the transcript index
if( expressedIn1 != expressedIn2 ) {
if(i != startExons[expressedIn1] & i != endExons[expressedIn1] & i+1 != startExons[expressedIn2] & i+1 != endExons[expressedIn2]) {
# annotate the number of MEE
MEEindexes1 <- which(isoformsToAnalyze$isoform_id == isoformsToAnalyze$isoform_id[uniqueIsoformsIndex[expressedIn1]])
MEEindexes2 <- which(isoformsToAnalyze$isoform_id == isoformsToAnalyze$isoform_id[uniqueIsoformsIndex[expressedIn2]])
isoformsToAnalyze$MEE[c(MEEindexes1,MEEindexes2)] <- isoformsToAnalyze$MEE[c(MEEindexes1,MEEindexes2)] + 1
# add the exons to the ignore list
exonsToIgnoreList[[expressedIn2]] <- c(exonsToIgnoreList[[expressedIn2]], exonIncluded[i,expressedIn1]) #exonsToIgnoreList[[expressedIn2]] is used instead of expressedIn1 since I want the exon not expressed in this isoform
exonsToIgnoreList[[expressedIn1]] <- c(exonsToIgnoreList[[expressedIn1]], exonIncluded[i+1,expressedIn2])
# annotate the positions of MEE
if(is.na(isoformsToAnalyze$MEE.start[MEEindexes1[1]])) {
isoformsToAnalyze$MEE.start[MEEindexes1] <- paste(exonInfoPreTranscript$start[i])
isoformsToAnalyze$MEE.end[MEEindexes1] <- paste(exonInfoPreTranscript$end[i])
} else {
isoformsToAnalyze$MEE.start[MEEindexes1] <- paste(isoformsToAnalyze$MEE.start[MEEindexes1],exonInfoPreTranscript$start[i],sep=';')
isoformsToAnalyze$MEE.end[MEEindexes1] <- paste(isoformsToAnalyze$MEE.end[MEEindexes1],exonInfoPreTranscript$end[i],sep=';')
}
if(is.na(isoformsToAnalyze$MEE.end[MEEindexes2[1]])) {
isoformsToAnalyze$MEE.start[MEEindexes2] <- paste(exonInfoPreTranscript$start[i+1])
isoformsToAnalyze$MEE.end[MEEindexes2] <- paste(exonInfoPreTranscript$end[i+1])
} else {
isoformsToAnalyze$MEE.start[MEEindexes2] <- paste(isoformsToAnalyze$MEE.start[MEEindexes2],exonInfoPreTranscript$start[i+1],sep=';')
isoformsToAnalyze$MEE.end[MEEindexes2] <- paste(isoformsToAnalyze$MEE.end[MEEindexes2],exonInfoPreTranscript$end[i+1],sep=';')
}
}
}
}
}
## Make sure every exon is only reppresented once (a special case where an exon is envolved in two MEE events)
exonsToIgnoreList <- lapply(exonsToIgnoreList, function(x) unique(x))
} # end of is MEE posible
### Determine which of the indexes is major (only nessesary if any exons ought to be skipped)
if(major) {
majorIndex <- which(uniqIsoformNames %in% isoformsToAnalyze$isoform_id[maxIsoformIndex[1]]) # get the index of major
}
######## Classify the rest of the AS types ########
# loop over the unique isoforms to make the AS type comparason
for(i in 1:length(uniqueIsoformsIndex)) { # i <- 2
if(major) { # if I compare to major
if(uniqueIsoformsIndex[i] %in% maxIsoformIndex) {next} # if this isoform is the major
}
## extract exon features of minor isoform
isoformExonInfo <- exonList[[ uniqIsoformNames[i] ]]
# Get indexes for all rows containing this transcript so they can all be annotated (many samples == many rows)
thisIsoformIndex <- which(isoformsToAnalyze$isoform_id == isoformsToAnalyze$isoform_id[uniqueIsoformsIndex[i]])
### Determine AS classification and overlap
if(!major) { # if pre-transcript
## Determine which exons to ignore
exonsToIgnore <- list(exonsToIgnoreList[[i]], NULL) # NULL since I know no exons is skipped in pre-transcripted, switched since the skipping is going to occure in the OPPOSITE transcript
### annotate all instances of this isoform with the Alternative splicing found
if(areMEEposible) {
isoformsToAnalyze[thisIsoformIndex, c(
'ESI','MESI','ISI','A5','A3','ATSS','ATTS',
'ESI.start', 'ESI.end','MESI.start','MESI.end','ISI.start','ISI.end','A5.start','A5.end',
'A3.start','A3.end','ATSS.start','ATSS.end','ATTS.start','ATTS.end'
)] <- .determineAStypeOverlap(exonInfoPreTranscript,isoformExonInfo,overlapList[[i]],identicalList[[i]], exonsToIgnore)
} else {
overlapListLocal <- .findOverlap(exonInfoPreTranscript,isoformExonInfo)
isoformsToAnalyze[thisIsoformIndex, c(
'ESI','MESI','ISI','A5','A3','ATSS','ATTS',
'ESI.start', 'ESI.end','MESI.start','MESI.end','ISI.start','ISI.end','A5.start','A5.end',
'A3.start','A3.end','ATSS.start','ATSS.end','ATTS.start','ATTS.end'
)] <- .determineAStypeOverlap(exonInfoPreTranscript,isoformExonInfo,overlapListLocal[[1]],overlapListLocal[[2]], exonsToIgnore)
}
} else { # if major
### Determine which exons to ignore
exonsToIgnore <- list(exonsToIgnoreList[[i]], exonsToIgnoreList[[majorIndex]]) # switched since the skipping is going to occure in the OPPOSITE transcript
### Determine overlapping exons between major and minor
temp <- .findOverlap(majorExonInfo,isoformExonInfo)
### annotate all instances of this isoform with the Alternative splicing found
isoformsToAnalyze[thisIsoformIndex, c(
'ESI','MESI','ISI','A5','A3','ATSS','ATTS',
'ESI.start', 'ESI.end','MESI.start','MESI.end','ISI.start','ISI.end','A5.start','A5.end',
'A3.start','A3.end','ATSS.start','ATSS.end','ATTS.start','ATTS.end'
)] <- .determineAStypeOverlap(majorExonInfo,isoformExonInfo,temp[[1]],temp[[2]], exonsToIgnore)
}
} # end of loop over isoforms
### Anotate IF and dIF values
# get total expression of all the isoforms to analyze for EACH condition (is different than the expression of the gene - because i exclude some transcripts)
totalIsoformExpression <- NULL
for(conditionName in conditionNames) {
maxOFthisIsoform <- sum(
isoformsToAnalyze$iso_value_1[which(isoformsToAnalyze$sample_1 == conditionName)],
isoformsToAnalyze$iso_value_2[which(isoformsToAnalyze$sample_2 == conditionName)]
)
totalIsoformExpression <- c(totalIsoformExpression , maxOFthisIsoform)
}
# Extract info about which collums are the ones that contain the wanted info
sampleCol1 <- which(colnames(transcriptData[["transcript_features"]])=="sample_1") #spliceR.sample_1
isoValCol1 <- which(colnames(transcriptData[["transcript_features"]])=="iso_value_1") #spliceR.iso_value_1
IFvalCol1 <- which(colnames(transcriptData[["transcript_features"]])=="IF1") #spliceR.IF1
# print(cat(colnames(transcriptData[["transcript_features"]])))
# print(cat(isoValCol1))
# print(cat(PSIvalCol1))
# if(dataOrigin == 'cufflinks') { # in this way we can controle the different indexes for different input files
# # sampleCol1 <- 7 #spliceR.sample_1
# # isoValCol1 <- 24 #spliceR.iso_value_1
# # PSIvalCol1 <- 31 #spliceR.PSI1
# sampleCol1 <- which(colnames(transcriptData[["transcript_features"]]))=="spliceR.sample_1") #spliceR.sample_1
# isoValCol1 <- which(colnames(transcriptData[["transcript_features"]]))=="spliceR.iso_value_1") #spliceR.iso_value_1
# PSIvalCol1 <- which(colnames(transcriptData[["transcript_features"]]))=="spliceR.PSI1") #spliceR.PSI1
# }
# if(dataOrigin == 'granges') { # in this way we can controle the different indexes for different input files
# sampleCol1 <- NULL
# isoValCol1 <- NULL
# PSIvalCol1 <- NULL
# }
# # loop over all indexes to analyze to calculte IF values
# for(isoformIndex in 1:nrow(isoformsToAnalyze)) {
# # if isoform is major annotate it
# if(major) {
# if(isoformIndex %in% maxIsoformIndex) { next }
# }
#
# for(i in 0:1) { #loop over indexes so i can calculate IF for both the sample_1 and sample_2 collumns
# # Get condition name
# myCondition <- isoformsToAnalyze[isoformIndex,(sampleCol1+i)] # get condition name (which is in collumn 2 and 3)
# # get total expression of isoforms within that gene
# totalExpValue <- totalIsoformExpression[conditionNames %in% myCondition]
# if(totalExpValue == 0) {
# isoformsToAnalyze[isoformIndex,(IFvalCol1+i)] <- 0 # else I would devide by zero
# } else {
# # calculate IF value
# isoformsToAnalyze[isoformIndex,(IFvalCol1+i)] <- round( isoformsToAnalyze[isoformIndex,(isoValCol1+i)] / totalExpValue * 100 ,digits = 2)
# }
# }
# }
# # annotate dIF
# isoformsToAnalyze$dIF <- isoformsToAnalyze$IF2 - isoformsToAnalyze$IF1
# write local data to global dataframe (so everything is stored and can be returned)
# this is faster than replacing the full dataset and also faster (and more readiable) than using c(26:40)
transcriptData[["transcript_features"]][
isoformsToAnalyzeWithinGeneIndexGlobal,
c("major","IF1","IF2","dIF","ESI","MEE","MESI","ISI","A5","A3","ATSS","ATTS","analyzed",'ESI.start', 'ESI.end','MEE.start','MEE.end','MESI.start','MESI.end','ISI.start','ISI.end','A5.start','A5.end','A3.start','A3.end','ATSS.start','ATSS.end','ATTS.start','ATTS.end')
] <- isoformsToAnalyze[,c("major","IF1","IF2","dIF","ESI","MEE","MESI","ISI","A5","A3","ATSS","ATTS","analyzed",'ESI.start', 'ESI.end','MEE.start','MEE.end','MESI.start','MESI.end','ISI.start','ISI.end','A5.start','A5.end','A3.start','A3.end','ATSS.start','ATSS.end','ATTS.start','ATTS.end')] #slow index - THE RATE LIMITING STEP
#paste(difftime(Sys.time(),t10,u='sec'),'Time to write to global file',sep=' ')
### Update progressbar
if (useProgressBar) setTxtProgressBar(pb, geneIndex)
} # belongs to loop over genes
### Annotate IF values
transcriptData$transcript_features$IF1[isoformsToAnalyzeIndex] <- round(transcriptData$transcript_features$iso_value_1[isoformsToAnalyzeIndex] / transcriptData$transcript_features$gene_value_1[isoformsToAnalyzeIndex] * 100, digits=4)
transcriptData$transcript_features$IF2[isoformsToAnalyzeIndex] <- round(transcriptData$transcript_features$iso_value_2[isoformsToAnalyzeIndex] / transcriptData$transcript_features$gene_value_2[isoformsToAnalyzeIndex] * 100, digits=4)
transcriptData$transcript_features$dIF[isoformsToAnalyzeIndex] <- transcriptData$transcript_features$IF2[isoformsToAnalyzeIndex] - transcriptData$transcript_features$IF1[isoformsToAnalyzeIndex]
#close progress bar
if (useProgressBar) close(pb)
# message('Preparing output...') # outcommented KVS 2018-03-06
ori_col_names <- colnames(mcols(originalTranscriptData[[1]]))
ori_col_names_no_spliceR <- substr(ori_col_names, 9, nchar(ori_col_names))
for (i in 1:length(ori_col_names))
{
mcols(originalTranscriptData[[1]])[ori_col_names[i]] <- transcriptData[[1]][,ori_col_names_no_spliceR[i]]
}
#Add filters to spliceR object
originalTranscriptData[['filter_params']] <- filters
#endTime <- Sys.time() # outcommented by KVS
#message('Done in ', format(difftime(endTime,startTime), digits=2)) # outcommented by KVS
# return data list to give back all annotation
return(originalTranscriptData)
#return GRanges
}
}
### New AS functions
analyzeAlternativeSplicing <- function(
switchAnalyzeRlist,
onlySwitchingGenes = TRUE,
alpha = 0.05,
dIFcutoff = 0.1,
showProgress = TRUE,
quiet = FALSE
) {
### Test input
if (TRUE) {
if (class(switchAnalyzeRlist) != 'switchAnalyzeRlist') {
stop(
'The object supplied to \'switchAnalyzeRlist\' must be a \'switchAnalyzeRlist\''
)
}
if (!is.logical(onlySwitchingGenes)) {
stop('The onlySwitchingGenes argument must be either TRUE or FALSE')
}
if (onlySwitchingGenes) {
if (!any(!is.na(
switchAnalyzeRlist$isoformFeatures$gene_switch_q_value
))) {
stop(
'The analsis of isoform switching must be performed before alternative splicing can be analyzed. Please run ?detectIsoformSwitching and try again.'
)
}
}
if (alpha < 0 |
alpha > 1) {
stop('The alpha parameter must be between 0 and 1 ([0,1]).')
}
if (alpha > 0.05) {
warning(
'Most journals and scientists consider an alpha larger than 0.05 untrustworthy. We therefore recommend using alpha values smaller than or queal to 0.05'
)
}
if (dIFcutoff < 0 | dIFcutoff > 1) {
stop('The dIFcutoff must be in the interval [0,1].')
}
}
### Exract data
if (TRUE) {
if (!quiet) {
message('Step 1 of 3: Massaging data...')
}
localData <- unique(
makeMinimumSwitchList(
switchAnalyzeRlist,
switchAnalyzeRlist$isoformFeatures$isoform_id
)$isoformFeatures[,
c('isoform_id', 'gene_id', 'condition_1', 'condition_2')]
)
localData$iso_value_1 <- NA
localData$iso_value_2 <- NA
localData$iso_q_value <- NA
localData$gene_value_1 <- NA
localData$gene_value_2 <- NA
colnames(localData)[match(
c('condition_1', 'condition_2') ,
colnames(localData)
)] <-
c('sample_1', 'sample_2')
if (onlySwitchingGenes) {
isoResTest <-
any(!is.na(
switchAnalyzeRlist$isoformFeatures$isoform_switch_q_value
))
if (isoResTest) {
switchingGenes <-
unique(switchAnalyzeRlist$isoformFeatures$gene_id [which(
switchAnalyzeRlist$isoformFeatures$isoform_switch_q_value <
alpha &
abs(switchAnalyzeRlist$isoformFeatures$dIF) >
dIFcutoff
)])
} else {
switchingGenes <-
unique(switchAnalyzeRlist$isoformFeatures$gene_id [which(
switchAnalyzeRlist$isoformFeatures$gene_switch_q_value <
alpha &
abs(switchAnalyzeRlist$isoformFeatures$dIF) >
dIFcutoff
)])
}
if (length(switchingGenes) == 0) {
stop(
'No switching genes were found. Pleasae turn off \'onlySwitchingGenes\' and try again.'
)
}
localData <-
localData[which(localData$gene_id %in% switchingGenes), ]
}
correspondingExons <-
switchAnalyzeRlist$exons[which(
switchAnalyzeRlist$exons$isoform_id %in% localData$isoform_id
),]
colnames(correspondingExons@elementMetadata) <-
paste('spliceR.',
colnames(correspondingExons@elementMetadata),
sep = '')
# make GRange
n <- nrow(localData)
transcriptFeatureGR <- GRanges(
seqnames = rep('NA', n),
ranges = IRanges(start = rep(0, n),
end = rep(1, n)),
spliceR = localData #add spliceR to metadata colnames
)
# Make list with data
localSpliceRList <- list(
transcript_features = transcriptFeatureGR,
exon_features = correspondingExons,
assembly_id = 'NA',
source_id = 'cufflinks',
conditions = switchAnalyzeRlist$conditions$condition,
transcripts_plot = NULL,
filter_params = NULL
)
}
### Run modified spliceR
if(TRUE) {
if (!quiet) {
message('Step 2 of 3: Analyzing splicing...')
}
if (quiet) {
suppressMessages(
annotationResult <- adaptedSpliceR(
transcriptData = localSpliceRList,
compareTo = 'preTranscript',
filters = 'none',
useProgressBar = showProgress & !quiet
)
)
} else {
annotationResult <- adaptedSpliceR(
transcriptData = localSpliceRList,
compareTo = 'preTranscript',
filters = 'none',
useProgressBar = showProgress & !quiet
)
}
}
### Massage data
if(TRUE) {
if (!quiet) {
message('Step 3 of 3: Preparing output...')
}
localASresults <-
unique(as.data.frame(
annotationResult$transcript_features@elementMetadata[,c(
'spliceR.isoform_id',
"spliceR.ESI",
"spliceR.ESI.start",
"spliceR.ESI.end",
"spliceR.MEE",
"spliceR.MEE.start",
"spliceR.MEE.end",
"spliceR.MESI",
"spliceR.MESI.start",
"spliceR.MESI.end",
"spliceR.ISI",
"spliceR.ISI.start",
"spliceR.ISI.end",
"spliceR.A5",
"spliceR.A5.start",
"spliceR.A5.end",
"spliceR.A3",
"spliceR.A3.start",
"spliceR.A3.end",
"spliceR.ATSS",
"spliceR.ATSS.start",
"spliceR.ATSS.end",
"spliceR.ATTS",
"spliceR.ATTS.start",
"spliceR.ATTS.end"
)]
))
### Massage
colnames(localASresults) <-
gsub('\\.', '_', gsub('spliceR.', '', colnames(localASresults)))
### Rename
colnames(localASresults) <-
gsub('ISI', 'IR', colnames(localASresults))
colnames(localASresults) <-
gsub('ESI', 'ES', colnames(localASresults))
colnames(localASresults) <-
gsub('MESI', 'MES', colnames(localASresults))
toModify <- which(grepl('start$|end$', colnames(localASresults)))
colnames(localASresults)[toModify] <-
gsub('start$', 'genomic_start', colnames(localASresults)[toModify])
colnames(localASresults)[toModify] <-
gsub('end$', 'genomic_end', colnames(localASresults)[toModify])
### Replace NA in counts with 0
localASresults$ES[which(is.na(localASresults$ES))] <- 0
localASresults$MEE[which(is.na(localASresults$MEE))] <- 0
localASresults$MES[which(is.na(localASresults$MES))] <- 0
localASresults$IR[which(is.na(localASresults$IR))] <- 0
localASresults$A5[which(is.na(localASresults$A5))] <- 0
localASresults$A3[which(is.na(localASresults$A3))] <- 0
localASresults$ATSS[which(is.na(localASresults$ATSS))] <- 0
localASresults$ATTS[which(is.na(localASresults$ATTS))] <- 0
}
### Add to switchList
if(TRUE) {
### Add intron rentention analysis
switchAnalyzeRlist$isoformFeatures$IR <- localASresults$IR[match(
switchAnalyzeRlist$isoformFeatures$isoform_id ,localASresults$isoform_id
)]
### Add alternative splicing analysis
switchAnalyzeRlist$AlternativeSplicingAnalysis <- localASresults
if (!quiet) {
message('Done')
}
}
return(switchAnalyzeRlist)
}
### Function which replaces analyze intron retention (by wrapping AS)
analyzeIntronRetention <- function(
switchAnalyzeRlist,
onlySwitchingGenes = TRUE,
alpha = 0.05,
dIFcutoff = 0.1,
showProgress = TRUE,
quiet = FALSE
) {
localAS <- analyzeAlternativeSplicing(
switchAnalyzeRlist = switchAnalyzeRlist,
onlySwitchingGenes = onlySwitchingGenes,
alpha = alpha,
dIFcutoff = dIFcutoff,
showProgress = showProgress,
quiet = quiet
)
### Transfer result to switchAnalyzeRlist list
switchAnalyzeRlist$isoformFeatures$IR <- localAS$AlternativeSplicingAnalysis$IR[match(
switchAnalyzeRlist$isoformFeatures$isoform_id ,localAS$AlternativeSplicingAnalysis$isoform_id
)]
switchAnalyzeRlist$intronRetentionAnalysis <- localAS$AlternativeSplicingAnalysis[,c(
'isoform_id',
'IR',
'IR_genomic_start',
'IR_genomic_end'
)]
if (!quiet) {
message('Done')
}
return(switchAnalyzeRlist)
}
### Post analysis of splicing
# Analysis number of AS events (all events)
extractSplicingSummary <- function(
switchAnalyzeRlist,
splicingToAnalyze = 'all',
asFractionTotal = FALSE,
alpha = 0.05,
dIFcutoff = 0.1,
onlySigIsoforms = FALSE,
plot = TRUE,
plotGenes = FALSE,
localTheme = theme_bw(),
returnResult = FALSE
) {
### Test input
if (TRUE) {
if (class(switchAnalyzeRlist) != 'switchAnalyzeRlist') {
stop(
'The object supplied to \'switchAnalyzeRlist\' must be a \'switchAnalyzeRlist\''
)
}
# test wether switching have been analyzed
if (!any(!is.na(
switchAnalyzeRlist$isoformFeatures$gene_switch_q_value
))) {
stop(
'The analsis of isoform switching must be performed before alternative splicing can be analyzed. Please run detectIsoformSwitching() and try again.'
)
}
# test whether AS have been predicted
if (is.null(switchAnalyzeRlist$AlternativeSplicingAnalysis)) {
stop(
'The analsis of alternative must be performed before it can be summarized. Please use analyzeAlternativeSplicing() and try again.'
)
}
# input format
if (dIFcutoff < 0 | dIFcutoff > 1) {
stop('The dIFcutoff must be in the interval [0,1].')
}
if (alpha < 0 |
alpha > 1) {
warning('The alpha parameter should usually be between 0 and 1 ([0,1]).')
}
if (alpha > 0.05) {
warning(
'Most journals and scientists consider an alpha larger than 0.05 untrustworthy. We therefore recommend using alpha values smaller than or queal to 0.05'
)
}
### Consequences to analyze
acceptedTypes <- c("A3","A5","ATSS","ATTS","ES" ,"IR","MEE","MES" )
if (!all(splicingToAnalyze %in% c('all', acceptedTypes))) {
stop(
'The argument(s) supplied to \'typeOfconsequence\' are not accepted. Please see ?analyzeAlternativeSplicing under details for description of which strings are allowed.'
)
}
splicingAnalyzed <-
intersect(
acceptedTypes,
colnames(switchAnalyzeRlist$AlternativeSplicingAnalysis)
)
if ('all' %in% splicingToAnalyze) {
splicingToAnalyze <- splicingAnalyzed
}
splicingNotAnalyzed <-
setdiff(splicingToAnalyze, splicingAnalyzed)
if (length(splicingNotAnalyzed)) {
warning(
paste(
'The following consequences appear not to have been analyzed and will therefor not be summarized:',
paste(splicingNotAnalyzed, collapse = ', '),
sep = ' '
)
)
}
}
### Extract data
if(TRUE) {
### Get pairs of isoform switches
if(TRUE) {
localData <- switchAnalyzeRlist$isoformFeatures[which(
switchAnalyzeRlist$isoformFeatures$gene_switch_q_value < alpha &
abs(switchAnalyzeRlist$isoformFeatures$dIF) > dIFcutoff
),
c(
'iso_ref',
'gene_ref',
'isoform_switch_q_value',
'gene_switch_q_value',
'dIF'
)]
if (!nrow(localData)) {
stop('No genes were considered switching with the used cutoff values')
}
### add switch direction
localData$switchDirection <- NA
localData$switchDirection[which(sign(localData$dIF) == 1)] <- 'up'
localData$switchDirection[which(sign(localData$dIF) == -1)] <- 'down'
### split based on genes and conditions
localDataList <-
split(localData, f = localData$gene_ref, drop = TRUE)
### Extract pairs of isoforms passing the filters
pairwiseIsoComparisonList <-
plyr::llply(
.data = localDataList,
.progress = 'none',
.fun = function(aDF) {
# aDF <- localDataList[[171]]
isoResTest <-
any(!is.na(
switchAnalyzeRlist$isoformFeatures$isoform_switch_q_value
))
if (isoResTest) {
sigIso <- aDF$iso_ref[which(
aDF$isoform_switch_q_value < alpha &
abs(aDF$dIF) > dIFcutoff
)]
} else {
sigIso <- aDF$iso_ref[which(
aDF$gene_switch_q_value < alpha &
abs(aDF$dIF) > dIFcutoff
)]
}
if (length(sigIso) == 0) {
return(NULL)
}
### reduce to significant if nessesary
if (onlySigIsoforms) {
aDF <- aDF[which(aDF$iso_ref %in% sigIso), ]
}
if (nrow(aDF) < 2) {
return(NULL)
}
### make sure there are both up and down
if (!all(c('up', 'down') %in% aDF$switchDirection)) {
return(NULL)
}
### extract pairs of isoforms
upIso <-
as.vector(aDF$iso_ref[which(
aDF$switchDirection == 'up'
)])
downIso <-
as.vector(aDF$iso_ref[which(
aDF$switchDirection == 'down'
)])
allIsoCombinations <-
setNames(
base::expand.grid(
upIso,
downIso,
stringsAsFactors = FALSE,
KEEP.OUT.ATTRS = FALSE
),
nm = c('iso_ref_up', 'iso_ref_down')
)
### Reduce to those where at least one of them is significant
allIsoCombinations <-
allIsoCombinations[which(
allIsoCombinations$iso_ref_up %in% sigIso |
allIsoCombinations$iso_ref_down %in% sigIso
), ]
### Add gen ref
allIsoCombinations$gene_ref <- aDF$gene_ref[1]
return(allIsoCombinations)
}
)
### Remove empty entries
pairwiseIsoComparisonList <-
pairwiseIsoComparisonList[which(
! sapply(pairwiseIsoComparisonList, is.null)
)]
if (length(pairwiseIsoComparisonList) == 0) {
stop('No candidate genes with the required cutoffs were found')
}
### Conver to data.frame
pairwiseIsoComparison <-
myListToDf(pairwiseIsoComparisonList, addOrignAsColumn = FALSE)
### Add additional info
# iso name
pairwiseIsoComparison$isoformUpregulated <-
switchAnalyzeRlist$isoformFeatures$isoform_id[match(
pairwiseIsoComparison$iso_ref_up,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
pairwiseIsoComparison$isoformDownregulated <-
switchAnalyzeRlist$isoformFeatures$isoform_id[match(
pairwiseIsoComparison$iso_ref_down,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
# gene info
pairwiseIsoComparison$gene_id <-
switchAnalyzeRlist$isoformFeatures$gene_id[match(
pairwiseIsoComparison$iso_ref_up,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
pairwiseIsoComparison$gene_name <-
switchAnalyzeRlist$isoformFeatures$gene_name[match(
pairwiseIsoComparison$iso_ref_up,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
# condition
pairwiseIsoComparison$condition_1 <-
switchAnalyzeRlist$isoformFeatures$condition_1[match(
pairwiseIsoComparison$iso_ref_down,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
pairwiseIsoComparison$condition_2 <-
switchAnalyzeRlist$isoformFeatures$condition_2[match(
pairwiseIsoComparison$iso_ref_down,
switchAnalyzeRlist$isoformFeatures$iso_ref
)]
}
### Massage AS analysis
if(TRUE) {
localAS <- switchAnalyzeRlist$AlternativeSplicingAnalysis
localAS <- localAS[which(
localAS$isoform_id %in% pairwiseIsoComparison$isoformUpregulated |
localAS$isoform_id %in% pairwiseIsoComparison$isoformDownregulated
),]
### Massage
m1 <- reshape2::melt(localAS[,c(
"isoform_id",
"ES_genomic_start",
"MEE_genomic_start",
"MES_genomic_start",
"IR_genomic_start",
"A5_genomic_start",
"A3_genomic_start",
"ATSS_genomic_start",
"ATTS_genomic_start"
)], id.vars = 'isoform_id')
colnames(m1)[3] <- 'genomic_start'
m1$AStype <- sapply(
strsplit(as.character(m1$variable),'_'),
function(x) x[1]
)
### Add AS to pairs
localConseq2 <- merge(
pairwiseIsoComparison,
m1[,c('isoform_id','AStype','genomic_start')],
by.x='isoformUpregulated',
by.y='isoform_id'
)
localConseq3 <- merge(
localConseq2,
m1[,c('isoform_id','AStype','genomic_start')],
by.x=c('isoformDownregulated','AStype'),
by.y=c('isoform_id','AStype'),
suffixes = c("_up","_down")
)
### Summarize
localConseq3$upAs <- !is.na(localConseq3$genomic_start_up)
localConseq3$dnAs <- !is.na(localConseq3$genomic_start_down)
}
}
### Massage via arguments
if(TRUE) {
localConseq3 <- localConseq3[which(
localConseq3$AStype %in% splicingToAnalyze
),]
if (!nrow(localConseq3)) {
stop('No swithces with consequences were found')
}
localConseq3$Comparison <-
paste(
localConseq3$condition_1,
'vs',
localConseq3$condition_2,
sep = ' '
)
}
### Final massage
if(TRUE) {
### remove NAs
#localConseq4 <- localConseq3[which(localConseq3$upAs | localConseq3$dnAs),]
tmpUp <- localConseq3[c('isoformUpregulated','gene_id','Comparison','AStype','upAs')]
tmpDn <- localConseq3[c('isoformDownregulated','gene_id','Comparison','AStype','dnAs')]
colnames(tmpUp) <- c('isoform_id','gene_id','Comparison','AStype','anyAs')
colnames(tmpDn) <- c('isoform_id','gene_id','Comparison','AStype','anyAs')
tmpUp$isoRegulation <- 'Up'
tmpDn$isoRegulation <- 'Dn'
localConseq5 <- rbind(
tmpUp,
tmpDn
)
}
### summarize count
if(TRUE) {
myNumbers <- plyr::ddply(
localConseq5,
.variables = c(
'AStype',
'isoRegulation',
'Comparison'
),
.drop = TRUE,
.fun = function(
aDF
) { # aDF <- localConseq5[which( localConseq5$AStype == 'IR' & localConseq5$Comparison == 'COAD_ctrl vs COAD_cancer' & localConseq5$isoRegulation == 'Up'),]
localRes <- data.frame(
nrGenesWithConsequences = length(
unique( aDF$gene_id[which(aDF$anyAs)] )
),
nrIsoWithConsequences = length(
unique( aDF$isoform_id[which(aDF$anyAs)] )
),
stringsAsFactors = FALSE
)
if (asFractionTotal) {
localRes$geneFraction <- localRes$nrGenesWithConsequences /
length( unique( aDF$gene_id ) )
localRes$isoFraction <- localRes$nrIsoWithConsequences /
length( unique( aDF$isoform_id ) )
}
return(localRes)
}
)
myNumbers$splicingResult <- paste(
myNumbers$AStype,
ifelse(myNumbers$isoRegulation == 'Dn','in isoform used less','in isoform used more'),
sep=' '
)
}
### Plot
if (plot) {
myNumbers$plotComparison <-
gsub(' vs ', '\nvs\n', myNumbers$Comparison)
### Make basic part of plot
if (plotGenes) {
if (asFractionTotal) {
g1 <- ggplot(myNumbers, aes(
x = splicingResult,
y = geneFraction
)) +
labs(
x = 'Alternative transcription event in Isoform',
y = 'Fraction of significant genes\n(with at least one event)'
)
} else {
g1 <-
ggplot(myNumbers, aes(
x = splicingResult,
y = nrGenesWithConsequences
)) +
labs(
x = 'Alternative transcription event in Isoform',
y = 'Number of significant genes\n(with at least one event)'
)
}
} else {
if (asFractionTotal) {
g1 <- ggplot(
myNumbers,
aes(
x = splicingResult,
y = isoFraction)
) + labs(
x = 'Alternative transcription event in Isoform',
y = 'Fraction of significant isoforms\n(with at least one event)')
} else {
g1 <- ggplot(
myNumbers,
aes(
x = splicingResult,
y = nrIsoWithConsequences
)
) + labs(
x = 'Alternative transcription event in Isoform',
y = 'Number of significant isoforms\n(with at least one event)'
)
}
}
g1 <- g1 + geom_bar(stat = "identity") +
facet_grid(plotComparison ~ AStype,
scales = 'free',
space = 'free_x') +
localTheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x = element_text(
angle = -45,
hjust = 0,
vjust = 1
))
}
if (returnResult) {
if(plot) {
print(g1)
}
myNumbers$isoRegulation <- NULL
myNumbers$plotComparison <- NULL
myNumbers2 <- myNumbers[,c(
match( c('Comparison','AStype','splicingResult'), colnames(myNumbers)),
which( ! colnames(myNumbers) %in% c('Comparison','AStype','splicingResult'))
)]
return(myNumbers2)
} else {
if(plot) {
return(g1)
}
}
}
# Analysis enrichment of AS events (nr iso/genes)
extractSplicingEnrichment <- function(
switchAnalyzeRlist,
splicingToAnalyze = 'all',
alpha = 0.05,
dIFcutoff = 0.1,
onlySigIsoforms = FALSE,
countGenes = TRUE,
plot = TRUE,
localTheme = theme_bw(base_size = 14),
minEventsForPlotting = 10,
returnResult=TRUE,
returnSummary=TRUE
) {
### Test input
if (TRUE) {
if (class(switchAnalyzeRlist) != 'switchAnalyzeRlist') {
stop(
'The object supplied to \'switchAnalyzeRlist\' must be a \'switchAnalyzeRlist\''
)
}
# test wether switching have been analyzed
if (!any(!is.na(
switchAnalyzeRlist$isoformFeatures$gene_switch_q_value
))) {
stop(
'The analsis of isoform switching must be performed before alternative splicing can be analyzed. Please run detectIsoformSwitching() and try again.'
)
}
# test whether AS have been predicted
if (is.null(switchAnalyzeRlist$AlternativeSplicingAnalysis)) {
stop(
'The analsis of alternative must be performed before it can be summarized. Please use analyzeAlternativeSplicing() and try again.'
)
}
# input format
if (dIFcutoff < 0 | dIFcutoff > 1) {
stop('The dIFcutoff must be in the interval [0,1].')
}
if (alpha < 0 |
alpha > 1) {
warning('The alpha parameter should usually be between 0 and 1 ([0,1]).')
}
if (alpha > 0.05) {
warning(
'Most journals and scientists consider an alpha larger than 0.05 untrustworthy. We therefore recommend using alpha values smaller than or queal to 0.05'
)
}
### Consequences to analyze
acceptedTypes <- c("A3","A5","ATSS","ATTS","ES" ,"IR","MEE","MES" )
if (!all(splicingToAnalyze %in% c('all', acceptedTypes))) {
stop(
'The argument(s) supplied to \'typeOfconsequence\' are not accepted. Please see ?analyzeAlternativeSplicing under details for description of which strings are allowed.'
)
}
splicingAnalyzed <-
intersect(
acceptedTypes,
colnames(switchAnalyzeRlist$AlternativeSplicingAnalysis)
)
if ('all' %in% splicingToAnalyze) {
splicingToAnalyze <- splicingAnalyzed
}
splicingNotAnalyzed <-
setdiff(splicingToAnalyze, splicingAnalyzed)
if (length(splicingNotAnalyzed)) {
warning(
paste(
'The following consequences appear not to have been analyzed and will therefor not be summarized:',
paste(splicingNotAnalyzed, collapse = ', '),
sep = ' '
)
)
}
}
### Get pairs
if(TRUE) {
pairwiseIsoComparison <- extractSwitchPairs(
switchAnalyzeRlist,
alpha = alpha,
dIFcutoff = dIFcutoff,
onlySigIsoforms = onlySigIsoforms
)
}
### Massage AS analysis
if(TRUE) {
localAS <- switchAnalyzeRlist$AlternativeSplicingAnalysis
localAS <- localAS[which(
localAS$isoform_id %in% pairwiseIsoComparison$isoformUpregulated |
localAS$isoform_id %in% pairwiseIsoComparison$isoformDownregulated
),]
### Massage
m1 <- reshape2::melt(localAS[,c(
"isoform_id",
"ES_genomic_start",
"MEE_genomic_start",
"MES_genomic_start",
"IR_genomic_start",
"A5_genomic_start",
"A3_genomic_start",
"ATSS_genomic_start",
"ATTS_genomic_start"
)], id.vars = 'isoform_id')
colnames(m1)[3] <- 'genomic_start'
m1$AStype <- sapply(
strsplit(as.character(m1$variable),'_'),
function(x) x[1]
)
m2 <- reshape2::melt(localAS[,c(
"isoform_id",
"ES_genomic_end",
"MEE_genomic_end",
"MES_genomic_end",
"IR_genomic_end",
"A5_genomic_end",
"A3_genomic_end",
"ATSS_genomic_end",
"ATTS_genomic_end"
)], id.vars = 'isoform_id')
colnames(m2)[3] <- 'genomic_end'
m2$AStype <- sapply(
strsplit(as.character(m2$variable),'_'),
function(x) x[1]
)
localAS <- dplyr::inner_join(
m1[,c('isoform_id','AStype','genomic_start')],
m2[,c('isoform_id','AStype','genomic_end')],
by=c('isoform_id','AStype')
)
### Add in NMD
if('orfAnalysis' %in% names(switchAnalyzeRlist)) {
localNMD <- data.frame(
isoform_id=switchAnalyzeRlist$orfAnalysis$isoform_id,
AStype='NMD',
genomic_start=ifelse(switchAnalyzeRlist$orfAnalysis$PTC, '0,0', '0'),
genomic_end=ifelse(switchAnalyzeRlist$orfAnalysis$PTC, '0,0', '0'),
stringsAsFactors = FALSE
)
localAS <- rbind(localAS , localNMD)
}
}
### Add AS to pairs
if(TRUE) {
localConseq2 <- merge(
pairwiseIsoComparison,
localAS,
by.x='isoformUpregulated',
by.y='isoform_id'
)
localConseq3 <- merge(
localConseq2,
localAS,
by.x=c('isoformDownregulated','AStype'),
by.y=c('isoform_id','AStype'),
suffixes = c("_up","_down")
)
### Replace NAs so they can be compared (na just mean same as pre-transcript)
localConseq3$genomic_start_up[which(
is.na(localConseq3$genomic_start_up)
)] <- 0
localConseq3$genomic_end_up[which(
is.na(localConseq3$genomic_end_up)
)] <- 0
localConseq3$genomic_start_down[which(
is.na(localConseq3$genomic_start_down)
)] <- 0
localConseq3$genomic_end_down[which(
is.na(localConseq3$genomic_end_down)
)] <- 0
### Identify differences
localConseq3$coordinatsDifferent <-
localConseq3$genomic_start_up != localConseq3$genomic_start_down |
localConseq3$genomic_end_up != localConseq3$genomic_end_down
localConseq4 <- localConseq3[which(localConseq3$coordinatsDifferent),]
if(nrow(localConseq4) == 0) {
stop('No alternative splicing differences were found')
}
}
### Subset based on input
if(TRUE) {
localConseq4 <- localConseq4[which(
localConseq4$AStype %in% splicingToAnalyze
),]
if (!nrow(localConseq4)) {
stop('No swithces with consequences were found')
}
}
### Analyze differences (both start and end coordinats)
if(TRUE) {
genomic_start_up <- strsplit(x = localConseq4$genomic_start_up , split = ';')
genomic_start_down <- strsplit(x = localConseq4$genomic_start_down, split = ';')
genomic_end_up <- strsplit(x = localConseq4$genomic_end_up , split = ';')
genomic_end_down <- strsplit(x = localConseq4$genomic_end_down, split = ';')
localConseq4$nrGain <- 0
localConseq4$nrLoss <- 0
for(i in seq_along(genomic_start_up)) { # i<-39
# combine start and end of exons
localUp <- stringr::str_c(genomic_start_up[[i]] , '_', genomic_end_up [[i]])
localDn <- stringr::str_c(genomic_start_down[[i]], '_', genomic_end_down[[i]])
# remove coordinats from primary transcripts
localUp <- localUp[which( localUp != '0_0')]
localDn <- localDn[which( localDn != '0_0')]
# calculate difference
localConseq4$nrGain[i] <- sum(!localUp %in% localDn)
localConseq4$nrLoss[i] <- sum(!localDn %in% localUp)
}
### Modify ends (can only have 1)
terminiIndex <- which( localConseq4$AStype %in% c('ATSS','ATTS') )
localConseq4$nrGain[terminiIndex] <-
1 * sign(localConseq4$nrGain[terminiIndex])
localConseq4$nrLoss[terminiIndex] <-
1 * sign(localConseq4$nrLoss[terminiIndex])
localConseq4$nrDiff <- localConseq4$nrGain - localConseq4$nrLoss
}
### Summarize gain vs loss for each AStype in each condition
gainLossBalance <- plyr::ddply(
.data = localConseq4,
.variables = c('condition_1','condition_2','AStype'),
.fun = function(aDF) { # aDF <- localConseq4[1:50,]
if( countGenes ) {
aDF2 <- plyr::ddply(aDF[,c('gene_ref','nrDiff')], .variables = 'gene_ref', function(aGene) {
data.frame(
nrDiff = sum(aGene$nrDiff)
)
})
localRes <- data.frame(
nUp = sum(aDF2$nrDiff > 0),
nDown= sum(aDF2$nrDiff < 0)
)
} else {
localRes <- data.frame(
nUp=sum(aDF$nrDiff > 0),
nDown=sum(aDF$nrDiff < 0)
)
}
if( localRes$nUp > 0 | localRes$nDown > 0 ) {
localTest <- suppressWarnings(
stats::binom.test(localRes$nUp, localRes$nUp + localRes$nDown)
)
localRes$propUp <- localTest$estimate
localRes$propUpCiLo <- min(localTest$conf.int)
localRes$propUpCiHi <- max(localTest$conf.int)
localRes$propUpPval <- localTest$p.value
} else {
localRes$propUp <- NA
localRes$propUpCiLo <- NA
localRes$propUpCiHi <- NA
localRes$propUpPval <- NA
}
return(localRes)
}
)
### Massage for plotting
if(TRUE) {
gainLossBalance <- gainLossBalance[which(
! is.na(gainLossBalance$propUp)
),]
gainLossBalance$propUpQval <- p.adjust(gainLossBalance$propUpPval, method = 'fdr')
gainLossBalance$Significant <- gainLossBalance$propUpQval < alpha
gainLossBalance$Significant <- factor(
gainLossBalance$Significant,
levels=c(FALSE,TRUE)
)
gainLossBalance$Comparison <- paste(
gainLossBalance$condition_1,
'vs',
gainLossBalance$condition_2,
sep='\n'
)
### Massage splicing type name
if(TRUE) {
gainLossBalance$AStype <- paste0(
gainLossBalance$AStype, ' gain ',
'(paired with ',gainLossBalance$AStype, ' loss)'
)
gainLossBalance$AStype <- gsub('MES gain', 'MES', gainLossBalance$AStype)
gainLossBalance$AStype <- gsub('MES loss', 'MEI', gainLossBalance$AStype)
gainLossBalance$AStype <- gsub('ES gain', 'ES', gainLossBalance$AStype)
gainLossBalance$AStype <- gsub('ES loss', 'EI', gainLossBalance$AStype)
#gainLossBalance$AStype <- gsub('IR gain', 'IR', gainLossBalance$AStype)
#gainLossBalance$AStype <- gsub('IR loss', 'IS', gainLossBalance$AStype)
}
myOrder <- plyr::ddply(
gainLossBalance,
.variables = 'AStype',
.fun = function(aDF) {
mean(aDF$propUp)
})
myOrder <- myOrder$AStype[sort.list(myOrder$V1, decreasing = TRUE)]
gainLossBalance$AStype <- factor(
gainLossBalance$AStype,
levels = myOrder
)
}
### Plot result
if(plot) {
### Subset based on minEvents
gainLossBalance2 <- gainLossBalance[which(
(gainLossBalance$nUp + gainLossBalance$nDown) >= minEventsForPlotting
),]
gainLossBalance2$nTot <- gainLossBalance2$nUp + gainLossBalance2$nDown
if(nrow(gainLossBalance2) == 0) {
stop('No features left for ploting after filtering with via "minEventsForPlotting" argument.')
}
if( countGenes ) {
xText <- 'Fraction of Genes with Switches Primarly\nResulting in The Alternative Splicing Event Indicated\n(With 95% Confidence Interval)'
} else {
xText <- 'Fraction of Switches Primarly\nResulting in Alternative Splicing Event Indicated\n(With 95% Confidence Interval)'
}
g1 <- ggplot(data=gainLossBalance2, aes(y=AStype, x=propUp, color=Significant)) +
#geom_point(size=4) +
geom_point(aes(size=nTot)) +
geom_errorbarh(aes(xmax = propUpCiHi, xmin=propUpCiLo), height = .3) +
facet_wrap(~Comparison) +
geom_vline(xintercept=0.5, linetype='dashed') +
labs(
x=xText,
y='Alternative Splicing Event\n(in isoform used more)') +
localTheme +
theme(axis.text.x=element_text(angle=-45, hjust = 0, vjust=1)) +
scale_color_manual(name = paste0('FDR < ', alpha), values=c('black','red'), drop=FALSE) +
guides(
color = guide_legend(order=1),
size = guide_legend(order=2)
) +
coord_cartesian(xlim=c(0,1))
if( countGenes ) {
g1 <- g1 + scale_size_continuous(name = 'Genes')
} else {
g1 <- g1 + scale_size_continuous(name = 'Switches')
}
}
### Return data
if(returnResult) {
if(plot) {
print(g1)
}
if(returnSummary) {
return(gainLossBalance)
} else {
localConseq5 <- localConseq3[,c('gene_id','gene_name','AStype','isoformUpregulated','isoformDownregulated','iso_ref_up','iso_ref_down','condition_1','condition_2')]
### Transfer res
localConseq5$nrDiff <- localConseq4$nrDiff[match(
stringr::str_c( localConseq5$iso_ref_up, localConseq5$iso_ref_down, localConseq5$AStype ),
stringr::str_c( localConseq4$iso_ref_up, localConseq4$iso_ref_down, localConseq4$AStype )
)]
localConseq5$nrDiff[which(is.na(localConseq5$nrDiff))] <- 0
localConseq5$iso_ref_up <- NULL
localConseq5$iso_ref_down <- NULL
localConseq5$ASchange <- dplyr::case_when(
localConseq5$nrDiff > 0 ~ 'Primarly gain',
localConseq5$nrDiff < 0 ~ 'Primarly loss',
TRUE ~ 'No change'
)
return(localConseq5)
}
} else {
if(plot) {
return(g1)
}
}
}
extractSplicingEnrichmentComparison <- function(
switchAnalyzeRlist,
splicingToAnalyze = 'all',
alpha = 0.05,
dIFcutoff = 0.1,
onlySigIsoforms = FALSE,
countGenes = TRUE,
plot = TRUE,
localTheme = theme_bw(base_size = 14),
minEventsForPlotting = 10,
returnResult=TRUE
) {
### Test
if(TRUE) {
if (class(switchAnalyzeRlist) != 'switchAnalyzeRlist') {
stop(
'The object supplied to \'switchAnalyzeRlist\' must be a \'switchAnalyzeRlist\''
)
}
nComp <- nrow( unique(
switchAnalyzeRlist$isoformFeatures[,c('condition_1','condition_2')]
))
if( nComp == 1) {
stop('Cannot do a contrast of different comparisons since only one comparison is analyzed in the switchAnalyzeRlist.')
}
}
### Extract splicing enrichment
splicingCount <- extractSplicingEnrichment(
switchAnalyzeRlist=switchAnalyzeRlist,
splicingToAnalyze=splicingToAnalyze,
alpha=alpha,
dIFcutoff=dIFcutoff,
onlySigIsoforms=onlySigIsoforms,
countGenes = countGenes,
plot=FALSE,
minEventsForPlotting = 0,
returnResult=TRUE
)
spliceTypes <- split(as.character(unique(splicingCount$AStype)), unique(splicingCount$AStype))
splicingCount$nTot <- splicingCount$nUp + splicingCount$nDown
### Make each pairwise comparison
myComparisons <- allPairwiseFeatures( unique(splicingCount$Comparison) )
### Loop over each pariwise comparison
fisherRes <- plyr::ddply(myComparisons, c('var1','var2'), function(localComparison) { # localComparison <- myComparisons[1,]
### Loop over each
localAsRes <- plyr::ldply(spliceTypes, .inform = TRUE, function(localSpliceType) { # localSpliceType <- 'MEE'
### Extract local data
localSpliceCount1 <- splicingCount[which(
splicingCount$Comparison %in% c(localComparison$var1, localComparison$var2) &
splicingCount$AStype == localSpliceType
),]
if(
nrow(localSpliceCount1) != 2 |
any(is.na(localSpliceCount1[,c('nUp','nDown')]))
) {
return(NULL)
}
### Test difference
fisherResult <- fisher.test(localSpliceCount1[,c('nUp','nDown')])
###
fisherTestResult <- data.frame(odds_ratio=fisherResult$estimate, p_value=fisherResult$p.value, lowCI=fisherResult$conf.int[1], highCI=fisherResult$conf.int[2])
rownames(fisherTestResult) <- NULL
localSpliceCount1$fisherPvalue <- fisherTestResult$p_value
localSpliceCount1$pair <- 1:2
localSpliceCount1$forPlotting <- any(
(localSpliceCount1$nUp + localSpliceCount1$nDown) >= minEventsForPlotting
)
return(
localSpliceCount1[,c('Comparison','propUp','propUpCiLo','propUpCiHi','fisherPvalue','pair','forPlotting','nTot')]
)
})
colnames(localAsRes)[1] <- 'AStype'
return(localAsRes)
})
### Add comparison
fisherRes$comp <- paste(
gsub('\\n',' ',fisherRes$var1),
gsub('\\n',' ',fisherRes$var2),
sep='\ncompared to\n'
)
### Multiple test correction
# perform correction for pair = 1 (to avoid correcting twice for the same pair)
tmp <- fisherRes[which(fisherRes$pair == 1),]
tmp$fisherQvalue <- p.adjust(tmp$fisherPvalue, method = 'fdr')
fisherRes$fisherQvalue <- tmp$fisherQvalue[match(
stringr::str_c(fisherRes$var1, fisherRes$var2, fisherRes$AStype),
stringr::str_c(tmp$var1, tmp$var2, tmp$AStype)
)]
fisherRes$Significant <- fisherRes$fisherQvalue < alpha
fisherRes$Significant <- factor(
fisherRes$Significant,
levels=c(FALSE,TRUE)
)
### Plot
if(plot) {
fisherRes2 <- fisherRes[which(
fisherRes$forPlotting
),]
if(nrow(fisherRes2) == 0) {
stop('No features left to plot after subsetting with \'minEventsForPlotting\'.')
}
fisherRes2$AStype2 <- gsub(
' \\(paired',
'\n(paried',
fisherRes2$AStype
)
if( countGenes ) {
xText <- 'Fraction of Genes with Switches Primarly\nResulting in The Alternative Splicing Event Indicated\n(With 95% Confidence Interval)'
} else {
xText <- 'Fraction of Switches Primarly\nResulting in Alternative Splicing Event Indicated\n(With 95% Confidence Interval)'
}
g1 <- ggplot(data=fisherRes2, aes(y=Comparison, x=propUp, color=Significant)) +
geom_point(aes(size=nTot)) +
geom_errorbarh(aes(xmax = propUpCiHi, xmin=propUpCiLo), height = .3) +
facet_grid(comp~AStype2, scales = 'free_y') +
geom_vline(xintercept=0.5, linetype='dashed') +
labs(
x=xText,
y='Comparison'
) +
scale_color_manual(name = paste0('FDR across\ncomparisons\n< ', alpha), values=c('black','red'), drop=FALSE) +
guides(
color = guide_legend(order=1),
size = guide_legend(order=2)
) +
localTheme +
theme(axis.text.x=element_text(angle=-45, hjust = 0, vjust=1), strip.text.y = element_text(angle = 0)) +
coord_cartesian(xlim=c(0,1))
if( countGenes ) {
g1 <- g1 + scale_size_continuous(name = 'Genes')
} else {
g1 <- g1 + scale_size_continuous(name = 'Switches')
}
}
if(returnResult) {
if(plot) {
print(g1)
}
fisherRes$nTot <- NULL
fisherRes$pair <- stringr::str_c('propUp_comparison_', fisherRes$pair)
fisherRes2 <- reshape2::dcast(data = fisherRes, comp + AStype ~ pair, value.var=c('propUp'))
fisherRes2$fisherQvalue <- tmp$fisherQvalue[match(
stringr::str_c(fisherRes2$comp, fisherRes2$AStype),
stringr::str_c(tmp$comp, tmp$AStype)
)]
fisherRes2$Significant <- fisherRes2$fisherQvalue < alpha
colnames(fisherRes2)[1] <- 'comparisonsCompared'
fisherRes2$comparisonsCompared <- gsub('\\n', ' ', fisherRes2$comparisonsCompared)
return(fisherRes2)
} else {
if(plot) {
return(g1)
}
}
}
# Analysis of isoform fraction (using only events unique to one)
extractSplicingGenomeWide <- function(
switchAnalyzeRlist,
featureToExtract = 'isoformUsage',
splicingToAnalyze = 'all',
alpha=0.05,
dIFcutoff = 0.1,
log2FCcutoff = 1,
violinPlot=TRUE,
alphas=c(0.05, 0.001),
localTheme=theme_bw(),
plot=TRUE,
returnResult=TRUE
) {
### Test input
if (TRUE) {
if (class(switchAnalyzeRlist) != 'switchAnalyzeRlist') {
stop(
'The object supplied to \'switchAnalyzeRlist\' must be a \'switchAnalyzeRlist\''
)
}
if( switchAnalyzeRlist$sourceId == 'preDefinedSwitches' ) {
stop(
paste(
'The switchAnalyzeRlist is made from pre-defined isoform switches',
'which means it is made without defining conditions (as it should be).',
'\nThis also means it cannot used to plot conditional expression -',
'if that is your intention you need to create a new',
'switchAnalyzeRlist with the importRdata() function and start over.',
sep = ' '
)
)
}
if (alpha < 0 |
alpha > 1) {
stop('The alpha parameter must be between 0 and 1 ([0,1]).')
}
if (alpha > 0.05) {
warning(
'Most journals and scientists consider an alpha larger than 0.05 untrustworthy. We therefore recommend using alpha values smaller than or queal to 0.05'
)
}
if (dIFcutoff < 0 | dIFcutoff > 1) {
stop('The dIFcutoff must be in the interval [0,1].')
}
if (log2FCcutoff < 0) {
stop(
'The log2FCcutoff cannot be negative (as the cutoff is applied to absolute values)'
)
}
if (length(alphas) != 2) {
stop('A vector of length 2 must be given to the argument \'alphas\'')
}
if (any(alphas < 0) |
any(alphas > 1)) {
stop('The \'alphas\' parameter must be numeric between 0 and 1 ([0,1]).')
}
if (any(alphas > 0.05)) {
warning(
'Most journals and scientists consider an alpha larger than 0.05 untrustworthy. We therefore recommend using alpha values smaller than or queal to 0.05'
)
}
if (!featureToExtract %in%
c('isoformUsage', 'isoformExp', 'geneExp', 'all')
) {
stop(
'The \'featureToExtract\' argument must be \'isoformUsage\', \'isoformExp\', \'geneExp\' or \'all\''
)
}
### Check annotation
acceptedTypes <- c("A3","A5","ATSS","ATTS","ES" ,"IR","MEE","MES" )
if ('all' %in% splicingToAnalyze) {
splicingToAnalyze <- acceptedTypes
}
if (!all(splicingToAnalyze %in% acceptedTypes)) {
stop(
paste(
'The \'acceptedTypes\' argument must be one (or multiple) of: \'',
paste(acceptedTypes, collapse = '\', \''),
'\'',
sep = ''
)
)
}
}
### Extract annotation
if (TRUE) {
switchAnalyzeRlist$isoformFeatures$comparison <- paste(
switchAnalyzeRlist$isoformFeatures$condition_1,
switchAnalyzeRlist$isoformFeatures$condition_2,
sep = ' vs '
)
isoformsToAnalyze <- extractSigData(
switchAnalyzeRlist = switchAnalyzeRlist,
alpha = alpha,
dIFcutoff = dIFcutoff,
log2FCcutoff = log2FCcutoff,
featureToExtract = featureToExtract
)
colToExtract <-
c('iso_ref',
'isoform_id',
'comparison',
'IF1',
'IF2')
colToExtract <-
intersect(colToExtract,
colnames(switchAnalyzeRlist$isoformFeatures))
isoData <- switchAnalyzeRlist$isoformFeatures[
which(
switchAnalyzeRlist$isoformFeatures$iso_ref %in%
isoformsToAnalyze
),
na.omit(match(
colToExtract ,
colnames(switchAnalyzeRlist$isoformFeatures)
))
]
if (nrow(isoData) == 0) {
stop('No data left after filtering')
}
isoData <- dplyr::inner_join(
isoData,
switchAnalyzeRlist$AlternativeSplicingAnalysis[,c(
'isoform_id',
splicingToAnalyze
)],
by='isoform_id'
)
}
### Prepare for plotting
if (TRUE) {
# melt categories
isoDataMelt <-
reshape2::melt(isoData,
id.vars = c(
'iso_ref', 'isoform_id', 'comparison', 'IF1', 'IF2'
))
#isoDataMelt <-
# isoDataMelt[which(!is.na(isoDataMelt$value)), ]
# melt IF
colnames(isoDataMelt)[match(
c('variable', 'value'), colnames(isoDataMelt)
)] <- c('category', 'isoform_feature')
isoDataMelt <-
reshape2::melt(
isoDataMelt,
id.vars = c(
'iso_ref',
'isoform_id',
'comparison',
'category',
'isoform_feature'
)
)
# massage category
isoDataMelt$category <- as.character(isoDataMelt$category)
isoDataMelt$comparison2 <-
paste(isoDataMelt$comparison, '\n(IF1 vs IF2)', sep = '')
isoDataMelt$isoform_feature <- paste(
ifelse(isoDataMelt$isoform_feature == 0, 'Without', 'With'),
isoDataMelt$category,
sep=' '
)
}
### Calculate statistics
if (TRUE) {
mySigTest <-
plyr::ddply(
isoDataMelt,
.variables = c('comparison', 'category', 'isoform_feature'),
.drop = TRUE,
.fun = function(aDF) {
data1 <- aDF$value[which(aDF$variable == 'IF1')]
data2 <- aDF$value[which(aDF$variable == 'IF2')]
myTest <- suppressWarnings(wilcox.test(data1,
data2))
myResult <- data.frame(
n = length(data1),
medianIF1 = median(data1, na.rm = TRUE),
medianIF2 = median(data2, na.rm = TRUE)
)
myResult$medianDIF <-
myResult$medianIF2 - myResult$medianIF1
myResult$wilcoxPval <- myTest$p.value
myResult$ymax <- max(c(data1, data2))
return(myResult)
}
)
mySigTest$ymax <- mySigTest$ymax * 1.05
mySigTest$wilcoxQval <-
p.adjust(mySigTest$wilcoxPval, method = 'fdr')
mySigTest$significance <-
sapply(mySigTest$wilcoxQval, function(x)
evalSig(x, alphas))
mySigTest <-
plyr::ddply(
mySigTest,
.variables = 'category',
.fun = function(aDF) {
aDF$isoform_feature <- factor(aDF$isoform_feature)
aDF$idNr <- as.numeric(aDF$isoform_feature)
return(aDF)
}
)
mySigTest$comparison2 <-
paste(mySigTest$comparison, '\n(IF1 vs IF2)', sep = '')
}
### Plot result
if (plot) {
# start plot
if (violinPlot) {
p1 <- ggplot() +
geom_violin(
data = isoDataMelt,
aes(
x = isoform_feature,
y = value,
fill = variable
),
scale = 'area'
) +
stat_summary(
data = isoDataMelt,
aes(
x = isoform_feature,
y = value,
fill = variable
),
fun.y = medianQuartile,
geom = 'point',
position = position_dodge(width = 0.9),
size = 2
)
} else {
p1 <- ggplot() +
geom_boxplot(data = isoDataMelt, aes(
x = isoform_feature,
y = value,
fill = variable
))
}
# add significance
p1 <- p1 +
geom_text(
data = mySigTest,
aes(x = isoform_feature, y = ymax, label = significance),
vjust = -0.2,
size = localTheme$text$size * 0.3
) +
geom_segment(data = mySigTest, aes(
x = idNr - 0.25,
xend = idNr + 0.25,
y = ymax,
yend = ymax
))
# build rest of plot
p1 <- p1 +
facet_grid(comparison2 ~ category,
scales = 'free_x',
space = 'free_x') +
localTheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x = element_text(
angle = -45,
hjust = 0,
vjust = 1
)) +
scale_fill_discrete(name = NULL) + theme(legend.position = "top") +
labs(x = 'Isoform feature', y = 'Isoform Usage (IF)') +
#coord_cartesian(ylim=c(0,1.25))
coord_cartesian(ylim = c(0, 1.1 + max(c(
0, 0.02 * (length(unique(
mySigTest$comparison2
)) - 2)
))))
}
### Return result
if (returnResult) {
if(plot) {
print(p1)
}
mySigTest2 <-
mySigTest[, c(
'comparison',
'category',
'isoform_feature',
'n',
'medianIF1',
'medianIF2',
'medianDIF',
'wilcoxPval',
'wilcoxQval',
'significance'
)]
mySigTest2 <-
mySigTest2[order(mySigTest2$comparison,
mySigTest2$category,
mySigTest2$isoform_feature), ]
return(mySigTest2)
} else {
if(plot) {
return(p1)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.