#' Survival analysis using metafeatures
#'
#' @description survFASE finds survival rate of patients using altenative splicing data. It requires exon, intron and junction expression of the samples generated using FASE (check \code{\link{EPrnaseq}}, \code{\link{iPrnaseq}} and \code{\link{DEJ}}). rownames of all expression files and clinical data should have the same identifier/sample ID, otherwise survFASE would not be able to perform survival analysis. survFASE uses RMM/iMM to find metafeature(s) associated with the given exonID/intronID and incorporate the expression of those metafeatures with respective exonID/intronID.
#'
#' @param Time column name of survival time in clinical data. (default = Time)
#' @param Status column name of survival status (alive/dead status) in clinical data. 1 = dead and 0 = alive. (default = Status)
#' @param Gcount Gene count matrix for the given gene. It should contain raw meta-feature counts of only patients. Rownames of gcount should be unique sample IDs that can be mapped to the clinical data.
#' @param clinical.data clinical data of patients. It should contain survival time (days to last follow up) and survival status (0/1) of the patients. Rownames of clinical data should be unique, non-repeating sample IDs that can be mapped to the Gcount.
#' @param rmm readMembershipMatrix matrix of the gene. It contains association between exons and other metafeatures in a gene and is generated by default as RMM.Rdata using \code{\link{readMembershipMatrix}} function. RMM is required only for finding survival rate associated with a cassette exon event.
#' @param imm intronMembershipMatrix matrix of the gene. It contains association between introns and other metafeatures in a gene. It is generated by default as iMM.Rdata using \code{\link{intronMembershipMatrix}} function. iMM is required only for finding survival rate associated with an intron retention event.
#' @param eventID exonID or intronID of the AS event for survival analysis.
#' @param designM design matrix
#' @param Groups list of sample groups
#'
#' @import survival
#' @import dplyr
#' @import stats
#' @import edgeR
#' @import matrixStats
#'
#' @return survFASE returns an overall p-value, concordance index and Cox-PH statistics. The overall p-value indicates whether or not the given exon/intron significantly affects patient survival. C-index signifies goodness-of-fit of the model. Cox-PH results show which of the metafeatures associated with the exon/intron affect survival rate and their statistical inferences like hazard-ratio, beta-coefficient, etc.
#'
#' @export
survFASE <- function(Time = Time, Status = Status, Gcount, clinical.data, rmm = NULL, imm = NULL, eventID, threshold = 0.60, designM, Groups){
#checking if rmm or imm is provided
if(!is.null(rmm) & !is.null(imm)) stop('Please provide RMM if cassette exon event or iMM if intron retention event.')
#renaming rmm/imm as mm
if(!is.null(rmm)) mm <- rmm else if(!is.null(imm)) mm <- imm else stop('Please provide RMM if cassette exon event or iMM if intron retention event.')
#checking if mm is empty (some iMM are empty)
if(is.null(mm)) base::stop("rmm/imm is empty.")
threshold.mf <- threshold
#making a matrix of metafetaures associated with given eventID
metafeatures <- as.matrix(mm[ , eventID])
metafeatures <- subset(metafeatures, metafeatures[,1]>0.0)
#Preparing expression matrix from Gcount
Gcount <- .removeLECountsTS(Gcount = Gcount, designM = designM, Groups = Groups, threshold = 6.32) #normalizing raw counts and removing low expression metafeatures
Gcount <- t(Gcount)
index.patients <- match(rownames(clinical.data), rownames(Gcount))
index.metafeatures <- match(rownames(metafeatures), colnames(Gcount))
Gcount <- as.matrix(Gcount[index.patients, index.metafeatures]) #retaining only patient samples and metafeatures associated with given eventID
#removing metafeatures with expression in less than threshold number of samples
Gcount.temp <- (Gcount != 0) * 1 # making non-zero expression as 1
threshold <- threshold * nrow(Gcount.temp) #atleast 51% of samples should have non-zero expression
counts <- colSums(Gcount.temp) #checking number of samples with non-zero expression in each mf
index <- which(counts > threshold) # checking mfs with more than threshold non-zero samples
Gcount <- Gcount[ , index] #retaining mfs with more than threshold non-zero expression
if(ncol(Gcount) == 0) warning('More than ' , threshold.mf*100, '% samples have zero expression in all meta-features.')
## adding clinicaldata to expression
index <- match(rownames(clinical.data), rownames(Gcount))
survival_data <- cbind(clinical.data, Gcount[index, ])
##cox
##running CoxPH
surv.formula <- Surv(Time, Status) ~ . #creating a survival formula
surv.formula <- reformulate(colnames(Gcount), surv.formula[[2]]) # adding mf colnames to survival formula
surv.result <- coxph(surv.formula, data = survival_data) #calculating Cox survival
surv.result.pvalues <- summary(surv.result)$coefficients[,5] #extracting p-values of all mfs
surv.result.pvalues.sig <- surv.result.pvalues[which(surv.result.pvalues < 0.05)]
# overall p-value using only significant metafeatures
overall_pvalue <- .sumPvalsMethod(sum(surv.result.pvalues.sig), length(surv.result.pvalues.sig))
cindex <- summary(surv.result)$concordance[1] # concordance index
names(cindex) <- NULL
survival_result <- list(overall_pvalue = overall_pvalue, concordance_index = cindex, survival_analysis = surv.result)
return(survival_result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.