R/specc_cluster.R

Defines functions get_samp_class get_univarCox_result

Documented in get_samp_class get_univarCox_result

#' @title Perform the univariate Cox proportional hazards regression analysis.
#' @description The function `get_univarCox_result` is used to perform the univariate Cox proportional hazards regression analysis.
#' @param DE_path_sur A matrix containing the activity values of all pathways in each sample, along with the survival time and survival status of the samples.Note that the column names of survival time and survival status must be "survival" and "event".
#' @importFrom survival coxph
#' @importFrom survival Surv
#' @return A data frame containing the pathways' coefficient, HR, confidence interval, and survival related difference p-value .
#' @export
#' @examples
#' #get path of the mutation annotation file.
#' data(cox_data)
#' #perform function `get_univarCox_result`.
#' res<-get_univarCox_result(cox_data)
get_univarCox_result<-function(DE_path_sur){
  path_name<-cbind(colnames(DE_path_sur),paste0("a",1:length(colnames(DE_path_sur))))
  colnames(DE_path_sur)[1:(dim(DE_path_sur)[2]-2)]<-path_name[1:(dim(path_name)[1]-2),2]
  covariates<-colnames(DE_path_sur)[1:(length(DE_path_sur[1,])-2)]
  univ_formulas <- sapply(covariates,function(x) as.formula(paste('Surv(survival, event) ~', x)))
  univ_models <- lapply( univ_formulas, function(x){coxph(x, data =DE_path_sur)})
  univ_results <- lapply(univ_models,
                         function(x){
                           x <- summary(x)
                           p.value<-signif(x$wald["pvalue"], digits=2)
                           wald.test<-signif(x$wald["test"], digits=2)
                           beta<-signif(x$coef[1], digits=2);
                           HR <-signif(x$coef[2], digits=2);
                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)

                           res<-c(beta,HR,HR.confint.lower,HR.confint.upper, p.value)
                           names(res)<-c("beta","HR", "HR.95L", "HR.95H","p.value")
                           return(res)
                         })
  res <- t(as.data.frame(univ_results, check.names = FALSE))
  result<-as.data.frame(res)
  rownames(result)<-path_name[match(rownames(result),path_name[,2]),1]
  return(result)
}



#' @title Determining cancer subtypes using unsupervised spectral clustering algorithm.
#' @description The function `get_samp_class` using spectral clustering algorithm to obtain cancer subtype labels.
#' @param Path_ES Single-sample mutation-based pathway enrichment score(ssMutPES) profiles.The file can be generated by the function `get_RWR_ES`.
#' @param sur A matrix containing the samples' survival time and survival states.
#' @param seed_num The number of seeds for iterating to select the optimal clustering result.
#' @param cox_pval A custom threshold to filter characteristic pathways, default to 0.05.
#' @param min.nc Minimal number of clusters.
#' @param max.nc maximal number of clusters,greater or equal to min.nc.
#' @importFrom survival coxph
#' @importFrom survival Surv
#' @importFrom NbClust NbClust
#' @importFrom kernlab specc
#' @importFrom survival survdiff
#' @importFrom stats dist
#' @importFrom stats pchisq
#' @importFrom stats as.formula
#' @return A list containing the filtered pathways,the best seed for clustering,and cancer subtyoe labels .
#' @export
#' @examples
#' #load the data.
#' surv_path <- system.file("extdata","sur.Rdata",package = "ssMutPA")
#' load(surv_path)
#' data(Path_ES)
#' #perform function `get_samp_class`.
#' \donttest{res<-get_samp_class(Path_ES,sur,seed_num=5,cox_pval=0.05,min.nc = 2,max.nc =5)}

get_samp_class<-function(Path_ES,sur,seed_num=50,cox_pval=0.05,min.nc = 2,max.nc =5){
  col_0<-apply(Path_ES,2,function(x){length(which(x==0))})
  norm_RWR<-Path_ES[,names(col_0)[which(col_0!=nrow(Path_ES))]]
  inter<-intersect(colnames(norm_RWR),rownames(sur))
  norm_RWR<-norm_RWR[,inter]
  sur<-sur[inter,]
  coxdata<-as.data.frame(cbind(t(norm_RWR),sur))
  path_Ucox_res<-get_univarCox_result(coxdata)
  cox_path<-rownames(path_Ucox_res)[which(path_Ucox_res$p.value < cox_pval)]
  patient_simil<-as.matrix(dist(t(norm_RWR[cox_path,]),method ="euclidean" ))
  nc<-NbClust(data=t(norm_RWR[cox_path,]),distance =NULL,diss = patient_simil,min.nc = min.nc,max.nc =max.nc,
              method ="kmeans",index = "silhouette")
  clust_num<-as.numeric(nc$Best.nc["Number_clusters"])
  logrank<-c()
  clust_seed<-sample(1000:10000,seed_num)
  for (j in 1:length(clust_seed)) {
    set.seed(clust_seed[j])
    res<-specc(patient_simil,centers=clust_num)
    sample_class<-res@.Data
    surv_data<-as.data.frame(cbind(event=sur[,1],
                                   survival=sur[,2],class=sample_class))
    sdiff <- survdiff(Surv(survival, event)~class, data=as.data.frame(surv_data))
    p.val = 1 - pchisq(sdiff$chisq, length(sdiff$n) - 1)
    logrank<-c(logrank,p.val)
  }
  seed<-clust_seed[which.min(logrank)]
  set.seed(seed)
  res<-specc(patient_simil,centers=clust_num)
  sample_class<-res@.Data
  names(sample_class)<-names(res)
  result<-list(cox_path=cox_path,seed=seed,sample_class=sample_class)
  return(result)
}

Try the ssMutPA package in your browser

Any scripts or data that you put into this service are public.

ssMutPA documentation built on June 22, 2024, 12:18 p.m.