# uni.tree #
#' @title A survival binary tree split by univariate selection and the statistical significant criterion.
#' @description Using a survival dataset to build a tree. The decision of splitting is using score test or log-rank test.
#' And the threshold of stop spltting can fixed through the argument.
#' This function also produce a relative risk rank between the terminal nodes.
#' @param t.vec :Vector of survival times (time to either death or censoring)
#' @param d.vec :Vector of censoring indicators (1=death, 0=censoring)
#' @param X.mat :n by p matrix of covariates, where n is the sample size and p is the number of covariates
#' @param P.value :the threshold of P-value for stop splitting (stopping criterion)
#' @param d0 :A positive constant to stabilize the variance of score statistics (Witten & Tibshirani 2010)
#' @param S.plot :call for plot the KM estimator for each split
#' @param score :TRUE = score test (Emura T et al. 2019); FALSE = log-rank test
#' @return A nesting list of tree, containing inner node and terminal node information
#' @details the concept is generated by a function containing the function itself
#' @details For each split, we record three things, fist is the information of inner node which contains what to choose for splitting and what value the p-value the two sample test is. Second and third are the information of left and right children nodes.
#' If the children node is decided to become a inner node, then it genertae another two nodes;otherwise, it becomes a terminal node and record the information of sample size and the covariate space and the p-value it try to split but is failed
#' @examples
#' data(Lung,package="compound.Cox")
#' train_Lung=Lung[which(Lung[,"train"]==TRUE),] #select training data
#' t.vec=train_Lung[,1]
#' d.vec=train_Lung[,2]
#' x.mat=train_Lung[,-c(1,2,3)]
#' uni.tree(t.vec,d.vec,x.mat,P.value=0.01,d0=0.01,S.plot=FALSE,score=TRUE)
#' @import survival
#' @references
#' Emura T, Matsui S, Chen HY (2019). compound.Cox: Univariate Feature Selection and Compound Covariate for Predicting Survival, Computer Methods and Programs in Biomedicine 168: 21-37.
#' @references
#' Witten DM, Tibshirani R (2010) Survival analysis with high-dimensional covariates. Stat Method Med Res 19:29-51
#' @export
uni.tree=function(t.vec,d.vec,X.mat,P.value=0.01,d0=0.01,
S.plot=FALSE,score=TRUE){
m=max(X.mat)-min(X.mat)
recursive=function(t.vec,d.vec,X.mat,condition=NULL,Risk.score=0,rank.weight=1){
n=length(t.vec)
if(n>1){
if(score == TRUE){
p = ncol(X.mat)
###############transform the X.mat into w binary matrix#################################
w.mat = 1*(X.mat[,rep(1:p,c(rep(m,p)))]>
matrix(rep(c(1:m),p),1,p*m)[rep(1,n),])
#import and implement uni.score function
uni.score.summary = uni.score(t.vec,d.vec,w.mat, d0= d0)
uni.score_res = uni.score.summary$P
#decide the cut value
selected.cut.value = rep(c(1:m),p)[which.min(uni.score_res)]
selected.cov.names = colnames(X.mat)[floor((which.min(uni.score_res)-1)/m)+1]#prepare the name of selected covariate name
selected.P.value = min(uni.score_res)
}else{
#else = logrank
uni.logrank.res = uni.logrank(t.vec,d.vec,X.mat)
selected.cov.names = colnames(uni.logrank.res)[1]
selected.cut.value = uni.logrank.res["cut_off_point",1]
selected.P.value = uni.logrank.res["Pvalue",1]
}
#stopping criterion
if(selected.P.value <= P.value ){
#Separate data by the final decided value
tmp=X.mat[,selected.cov.names]<=selected.cut.value
#check high risk and low risk set
#prepare for subtree
if(score == TRUE){
Zvalue = uni.score.summary$Z[which.min(uni.score_res)]
if(Zvalue <= 0){
left_t.vec = t.vec[tmp];right_t.vec=t.vec[!tmp]
left_d.vec = d.vec[tmp];right_d.vec=d.vec[!tmp]
left_X.mat = X.mat[tmp,];right_X.mat=X.mat[!tmp,]
}else{
left_t.vec=t.vec[!tmp];right_t.vec=t.vec[tmp]
left_d.vec=d.vec[!tmp];right_d.vec=d.vec[tmp]
left_X.mat=X.mat[!tmp,];right_X.mat=X.mat[tmp,]
}
}else{
x=X.mat[,selected.cov.names]<=selected.cut.value
lrsummary = survdiff(Surv(t.vec,d.vec)~x)
Zvalue = (lrsummary$obs - lrsummary$exp)[1]
if(Zvalue <= 0){
left_t.vec=t.vec[tmp];right_t.vec=t.vec[!tmp]
left_d.vec=d.vec[tmp];right_d.vec=d.vec[!tmp]
left_X.mat=X.mat[tmp,];right_X.mat=X.mat[!tmp,]
}else{
left_t.vec=t.vec[!tmp];right_t.vec=t.vec[tmp]
left_d.vec=d.vec[!tmp];right_d.vec=d.vec[tmp]
left_X.mat=X.mat[!tmp,];right_X.mat=X.mat[tmp,]
}
}
#prepare for subtree
#Set condition
if(is.null(condition)){
if(Zvalue <= 0){
condition_left=paste0(selected.cov.names,"<=",selected.cut.value,sep="")
condition_right=paste0(selected.cov.names,">",selected.cut.value,sep="")
}else{
condition_left=paste0(selected.cov.names,">",selected.cut.value,sep="")
condition_right=paste0(selected.cov.names,"<=",selected.cut.value,sep="")
}
}else{
if(Zvalue <= 0){
condition_left=paste0(condition," & ",paste0(selected.cov.names,"<=",selected.cut.value))
condition_right=paste0(condition," & ",paste0(selected.cov.names,">",selected.cut.value))
}else{
condition_left=paste0(condition," & ",paste0(selected.cov.names,">",selected.cut.value))
condition_right=paste0(condition," & ",paste0(selected.cov.names,"<=",selected.cut.value))
}
}
if(S.plot == TRUE){
KM.split(t.vec,d.vec,X.mat,selected.cov.names,selected.cut.value)
}
return(
list(NODE=data.frame(Information=c("Inner.node",
Risk.score,
selected.P.value,
selected.cov.names,
selected.cut.value,
round(Zvalue,2)
),
row.names = c("node_status:",
"Risk score:",
"P-value:",
"name:",
"cut_value:",
"Z-value")
)
,Left=recursive(left_t.vec,left_d.vec,left_X.mat,
condition=condition_left,Risk.score=Risk.score+rank.weight,rank.weight = 0.1*rank.weight)
,Right=recursive(right_t.vec,right_d.vec,right_X.mat,
condition=condition_right,Risk.score = Risk.score-rank.weight,rank.weight = 0.1*rank.weight)
)
)
}else{
return(list(NODE=data.frame(Information=c("terminal node",
Risk.score,
selected.P.value,
n,
if(is.null(condition)){"NULL"}else{condition}
),
row.names=c("node_status:",
"Risk score:",
"P-value:",
"samplesize:",
"condition:")
)
)
)
}
}else{
return(list(NODE=data.frame(Information=c("terminal node",
Risk.score,
n,
if(is.null(condition)){"NULL"}else{condition}
),
row.names=c("node_status:",
"Risk score:",
"samplesize:",
"condition:")
)
)
)
}
}
return(recursive(t.vec,d.vec,X.mat))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.