Nothing
#' Permutation-based variable importance metric for high dimensional datasets appropriate for time to event outcomes,
#' in the presence of imperfect self-reports or laboratory-based diagnostic tests.
#'
#' @description Let N and P denote the number of subjects and number of variables in the dataset, respectively.
#' Let N** denote the total number of visits, summed over all subjects in the study
#' [i.e. N** denotes the number of diagnostic test results available for all subjects in the study].
#' This algorithm builds a user-defined number of survival trees, using bootstrapped datasets.
#' Using the out of bag (OOB) data in each tree, a permutation-based measure of
#' variable importance for each of the P variables is obtained.
#'
#' @param data name of the data frame that includes the variables subject, testtimes, result
#' @param subject vector of subject IDs of length N**x1.
#' @param testtimes vector of visit or test times of length N**x1.
#' @param result vector of binary diagnostic test results (0 = negative for event of interest; 1 = positive
#' for event of interest) of length N**x1.
#' @param sensitivity the sensitivity of the diagnostic test.
#' @param specificity the specificity of the diagnostic test.
#' @param Xmat a N x P matrix of covariates.
#' @param root.size the minimum number of subjects in a terminal node.
#' @param ns number of covariate selected at each node to split the tree.
#' @param pval P-value threshold of the Likelihood Ratio Test.
#' @return a vector of the ensembled variable importance for modified random survival forest (icRSF).
#' @export
#' @examples
#' data(Xmat)
#' data(pheno)
#' tree <- treebuilder(data=pheno, subject=ID, testtimes=time, result=result, sensitivity=1,
#' specificity=1, Xmat=Xmat, root.size=30, ns=sqrt(ncol(Xmat)), pval=1)
#'
#' @useDynLib icRSF
#' @importFrom Rcpp evalCpp
#' @importFrom stats optim rbinom rexp
#' @import parallel icensmis
#'
#'
treebuilder <- function(data, subject, testtimes, result, sensitivity, specificity, Xmat, root.size, ns, pval=1){
id <- eval(substitute(subject), data, parent.frame())
time <- eval(substitute(testtimes), data, parent.frame())
result <- eval(substitute(result), data, parent.frame())
ord <- order(id, time)
if (is.unsorted(ord)) {
id <- id[ord]
time <- time[ord]
result <- result[ord]
data <- data[ord, ]
}
utime <- sort(unique(time))
stopifnot(is.numeric(sensitivity), is.numeric(specificity),
is.numeric(root.size),
is.numeric(pval), is.numeric(ns))
stopifnot(length(sensitivity) == 1, sensitivity >= 0, sensitivity <= 1,
length(specificity) == 1, specificity >= 0, specificity <= 1,
length(root.size) == 1, root.size >= 1, root.size <= nrow(Xmat),
length(ns) ==1, ns >=1, ns<=ncol(Xmat),
all(time > 0), all(is.finite(time)))
if (!all(result %in% c(0, 1))) stop("result must be 0 or 1")
if (any(tapply(time, id, anyDuplicated)))
stop("existing duplicated visit times for some subjects")
#############################################################################
# Compute D matrix
#############################################################################
timen0 <- (time != 0)
Dmat <- dmat(id[timen0], time[timen0], result[timen0], sensitivity,
specificity, 1)
row.names(Dmat) <- unique(id)
hdidx <- 1
tree <- 1
thres <- stats::qchisq(1-pval, df=1)
getchisq <- function(Dmat, x) {
# J <- dim(Dmat)[2]-1
# parmi <- c(0, log(-log((J:1)/(J+1))))
# parmD <- c(parmi[1], diff(c(0, parmi[-1])))
# parmD0 <- parmD[-1]
## log-likelihood function
if(length(unique(x))==1){
chisq <- 0
parm <- rep(0, J+1)
}else{
q <- try(optim(parmD, loglikCD, gradlikCD, lower = c(rep(-Inf, 2), rep(1e-8, J-1)), Dmat = Dmat, x = x, method="L-BFGS-B"))
q0 <- try(optim(parmD0, loglikCD0, gradlikCD0, lower = c(-Inf, rep(1e-8, J-1)), Dmat = Dmat, method="L-BFGS-B"))
if (class(q)=="try-error" | class(q0)=="try-error") return(rep(0, J+2))
chisq <- 2*(q0$value - q$value)
parm <- q$par
}
return(c(chisq, parm))
}
#############################################################################
# Compute D matrix
#############################################################################
#
# timen0 <- (time != 0)
# Dm <- icensmis:::dmat(id[timen0], time[timen0], result[timen0], sensitivity,
# specificity, 1)
simdata <- cbind(Dmat, Xmat)
J <- ncol(Dmat) - 1
nsub <- nrow(Dmat)
ncov <- ncol(Xmat)
parmi <- c(0, log(-log((J:1)/(J+1))))
parmD <- c(parmi[1], diff(c(0, parmi[-1])))
parmD0 <- parmD[-1]
bootid <- sample(1:nrow(simdata),nrow(simdata), replace = T)
bootdata <- simdata[bootid, ]
OOBid <- setdiff(1:nsub, unique(bootid))
OOB <- simdata[setdiff(1:nsub, unique(bootid)), ]
nOOB <- nrow(OOB)
# OOB <- simdata[setdiff(1:nsub, unique(bootid)), ]
# nOOB <- nrow(OOB)
################# TREE BUILDING ####################
newtree <- function(firstval, firstc, inc, parm, firstnobs) {
m <- matrix(rep(NA, inc*(J+6)), nrow = inc, ncol = J + 6)
m[1, 3] <- firstval
m[1, 4] <- firstc
m[1, 5:(J+5)] <- parm
m[1, J+6] <- firstnobs
return(list(mat=m, nxt=2, inc=inc))
}
## function to insert value to the tree
insert <- function(hdidx, dir, tr, newval, newc, parm, nobs) {
newidx <- tr$nxt
if (tr$nxt == nrow(tr$mat) + 1) {
tr$mat <- rbind(tr$mat, matrix(rep(NA, tr$inc*(J+6)), nrow=tr$inc, ncol=J+6))
}
tr$mat[newidx, 3] <- newval
tr$mat[newidx, 4] <- newc
tr$mat[newidx, 5:(J+5)] <- parm
tr$mat[newidx, (J+6)] <- nobs
tr$mat[hdidx, dir] <- newidx
tr$nxt <- tr$nxt + 1
return(tr)
}
training <- function(bootdata, hdidx, dir, tree, root.size, ns, pval) {
## stopping rule
if (length(unique(row.names(bootdata))) > root.size) {
Dmat <- bootdata[, 1:(J+1)]
Xmat <- bootdata[, -(1:(J+1))]
#selid <- 1:10
# set.seed(1)
selid <- sample(1:ncov, ns, replace=FALSE)
#ind <- apply(Xmat[,selid], 2, function(x) length(unique(x))>1)
#selid <- selid[ind]
X <- Xmat[, selid]
bsplit <- apply(X, 2, function(t) splitpointC(Dmat, t, getchisq))
if(length(selid) > 0){
id <- which.max(bsplit[2, ])
# print(max(bsplit[2, ]))
if(max(bsplit[2, ])>= thres){
bvar <- selid[id]
bcp <- bsplit[1, id] ##change here if change to continuous
bootdata.L <- bootdata[X[,id] <= bcp, ]
bootdata.R <- bootdata[X[, id] > bcp, ]
parm <- bsplit[-(1:2), id]
nobs <- nrow(bootdata)
if (nrow(bootdata) == nsub) {
tree <- newtree(bvar, bcp, 3, parm, nobs)
} else {
tree <- insert(hdidx, dir, tree, bvar, bcp, parm, nobs)
}
a <- tree$nxt - 1
tree <- training(bootdata.L, a, 1, tree, root.size, ns, pval)
tree <- training(bootdata.R, a, 2, tree, root.size, ns, pval)
}
}
}
return(tree)
}
tr <- training(bootdata, hdidx, dir, tree, root.size, ns, pval)
tr<- tr$mat
tr <- tr[!is.na(tr[, 3]), ]
return(list(tree=tr, OOBid=OOBid))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.