Nothing
LongCART<- function (data, patid, fixed, gvars, tgvars, minsplit = 40, minbucket = 20,
alpha = 0.05, coef.digits = 2, print.lme = FALSE)
{
node.labels<- function(x){ #the x should be Treeout
Terminal<- NULL
tree.nt<- subset(x, !Terminal)
tree.t<- subset(x, Terminal)
n.group<- nrow(tree.t)
id.nt<- tree.nt$ID
node.labels<- data.frame(node=numeric(), label=character())
for(k in 1:n.group){
node.t<- tree.t$ID[k]
#cat(node.t, "\n")
text=NULL
while(node.t>1){
if((node.t/2)>floor(node.t/2)){
text<- paste0(tree.nt$var[id.nt==floor(node.t/2)], ">=", tree.nt$index[id.nt==floor(node.t/2)],
ifelse(!is.null(text), paste0(" & ", text), ""))
} else if((node.t/2)==floor(node.t/2)){
text<- paste0(tree.nt$var[id.nt==node.t/2], "<", tree.nt$index[id.nt==node.t/2],
ifelse(!is.null(text), paste0(" & ", text), ""))
}
#cat(text, "\n")
node.t=floor(node.t/2)
} #--- while loop ends
node.labels[nrow(node.labels) + 1,]<- c(tree.t$ID[k], text)
} #--- for loop ends
node.labels
}
llike <- function(data, fixed) {
if (exists("txx.out"))
rm(txx.out)
loglik <- NA
try(txx.out <- lme(fixed, data = data, method = "ML",
random = ~1 | patid, na.action = na.omit), silent = TRUE)
if (exists("txx.out"))
loglik <- logLik(txx.out)[1]
loglik
}
test4split <- function(cutoff, data, fixed, splitvar) {
LRTS <- NA
message(cutoff, appendLF = FALSE)
left.data <- data[data[[splitvar]] < cutoff, ]
right.data <- data[data[[splitvar]] >= cutoff, ]
rn.ll <- llike(data = data, fixed = fixed)
left.ll <- llike(data = left.data, fixed = fixed)
right.ll <- llike(data = right.data, fixed = fixed)
LRTS <- 2 * (left.ll + right.ll - rn.ll)
message(" ", appendLF = FALSE)
LRTS
}
single.group <- function(data, fixed, splitvar, minbucket) {
xcut <- NA
improve <- NA
Y.name <- as.character(attr(as.Formula(fixed), "lhs"))
data <- data[!is.na(data[[Y.name]]), ]
data <- data[!is.na(data[[splitvar]]), ]
temp <- unique(data[, c("patid", splitvar)])
N <- tapply(temp$patid, temp[[splitvar]], length)
temp1 <- as.data.frame(list(cuts = as.numeric(names(N)),
N = N))
temp1$left <- cumsum(temp1$N) - temp1$N
temp1$right <- sum(temp1$N) - temp1$left
temp2 <- temp1[temp1$left >= minbucket & temp1$right >=
minbucket, ]
vals <- temp2$cuts
message("Evaluations of cutoffs for maximum improvements (Maximum cutoff value = ",
max(vals), ")\n", appendLF = FALSE)
LRT.vec <- lapply(vals, FUN = test4split, data = data,
fixed = fixed, splitvar = splitvar)
message(".\n", appendLF = FALSE)
LRT.vec <- unlist(LRT.vec)
if (any(!is.na(LRT.vec))) {
index <- which.max(LRT.vec)
xcut = vals[index]
improve = LRT.vec[index]
}
ret <- list(xcut = xcut, improve = improve)
ret
}
bestsplit <- function(data, fixed, gvars, tgvars, node.name,
minbucket, alpha) {
ngvars <- length(gvars)
best.gvar <- NA
best.cutoff <- NA
improve <- NA
min.pval.adj <- NA
stab.pval <- numeric(length = ngvars)
for (v in 1:ngvars) {
stab.pval[v] <- NA
splitvar <- gvars[v]
message("\nSplitting variable: ", splitvar,
"\n", appendLF = FALSE)
data1 <- data[!is.na(splitvar), ]
G <- length(unique(data1[[splitvar]]))
message("G=", G, "\n", appendLF = FALSE)
if (G > 1) {
if (tgvars[v] == 0)
stab.pval[v] <- StabCat(data = data1, patid = "patid",
fixed = fixed, splitvar = splitvar)$pval
if (tgvars[v] == 1)
stab.pval[v] <- StabCont(data = data1, patid = "patid",
fixed = fixed, splitvar = splitvar)$pval
}
else message("Instability test was NOT performed. \n",
appendLF = FALSE)
}
message("\n stab.pval=", stab.pval, "\n",
appendLF = FALSE)
if (any(!is.na(stab.pval))) {
stab.pval.adj <- p.adjust(stab.pval, method = "hochberg")
sel.v <- which.min(stab.pval)
min.pval.adj <- stab.pval.adj[sel.v]
best.gvar <- gvars[sel.v]
message("\n stab.pval.adj=", stab.pval.adj,
"\n", appendLF = FALSE)
message("\n alpha=", alpha, "\n", appendLF = FALSE)
if (min.pval.adj < alpha) {
bestcut <- single.group(data, fixed, best.gvar,
minbucket)
best.cutoff <- bestcut$xcut
improve <- bestcut$improve
}
}
return(list(node = node.name, gvar = best.gvar, cutoff = best.cutoff,
improve = improve, pval = min.pval.adj))
}
coeff.Estimate <- function(LMEobj, coef.digits = 2) {
ct <- LMEobj$tTable
p <- nrow(ct)
ct.names <- row.names(ct)
yval <- " "
for (ct.i in 1:p) yval <- paste0(yval, "+", round(ct[ct.i,
1], coef.digits), "*", ct.names[ct.i])
yval <- gsub(pattern = "(Intercept)", replacement = "",
x = yval, fixed = TRUE)
yval <- gsub(pattern = "+-", replacement = "-",
yval, fixed = TRUE)
yval <- gsub(pattern = " +", replacement = "",
yval, fixed = TRUE)
yval <- gsub(pattern = " ", replacement = "",
yval, fixed = TRUE)
yval <- gsub(pattern = "*+", replacement = "+",
yval, fixed = TRUE)
yval <- gsub(pattern = "*-", replacement = "-",
yval, fixed = TRUE)
yval
}
rsplit <- function(data, fixed, gvars, tgvars, id, split,
alpha, minsplit, minbucket, Rate, loglik, env = parent.frame(),
coef.digits = 2) {
s.var <- unlist(split[2])
s.cut <- unlist(split[3])
s.improve <- unlist(split[4])
s.pval <- unlist(split[5])
N <- length(unique(data$patid))
if (id == 1) {
env$Treeout <- data.frame(id = id, N = N, yval = Rate,
splitvar = s.var, cutoff = s.cut, pstab = s.pval,
loglik = loglik, improve = s.improve, stringsAsFactors = FALSE)
}
else {
env$Treeout <- rbind(env$Treeout, c(id, N, Rate,
s.var, s.cut, s.pval, loglik, s.improve))
}
if (!is.na(s.var) && !is.na(s.cut)) {
data <- data[!is.na(data[[s.var]]), ]
id_l = id * 2
message("---------------------------------------- \n",
appendLF = FALSE)
message("NODE ", id_l, "- Rule:", s.var,
" <", s.cut, "\n", appendLF = FALSE)
message("---------------------------------------- \n",
appendLF = FALSE)
data_l <- subset(data, data[[s.var]] < s.cut)
left.subj <- length(unique(data_l$patid))
message("No. of individual in left node: ",
left.subj, " \n", appendLF = FALSE)
tjj <- NULL
tjj <- summary(lme(fixed, data = data_l, method = "REML",
random = ~1 | patid))
if (print.lme)
cat("NODE ", id_l, "- Rule:", s.var,
" <", s.cut, "\n")
if (print.lme)
print(tjj$tTable)
Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
loglik <- logLik(lme(fixed, data = data_l, method = "REML",
random = ~1 | patid))
if (left.subj >= minsplit) {
message("\nDECISION: Go to the next level \n",
appendLF = FALSE)
split_l <- bestsplit(data = data_l, fixed = fixed,
gvars = gvars, tgvars = tgvars, node.name = id_l,
minbucket = minbucket, alpha = alpha)
rsplit(data = data_l, fixed = fixed, gvars = gvars,
tgvars = tgvars, id = id_l, split = split_l,
alpha = alpha, minsplit = minsplit, minbucket = minbucket,
Rate = Rate, loglik = loglik, env = env, coef.digits = coef.digits)
}
else {
env$Treeout <- rbind(env$Treeout, c(id_l, left.subj,
Rate, NA, NA, NA, loglik, NA))
}
id_r = id * 2 + 1
message("---------------------------------------- \n",
appendLF = FALSE)
message("NODE ", id_r, "- Rule:", s.var,
" >=", s.cut, "\n", appendLF = FALSE)
message("---------------------------------------- \n",
appendLF = FALSE)
data_r <- subset(data, data[[s.var]] >= s.cut)
right.subj <- length(unique(data_r$patid))
message("No. of individual in right node: ",
right.subj, " \n", appendLF = FALSE)
tjj <- NULL
tjj <- summary(lme(fixed, data = data_r, method = "REML",
random = ~1 | patid))
if (print.lme)
cat("NODE ", id_r, "- Rule:", s.var,
" >=", s.cut, "\n")
if (print.lme)
print(tjj$tTable)
Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
loglik <- logLik(lme(fixed, data = data_r, method = "REML",
random = ~1 | patid))
if (right.subj >= minsplit) {
message("\nDECISION: Go to the next level \n",
appendLF = FALSE)
split_r <- bestsplit(data = data_r, fixed = fixed,
gvars = gvars, tgvars = tgvars, node.name = id_r,
minbucket = minbucket, alpha = alpha)
rsplit(data = data_r, fixed = fixed, gvars = gvars,
tgvars = tgvars, id = id_r, split = split_r,
alpha = alpha, minsplit = minsplit, minbucket = minbucket,
Rate = Rate, loglik = loglik, env = env, coef.digits = coef.digits)
}
else {
env$Treeout <- rbind(env$Treeout, c(id_r, right.subj,
Rate, NA, NA, NA, loglik, NA))
}
}
else message("\nDECISION: NO more splitting required \n",
appendLF = FALSE)
}
if (!exists(as.character(substitute(data))))
stop("Dataset does not exist\n")
if (!is.data.frame(data))
stop("Dataset does not exist\n")
if (!patid %in% colnames(data))
stop("The column ", patid, " containing subjects id is missing in dataset.\n")
data$patid <- data[[patid]]
data <- data[!is.na(data[["patid"]]), ]
Y.name <- as.character(attr(as.Formula(fixed), "lhs"))
if (!Y.name %in% colnames(data))
stop("The column ", Y.name, " containing subjects id is missing in dataset.\n")
data <- data[!is.na(data[[Y.name]]), ]
if (length(gvars) != length(tgvars))
stop("gvars and tgvars are not of equal length. \n")
if (any(is.na(tgvars)))
stop("tgvars cannot have NA value. \n")
for (var.i in 1:length(gvars)) {
if (!gvars[var.i] %in% colnames(data))
stop("The column ", gvars[var.i], " is missing in dataset.\n")
}
for (var.i in 1:length(gvars)) {
temp <- as.data.frame(list(patid = cbind(data["patid"],
gvar = data[gvars[var.i]])))
colnames(temp) = c("patid", "gvar")
temp <- unique(temp)
temp <- temp[!is.na("gvar"), ]
n.gvar.pat <- tapply(temp$gvar, temp$patid, function(x) length(unique(x)))
if (any(n.gvar.pat != 1))
stop("One subject is associated with more than one value of ",
gvars[var.i], ".\n")
rm(temp, n.gvar.pat)
}
LongCART.env <- new.env()
message("------------------------------------------ \n",
appendLF = FALSE)
message(" ROOT NODE: NODE 1 \n",
appendLF = FALSE)
message("------------------------------------------ \n",
appendLF = FALSE)
tjj <- NULL
tjj <- summary(lme(fixed, data = data, method = "REML",
random = ~1 | patid))
if (print.lme)
cat("ROOT NODE: NODE 1\n")
if (print.lme)
print(tjj$tTable)
Rate <- coeff.Estimate(tjj, coef.digits = coef.digits)
loglik <- logLik(lme(fixed, data = data, method = "REML",
random = ~1 | patid))
split <- bestsplit(data = data, fixed = fixed, gvars = gvars,
tgvars = tgvars, node.name = 1, minbucket = minbucket,
alpha = alpha)
rsplit(data = data, fixed = fixed, gvars = gvars, tgvars = tgvars,
id = 1, split = split, alpha = alpha, minsplit = minsplit,
minbucket = minbucket, Rate = Rate, loglik = loglik,
env = LongCART.env, coef.digits = coef.digits)
Treeout <- LongCART.env$Treeout
colnames(Treeout) <- c("ID", "n", "yval",
"var", "index", "p (Instability)",
"loglik", "improve")
row.names(Treeout) <- NULL
Treeout[, 1] <- as.numeric(Treeout[, 1])
Treeout[, 2] <- as.numeric(Treeout[, 2])
Treeout[, 5] <- round(as.numeric(Treeout[, 5]), digits = 2)
Treeout[, 6] <- round(as.numeric(Treeout[, 6]), digits = 3)
Treeout[, 7] <- round(as.numeric(Treeout[, 7]), digits = 0)
Treeout[, 8] <- round(as.numeric(Treeout[, 8]), digits = 0)
Treeout$Terminal <- ifelse(is.na(Treeout[, 5]), TRUE, FALSE)
print(Treeout)
form <- Formula(fixed)
Y.name <- as.character(attr(form, "lhs"))
X.name <- as.character(attr(form, "rhs"))
X.name <- gsub(pattern = " ", replacement = "",
x = X.name)
X.name <- unlist(strsplit(X.name, "+", fixed = TRUE))
p <- length(X.name) + 1
AIC.tree <- 2 * sum(Treeout[, 7] * Treeout[, 9]) - 2 * (p +
2) * sum(Treeout[, 9])
AIC.root <- 2 * Treeout[1, 7] - 2 * (p + 2)
improve.AIC <- AIC.tree - AIC.root
logLik.tree <- sum(Treeout[, 7] * Treeout[, 9])
logLik.root <- Treeout[1, 7]
Deviance <- 2 * (logLik.tree - logLik.root)
LRT.df <- p * sum(Treeout[, 9]) - p
LRT.p <- 1 - pchisq(Deviance, LRT.df)
message("AIC(tree)=", AIC.tree, " AIC(Root)=",
AIC.root, "\n", appendLF = FALSE)
message("logLikelihood (tree)=", logLik.tree, " logLikelihood (Root)=",
logLik.root, "\n", appendLF = FALSE)
message("Deviance=", Deviance, " (df=", LRT.df,
", p-val=", LRT.p, ") \n", appendLF = FALSE)
sel <- as.numeric(rownames(Treeout[Treeout$Terminal, ]))
Treeout1 <- as.data.frame(Treeout[, c("ID", "var",
"n", "yval", "loglik", "Terminal",
"index", "improve")])
row.names(Treeout1) <- Treeout1$ID
Treeout1$var = ifelse(Treeout1$Terminal, "<leaf>",
Treeout1$var)
Treeout1$dev = -Treeout1$loglik
Treeout1$wt <- Treeout1$count <- Treeout1$n
Treeout1$ncompete <- Treeout1$nsurrogate <- 0
Treeout1$ncat = -1
frame <- Treeout1[c("var", "n", "wt", "dev",
"yval", "ncompete", "nsurrogate")]
splits <- Treeout1[!Treeout1$Terminal, ]
splits <- splits[c("count", "ncat", "improve",
"index")]
splits <- as.matrix(splits)
temp <- frame[frame$var == "<leaf>", ]
cptable <- 0:(dim(temp)[1] - 1)
functions <- NULL
functions$text <- function(yval, dev, wt, ylevel, digits,
n, use.n) {
if (use.n)
paste0(yval, "\nn=", n)
else yval
}
nodelab<- node.labels(Treeout)
varnam<- unique(frame$var)
varnam<- varnam[varnam!="<leaf>" ]
ret <- list(Treeout, frame, splits, cptable, functions, p,
AIC.tree, AIC.root, improve.AIC, logLik.tree, logLik.root,
Deviance, LRT.df, LRT.p, data, patid, fixed,
nodelab, varnam)
names(ret) <- c("Treeout", "frame", "splits",
"cptable", "functions", "p", "AIC.tree",
"AIC.root", "improve.AIC", "logLik.tree",
"logLik.root", "Deviance", "LRT.df",
"LRT.pval", "data", "patid", "fixed",
"nodelab", "varnam")
class(ret) <- "LongCART"
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.