Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo=TRUE,eval=FALSE)
#setwd("~/Desktop/cornet")
#utils::install.packages(pkgs=c("devtools","missRanger","xtable","randomForest","MLmetrics"))
#devtools::install_github("rauschenberger/cornet")
## ----process------------------------------------------------------------------
# # features
# X <- read.csv("data/PPMI_Baseline_Data_02Jul2018.csv",row.names="PATNO",na.strings=c(".",""))
# X <- X[X$APPRDX==1,] # Parkinson's disease
# X[c("SITE","APPRDX","EVENT_ID","symptom5_comment")] <- NULL
# 100*mean(is.na(X)) # proportion missingness
# #x <- mice::complete(data=mice::mice(X,m=10,maxit=5,method="pmm",seed=1),action="all") # low-dimensional
# x <- lapply(seq_len(10),function(x) missRanger::missRanger(data=X,pmm.k=3,
# num.trees=100,verbose=0,seed=1)) # high-dimensional
# x <- lapply(x,function(x) model.matrix(~.-1,data=x))
#
# # outcome
# Y <- read.csv("data/PPMI_Year_1-3_Data_02Jul2018.csv",na.strings=".")
# Y <- Y[Y$APPRDX==1 & Y$EVENT_ID %in% c("V04","V06","V08"),]
# Y <- Y[,c("EVENT_ID","PATNO","moca")]
# Y <- reshape(Y,idvar="PATNO",timevar="EVENT_ID",direction="wide")
# rownames(Y) <- Y$PATNO; Y$PATNO <- NULL
#
# # overlap
# names <- intersect(rownames(X),rownames(Y))
# Y <- Y[names,]; x <- lapply(x,function(x) x[names,])
# save(Y,x,file="data/processed_data.RData")
# writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),
# sessioninfo::session_info()),con="data/info_data.txt")
#
# # dictionary
# rm(list=ls())
# info <- read.csv("data/PPMI_Data_Dictionary_for_Baseline_Dataset_Jul2018.csv",nrows=238)
# info <- info[!info$Variable %in% c("","PATNO","SITE","APPRDX","EVENT_ID","symptom5_comment"),]
# info <- paste(paste0("\\item \\textit{",info$Variable,"}: ",info$Description),collapse=" ")
# info <- gsub(pattern=" \\(OFF\\)",replacement="",x=info)
# info <- gsub(pattern="_",replacement="\\_",x=info,fixed=TRUE)
# info <- gsub(pattern="&",replacement="\\\\&",x=info)
# cat(info)
## ----analyse------------------------------------------------------------------
# load("data/processed_data.RData",verbose=TRUE)
#
# colSums(!is.na(Y)) # sample size
# round(100*colMeans(Y<25.5,na.rm=TRUE),1) # proportion impairment
#
# # --- Cross-validating models. ---
# names <- paste0(rep(c("lasso","ridge"),each=3),seq_len(3)) # change to 3!
# loss <- fit <- p.log <- p.lin <- list()
# for(i in seq_along(x)){
# loss[[i]] <- fit[[i]] <- p.log[[i]] <- p.lin[[i]] <- list()
# cat("i =",i,"\n")
# for(j in seq_along(names)){
# cat("j =",j," ")
# alpha <- 1*(substr(names[j],start=1,stop=5)=="lasso")
# index <- as.numeric(substr(names[j],start=6,stop=6))
# y <- Y[,index]
# cond <- !is.na(y)
# set.seed(i)
# loss[[i]][[names[j]]] <- cornet::cv.cornet(y=y[cond],cutoff=25.5,
# X=x[[i]][cond,],alpha=alpha,rf=(alpha==1),xgboost=(alpha==1))
# set.seed(i)
# fit[[i]][[names[j]]] <- cornet::cornet(y=y[cond],cutoff=25.5,
# X=x[[i]][cond,],alpha=alpha)
# set.seed(i)
# temp <- replicate(n=50,expr=cornet:::.test(y=y[cond],cutoff=25.5,X=x[[i]][cond,],alpha=alpha))
# p.log[[i]][[names[j]]] <- median(unlist(temp["log",]))
# p.lin[[i]][[names[j]]] <- median(unlist(temp["lin",]))
# }
# cat("\n")
# }
#
# save(loss,fit,p.log,p.lin,file="results/application.RData")
# writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),
# sessioninfo::session_info()),con="results/info_app.txt")
## ----table_test---------------------------------------------------------------
# load("results/application.RData",verbose=TRUE)
#
# k <- "binomial" # compare: k <- "gaussian
#
# names <- c(paste0("lasso",1:3),paste0("ridge",1:3))
# frame <- data.frame(row.names=names)
# for(i in names){
#
# # deviance
# dev <- as.data.frame(t(sapply(loss,function(x) x[[i]]$deviance)))
# dec <- (dev$combined-dev[[k]])/dev[[k]]
# # change in percent
# frame[i,"dev.min"] <- round(100*min(dec),digits=1)
# frame[i,"dev.max"] <- round(100*max(dec),digits=1)
#
# # proportion with improvement
# frame[i,"dev.num"] <- sum(dev$combined < dev[[k]])
#
# # significance based on multi-split
# if(k=="binomial"){
# pvalue <- sapply(p.log,function(x) x[[i]])
# }
# if(k=="gaussian"){
# pvalue <- sapply(p.lin,function(x) x[[i]])
# }
# frame[i,"pval.min"] <- round(min(pvalue),digits=3)
# frame[i,"pval.max"] <- round(max(pvalue),digits=3)
#
# # quantiles weight parameter
# q <- round(quantile(sapply(fit,function(x) x[[i]]$pi.min),probs=c(0,0.5,1)),digits=2)
# frame[i,"pi.min"] <- q[1]
# frame[i,"pi.med"] <- q[2]
# frame[i,"pi.max"] <- q[3]
#
# # quantiles scale parameter
# q <- round(quantile(sapply(fit,function(x) x[[i]]$sigma.min),probs=c(0,0.5,1)),digits=2)
# frame[i,"sd.min"] <- q[1]
# frame[i,"sd.med"] <- q[2]
# frame[i,"sd.max"] <- q[3]
# }
#
# # presentation
# frame <- format(frame)
# rownames(frame) <- paste0(substr(x=rownames(frame),start=1,stop=5)," ",
# substr(x=rownames(frame),start=6,stop=6))
# colnames(frame) <- gsub(pattern="dev.",replacement="\\\\delta_{\\\\text{",x=colnames(frame))
# colnames(frame) <- gsub(pattern="pval.",replacement="p_{\\\\text{",x=colnames(frame))
# colnames(frame) <- gsub(pattern="pi.",replacement="\\\\pi_{\\\\text{",x=colnames(frame))
# colnames(frame) <- gsub(pattern="sd.",replacement="\\\\sigma_{\\\\text{",x=colnames(frame))
# colnames(frame) <- paste0("$",colnames(frame),"}}$")
#
# xtable <- xtable::xtable(frame,align="r|rrc|cc|ccc|ccc")
# xtable::print.xtable(xtable,include.rownames=TRUE,sanitize.text.function=function(x) x,comment=FALSE)
## ----table_change-------------------------------------------------------------
# load("results/application.RData",verbose=TRUE)
#
# names <- c(paste0("lasso",1:3),paste0("ridge",1:3))
# type <- c("deviance","class","mse","mae","auc","prauc")
#
# k <- "binomial"
#
# frame <- matrix(NA,nrow=length(names),ncol=length(type),dimnames=list(names,type))
#
# for(i in names){
# for(j in type){
# value <- as.data.frame(t(sapply(loss,function(x) x[[i]][[j]])))
# change <- 100*(value$combined-value[[k]])/value[[k]]
# frame[i,j] <- median(change)
# }
# }
#
# frame <- format(round(frame,digits=1))
# frame <- gsub(pattern=" ",replacement="+",x=frame)
# colnames(frame) <- sapply(colnames(frame),function(x) switch(x,class="\\textsc{mcr}",mse="\\textsc{mse}",mae="\\textsc{mae}",auc="\\textsc{roc-auc}",prauc="\\textsc{pr-auc}",x))
# colnames(frame) <- paste0("$\\Delta$",colnames(frame))
# rownames(frame) <- paste0(substr(x=rownames(frame),start=1,stop=5)," ",
# substr(x=rownames(frame),start=6,stop=6))
# xtable <- xtable::xtable(frame,align="r|cccccc")
# xtable::print.xtable(xtable,include.rownames=TRUE,sanitize.text.function=function(x) x,comment=FALSE)
## ----table_other--------------------------------------------------------------
# load("results/application.RData",verbose=TRUE)
#
# table <- numeric()
# for(i in 1:3){
# lasso <- apply(sapply(loss,function(x) x[[paste0("lasso",i)]]$deviance),1,median)
# names(lasso)[names(lasso)=="binomial"] <- "logistic lasso regression"
# names(lasso)[names(lasso)=="combined"] <- "combined lasso regression"
# ridge <- apply(sapply(loss,function(x) x[[paste0("ridge",i)]]$deviance),1,median)
# names(ridge)[names(ridge)=="binomial"] <- "logistic ridge regression"
# names(ridge)[names(ridge)=="combined"] <- "combined ridge regression"
# table <- cbind(table,c(lasso[c("logistic lasso regression","combined lasso regression")],ridge[c("logistic ridge regression","combined ridge regression")],lasso["rf"],lasso["xgboost"]))
# rownames(table)[rownames(table)=="rf"] <- "\\texttt{randomForest}"
# rownames(table)[rownames(table)=="xgboost"] <- "\\texttt{xgboost}"
# }
# colnames(table) <- paste("year",1:3)
# rownames(table) <- paste("\\footnotesize",rownames(table))
# xtable <- xtable::xtable(table)
# xtable::print.xtable(xtable,comment=FALSE,hline.after=c(0,2,4),sanitize.text.function=identity)
#
## ----figure_MAP---------------------------------------------------------------
# load("results/application.RData",verbose=TRUE)
# sum <- fit[[1]]$lasso1
# sum$cvm <- Reduce("+",lapply(fit,function(x) x$lasso1$cvm))
# sum$sigma.min <- sapply(fit,function(x) x$lasso1$sigma.min)
# sum$pi.min <- sapply(fit,function(x) x$lasso1$pi.min)
#
# grDevices::pdf("manuscript/figure_MAP.pdf",width=4,height=3)
#
# graphics::par(mar=c(4,4,0.5,0.5))
# cornet:::plot.cornet(sum)
#
# grDevices::dev.off()
## ----figure_TFN---------------------------------------------------------------
# rm(list=ls())
#
# sigma <- c(1,2,3); cutoff <- 25.5
# x <- seq(from=20,to=30,length.out=100)
#
# grDevices::pdf("manuscript/figure_TFN.pdf",width=4,height=3)
#
# graphics::par(mar=c(4,4,0.5,0.5))
# graphics::plot.new()
# graphics::plot.window(xlim=range(x),ylim=c(0,1))
# graphics::box()
# graphics::axis(side=1)
# graphics::axis(side=2)
# graphics::title(xlab=expression(hat(y)),ylab=expression(Phi(hat(y),mu,sigma^2)),line=2.5)
# graphics::abline(h=0.5,lty=2,col="grey")
# graphics::abline(v=cutoff,lty=2,col="grey")
#
# lty <- c(2,1,3); lwd <- c(1,1,2)
# lty <- c("dashed","solid","dotted")
# for(i in seq_along(sigma)){
# p <- stats::pnorm(q=x,mean=cutoff,sd=sigma[i])
# graphics::lines(x=x,y=p,lty=lty[i],lwd=lwd[i])
# }
#
# graphics::text(x=cutoff,y=0.40,labels=bquote(mu==.(cutoff)),pos=4)
# legend <- sapply(sigma,function(x) as.expression(bquote(sigma == .(x))))
# graphics::legend(x="topleft",legend=legend,lty=lty,bty="n",lwd=lwd)
#
# grDevices::dev.off()
## ----ordinal,eval=FALSE-------------------------------------------------------
# cv.cornet <- function (y, cutoff, X, alpha = 1, nfolds.ext = 5, nfolds.int = 10, foldid.ext = NULL, foldid.int = NULL, type.measure = "deviance", ordinal = FALSE,...) {
# z <- 1 * (y > cutoff)
# if (is.null(foldid.ext)) {
# foldid.ext <- palasso:::.folds(y = z, nfolds = nfolds.ext)
# } else {
# nfolds.ext <- length(unique(foldid.ext))
# }
# cols <- c("intercept", "binomial", "combined")
# ### trial start ###
# if(ordinal){cols <- c(cols,"ordinal")}
# ### trial end ###
# pred <- matrix(data = NA, nrow = length(y), ncol = length(cols),
# dimnames = list(NULL, cols))
# for (i in seq_len(nfolds.ext)) {
# y0 <- y[foldid.ext != i]
# z0 <- z[foldid.ext != i]
# X0 <- X[foldid.ext != i, ]
# X1 <- X[foldid.ext == i, ]
# if (is.null(foldid.int)) {
# foldid <- palasso:::.folds(y = z0, nfolds = nfolds.int)
# } else {
# foldid <- foldid.int[foldid.ext != i]
# }
# fit <- cornet::cornet(y = y0, cutoff = cutoff, X = X0,
# alpha = alpha, type.measure = type.measure, foldid = foldid,
# ...)
# tryCatch(expr = plot.cornet(fit), error = function(x) NULL)
# temp <- predict.cornet(fit, newx = X1)
# if (any(temp < 0 | temp > 1)) {
# stop("Outside unit interval.", call. = FALSE)
# }
# model <- colnames(temp)
# for (j in seq_along(model)) {
# pred[foldid.ext == i, model[j]] <- temp[[model[j]]]
# }
# if(ordinal){
# ### trial start ###
# #browser()
# y0_ord <- as.factor(y0)
# fit <- ordinalNet::ordinalNet(x=X0,y=y0_ord,alpha=alpha)
# pred_ord <- predict(fit,newx=X1)
# above <- as.numeric(levels(y0_ord))>cutoff
# pred[foldid.ext ==i,"ordinal"] <- rowSums(pred_ord[,above])
# } ### trial end ###
# }
# type <- c("deviance", "class", "mse", "mae", "auc")
# loss <- lapply(X = type, FUN = function(x) palasso:::.loss(y = z,
# fit = pred, family = "binomial", type.measure = x,
# foldid = foldid.ext)[[1]])
# names(loss) <- type
# loss <- lapply(loss, function(x) signif(x, digits = 6))
# return(loss)
# }
# predict.cornet <- cornet:::predict.cornet
#
# loss <- list()
# for(i in seq_along(Y)){
# load("data/processed_data.RData",verbose=TRUE)
# cond <- !is.na(Y[[i]])
# loss[[i]] <- cv.cornet(y=Y[[i]][cond],X=x[[1]][cond,],cutoff=25.5,ordinal=TRUE)
# }
# sapply(loss,function(x) x$deviance)
#
#
# ### Ordinal regression with two different packages.
# #
# # load("data/processed_data.RData",verbose=TRUE)
# # cond <- !is.na(Y$moca.V04)
# # y <- as.factor(x=Y$moca.V04[cond])
# # x <- x[[1]][cond,]
# #
# # # ordinalNet
# # model <- ordinalNet::ordinalNet(x=x,y=y)
# # y_hat <- predict(model)
# # below <- as.numeric(levels(y))<=25.5
# # above <- as.numeric(levels(y))>25.5
# # b1 <- rowSums(y_hat[,below])
# # a1 <- rowSums(y_hat[,above])
# #
# # # glmnetcr
# # model <- glmnetcr::glmnetcr(x=x,y=y)
# # s <- glmnetcr::select.glmnetcr(model)
# # y_hat <- predict(model)$probs[,,s]
# # below <- as.numeric(colnames(y_hat))<=25.5
# # above <- as.numeric(colnames(y_hat))>25.5
# # b2 <- rowSums(y_hat[,below])
# # a2 <- rowSums(y_hat[,above])
# #
# # cor(a1,a2)
# # cor(b1,b2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.