#' @title Diagnostics and performance plots
#'
#' @description Displays visual diagnostics information and performance measures
#' for objects of class \code{fdaModel} and \code{fdaModelPred}.
#'
#' @param obj An object of class \code{fdaModel} from \code{\link{fdaML_train}}
#' or class \code{fdaModelPred} from \code{\link{fdaML_predict}}.
#'
#' @param hist_range A range for the histogram plot in binomial classification.
#'
#' @param roc_smoothing A value for the parameter \code{spar} in \code{\link{smooth.spline}}.
#'
#' @param multinom_labels Labels for the multinomial classes.
#'
#' @seealso
#' \code{\link{fdaML_train}} and \code{\link{fdaML_predict}}
#'
#' @export
#'
fdaPlot <- function(obj, hist_range = NULL, roc_smoothing = NULL, multinom_labels = NULL){
if(class(obj) == "fdaModel"){
oldparams <- par()
uniq <- sort(unique(obj$y))
lu <- length(uniq)
if(obj$model == "lm"){
### PLOT 1 --- cross-validation
#
if(!(obj$Q_len==1)){
matplot(obj$Q_vec, t(obj$perf_cv), type="l", lty=1, col="grey", xlab="components (Q)", ylab="RMSD")
lines(obj$Q_vec, colMeans(obj$perf_cv), lwd=2)
points(obj$Q_vec, colMeans(obj$perf_cv), pch=1, cex=0.8)
abline(v = c(obj$Q_max, obj$Q_opt), lty=c(2,1), lwd=1)
legend("topright", c(paste0("Q_opt=", obj$Q_opt), paste0("Q_max=", obj$Q_max)), lty=1:2, bty="n", cex=0.9)
}
### PLOT 2 --- coefficient function for all replications, plus average in bold
#
matplot(obj$t_range_mod, obj$beta, type="l", lty=1, col="gray", xlab="wavelength", ylab="coefficient function")
lines(obj$t_range_mod, rowMeans(obj$beta), type="l", lwd=2 , col="black")
legend("bottomright", c("coefficient functions", "mean coefficient function"), lty=c(1,1), lwd=c(1,2), col=c('grey','black'), bty="n", cex=0.9)
### PLOT 3 --- best cross-validation parameter for all training/testing splits
#
# gather all data (across 'obj$reps') in one vector for each of the unique values of 'y'
alldata <- list()
for(u in 1:lu){
alldata[[u]] <- 0
for(r in 1:obj$reps){
alldata[[u]] <- c(alldata[[u]], (obj$y_testpred_opt[,r]+obj$cenY[[r]])[obj$y[obj$id_test[[r]]] == uniq[u]])
}
alldata[[u]] <- alldata[[u]][-1]
}
# compute percentiles for each of the unique values of 'y'
ofs <- (min(uniq)==0); ofs_x <- 0.4
percentile_values <- c(0.05, 0.15, 0.25, 0.5, 0.75, 0.85, 0.95);
percentiles <- matrix(0, length(percentile_values), lu); extremes <- x_vec <- matrix(0, 2, lu); means <- rep(0,lu)
for(u in 1:lu){
percentiles[,u] <- quantile(alldata[[u]], probs=percentile_values)
x_vec[,u] <- c(uniq[u] - ofs_x, uniq[u] + ofs_x)
means[u] <- mean(alldata[[u]])
}
# Boxplot
xlabels <- rep(NA,max(uniq)-min(uniq)+2); xlabels[0:(1+max(uniq)+ofs) %in% uniq] <- uniq
range_pred <- range(percentiles)
ofs <- (min(uniq)==0); ofs_x <- 0.4
plot(0, col=NA, xlim=c(0,1+max(uniq)), ylim=c(-1,1)+range_pred, xaxt='n', xlab="true age", ylab="predicted age")
axis(side=1, at=0:(1+max(uniq)+ofs), labels=xlabels)
for(u in 1:lu){
# boxes
rect(x_vec[1,u], percentiles[3,u], x_vec[2,u], percentiles[5,u], col="white") # fill boxes
segments(x_vec[1,u], percentiles[4,u], x_vec[2,u], percentiles[4,u], lty=1, lwd=3, lend=1)
# whiskers
segments(uniq[u], percentiles[5,u], uniq[u], percentiles[7,u], lty=1, lwd=1, lend=1)
segments(uniq[u], percentiles[3,u], uniq[u], percentiles[1,u], lty=1, lwd=1, lend=1)
# whisker ends
segments(uniq[u]-ofs_x/2, percentiles[6,u], uniq[u]+ofs_x/2, percentiles[6,u], lty=1, lwd=1, lend=1)
segments(uniq[u]-ofs_x/2, percentiles[2,u], uniq[u]+ofs_x/2, percentiles[2,u], lty=1, lwd=1, lend=1)
segments(uniq[u]-ofs_x/4, percentiles[7,u], uniq[u]+ofs_x/4, percentiles[7,u], lty=1, lwd=1, lend=1)
segments(uniq[u]-ofs_x/4, percentiles[1,u], uniq[u]+ofs_x/4, percentiles[1,u], lty=1, lwd=1, lend=1)
# means
segments(x_vec[1,u]+ofs_x/2, means[u], x_vec[2,u]-ofs_x/2, means[u], lty=1, lwd=1, col="green")
}
points(x=uniq + ofs, y=uniq, pch=19, col="Gold", cex=0.8) # truth
legend("topleft", legend=c("mean", "truth"), col=c("green","Gold"), pch=c(NA,19), lty=c(1,NA), lwd=c(1,NA), seg.len=0.75, bty="n", cex=0.8)
legend("topright", paste0("avg test error: ",round(mean(obj$err_test),2)), bty="n", cex=0.8, text.col="brown")
biasVec <- colMeans(obj$bias_test)
text(x = uniq, y = range_pred[1]-1, labels = paste0(round(biasVec,2)), cex=0.8, col="purple")
text(uniq[lu], range_pred[1]+0.3, "bias:", cex=0.8, col="purple")
return( list(err = round(mean(obj$err_test),2),
largest_bias = biasVec[which.max(abs(biasVec))]) )
}else if(obj$model == "glm"){
### PLOT 1 --- cross-validation
#
if(!(obj$Q_len==1)){
matplot(obj$Q_vec, t(obj$perf_cv), type="l", lty=1, col="grey", xlab="components (Q)", ylab="AUC", ylim=c(0,1))
lines(obj$Q_vec, colMeans(obj$perf_cv), lwd=2)
abline(v = c(obj$Q_opt, obj$Q_max), lty=c(1,2), lwd=1)
legend("bottomright", c(paste0("Q opt = ", obj$Q_opt, " (AUC = ", round(colMeans(obj$perf_cv)[obj$Q_vec==obj$Q_opt],2),")"), paste0("Q max = ", obj$Q_max, " (AUC = ", round(colMeans(obj$perf_cv)[obj$Q_vec==obj$Q_max],2),")")), lty=1:2, bty="n", cex=0.9)
}
### PLOT 2 --- coefficient function for all replications, plus average in bold
#
if(obj$family != "multinomial"){
xlim_vec <- range(obj$t_range_mod)+c(-100*(obj$S+1),0)
matplot(obj$t_range_mod, obj$beta, type="l", lty=1, col=alpha("gray",0.6), xlab="wavelength", ylab="coefficient function", xlim=xlim_vec, ylim=range(obj$beta))
lines(obj$t_range_mod, rowMeans(obj$beta), type="l", lwd=2 , col="black")
par(new = TRUE)
plot(jitter(rep(xlim_vec[1], obj$reps), factor=5), obj$alpha, axes=F, col=alpha("blue",0.4), pch=5, cex=0.8, xlab="", ylab="", xlim=xlim_vec, ylim=range(rbind(obj$alpha,obj$gamma)))
points(xlim_vec[1], mean(obj$alpha), pch="-", col="blue", cex=3, xlab="", ylab="")
if(!is.null(obj$Z)){
for(q in 1:obj$S){ points(jitter(rep(xlim_vec[1]+100*q, obj$reps), factor=5), obj$gamma[q,], col=alpha("blue",0.4), pch=1, cex=1.1)
points(xlim_vec[1]+100*q, mean(obj$gamma[q,]), col="blue", pch=20, cex=1) }}
axis(side=4, at = pretty(range(rbind(obj$alpha,obj$gamma))), ylab="?", col="blue", col.axis="blue",las=1)
}
legend("bottomright", c("coefficient functions", "mean coefficient function"), lty=c(1,1), lwd=c(1,2), col=c('grey','black'), bty="n", cex=0.9)
### PLOT 3 --- error/accuracy and optimal cutoff point (putting equal weight on sensitivity and specificity)
#
if(obj$family == "binomial"){
pred <- prediction(obj$ROC_to_plot_binom$pred_test, obj$ROC_to_plot_binom$labels)
perf <- performance(pred, "tpr", "fpr")
measureType <- "err"
if(measureType == "err"){ #error
# stats
err_perf <- performance(pred, measure = "err")
err_ind = sapply(1:obj$reps, function(z){ which.min(err_perf@y.values[[z]]) })
if(any(err_ind == 1)){ # get rid of infinities in 'err_perf@x.values)'
tt <- (1:obj$reps)[err_ind == 1]; for(l in 1:length(tt)){ if( is.infinite(slot(err_perf, "x.values")[[tt[l]]][1]) ){ err_ind[tt[l]] <- 2 }}
}
err_val = sapply(1:obj$reps, function(z){ err_perf@y.values[[z]][err_ind[z]] })
err_cut = sapply(1:obj$reps, function(z){ err_perf@x.values[[z]][err_ind[z]] })
avg_err <- mean(err_val)
avg_err_cut <- mean(err_cut)
error_per_rep <- sapply(1:obj$reps, function(qq){ mean((obj$y_testpred_opt[,qq] > avg_err_cut) != obj$y[obj$id_test[[qq]]]) })
avg_err_avgcut <- mean(error_per_rep)
stdev_err_avgcut <- sd(error_per_rep)
err_colcode <- ifelse( all(avg_err_avgcut < c(0.5, mean(obj$y==1), mean(obj$y==0))), "green", ifelse(all(avg_err_avgcut < 0.5), "orange", "red") )
d_allcurves <- data.frame(x = unlist(err_perf@x.values), y = unlist(err_perf@y.values)); d_allcurves <- d_allcurves[order(d_allcurves$x),]
# plot
plot(err_perf, lty=1, col="grey", xlim=c(0,1), ylim=c(0,1), xlab="cutoff (probability)", ylab="error rate") # cutoff-error curve for each randomisation (1:obj$reps)
lines(d_allcurves$x[!is.infinite(d_allcurves$x)], predict(loess(d_allcurves$y[!is.infinite(d_allcurves$x)] ~ d_allcurves$x[!is.infinite(d_allcurves$x)])), lwd=2)
points(err_cut,err_val) # optimal (cutoff,error) point for each randomisation (1:obj$reps)
abline(v = avg_err_cut, lty=2) # average cutoff
points(avg_err_cut, avg_err, pch=19, col="black", cex=1.3) # avg error (w/ curve-specific cutoff)
points(avg_err_cut, avg_err_avgcut, pch=19, col=err_colcode, cex=1.3) # avg error (w/ avg cutoff)
legend("topright", c("cutoff-error curves", "mean cutoff-error curve", "avg cutoff", paste0("avg error (w/ curve-specific cutoff) = ", round(avg_err,2)), paste0("avg error (w/ avg cutoff) = ", round(avg_err_avgcut,2)), paste0("freq minority class = ", round(length(obj$y[obj$y == names(table(obj$y))[which.min(table(obj$y))]]) / length(obj$y),2))), lty=c(1,1,2,NA,NA,NA), pch=c(NA,NA,NA,19,19,NA), col=c("grey","black","black","black",err_colcode,NA), seg.len=1.2, bty="n", cex=0.9)
}else if(measureType == "acc"){ #accuracy
# stats
acc_perf <- performance(pred, measure = "acc")
acc_ind = sapply(1:obj$reps, function(z){ which.max(acc_perf@y.values[[z]]) })
if(any(acc_ind == 1)){ # get rid of infinities in 'acc_perf@x.values'
tt <- (1:obj$reps)[acc_ind == 1]; for(l in 1:length(tt)){ if( is.infinite(slot(acc_perf, "x.values")[[tt[l]]][1]) ){ acc_ind[tt[l]] <- 2 }}
}
acc_val = sapply(1:obj$reps, function(z){ acc_perf@y.values[[z]][acc_ind[z]] })
acc_cut = sapply(1:obj$reps, function(z){ acc_perf@x.values[[z]][acc_ind[z]] })
avg_acc <- mean(acc_val)
avg_acc_cut <- mean(acc_cut)
avg_acc_avgcut <- sapply(1:obj$reps, function(z){ acc_perf@y.values[[z]][which.min(abs(acc_perf@x.values[[z]] - avg_acc_cut))] }) # this is only an approximation since it seems difficult to find performance for a specific cutoff
acc_colcode <- ifelse( all(mean(avg_acc_avgcut) > c(0.5, mean(obj$y==1), mean(obj$y==0))), "green", ifelse(all(mean(avg_acc_avgcut) > 0.5), "orange", "red") )
# plot
plot(acc_perf, lty=1, col="grey", xlim=c(0,1), ylim=c(0,1)) # cutoff-accuracy curve for each randomisation (1:obj$reps)
points(acc_cut, acc_val) # optimal (cutoff,accuracy) point for each randomisation (1:obj$reps)
abline(v = avg_acc_cut); abline(h = avg_acc) # average of optimal cutoff-accuracy
points(avg_acc_cut, mean(avg_acc_avgcut), pch=19, col=acc_colcode)
legend("bottomleft", c(paste0("avg accuracy (w/ specific cutoff) = ", round(avg_acc,2)), paste0("avg accuracy (w/ avg cutoff) = ", round(mean(avg_acc_avgcut),2)), paste0("Prob[y=1] = ", round(mean(obj$y==1),2))), bty="n", cex=0.8)
}else{
stop("'measureType' must be either 'err' (for error rate) or 'acc' (for accuracy rate).")
}
}else if(obj$family == "multinomial"){
;
}
### PLOT 4 --- ROC for best cross-validation parameter for all replications, plus average in bold
#
avg_auc <- mean(obj$AUC_opt[,"test"])
if(obj$family == "binomial"){
pred <- prediction(obj$ROC_to_plot_binom$pred_test, obj$ROC_to_plot_binom$labels)
perf <- performance(pred, "tpr", "fpr")
dd <- data.frame(x = unlist(perf@x.values), y = unlist(perf@y.values)); dd <- dd[order(dd[,1]),]
xvals <- seq(0, 1, by=0.1); lx <- length(xvals)
intervals <- matrix(c(xvals-0.05, xvals+0.05), lx, 2); idx <- matrix(0, lx, 2)
for(k in 1:nrow(intervals)){ idx[k,] <- range(which(dd$x >= intervals[k,1] & dd$x < intervals[k,2])) }
forget <- which(is.infinite(idx[,1]) * is.infinite(idx[,2]) == 1)
idx[forget,] <- 0
ofs_x <- 0.025
percentile_values <- c(0.05, 0.15, 0.25, 0.5, 0.75, 0.85, 0.95)
percentiles <- matrix(0, length(percentile_values), lx); x_vec <- matrix(0, 2, lx); means <- stdev <- rep(0,lx)
for(u in 1:lx){
percentiles[,u] <- quantile(dd$y[idx[u,1]:idx[u,2]], probs=percentile_values)
x_vec[,u] <- c(xvals[u] - ofs_x, xvals[u] + ofs_x)
means[u] <- mean(dd$y[idx[u,1]:idx[u,2]])
stdev[u] <- sd(dd$y[idx[u,1]:idx[u,2]])
}
# plot
plot(perf, col="grey", lty=1, ylim=c(0,1.05), xlab="false positive rate", ylab="true positive rate")
for(u in 1:lx){
# BOXES
rect(x_vec[1,u], percentiles[3,u], x_vec[2,u], percentiles[5,u], col="white") # fill boxes
segments(x_vec[1,u], percentiles[4,u], x_vec[2,u], percentiles[4,u], lty=1, lwd=3, lend=1)
# WHISKERS
segments(xvals[u], percentiles[5,u], xvals[u], percentiles[7,u], lty=1, lwd=1, lend=1)
segments(xvals[u], percentiles[3,u], xvals[u], percentiles[1,u], lty=1, lwd=1, lend=1)
# WHISKER ENDS
segments(xvals[u]-ofs_x/2, percentiles[6,u], xvals[u]+ofs_x/2, percentiles[6,u], lty=1, lwd=1, lend=1)
segments(xvals[u]-ofs_x/2, percentiles[2,u], xvals[u]+ofs_x/2, percentiles[2,u], lty=1, lwd=1, lend=1)
segments(xvals[u]-ofs_x/4, percentiles[7,u], xvals[u]+ofs_x/4, percentiles[7,u], lty=1, lwd=1, lend=1)
segments(xvals[u]-ofs_x/4, percentiles[1,u], xvals[u]+ofs_x/4, percentiles[1,u], lty=1, lwd=1, lend=1)
}
if(length(forget) == 0){
lines(xvals, predict(loess(means ~ xvals)), lty=1, lwd=2)
}else{
lines(xvals[-forget], predict(loess(means ~ xvals)), lty=1, lwd=2)
}
lines(seq(0,1,by=0.1), seq(0,1,by=0.1), lty=2)
# OR just confidence intervals instead of boxplots
#lines(xvals, predict(loess(extremes[1,] ~ xvals)), lty=2, lwd=1); lines(xvals, predict(loess(extremes[2,] ~ xvals)), lty=2, lwd=1)
text(x = xvals, y = rep(1.05,length(xvals)), labels = sapply(1:length(xvals), function(aa){ paste0(strsplit(as.character(round(stdev,2)), split="")[[aa]][2:4], collapse='') }), col="black", cex=0.7)
# for text inside boxes use:
#text(x = xvals, y = percentiles[5,]-0.025, labels = sapply(1:length(xvals), function(aa){ paste0(strsplit(as.character(round(stdev,2)), split="")[[aa]][2:4], collapse='') }), col="black", cex=0.7)
labls <- c(paste0("avg AUC = ", round(avg_auc,2)), "fpr-tpr curves", "mean fpr-tpr curve", "standard deviation")
legend("bottomright", legend=labls, lty=c(NA,1,1,NA), lwd=c(NA,1,2,NA), col=c(NA,'grey','black','black'), bty="n", cex=0.9)
text(x=0.55, y=0.02, labels="x.xx", cex=0.9)
}else if(obj$family == "multinomial"){
lvls <- levels(factor(obj$y))
d_lvl <- list(); auc_lvl <- matrix(0, obj$reps, lu);
#pdf("~/Downloads/f1.pdf", height=6, width=6, pointsize=18)
plot(0, col="white", xlim=c(0,1), ylim=c(0,1.04), xlab="False positive rate", ylab="True positive rate")
for(lvls_id in 1:lu){
pred_lvl <- prediction(obj$ROC_to_plot_multinom[[lvls[lvls_id]]]$pred_test, obj$ROC_to_plot_multinom[[lvls[lvls_id]]]$labels)
perf_lvl <- performance(pred_lvl, "tpr", "fpr")
auc_lvl[, lvls_id] <- unlist(slot(performance(pred_lvl, "auc"), "y.values"))
roc_x <- roc_y <- c()
for(kk in 1:obj$reps){
#lines(unlist(perf_lvl@x.values[[kk]]), unlist(perf_lvl@y.values[[kk]]), lty=2, col=1+lvls_id)
roc_x <- c(roc_x, unlist(perf_lvl@x.values[[kk]]))
roc_y <- c(roc_y, unlist(perf_lvl@y.values[[kk]]))
}
d_lvl[[lvls_id]] <- cbind(roc_x, roc_y); d_lvl[[lvls_id]] <- d_lvl[[lvls_id]][order(d_lvl[[lvls_id]][,1]),] # order points along x axis
if(is.null(roc_smoothing)){ roc_smoothing <- 0.25 }
smoothingSpline <- smooth.spline(d_lvl[[lvls_id]][,1], d_lvl[[lvls_id]][,2], spar=roc_smoothing)
sSpline_x <- smoothingSpline$x
sSpline_y <- smoothingSpline$y; sSpline_y[sSpline_y > 1] <- 1
lines(sSpline_x, sSpline_y, col=lvls_id+1, lty=1, lwd=2)
#points(d_lvl[[lvls_id]][,1], d_lvl[[lvls_id]][,2], pch='.', col=lvls_id+1)
#lines(d_lvl[[lvls_id]][,1], predict(loess(d_lvl[[lvls_id]][,2] ~ d_lvl[[lvls_id]][,1], span=0.2)), col=lvls_id+1, lty=1, lwd=2)
}
avg_auc <- round(mean(auc_lvl),2)
the_names <- lvls #; the_names <- multinom_labels
legend("bottomright", legend = paste0("y=", the_names, ": ", sprintf("%.2f",round(colMeans(auc_lvl),2))), title=expression(bold("one vs all AUC:")), lwd=2, lty=rep(1,lu), col=2+uniq, bty="n", cex=0.9)
legend("bottomleft", legend = paste0("avg AUC: ", sprintf("%.2f", avg_auc)), bty="n", cex=0.9)
#dev.off()
}
### PLOT 5 --- positive/negative densities as a function of the linear predictor
#
if(obj$family == "binomial"){
# ALTERNATIVE WAY TO FIND CUTOFF AND GIVEN DIFFERENT WAYS TO FALSE POSITIVES AND FALSE NEGATIVES
# SEE https://www.r-bloggers.com/a-small-introduction-to-the-rocr-package
# cost.perf = performance(pred, "cost", cost.fp = 1, cost.fn = 1)
# pred@cutoffs[[1]][which.min(cost.perf@y.values[[1]])]
# organise data
id1 <- c(sapply(1:obj$reps, function(z){ obj$y[obj$id_test[[z]]]==1 }))
d <- data.frame(label = as.factor(id1+0), val_linpred = c(obj$linear_predictor)); d$val_prob <- 1/(1+exp(-d$val_linpred))
# plot
nbins <- 40; col0 <- "skyblue"; col1 <- rgb(1,0,0,0.6)
tt <- "linpred" # {"prob", "linpred"}
if(tt == "linpred"){
optcut <- -log((1-avg_err_cut)/avg_err_cut) # obtained by solving 'P = 1 / (1 + exp(-L))' for the linear predictor 'L'
dd <- d$val_linpred
if(is.null(hist_range)){
n1 <- min(d$val_linpred); n2 <- max(d$val_linpred); wd <- n2-n1; perc <- abs(range(d$val_linpred)-optcut)/wd
}else{
n1 <- hist_range[1]; n2 <- hist_range[2]; wd <- n2-n1; perc <- abs(range(hist_range)-optcut)/wd
}
brks <- c(seq(n1, optcut, len=perc[1]*nbins), seq(optcut, n2, len=perc[2]*nbins+1)[-1])
}else if(tt == "prob"){
optcut <- avg_err_cut
dd <- d$val_prob
brks <- seq(0, 1, len=nbins)
}
# confusion matrix
# individual CM
#confu <- confusionMatrix(data=as.factor(as.numeric(obj$y.testpred.opt[,ii] > err_cut[ii])), reference = obj$y[obj$id.test[[ii]]])$table
# grouped CM: assumes the average cutoff for all replications
confu <- confusionMatrix(data=factor(as.numeric(c(obj$y_testpred_opt) > avg_err_cut)), reference = factor(obj$y[unlist(obj$id_test)]))$table
dc <- list(tnr = confu[1,1] / sum(confu[,1]),
tpr = confu[2,2] / sum(confu[,2]),
fnr = confu[1,2] / sum(confu[,2]),
fpr = confu[2,1] / sum(confu[,1]))
# grouped CM: assumes specific cutoffs for each replication
#auxM <- matrix(0, nrow(obj$y.testpred.opt), ncol(obj$y.testpred.opt))
#for(k in 1:ncol(auxM)){ auxM[,k] <- obj$y.testpred.opt[,k] > err_cut[k] }
#confu <- confusionMatrix(data=as.factor(as.numeric(c(auxM))), reference = obj$y[unlist(obj$id.test)])$table
#
#u <- par("usr"); v <- c(grconvertX(u[1:2], "user", "ndc"), grconvertY(u[3:4], "user", "ndc"))
#v <- c( (v[1]+v[2])/2, v[2], (v[3]+v[4])/2, v[4] )
# plot
# fourfoldplot( round(confu / rep(colSums(confu),each=2), 2), col=c("grey50","grey80"), conf.level=0, std="all.max")
#pdf("./nirs/typesetting/nirs_paper1_labdata/figures/qqq.pdf", height=6, width=6, pointsize=12)
if(is.null(hist_range)){
hist(dd[d$label==0], col=col0, freq=F, breaks=brks, border="white", xlab=ifelse(tt=="prob","probability of class 1","linear predictor"), ylab="density", main="")
hist(dd[d$label==1], col=col1, freq=F, breaks=brks, border="white", add=T)
}else{
hist(dd[d$label==0 & dd > hist_range[1] & dd < hist_range[2]], col=col0, freq=F, breaks=brks, border="white", xlab=ifelse(tt=="prob","probability of class 1","linear predictor"), ylab="density", main="")
hist(dd[d$label==1 & dd > hist_range[1] & dd < hist_range[2]], col=col1, freq=F, breaks=brks, border="white", add=T)
}
abline(v=optcut, lwd=2)
#
#l1 <- legend("topleft", "optimal cutoff", lty=1, lwd=2, col="black", seg.len=0.8, bty="n", cex=0.9)
#l2 <- legend(x=l1$rect$left, y=l1$rect$top-l1$rect$h, c("uninfected (y=0)", "infected (y=1)"), title=expression(bold("true class:")), pch=15, col=c(col0,col1), seg.len=0.8, bty="n", cex=0.9)
#legend("bottomleft", c(paste0("avg error = ",round(avg_err_avgcut,2)), ""), bty="n", cex=0.9)
#
#l1 <- legend("bottomleft", c("uninfected (y=0)", "infected (y=1)", ""), title=expression(bold("true class:")), pch=c(15,15,NA), col=c(col0,col1,NA), seg.len=0.8, bty="n", cex=0.9)
#l2 <- legend(x=l1$rect$left, y=with(l1$rect, top+0.1), c(paste0("avg error = ",round(avg_err_avgcut,2)), "optimal cutoff"), col=c(NA,"black"), lty=c(NA,1), lwd=c(NA,2), seg.len=0.8, bty="n", cex=0.9)
#
legend("bottomleft", c(paste0("average error = ",round(avg_err_avgcut,2)), "optimal cutoff", expression(bold(true*" "*class*":")), "uninfected (y=0)", "infected (y=1)", ""), lty=c(NA,1,NA,1,1,NA), lwd=c(NA,2,NA,NA,NA,NA), pch=c(NA,NA,NA,15,15,NA), col=c(NA,"black",NA,col0,col1,NA), seg.len=0.8, bty="n", cex=0.9)
par(fig = c(0.15, 0.55, 0.5, 0.9), new=TRUE, mar=2+c(0,0,0,0))
confusion_plot(d=dc, fam=obj$family)
options(showWarnCalls = F)
par(oldparams)
options(showWarnCalls = T)
#dev.off()
}else if(obj$family == "multinomial"){
lvls <- levels(as.factor(obj$y)); lu <- length(lvls)
# compute indices of test observation by true class
lvls_id_true <- list()
for(q in 1:lu){
lvls_id_true[[paste0("lvl",lvls[q])]] <- unlist(sapply(1:obj$reps, function(z){ obj$id_test[[z]][obj$y[obj$id_test[[z]]]==uniq[q]] }))
}
# compute class probabilities by response class
classProbs_matrix <- list()
for(q in 1:lu){
classProbs_matrix[[paste0("lvl",lvls[q])]] <- 0
for(r in 1:obj$reps){
idx <- obj$y[obj$id_test[[r]]] == uniq[q]
classProbs_matrix[[paste0("lvl",lvls[q])]] <- rbind(classProbs_matrix[[paste0("lvl",lvls[q])]], obj$classProbs[[r]][idx,])
}
classProbs_matrix[[paste0("lvl",lvls[q])]] <- classProbs_matrix[[paste0("lvl",lvls[q])]][-1,]
}
# compute percentage of correct class as given by highest probability
classPercents <- rep(NA,lu)
for(q in 1:lu){
classPercents[q] <- mean((apply(classProbs_matrix[[paste0("lvl",lvls[q])]], 1, which.max) - 1) == uniq[q])
}
# plot predicted class probabilities
#flat <- unlist(lapply(classProbs_matrix, colMeans))
#plot(uniq,rep(0,lu), col="white", xlim=range(uniq), ylim=c(0.8, 1.1)*range(flat))
#for(t in uniq){ lines(uniq, colMeans(classProbs_matrix[[paste0("lvl",t)]]), type="l", lwd=2, col=t+1) }
#legend("topright",paste0("y=",uniq), seg.len=0.75, bty="n", cex=0.9, col=uniq+1, lty=1, lwd=2)
ddd <- as.table(matrix(unlist(lapply(classProbs_matrix, colMeans)), lu, lu, byrow=F)); rownames(ddd) <- paste0("pred_",lvls); colnames(ddd) <- paste0("true_",lvls)
#pdf("~/Downloads/f2.pdf", height=6, width=6, pointsize=18)
the_names <- lvls ; the_names <- multinom_labels
barplot(ddd, ylim=c(0,1.2), beside=T, col=uniq+2, names.arg=the_names, xlab="true class", ylab="average predicted class probability")
legend("top", paste0("y=",the_names), title=expression(bold("predicted class:")), horiz=T, text.width=2.5, bty="n", cex=0.9, pch=rep(15,lu), col=uniq+2)
#dev.off()
# plot confusion matrix
confu_multinom <- confusionMatrix(data=as.factor(as.numeric(c(obj$y_testpred_opt))), reference = obj$y[unlist(obj$id_test)])$table
avg_err_avgcut <- (sum(confu_multinom) - sum(diag(confu_multinom))) / sum(confu_multinom)
#pdf("~/Downloads/f3.pdf", height=6, width=6, pointsize=18)
confusion_plot(d=confu_multinom, fam=obj$family, avgError=avg_err_avgcut, multinom_labels=multinom_labels)
#dev.off()
}
return(list(err = avg_err_avgcut,
err_stdev = stdev_err_avgcut,
auc = avg_auc,
roughnessStd = mean(obj$beta_roughnessStd)) )
}#glm
}else if(class(obj) == "fdaModelPred"){
if(obj$model == "lm"){
print(paste0("avgTestErr_avgTrainCut = ", obj$avgTestErr))
biasMat <- colMeans(obj$bias_test); names(biasMat) <- paste('age', sort(unique(obj$new_y)))
print("Bias:")
print(biasMat)
plot(sort(unique(obj$new_y)), biasMat, pch=19, xlab="age", ylab="bias"); abline(0,0)
legend('topright', paste0("avgTestErr = ", round(mean(obj$err_test), 2)), bty="n", cex=0.9)
}else if(obj$model == "glm"){
if(obj$family == "binomial"){
print(paste0("avgTestErr_avgTrainCut = ", round(obj$avgTestErr, 2)))
confusion_plot(d = obj$errorBreakdown, fam = obj$family)
}else if(obj$family == 'multinomial'){
print(paste0("avgTestErr = ", round(obj$avgTestErr, 2)))
confusion_plot(d = obj$errorBreakdown, fam = obj$family) }
}
}else{
stop("Class of 'obj' in fdaPlot() is invalid.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.