### residplot.R ---
#----------------------------------------------------------------------
## author: Brice Ozenne
## created: aug 29 2017 (11:52)
## Version:
## last-updated: jun 4 2018 (15:20)
## By: Brice Ozenne
## Update #: 124
#----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
#----------------------------------------------------------------------
##
### Code:
## * Documentation - residplot
#' @title Plot the Residuals of a LVM Object
#' @description Plot the residuals for each outcome of a lvm object.
#'
#' @name residplot
#'
#' @param object a lvm model.
#' @param res.variables the endogenous variable for which the residuals should be displayed.
#' @param obs.variables same as res.variables or a variable present in the model
#' @param sd.kernel the standard deviation of the kernel used to smooth the variance.
#' @param kernel the type of kernel used to smooth the variance.
#' @param plot.weights should the weights used to compute the variance be displayed?
#' @param ncol the number of columns in the graphical display.
#' @param smooth.mean should the mean be displayed across the obs.variables values?
#' @param smooth.sd should the variance be displayed across the obs.variables values?
#' @param plot should the fit be displayed in a graphical window.
#' @param ... additional arguments.
#'
#' @return a list containing:
#' \itemize{
#' \item plot the display of the fit
#' \item data the dataset used to make the display
#' }
#'
#' @examples
#' m <- lvm(y1~eta+x1,y2~eta,y3~eta+x3)
#' latent(m) <- ~ eta
#' dd <- lava::sim(m,100) ## Simulate 100 observations from model
#' e <- estimate(m, dd) ## Estimate parameters
#'
#' res <- residplot(e)
#' residplot(e, obs.variables = "x1")
#'
#' m <- lvm(y~x)
#' distribution(m,~y) <- function(n,mean,x) rnorm(n,mean,exp(x)^.5)
#' d <- lava::sim(m,1e3)
#'
#' e <- estimate(m,data = d)
#'
#' residplot(e)
#' # residplot(e, plot.weights = TRUE)
#' residplot(e, res.variables = "y", obs.variables = "x")
#' @export
residplot <- function (object, ...) {
UseMethod("residplot", object)
}
## * residplot.lvmfit
#' @rdname residplot
#' @export
residplot.lvmfit <- function(object, res.variables = endogenous(object), obs.variables = res.variables,
sd.kernel = 0.5, kernel = "dnorm", plot.weights = FALSE, ncol = NULL,
smooth.mean = TRUE, smooth.sd = TRUE, plot = TRUE,...){
fitted <- observed <- NULL ## [:for CRAN check] data.table
if(any(obs.variables %in% lava::vars(object) == FALSE)){
missing.var <- obs.variables[obs.variables %in% lava::vars(object) == FALSE]
stop("variable \"",paste(missing.var, collapse = "\"\""),"\" not found \n",
"all obs.variables must be in the model \n")
}
if(any(res.variables %in% lava::endogenous(object) == FALSE)){
missing.var <- res.variables[res.variables %in% lava::endogenous(object) == FALSE]
stop("outcome \"",paste(missing.var, collapse = "\"\""),"\" not found \n",
"all res.variables must be endogenous \n")
}
if(length(obs.variables) != length(res.variables)){
if(length(obs.variables)==1){
obs.variables <- rep(obs.variables,length(res.variables))
}else{
stop("obs.variables and res.variables must have the same length \n")
}
}
if(!identical(res.variables,obs.variables)){
x.lab <- unique(obs.variables)
if(length(x.lab)>1){
stop("obs.variables must either be identical to res.variables \n",
"or contain only the name of one variable from the model \n")
}
}else{
x.lab <- "observed outcome"
}
## observed values
res <- stats::model.frame(object)
if(is.data.table(res)){
data <- res[,.SD,.SDcols=obs.variables]
}else{
data <- as.data.table(res[,obs.variables])
}
names(data) <- res.variables
data[, "XXXXIdXXXX" := 1:.N]
dtL.data <- data.table::melt(data, id.vars = "XXXXIdXXXX",
variable.name = "endogenous")
## predicted values
#predicted <- as.data.table(stats::predict(object)[,variables])
#predicted[, "XXXXIdXXXX" := 1:.N]
## residuals values
dt.residuals <- as.data.table(stats::residuals(object)[,res.variables])
## dt.residuals <- as.data.table(stats::stats::predict(object)[,res.variables])
names(dt.residuals) <- res.variables
dt.residuals[, "XXXXIdXXXX" := 1:.N]
dtL.residuals <- data.table::melt(dt.residuals, id.vars = "XXXXIdXXXX",
variable.name = "endogenous")
dtL.all <- merge(x = dtL.data,
y = dtL.residuals,
by = c("XXXXIdXXXX","endogenous"))
setnames(dtL.all,
old = c("value.x","value.y"),
new = c("observed","fitted"))
## compute standard deviation
setkeyv(dtL.all, "observed")
dtL.all[, "sdY" := smoothSD(Y = fitted, time = observed,
plot.weights = plot.weights,
sd.kernel = sd.kernel, kernel = kernel),
by = "endogenous"]
## display
gg <- ggplot2::ggplot(dtL.all,
aes_string(x = "observed", y = "fitted"))
gg <- gg + ggplot2::geom_point()
if(smooth.mean){
gg <- gg + ggplot2::geom_smooth(method = "lm",aes(color = "mean"))
}
if(smooth.sd){
sdY <- NULL ## [:for CRAN check] we don't want to move to aes_string because color is indeed a character
gg <- gg + ggplot2::geom_line(aes(y = sdY, color = "standard devation"))
}
if(is.null(ncol)){
ncol <- round(sqrt(length(res.variables)))
}
gg <- gg + ggplot2::facet_wrap(~endogenous, ncol = ncol, labeller = label_both)
gg <- gg + ggplot2::xlab(x.lab) + ggplot2::ylab("residuals")
if(plot){
print(gg)
}
## export
return(invisible(list(data = dtL.all,
plot = gg)))
}
## * smoothSD
smoothSD <- function(Y, time, sd.kernel, kernel, plot.weights){
n <- length(Y)
dt <- data.table(Y=Y,time=scale(time, center = stats::median(time), scale = stats::mad(time)), index = 1:n)
dt[,time:=time-min(time)]
setkey(dt,time)
res <- sapply(1:n, function(i){ # i <- 25
weight <- abs(do.call(kernel, args = list(x = dt$time[i]-dt$time, sd = sd.kernel)))
#weight[weight<1e-6] <- 0
weight <- weight/sum(weight)
if(plot.weights){graphics::plot(weight)}
w.mean <- stats::weighted.mean(Y, weight)
w.sd <- sqrt(sum(weight * (Y - w.mean)^2))
return(w.sd)
})
return(res)
}
#----------------------------------------------------------------------
### residplot.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.