#' Model predictions
#'
#' Function generates a data frame of model predictions for the typical value in the population,
#' individual predictions and data predictions. The function can also be used to generate datasets
#' without predictions using the design specified in the arguments.
#'
#' @param poped.db A PopED database created by \code{\link{create.poped.database}}.
#' @param models_to_use Which model numbers should we use?
#' Model numbers are defined in \code{design} below using \code{model_switch}. For an explanation see \code{\link{create_design}}.
#' @param model_num_points How many extra observation rows should be created in the data frame for each group or individual
#' per model. If used then the points are placed evenly between \code{model_minxt} and \code{model_maxxt}. This option
#' is used by \code{\link{plot_model_prediction}} to simulate the response of the model on a finer grid then the defined design.
#' If \code{NULL} then only the input design is used. Can be a single value or a vector the same length as the number of models.
#' @param model_minxt The minimum time value for extra observation rows indicated by \code{model_num_points}.
#' A vector the same length as the number of models
#' @param model_maxxt The minimum time value for extra observation rows indicated by \code{model_num_points}.
#' A vector the same length as the number of models
#' @param include_sample_times Should observations rows in the output data frame include the times indicated in the input design?
#' @param IPRED Should we simulate individual predictions?
#' @param DV should we simulate observations?
#' @param include_a Should we include the continuous design variables in the output?
#' @param include_x Should we include the discrete design variables in the output?
#' @param groups_to_use Which groups should we include in the output data frame?Allowed values are \code{"all"} or
#' a vector of numbers indicating the groups to include, e.g. \code{c(1,3,6)}.
#' @param filename A filename that the data frame should be written to in comma separate value (csv) format.
#' @param dosing A list of lists that adds dosing records to the data frame (Each inner list corresponding to a group in the design).
#' @param design A list that is passed as arguments to the function \code{\link{create_design}} to create a design object.
#' @param model A list containing the model elements to use for the predictions
#' @param parameters A list of parameters to use in the model predictions.
#' @param predictions Should the resulting data frame have predictions? Either \code{TRUE} or \code{FALSE}
#' or \code{NULL} in which case the function decides based on other arguments.
#' @param manipulation A list of one or more \code{\link[base]{expression}} arguments. Each expression is
#' evaluated using the code \code{for(i in 1:length(manipulation)){df <- within(df,{eval(manipulation[[i]])})}}.
#' Can be used to transform
#' or create new columns in the resulting data frame. Note that these transformations are created after any model predictions occur,
#' so transformations in columns having to do with input to model predictions will not affect the predictions.
#' @param PI Compute prediction intervals for the data given the model. Predictions are based on first-order approximations to
#' the model variance and a log-normality assumption of that variance (by default), if all predictions are positive, otherwise a
#' normal distribution is assumed.
#' @param PI_conf_level The confidence level for the prediction interval computed.
#' @param PI_ln_dist Should the PI calculation be done assuming log-normal or a normal distribution. TRUE is the default and
#' indicates a log-normal distribution. If any of the PRED values from the model are negative then a normal distribution is
#' assumed.
#'
#' @return A dataframe containing a design and (potentially) simulated data with some dense grid of samples and/or
#' based on the input design.
#'
#' @family evaluate_design
#' @family Simulation
#'
#' @example tests/testthat/examples_fcn_doc/examples_model_prediction.R
#'
#' @export
# @importFrom mvtnorm rmvnorm
# @importFrom dplyr rbind_list
model_prediction <- function(poped.db=NULL,
design=list( ## passed to create_design
xt=poped.db$design[["xt"]], ## -- Matrix defining the initial sampling schedule row major, can also be a list of vectors--
groupsize=poped.db$design$groupsize, ## -- Vector defining the size of the different groups (num individuals in each group) --
m=poped.db$design[["m"]], ## -- Number of groups, computed from xt if not defined --
x=poped.db$design[["x"]], ## -- Matrix defining the initial discrete values --
a=poped.db$design[["a"]], ## -- Vector defining the initial covariate values --
ni=poped.db$design$ni, ## -- Vector defining the number of samples for each group, computed as all elements of xt by default --
model_switch=poped.db$design$model_switch),
model = list(
fg_pointer=poped.db$model$fg_pointer,
ff_pointer=poped.db$model$ff_pointer,
ferror_pointer= poped.db$model$ferror_pointer),
parameters=list(
docc=poped.db$parameters$docc,
d = poped.db$parameters$d,
bpop = poped.db$parameters$bpop,
covd = poped.db$parameters$covd,
covdocc = poped.db$parameters$covdocc,
sigma = poped.db$parameters$sigma),
IPRED=FALSE,
DV=FALSE,
dosing=NULL, # otherwise a list of lists with "Time" and dosing columns needed for each group
predictions=NULL,
filename=NULL,
models_to_use="all",
model_num_points=NULL,
model_minxt=NULL,
model_maxxt=NULL,
include_sample_times=T,
groups_to_use="all",
include_a = TRUE,
include_x = TRUE,
manipulation=NULL,
PI = FALSE,
PI_conf_level = 0.95,
PI_ln_dist = TRUE)
{
if(is.null(predictions)){
predictions=FALSE
if(!is.null(poped.db) || (!is.null(unlist(parameters))) && !is.null(unlist(model)) && !is.null(unlist(design))) predictions=TRUE
}
if(is.null(poped.db) && is.null(unlist(design))) stop("Either 'poped.db' or 'design' need to be defined")
design <- do.call(create_design,design)
NumOcc=poped.db$parameters$NumOcc ## this should be removed and put into design
if(DV) IPRED=T
with(design,{
maxxt=poped.choose(poped.db$design_space$maxxt,xt) ## -- Matrix defining the max value for each sample --
minxt=poped.choose(poped.db$design_space$minxt,xt) ## -- Matrix defining the min value for each sample --
#--- size checking --------
if(!is.null(dosing)){
if(!length(dosing)==m){
if(length(dosing) == 1) {
dosing <- rep(dosing,m)
} else {
stop("dosing argument does not have the right dimensions.
Must be 1 list or a list of lists the size of the number of groups")
}
}
}
if(predictions){
docc_size = 0
if((!isempty(parameters$docc[,2]))){
docc_size = size(parameters$docc[,2,drop=F],1)
}
d_size = 0
if((!isempty(parameters$d[,2]))){
d_size = size(parameters$d[,2,drop=F],1)
}
}
used_times <- zeros(size(xt))
for(i in 1:size(xt,1)) used_times[i,1:ni[i]] <- 1
if(all(groups_to_use=="all")){
groups_to_use = 1:size(xt,1)
}
if(all(models_to_use=="all")){
#models_to_use = unique(as.vector(model_switch))
models_to_use = unique(as.vector(model_switch[used_times==1]))
}
df <- data.frame()
id_num_start <- 1
for(i in 1:length(groups_to_use)){
if(!exists("a")){
a_i = zeros(0,1)
} else if((isempty(a))){
a_i = zeros(0,1)
} else {
a_i = a[groups_to_use[i],,drop=F]
}
if(!exists("x")){
x_i = zeros(0,1)
} else if((isempty(x))){
x_i = zeros(0,1)
} else {
x_i = x[groups_to_use[i],,drop=F]
}
num_ids = groupsize[groups_to_use[i]]
if(all(is.null(model_num_points))){
xt_i = xt[groups_to_use[i],1:ni[groups_to_use[i]]]
model_switch_i = model_switch[groups_to_use[i],1:ni[groups_to_use[i]]]
if(!all(models_to_use == unique(as.vector(model_switch[used_times==1])))){ ## needs testing
xt_i = xt_i[model_switch_i %in% models_to_use]
model_switch_i = model_switch_i[model_switch_i %in% models_to_use]
}
} else {
xt_i <- c()
model_switch_i <- c()
if(length(models_to_use)>1 && length(model_num_points)==1) model_num_points <- rep(model_num_points,length(models_to_use))
for(j in models_to_use){
if(is.null(model_minxt)){
minv <- min(as.vector(minxt[model_switch==j]),na.rm = TRUE)
} else {
if(length(models_to_use)>1 && length(model_minxt)==1) model_minxt <- rep(model_minxt,length(models_to_use))
minv = model_minxt[j]
}
if(is.null(model_maxxt)){
maxv <- max(as.vector(maxxt[model_switch==j]),na.rm = TRUE)
} else {
if(length(models_to_use)>1 && length(model_maxxt)==1) model_maxxt <- rep(model_maxxt,length(models_to_use))
maxv = model_maxxt[j]
} #xt = t(seq(minv,maxv,length.out=model_num_points[i]))
tmp_num_pts <- model_num_points[j]
if(length(model_num_points)<j) tmp_num_pts <- model_num_points[1]
xt_i= c(xt_i,seq(minv,maxv,length.out=tmp_num_pts))
#model.pred <- rbind(xt)
#model.pred <- data.frame(Time=xt)
#model.pred <- c(model.pred,foo=xt)
#browser()
model_switch_i = c(model_switch_i,j*matrix(1,1,tmp_num_pts))
}
if(include_sample_times){
xt_i_extra = xt[groups_to_use[i],1:ni[groups_to_use[i]]]
model_switch_i_extra = model_switch[groups_to_use[i],1:ni[groups_to_use[i]]]
if(!all(models_to_use == unique(as.vector(model_switch[used_times==1])))){ ## needs testing
xt_i_extra = xt_i_extra[model_switch_i_extra %in% models_to_use]
model_switch_i_extra = model_switch_i_extra[model_switch_i_extra %in% models_to_use]
}
tmp.include <- !(xt_i_extra %in% xt_i)
xt_i <- c(xt_i,xt_i_extra[tmp.include])
model_switch_i <- c(model_switch_i,model_switch_i_extra[tmp.include])
tmp.order <- order(xt_i)
xt_i <- xt_i[tmp.order]
model_switch_i <- model_switch_i[tmp.order]
}
}
pred <- NA
group.df <- data.frame(Time=xt_i,PRED=pred,Group=groups_to_use[i],Model=model_switch_i)
if(predictions){
bpop_val <- parameters$bpop[,2,drop=F]
b_ind = zeros(1,d_size)
bocc_ind = zeros(docc_size,NumOcc)
g0 = feval(model$fg_pointer,x_i,a_i,bpop_val,b_ind,bocc_ind)
pred_list <- feval(model$ff_pointer,model_switch_i,xt_i,g0,poped.db)
pred_list$poped.db <- NULL
pred <- drop(pred_list[[1]])
group.df["PRED"] <- pred
# TODO: add this to both IPRED and DV and chnage extra columns names to replct where
# prediction comes from.
if(length(pred_list)>1){
# pred_list <- pred_list[sapply(pred_list, is.numeric)]
extra_df <- data.frame(pred_list[-1])
group.df <- cbind(group.df,extra_df)
}
if(PI){
sigma_full = parameters$sigma
d_full = getfulld(parameters$d[,2],parameters$covd)
docc_full = getfulld(parameters$docc[,2,drop=F],parameters$covdocc)
cov <- v(as.matrix(model_switch_i),as.matrix(xt_i),
t(x_i),t(a_i),bpop_val,t(b_ind),bocc_ind,d_full,
sigma_full,docc_full,poped.db)[[1]]
PI_alpha <- 1-PI_conf_level
z_val <- qnorm(1-PI_alpha/2)
#z_val <- qt(1-PI_alpha/2,df=(sum(design$groupsize)-5))
se_val <- sqrt(diag(cov))
PI_u <- pred + z_val*se_val
PI_l <- pred - z_val*se_val
#PI_ln_dist <- TRUE
if(PI_ln_dist && all(pred>0)){
# using method of moments (Stein)
# assuming univariate distributions
se_val_log <- sqrt(log(diag(cov)/(pred^2)+1))
mean_val_log <- log(pred^2/sqrt(diag(cov) + pred^2))
PI_u <- exp(mean_val_log + z_val*se_val_log)
PI_l <- exp(mean_val_log - z_val*se_val_log)
}
group.df <- data.frame(group.df,PI_l=PI_l,PI_u=PI_u)
}
}
# group.df <- data.frame(Time=xt_i,PRED=drop(pred[[1]]),Group=groups_to_use[i],
# ##paste("Group",i,sep="_"),
# Model=model_switch_i)
#
if(include_a && !isempty(a_i)){
rownames(a_i) <- NULL
group.df <- data.frame(group.df,a_i)
}
if(include_x && !isempty(x_i)){
rownames(x_i) <- NULL
group.df <- data.frame(group.df,x_i)
}
# if(include_id){
# #parameters$groupsize[groups_to_use[i]]
# id_num_end <- id_num_start + num_ids - 1
# group.df <- merge(group.df,data.frame(ID=id_num_start:id_num_end))
# id_num_start <- id_num_end + 1
# group.df <- group.df[c(length(group.df),1:(length(group.df)-1))] #reorder columns
# }
if(IPRED){
group.df.ipred <- data.frame()
bocc_start= 1
id_num_end <- id_num_start + num_ids - 1
id_vals <- id_num_start:id_num_end
if(predictions){
fulld = getfulld(parameters$d[,2],parameters$covd)
fulldocc = getfulld(parameters$docc[,2,drop=F],parameters$covdocc)
if(any(size(fulld)==0)){
b_sim_matrix = zeros(num_ids,0)
} else {
b_sim_matrix = mvtnorm::rmvnorm(num_ids,sigma=fulld)
}
bocc_sim_matrix = zeros(num_ids*NumOcc,length(parameters$docc[,2,drop=F]))
if(nrow(fulldocc)!=0) bocc_sim_matrix = mvtnorm::rmvnorm(num_ids*NumOcc,sigma=fulldocc)
}
for(j in 1:num_ids){
tmp.df <- group.df
if(predictions){
bocc_stop=bocc_start + NumOcc - 1
if(nrow(fulldocc)==0){
bocc_start=0
bocc_stop=0
}
fg_sim = feval(model$fg_pointer,x_i,a_i,parameters$bpop[,2,drop=F],b_sim_matrix[j,],t(bocc_sim_matrix[bocc_start:bocc_stop,]))
bocc_start = bocc_stop + 1
ipred <- feval(model$ff_pointer,model_switch_i,xt_i,fg_sim,poped.db)
ipred <- drop(ipred[[1]])
} else {
ipred <- xt_i*NA
}
#ID <- (i-1)*num_ids+j
ID <- id_vals[j]
tmp.df["ID"] <- ID
tmp.df["IPRED"] <- ipred
if(DV){
if(predictions){
eps_sim = mvtnorm::rmvnorm(length(xt_i),sigma=parameters$sigma)
dv <- feval(model$ferror_pointer,model_switch_i,xt_i,fg_sim,eps_sim,poped.db)
dv <- drop(dv[[1]])
} else {
dv <- xt_i*NA
}
tmp.df["DV"] <- dv
}
if(!is.null(dosing)){
dose.df <- data.frame(dosing[groups_to_use[i]])
# add constant values within a group to dosing records
for(nam in names(tmp.df[!(names(tmp.df) %in% c("IPRED","PRED","DV","Time"))])){
#if(length(unique(tmp.df[nam]))==1) dose.df <- data.frame(dose.df,nam=tmp.df[1,nam])
if(length(unique(tmp.df[nam]))==1) dose.df[nam] <- tmp.df[1,nam]
}
dose.df$dose_record_tmp <- 1
#if(packageVersion("dplyr") >= "0.5.0"){
tmp.df <- dplyr::bind_rows(dose.df,tmp.df)
#} else {
# tmp.df <- dplyr::rbind_list(dose.df,tmp.df)
#}
tmp.df <- tmp.df[order(tmp.df$Time,tmp.df$dose_record_tmp),]
tmp.df$dose_record_tmp <- NULL
}
group.df.ipred <- rbind(group.df.ipred,tmp.df)
}
id_num_start <- id_num_end + 1
group.df <- group.df.ipred
}
df <- rbind(df,group.df)
#model.pred <- rbind(model.pred,i=returnArgs[[1]])
#model.pred[paste("Group",i,sep="_")] <- returnArgs[[1]]
}
#reorder columns
first_names <- c("ID","Time","DV","IPRED","PRED")
first_names <- first_names[first_names %in% names(df)]
other_names <- names(df[!(names(df) %in% first_names)])
df <- df[c(first_names,other_names)]
df$Group <- as.factor(df$Group)
df$Model <- as.factor(df$Model)
if(IPRED) df$ID <- as.factor(df$ID)
row.names(df) <- NULL
if(!is.null(manipulation)){
for(i in 1:length(manipulation)){
df <- within(df,{
eval(manipulation[[i]])
})
}
}
if(!is.null(filename)) write.table(x=df, filename, row.names=FALSE, quote=FALSE, na=".",sep=",")
return( df )
}) # end with(design,{})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.