#' LME Analysis
#'
#' \code{lme_analysis} analyzes the data from a single simulated trial
#'
#' Analyzes the simulated data (including all paths) from a single simulated
#' trial. Most often called by \code{generateSimualtedResults}. See vignettes
#' for additional information.
#'
#' @param trialdesign_set The set of all pathways for the clinical trial
#' @param dat A dat file as generated by \link{generateData}
#' @param op an options file with the following components:
#' \itemize{
#' \item{\code{op$useDE=TRUE}}{ Binary: should the LME model use expectnacy
#' information if available?}
#' \item{\code{op$t_random_slope=FALSE}}{ Binary: should the LME model use
#' random slopes, as well as random intercepts, for participants?}
#' \item{\code{op$full_model_out=FALSE}}{ Binary: if false, will output model
#' parameters as a row of numeric values for simulation processing (most common);
#' if true, will output an array that allows you to examine the results of a single
#' analysis in more detail}
#' \item{\code{op$simplecarryover=FALSE}}{ Binary: if true will add "time since
#' drug" as a free parameter in the model}
#' \item{\code{op$carryover_t1half=0}}{ If non-zero, will build in an exponential
#' falloff in the "on drug or not" term in the analysis (Db), using this as the
#' half-life.}
#' }
#' @return Generally returns a one row data table with components:
#' \itemize{
#' \item{\code{out$beta}}{ The coefficient corresponding to an interaction
#' between biomarker and outcome}
#' \item{\code{out$betaSE}}{ Standard error of the above coefficient}
#' \item{\code{out$p}}{ p-value conveying the statistical significance of the coefficient}
#' \item{\code{out$issingular}}{ Was the model fit flagged as singular?}
#' \item{\code{out$warning}}{ Any warnings from \code{fitMsgs}}
#' }
#' However, if you have full_model_out set to TRUE, you will instead get an array with components:
#' \itemize{
#' \item{\code{form}}{ The model fit by lmer}
#' \item{\code{fit}}{ The output of lmer}
#' \item{\code{datamerged}}{ The data in the form it was provided to lmer}
#' \item{\code{stdout}}{ The standard one row data table that's usually returned when
#' full_mode_out is set to FALSE, with beta, betaSE, etc}
#' }
#'
#' @examples
#' # See vignettes 1 & 2 for examples of how to call from the function generateSimualtedResults.R
#' # See vignette 3 for an example of how to apply lme_analysis to actual clincal trial results
#' @export
lme_analysis<-function(trialdesign_set,dat,op){
if(missing(op)){
op$useDE<-TRUE
op$t_random_slope<-FALSE
op$full_model_out<-FALSE
op$carryover_t1half <- 0
op$simplecarryover <- FALSE
op$carryover_scalefactor=1
}else if(!("simplecarryover"%in%names(op))){
op$simplecarryover=FALSE
}
if(!("carryover_t1half"%in%names(op))){
op$carryover_t1half=0
op$carryover_scalefactor=1
}
if((op$carryover_t1half>0)&(op$simplecarryover==TRUE)){
error("Don't know what it means to have two different carryover models, so disallowing simplecarryover and a half-life based carryover at the same time for now.")
}
n_groups<-length(trialdesign_set)
datout<-list(rep(NA),n_groups)
lastptID<-0
for(g in 1:n_groups){
trialdesign<-trialdesign_set[[g]]
datSingle<-dat[path==g]
# I. Turn data into this format
# Format trialdesign:
# Add a BL row
trialdesign<-rbind(list(timeptnames="BL",t_wk=0,e=0,tod=0,tsd=0,tpb=0),trialdesign)
# For minimization of coding erors, t_wk is length of the block, not time since
# start, so create a "t" that is just true time with bl as 0:
trialdesign[,t:=as.integer(NA)]
trialdesign$t<-cumulative(trialdesign$t_wk)
# Format dat:
# Make pt data dt that just has the numbers we actually want
timeptnames=union(trialdesign$timeptnames,"BL") # makes sure don't end up with it twice
evalstring<-paste("data<-datSingle[,.(ptID,bm,",
paste(timeptnames,sep="",collapse=","),")]",sep="")
eval(parse(text=evalstring))
# Make sure the ptID is unique even if we're merging across groups:
data$ptID<-data$ptID+lastptID
lastptID<-max(data$ptID)
# Turn it long-form, then merge in the trialdesign data
data.m1<-melt(data,id.vars=c("ptID","bm"),measure.vars=trialdesign$timeptnames,
variable.name="timeptnames",value.name="Sx",na.rm=FALSE)
data.m2<-merge(data.m1,
trialdesign[,.(timeptnames,t,De=e,Db=(tod>0),tsd)],
by="timeptnames",all=TRUE)
# Add a scaler version of Db - Dbc, for continuous - to allow for
# more complex carryover modeling
data.m2[,Dbc:=as.numeric(NA)]
data.m2[Db==TRUE,Dbc:=1]
data.m2[Db==FALSE,Dbc:=((1/2)^(op$carryover_scalefactor*tsd/op$carryover_t1half))]
datout[[g]]<-data.m2
}
datamerged<-datout[[1]]
if(n_groups>1){for(g in 2:n_groups){datamerged<-rbind(datamerged,datout[[g]])}}
# Tests we'll use to sort out what model to use:
# 1) Test whether can possibly include the expectancy-related factor:
varInExp<-length(unique(trialdesign$e[2:length(trialdesign$e)]))
# 2) Test whether can use Db vs need to use t for the interaction term:
datamerged[t>0,meanDb:=mean(Db),by=ptID]
datamerged[t>0,DbVar:=((meanDb!=0)&(meanDb!=1)),by=ptID]
varInDb<-(sum(datamerged[t>0]$DbVar==TRUE)>0)
if(!varInDb){
# Only analyze folks who are ever on drug, in this case:
iEverOnDrug<-unique(datamerged[meanDb==1]$ptID)
datamerged<-datamerged[ptID%in%iEverOnDrug]
}
# 3) Test whether we can use the carryover term, ie, tsd is sometimes non-zero
varintsd=(nrow(datamerged[tsd!=0])>0)
# Use these to pick a model
modelbase="Sx~bm+t"
if(varInDb){
modelbase=paste0(modelbase,"+Dbc+bm*Dbc")
}else{
modelbase=paste0(modelbase,"+bm*t")
}
if((varInExp>1)&(op$useDE==TRUE)){
modelbase=paste0(modelbase,"+De")
}
if(op$simplecarryover&varintsd){
modelbase=paste0(modelbase,"+tsd")
}
if(op$t_random_slope){
modelbase=paste0(modelbase,"+(1+t|ptID)")
}else{
modelbase=paste0(modelbase,"+(1|ptID)")
}
# now knit together and evaluate - poor programming form!
eval(parse(text=paste0("form<-",modelbase)))
# if(varInDb){
# if(op$t_random_slope){
# if((varInExp>1)&(op$useDE==TRUE)){
# form<-Sx~bm+De+Db+t+bm*Db+(1+t|ptID)
# } else {
# form<-Sx~bm+Db+t+bm*Db+(1+t|ptID)
# }
# }else{
# if((varInExp>1)&(op$useDE==TRUE)){
# form<-Sx~bm+De+Db+t+bm*Db+(1|ptID)
# } else {
# form<-Sx~bm+Db+t+bm*Db+(1|ptID)
# }
# }
# }else{
# if(op$t_random_slope){
# if((varInExp>1)&(op$useDE==TRUE)){
# form<-Sx~bm+De+t+bm*t+(1+t|ptID)
# } else {
# form<-Sx~bm+t+bm*t+(1+t|ptID)
# }
# }else{
# if((varInExp>1)&(op$useDE==TRUE)){
# form<-Sx~bm+De+t+bm*t+(1|ptID)
# } else {
# form<-Sx~bm+t+bm*t+(1|ptID)
# }
# }
# }
# Run (should insert tryCatch here, but not working!)
fit<-lmer(form,data=datamerged)
holdWarning<-summary(fit)$fitMsgs
if(length(holdWarning)==0) holdWarning<-as.character(NA)
# set issingular flag off, turn on if get singular warning
issingular<-FALSE
if(length(summary(fit)$optinfo$conv$lme4$messages)>0){
if(summary(fit)$optinfo$conv$lme4$messages[[1]]=="boundary (singular) fit: see ?isSingular"){
issingular<-TRUE
}
}
# Package output
c<-summary(fit)$coefficients
if(varInDb){
p<-c['bm:DbcTRUE','Pr(>|t|)']
beta<-c['bm:DbcTRUE','Estimate']
betaSE<-c['bm:DbcTRUE','Std. Error']
}else{
p<-c['bm:t','Pr(>|t|)']
beta<-c['bm:t','Estimate']
betaSE<-c['bm:t','Std. Error']
}
# pvalue plan from http://mindingthebrain.blogspot.in/2014/02/three-ways-to-get-parameter-specific-p.html
# this is the medium-conservative option
out<-data.table(beta=beta,betaSE=betaSE,p=p,issingular=issingular,warning=holdWarning)
# If full model output reqeusted, will repackage a bit:
if(op$full_model_out){
out<-list(form=form,fit=fit,datamerged=datamerged,stdout=out)
}
return(out)
}
# Short helper functions
#' Cumulative
#'
#' \code{cumulative} turns a vector of intervals into a cumulative running time
#'
#' @export
cumulative<-function(x){
I<-length(x)
y<-rep(NA,I)
y[1]<-x[1]
if(I>1){
for(i in 2:I){
y[i]<-x[i]+y[i-1]
}
}
return(y)
}
#' Modified Gompertz
#'
#' \code{modgompertz} takes response parameters and returns change in symptoms as a function of time
#'
#' This is a slight modification of a standard gompertz function that is designed to model
#' how symptoms can change as a function of time.
#'
#' @param t time - no specific units, just make sure it's consistant across your modeling
#' @param maxr maximum response that can be attained (ceiling)
#' @param disp vertical displacement
#' @param rate rate
#' @return A numeric value indicating how much "improvement" there has been since baseline
#' as of point \code{t} in time. A negative value indicates worsening rather than improvement.
#' @export
modgompertz<-function(t,maxr,disp,rate){
y<-maxr*exp(-disp*exp(-rate*t))
vert_offset<-maxr*exp(-disp*exp(-rate*0))
y<-y-vert_offset
y<-y*(maxr/(maxr-vert_offset)) # return the assymptotic max to what it was
return(y)
}
#' Reknit simulated results
#'
#' \code{reknitsimresults} recombines the results of simulations that have the
#' exact same parameter space set, but were run as separate chunks - e.g., you
#' ran 100 repititions, realized you needed more, and ran 150 more to have 250
#' total. Does not work with rawdataout.
#'
#' @param basesavename The basesavename as described in \link{generateSimulatedResults}
#' @param savedir The directory the files are saved in
#' @return The output is a file just like you would have received from
#' \link{generateSimulatedResults} if it had all been run at once.
#' @export
reknitsimresults<-function(basesavename,savedir){
if(missing(savedir)){
initialdirectory<-getwd()
}else{
initialdirectory<-getwd()
setwd(savedir)
}
sr<-vector(mode="list",length=length(basesavename))
for(iSN in 1:length(basesavename)){
simresults<-readRDS(paste(basesavename[1],"1",sep="_save"))
nextfilenum<-2
while(nextfilenum>0){
nextfile<-paste(basesavename[1],nextfilenum,sep="_save")
if(file.exists(nextfile)){
newresults<-readRDS(nextfile)
simresults$results<-rbind(simresults$results,newresults$results)
if(length(names(simresults))>2){
warning("Haven't implemented adding rawdata together yet")
}
nextfilenum<-nextfilenum+1
}else{
nextfilenum<-0
}
}
sr[[iSN]]<-simresults
}
if(length(sr)>1){
# test the parameter settings are the same
for(iSN in 2:length(sr)){
if(!identical(sr[[1]]$parameterselections$trialdesigns,
sr[[iSN]]$parameterselections$trialdesigns)){
warning("Trial design settings don't match")
}
if(!identical(sr[[1]]$parameterselections$respparamsets,
sr[[iSN]]$parameterselections$respparamsets)){
warning("Response paramset settings don't match")
}
if(!identical(sr[[1]]$parameterselections$blparamsets,
sr[[iSN]]$parameterselections$blparamsets)){
warning("Baseline paramset settings don't match")
}
}
# Merge (inefficiently, if had saved differently could use rbindlist...)
simresults<-sr[[1]]
for(iSN in 2:length(sr)){
simresults$results<-rbind(simresults$results,sr[[iSN]]$results)
}
}else{
simresults<-sr[[1]]
}
return(simresults)
}
# Scratch Script space for doing the development work on the package.
#devtools::install_github("r-lib/devtools")
#install_github("rchendrickson/pmsimstats")
# # Get tools on board
# library("devtools")
# library("roxygen2")
# library("testthat")
# library("knitr")
#
# # Stuff we have to figure out how to handle in terms of namespace...
# library(data.table)
# library(lme4)
# library(lmerTest)
# library(corpcor)
# library(ggplot2)
# library(MASS)
# library(svMisc)
# library(tictoc)
# library(ggpubr)
# library(gridExtra)
# Check, because SO many problems with installation...
#library(devtools)
#has_devel()
# Build an R.buildignore file:
#use_build_ignore("scratch_WorkingScript")
# Start the vingette:
#use_vignette("Produce_Publication_Results")
#use_vignette("Produce_Publication_Results_2_Look_at_data")
#use_vignette("Produce_Publication_Results_3_apply_to_actual_clinical_trial_data")
#
# # Create the data files for the plotting results side:
# mdir<-"C:\\Users\\afane\\Documents\\GitHub\\pmsimstats"
#
# basesavenames_basicpowersets<-c("2020_02_17_core1",
# "2020_02_17_core2",
# "2020_02_17_core3",
# "2020_02_17_core4",
# "2020_02_17_core5",
# "2020_02_18_core1",
# "2020_02_18_core2",
# "2020_02_18_core3",
# "2020_02_18_core4",
# "2020_02_18_core5")
# basesavenames_respparamsetsRates<-c("2020_02_19_respRates1",
# "2020_02_19_respRates2",
# "2020_02_19_respRates3",
# "2020_02_19_respRates4",
# "2020_02_19_respRates5",
# "2020_02_19_respRates6",
# "2020_02_19_respRates7",
# "2020_02_19_respRates8",
# "2020_02_19_respRates9",
# "2020_02_19_respRates10")
# basesavenames_respparamsetsMaxes<-c("2020_02_18_respMaxes1",
# "2020_02_18_respMaxes2",
# "2020_02_18_respMaxes3",
# "2020_02_18_respMaxes4",
# "2020_02_18_respMaxes5")
# Our core parameters
# setwd(wdir)
# simresults<-reknitsimresults(basesavenames_basicpowersets)
# setwd(mdir)
# save(results_core, file="data/results_core.rda")
# setwd(wdir)
# simresults<-reknitsimresults(basesavenames_respparamsetsRates)
# setwd(mdir)
# save(results_rates, file="data/results_rates.rda")
# setwd(wdir)
# simresults<-reknitsimresults(basesavenames_respparamsetsMaxes)
# setwd(mdir)
# save(results_maxes, file="data/results_maxes.rda")
# setwd(wdir)
# simresults<-readRDS("LME_compbaseparam_2020_02_08v2_500rep.rds") # 1.5Gigs!
# setwd(mdir)
# # TOOOOO big!!! Cut down to what we acutally need, minimally...
# simresults$rawdata<-simresults$rawdata$precensor # cuts down to 950 MB
# dim(simresults$results)
# length(simresults$rawdata)
# sr<-simresults$results[censorparamset==0]
# dim(sr) # now indexed the same
# ikeep<-sr[respparamset==1][blparamset==1][N==75][c.bm==.6][c.cf1t==.2]$irep
#
#setwd(mdir)
#save(results_trajectories, file="data/results_trajectories.rda")
# See if can apply our lme_analysis to the clinical trial results??? What happens if include
# the placebo group vs not...??
# rddir<-"C:\\Users\\afane\\Dropbox\\Misc\\DataSave\\Data\\CAPSitems"
# hddir<-getwd()
# setwd(rddir)
# dt.s<-as.data.table(readRDS("FreshPullY13AllTimepoints_scoresWithCovar.rds"))
# # s for scores
# dt.d<-readRDS("FreshPullY13AllTimepoints_diffs.rds")
# # d for differences - gives deltaCAPS for each time interval ("timeptDiff")
# # timepts are: weeks 0, 7, 11, 15
# dto<-readRDS("FreshPullY13AllTimepoints_SummaryCalcs.rds")
# # this version goes item by item and gives the mean rating at baseline and at end of trial for eachgroup
# # Data (produced by createDataSetForY13data_06.R)
#
# # Get BP and total CAPS into useable format
# dt.s[,CAPSToti:=sum(B1,B2,B3,B4,B5,C1,C2,C3,C4,C5,C6,C7,D1,D2,D3,D4,D5),by=c("SubjID","timept")]
# dt.s[,BP:=as.numeric(strsplit(ScrBpStan,"/")[[1]][1]),by=c("SubjID","timept")]
#
# # Make match dat file: names are...
# # bm, BL, ptID, (timept names), path, replicate
# rdat<-dt.s[,.(bm=BP,ptID=SubjID,StudyDrug,replicate=1,timept,path=1,CAPSToti)]
# rdat[StudyDrug=="placebo"]$path<-2
# rdatw<-dcast(rdat,bm+ptID+StudyDrug+replicate+path~timept,value.var="CAPSToti")
# setnames(rdatw,c("1","2","3","4"),c("BL","T1","T2","T3"))
# dattest<-rdatw[,.(bm,BL,ptID=1:.N,T1,T2,T3,path)]
#
# The bm of 76 turns out to be an error where the SBP is missing:
# dattest<-dattest[bm>80]
#
# datasavedir<-"C://Users//afane//Documents//GitHub//pmsimstats//data//"
# setwd(datasavedir)
# CTdata<-dattest
# save(CTdata,file="CTdata.rda")
#
# simresults<-generateSimulatedResults(
# trialdesigns=list(trialdesigns$OL,trialdesigns$PGRCT),
# respparamsets=origrespparamsets[1],
# blparamsets=blparamsets[1],
# censorparams=censorparams,
# modelparams=coremodelparams[1,],
# simparam=list(Nreps=1,
# progressiveSave=FALSE,
# basesavename="TEST",
# nRep2save=5,
# saveunit2start=1,
# savedir=getwd()),
# analysisparams=analysisparams,
# rawdataout=TRUE)
#
#' Plot factor trajectories
#'
#' \code{PlotModelingResults} plots basic output relating to the peformance of each
#' trial design across a parameter space
#'
#' @param data the \code{$results} component of the output of \link{generateSimulatedResults}
#' @param param2plot What value to plot as the heatmap out put. Can be power,
#' mean_frac_NA, mean_beta, mean_betaSE, sd_beta
#' @param param2vary 4 params to vary and plot across the created space
#' @param param2hold Params to keep constant
#' @param param2nothold Things to let vary freely (if e.g. they are linked to another
#' intentionally varied parameter)
#' @param labelcode A mapping between interal variable names and figure labels
#'
#' @return a ggplot object containing the plot
#' @examples
#' data(results_core)
#' param2vary<-c("trialdesign","N","c.bm","censorparamset")
#' param2hold<-data.table(blparamset=1,respparamset=1,carryover_t1half=0)
#' param2nothold<-c("c.cfct","modelparamset")
#' param2plot<-"power" # Options: power, mean_frac_NA, mean_beta, mean_betaSE, sd_beta...
#' p1<-PlotModelingResults(simresults,param2plot,param2vary,param2hold,param2nothold)
#' # See vignettes for additional details
#' @export
PlotModelingResults<-function(data,param2plot,param2vary,param2hold,param2nothold,labelcode){
# This just for labeling the plot
if(missing(labelcode)){
labelcode<-data.table(handle=c("N","co","carryover","call","ctv","cpb","cbr",
"mtv","mpb","mbr","stv","spb","sbr",
"rtv","rpb","rbr","cbm","sbl","sbm",
"trialdesignopt","beta0","beta1","eb1",
"c.cf1t","carryover_t1half",
"tv_rate","pb_rate","br_rate",
"tv_max","pb_max","br_max",
"c.bm","t_random_slope","use_DE"),
label=c("N subjects","Carryover","Carryover","Autocorrelations",
"tv autocorrelation","pb autocorrelation","br autocorrelation",
"tv mean","pb mean","br mean","tv SD","pb SD","br SD",
"tv rate","pb rate","br rate","Correlation with biomarker",
"SD of baseline CAPS","SD of SBP","Trial Design",
"Baseline dropout (beta0)","Beta1","eb1",
"cross-factor within timepoint autocorrelation","Carryover (t_1/2)",
"TR rate","ER rate","BR rate",
"TR max"," ER max","BR max",
"Biomarker effect","Random Slope","Include expectancy in model"
))
}
# Narrow the data down to what we will actually use
d<-data$results
# If we're breaking up respparams, do so now so can use same plotting machinery below
if(length(intersect(param2vary,c("tv_rate","pb_rate","br_rate")))>1){
# Build map from respparamste to rates
cw<-data.table(respparamset=integer(),tv_rate=numeric(),pb_rate=numeric(),br_rate=numeric())
for(irp in 1:length(data$parameterselections$respparamsets)){
rps<-data$parameterselections$respparamsets[[irp]]$param
cwn<-data.table(respparamset=irp,
tv_rate=rps[cat=="tv"]$rate,
pb_rate=rps[cat=="pb"]$rate,
br_rate=rps[cat=="br"]$rate)
cw<-rbind(cw,cwn)
}
d<-merge(d,cw,by="respparamset",all.x=TRUE,all.y=FALSE)
}
if(length(intersect(param2vary,c("tv_max","pb_max","br_max")))>1){
# Build map from respparamste to rates
cw<-data.table(respparamset=integer(),tv_max=numeric(),pb_max=numeric(),br_max=numeric())
for(irp in 1:length(data$parameterselections$respparamsets)){
rps<-data$parameterselections$respparamsets[[irp]]$param
cwn<-data.table(respparamset=irp,
tv_max=rps[cat=="tv"]$max,
pb_max=rps[cat=="pb"]$max,
br_max=rps[cat=="br"]$max)
cw<-rbind(cw,cwn)
}
d<-merge(d,cw,by="respparamset",all.x=TRUE,all.y=FALSE)
}
# What didn't have a section made, but isn't part of what's being intentionally varied?
otherholds<-setdiff(names(d),
c(param2hold,param2vary,param2nothold,"modelparamset",
"frac_NA","beta","betaSE","p","tID","irep","ETbeta","ETbetaSE",
"issingular","warning"))
for(iP in 1:length(param2hold)){
n<-names(param2hold)[iP]
d<-d[get(n)==param2hold[[n]]]
}
for(iP in 1:length(otherholds)){
param2pick<-d[[otherholds[iP]]][1] # TAKING THE FIRST ONE
d<-d[get(otherholds[iP])==param2pick]
}
# Format the parameters to vary and their labels. Hacky - could have designed more flexibly...
op<-param2vary
oplabels<-vector(mode="list",length=(length(op)))
for(iop in 1:length(op)){
if(op[iop]=="trialdesign"){
axlabel<-"Trial design"
acttds<-unique(d$trialdesign)
bklabels<-vector(mode="character",length=length(acttds))
for(iT in 1:length(bklabels)){
itd<-acttds[iT]
bklabels[iT]<-data$parameterselections$trialdesigns[[itd]]$metadata$name_shortform[[1]]
}
}else if(op[iop]=="respparamset"){
axlabel<-"Response parameters"
bklabels<-vector(mode="character",length=length(data$parameterselections$respparamsets))
for(iT in 1:length(bklabels)){
bklabels[iT]<-data$parameterselections$respparamsets[[iT]]$name
}
}else if(op[iop]=="blparamset"){
axlabel<-"Baseline parameters"
bklabels<-vector(mode="character",length=length(data$parameterselections$blparamsets))
for(iT in 1:length(bklabels)){
bklabels[iT]<-data$parameterselections$blparamsets[[iT]]$name
}
}else if(op[iop]=="censorparamset"){
axlabel<-"Censoring parameters"
bklabels<-c("No censoring",data$parameterselections$censorparams$names)
d$censorparamset<-d$censorparamset+1
}else if(op[iop]=="carryover_t1half"){
axlabel<-"Carryover (in weeks)"
bklabels<-unique(d[,get(op[iop])])
conv<-data.table(carryover_t1half=bklabels,new=1:(length(bklabels)))
d<-merge(d,conv,by="carryover_t1half")
d[,carryover_t1half:=new]
d[,new:=NULL]
}else{
axlabel<-merge(data.table(handle=op[iop]),labelcode,by="handle",all=FALSE)$label
bklabels<-unique(d[,get(op[iop])])
if(class(d[,get(op[iop])])=="logical"){
evalstring<-paste("d$",op[iop],"<-as.integer(d[,get(op[iop])])",sep="")
eval(parse(text=evalstring))
}
}
oplabels[[iop]]<-list(axlabel=axlabel,bklabels=bklabels)
}
# Change the labels on the stuff that will be faceted
for(iop in 1:2){
d[[op[iop]]]<-as.factor(d[[op[iop]]])
levels(d[[op[iop]]])<-oplabels[[iop]]$bklabels
}
d$tID<-1:dim(d)[1]
d[,mean_beta:=mean(beta),by=op]
d[,mean_betaSE:=mean(betaSE),by=op]
d[,sd_beta:=sd(beta),by=op]
d[,sig_p:=(p<.05),by=tID]
d[,power:=mean(sig_p),by=op]
d[,mean_frac_NA:=mean(frac_NA),by=op]
d[,CV:=sd_beta/mean_beta,by=op]
d[,fractionSingular:=sum(issingular==TRUE,na.rm=TRUE)/length(issingular),by=op]
d[,fractionNA:=sum(is.na(issingular))/length(issingular),by=op]
d[,fractionNotSingular:=sum(issingular==FALSE,na.rm=TRUE)/length(issingular),by=op]
d[,toplot:=get(param2plot)]
ds<-d
ds[,c("tID","beta","betaSE","p","sig_p"):=NULL]
ds<-unique(ds)
palettevalues=c(0,.7,1)
if(param2plot=="power"){
legendtitle<-"Power"
}else if(param2plot=="sd_beta"){
legendtitle<-"SD of beta"
}else if(param2plot=="mean_betaSE"){
legendtitle<-"Mean SE of \ncoefficient"
}else{
legendtitle<-param2plot
}
# suppress the warnings thrown by the scale_y_continuous line:
suppressWarnings(
p<-ggplot(data=ds,aes_string(op[3],op[4])) +
geom_tile(aes(fill=toplot),colour="white") +
scale_fill_distiller(type="seq",palette="RdYlBu",values=palettevalues,legendtitle) +
geom_text(aes(label=round(toplot,dig=2)),size=3,na.rm=TRUE) +
scale_x_continuous(name=oplabels[[3]]$axlabel,breaks=unique(ds[,get(op[3])]),
labels=oplabels[[3]]$bklabels) +
scale_y_continuous(name=oplabels[[4]]$axlabel,breaks=unique(ds[,get(op[4])]), # This line is where the warnings are coming from....??
labels=oplabels[[4]]$bklabels,
sec.axis=sec_axis(trans=~.,labels=NULL,breaks=NULL,
name=oplabels[[2]]$axlabel)) +
ggtitle(oplabels[[1]]$axlabel) +
theme(plot.title = element_text(size=12,hjust=0.5))+
facet_grid(reformulate(op[1], op[2]))
)
return(p)
}
#' Plot factor trajectories
#'
#' \code{plotfactortrajectories} plots the averaged output of the factors
#' used to generate simulated trial data
#'
#' @param data the \code{$rawdata$precensor} component of the output of \link{generateSimulatedResults}
#' (run with rawdataout=TRUE)
#' @param tinfo the \code{$results} component of the output of \link{generateSimulatedResults}
#' @param tds the \code{$parameterselections$trialdesigns} component of the output of
#' \link{generateSimulatedResults}
#' @param opt2plot a data.table with a single row using parameter names to instruct the
#' function which parameters to hold constant and plot the results of
#' @param mergeacrossreps=TRUE logical input of whether to merge across repititions
#' @return a ggplot object containing the plot
#' @examples
#' data(results_trajectories)
#' data<-simresults$rawdata$precensor # this gives a list of length (total reps done)
#' tinfo<-simresults$results # dim[1] of tinfo gives the parameter space that went in
#' tds<-simresults$parameterselections$trialdesigns
#' mergeacrossreps<-TRUE
#' plots1<-vector(mode="list",length=length(simresults$parameterselections$trialdesigns))
#' for(ip in 1:length(plots1)){
#' param2hold<-data.table(carryover_t1half=0,trialdesign=ip)
#' plots1[[ip]]<-plotfactortrajectories(data,tinfo,tds,param2hold,mergeacrossreps)
#' }
#' # See vignettes for additional details
#' @export
plotfactortrajectories<-function(data,tinfo,tds,opt2plot,mergeacrossreps=TRUE){
# Pull out the trial design we're plotting
if("trialdesign"%in%names(opt2plot)){
td<-tds[[opt2plot$trialdesign]]
}else{
warning("You need to specify the trial design by index number")
}
# Turn tinfo into, essentially VPG from generateEsimulatedResults.R:
tinfo<-unique(tinfo[,.(trialdesign,respparamset,blparamset,modelparamset,N,c.bm,
carryover_t1half,c.tv,c.pb,c.br,c.cf1t,c.cfct,censorparamset,
use_DE,t_random_slope)])
# Since crossreferencing indexing, make it explicit:
tinfo<-cbind(iR=1:(dim(tinfo)[1]),tinfo)
# Only plot pre-censoring:
tinfo<-tinfo[censorparamset==0]
# Pull out the reps we're going to use
# 1) What didn't have a section made, but isn't part of what's being intentionally varied?
otherholds<-setdiff(names(tinfo),
c(names(opt2plot),"modelparamset","trialdesign",
"frac_NA","beta","betaSE","p","iR","irep",
"ETbea","ETbetaSE","issingular","warning"))
keepR<-1:dim(tinfo)[1]
for(iP in 1:length(param2hold)){
n<-names(param2hold)[iP]
kkeepR<-which(tinfo[[n]]==param2hold[[n]])
keepR<-intersect(keepR,kkeepR)
}
for(iP in 1:length(otherholds)){
param2pick<-tinfo[[otherholds[iP]]][1] # TAKING THE FIRST ONE
kkeepR<-which(tinfo[[otherholds[iP]]]==param2pick)
keepR<-intersect(keepR,kkeepR)
}
# keepR<-1:dim(tinfo)[1]
# for(op in names(opt2plot)){
# kkeepR<-which(tinfo[[op]]==opt2plot[[op]])
# keepR<-intersect(keepR,kkeepR)
# }
# Compile the data we want to use and melt it for flexible plotting
d<-data[[keepR[1]]][0,]
for(kR in keepR){
d<-rbind(d,data[[kR]])
}
if(!mergeacrossreps) d<-d[replicate==1]
#d[,ptID:=NULL] # not unique after merge
d<-cbind(ptID=1:dim(d)[1],d)
suppressWarnings(d.m1<-melt(d,id.var=c("ptID","path","bm"),value.name="pointvalue"))
d.m1$variable<-as.character(d.m1$variable)
d.m1[,c("temp1","factor"):=tstrsplit(variable,"[.]")]
d.m1[,c("temp2","temp3"):=tstrsplit(temp1,"_")]
d.m1[,timept:=as.character(NA)]
d.m1[,L_delta:=as.logical(NA)]
# Check, as this got way more complicated than expected, fast
#unique(d.m1[,.(variable,timept,factor,L_delta,temp1,temp2,temp3)])
# Fix up the timept column
d.m1[temp2=="D"]$timept<-d.m1[temp2=="D"]$temp3
d.m1[temp2!="D"]$timept<-d.m1[temp2!="D"]$temp1
#unique(d.m1[,.(variable,timept,factor,L_delta,temp1,temp2,temp3)])
# Fix up the L_delta column
d.m1[temp2=="D"]$L_delta<-TRUE
d.m1[!is.na(factor)]$L_delta<-TRUE
d.m1[is.na(L_delta)]$L_delta<-FALSE
# Fix up the factor column
d.m1[is.na(factor)]$factor<-"all"
# Last check - HIGH RISH POINT FOR ERRORS
#unique(d.m1[,.(variable,timept,factor,L_delta,temp1,temp2,temp3)])
d.m1[,c("temp1","temp2","temp3"):=NULL]
# Merge in the weeks, and do a version with mean/SDs
timecrosswalk<-data.table(timept=td$metadata$timeptname,wk=td$metadata$timepoints)
d.m2<-merge(d.m1,timecrosswalk,by="timept",all.x=TRUE,all.y=FALSE)
d.m2[,meanvalue:=mean(pointvalue),by=c("variable","path")]
d.m2[,sdvalue:=sd(pointvalue),by=c("variable","path")]
d.m3<-unique(d.m2[,.(timept,path,factor,L_delta,wk,meanvalue,sdvalue)])
# add in the baseline values...
EG<-expand.grid(unique(d.m3$path),unique(d.m3$factor))
setnames(EG,names(EG),c("path","factor"))
BLvalues<-cbind(timept="BL",EG,L_delta=TRUE,wk=0,meanvalue=0,sdvalue=0)
d.m3<-rbind(d.m3,BLvalues)
# Order the factors, so will be consistent coloring:
d.m3$factor<-factor(d.m3$factor,levels=c("all","br","pb","tv"))
# Create the info for the bar at the bottom ID'ing when on drug
mod<-td$metadata$ondrug
w<-td$metadata$timepoints
ondrug<-data.table(starts=numeric(),stops=numeric(),path=integer())
for(path in 1:length(mod)){
for(ep in 1:length(mod[[path]])){ # ep is the endpoint index
if(mod[[path]][ep]==1){ # only add to list to plot if on drug at end of segment
if(ep==1){start<-0}else{start<-w[ep-1]}
stop<-w[ep]
ondrug<-rbind(ondrug,data.table(starts=start,stops=stop,path=path))
}
}
}
# Plot by path
p<-ggplot()+
# Plot by factor, then sum...
geom_point(data=d.m3[L_delta==TRUE],aes(x=wk,y=meanvalue,color=factor),size=.3)+
geom_line(data=d.m3[L_delta==TRUE],aes(x=wk,y=meanvalue,color=factor),size=.3)+
facet_wrap(~path)+
labs(x="Weeks",y="Improvement in symptom intensity from baseline")+
# theme_minimal()+
scale_color_manual(name="Factor",
values=c("red","blue","brown","black"),
breaks=c("all","br","pb","tv"),
labels=c("Sum","BR","ER","TR"))+
# add the bar indicating when on drug...
geom_segment(data=ondrug,aes(x=starts,y=-1.5,xend=stops,yend=-1.5),size=1)
return(p)
}
#' Personalized medicine stimulation-based statistics
#'
#' The \code{pmsimstats} package is a simple set of tools to
#' help with implementing simulation-based powercalculations and
#' bias assessments the ability of different clinical trial
#' designs to answer personalized medicine (or precision medicine)
#' oriented hypotheses - specifically, the ability of a
#' biomarker measured at baseline to predict the biologic
#' response to a treatment, in the presence of other confounding
#' effects, including regression to the mean and expectancy
#' related effects.
#'
#' @section Further documentation:
#' See ___ publication or the two included vignettes, which
#' generate the analyses and figures from the publication.
#'
#' @docType package
#' @name pmsimstats
NULL
#' Generate Simulated Results
#'
#' \code{generateSimulatedResults} rotates through a parameter space, creating
#' and analyizing simulated data and collating the results
#'
#' This is a wrapper for \link{generateData} and \link{lme_analysis} that
#' systematically works its way through a set of parameters and creates
#' a specified number of repetitions of simulated data and results of
#' analyizing that data for each set of parameter specifications.
#'
#' @param trialdesigns A list of trial designs of the form generated
#' by \link{buildtrialdesigns}
#' @param respparamsets A list of options for \code{respparam} as
#' described in \link{generateData}
#' @param blparamsets A list of options for \code{blparam} as
#' described in \link{generateData}
#' @param censorparams A data.table where each row is a set of options for
#' \code{censorparam} as described in \link{censordata}
#' @param modelparams A data.table where each row is a set of options for
#' \code{modelparams} as described in \link{generateData}
#' @param simparam The options that control the logistics of the simulations:
#' \itemize{
#' \item{\code{Nreps}}{ Number of repititions for a given set of parameters}
#' \item{\code{progressiveSave=TRUE}}{ Do you want to save chunks as you go?
#' Helpful if the run is going to take a long time, and you don't want the
#' run to be interrupted 8 hours in to a 9 hour run and lose everything.}
#' \item{\code{basesavename}}{ If you are using progressivesave, what do you
#' want the base of the filename to be? Will have _save1, _save2, etc appended}
#' \item{\code{nRep2save}}{ How many parameter sets to you want to run at
#' a time before saving?}
#' \item{\code{saveunit2start}}{ If you got through part of your parameter
#' space but not all of it, you can start partway through. If starting
#' from the beginning, set to 1}
#' \item{\code{savedir}}{ What directory do you want to save to?}
#' }
#' @param analysisparams The options used by \link{lme_analysis}. Has the
#' following components:
#' \itemize{
#' \item{\code{useDE=TRUE}}{ Binary: should the LME model use expectnacy
#' information if available?}
#' \item{\code{t_random_slope=FALSE}}{ Binary: should the LME model use
#' random slopes, as well as random intercepts, for participants?}
#' }
#' @param rawdataout=FALSE Binary - output the raw data for all
#' simulations? This is memory intensive for large runs.
#' @return Returns a list with three named parts:
#' \itemize{
#' \item{\code{results}}{ A large data table that tells you the parameters used
#' and the analysis results for every simulated trial, one row per simualated trial.}
#' \item{\code{parameterselections}}{ A list desribing the parameterspace used.
#' This is important because some of the information in \code{out$results} is
#' the index of the parameter set selection, and you will need this additional
#' information to interpret it. For example, \code{trialdesigns}, \code{respparamset},
#' \code{blparamset}, and \code{modelparamset} are all indexed.}
#' \item{\code{rawdataout}}{ If you have rawdataout set to \code{TRUE}, you will
#' also get (1) \code{drawdataout$precensor}, a list of simulated trial data of
#' a length corresponding to the number of parameter permulations, with the order
#' parallel to that of the main output results. Repititions of a particular
#' parameter set are mixed, identified by \code{irep}. And, \code{rawdataout$postcensor},
#' which has the same format but with the psotcensoring data}
#' }
#' @examples
#' # See vignettes for examples
#' @export
generateSimulatedResults<-function(trialdesigns,respparamsets,blparamsets,
censorparams,modelparams,simparam,analysisparams,
rawdataout=FALSE){
# defaults
if(missing(analysisparams)) analysisparams<-list(useDE=TRUE,
t_random_slope=FALSE,
full_model_out=FALSE)
# Hold original directory in case we change it
initialdirectory<-getwd()
# Setup our path through the variable parameters and
# initialize counts used for tracking on the screen:
tic("Total time:")
VPGmaster<-expand.grid(
trialdesign=1:length(trialdesigns),
respparamset=1:length(respparamsets),
blparamset=1:length(blparamsets),
modelparamset=1:dim(modelparams)[1]
)
nV<-dim(VPGmaster)[1]
# note that censor param not included because done after data generation
irep<-1
totrep<-nV*simparam$Nreps
totparamsets<-nV
# If we're doing a progressive save, have a "large loop" that controls the smaller inside loop
if(simparam$progressiveSave==FALSE){
nLargeLoops<-1
LLstarts<-1
LLstops<-nV
}else{
nLargeLoops<-ceiling(nV/simparam$nRep2save)
if(nLargeLoops>1){
LLstarts<-c(1,1+simparam$nRep2save*(1:(nLargeLoops-1)))
LLstops<-c(LLstarts[2:nLargeLoops]-1,nV)
}else{
LLstarts<-1
LLstops<-nV
}
}
for(iLL in simparam$saveunit2start:nLargeLoops){
tic(paste("***Progressive Save Unit ",(iLL+1-simparam$saveunit2start)," of ",(nLargeLoops+1-simparam$saveunit2start),
" complete",sep=""))
VPG<-VPGmaster[LLstarts[iLL]:LLstops[iLL],]
iparamset<-LLstarts[iLL]
# Create output structure:
out<-cbind(VPG[0,],data.table(censorparamset=integer(),use_DE=logical(),t_random_slope=logical(),
irep=integer(),frac_NA=numeric(),beta=numeric(),betaSE=numeric(),
p=numeric(),issingular=logical(),warning=character()))
if(rawdataout){d_out<-list(precensor=list(),postcensor=list())}
# Loop through the different variable parameters
for(iR in 1:dim(VPG)[1]){
tic(paste("Parameter set ",iR," of ",dim(VPG)[1]," in this save unit now complete (on ",iparamset," of ",nV," total sets)",sep=""))
td<-trialdesigns[[VPG[iR,"trialdesign"]]][[2]]
p<-list()
p$respparam<-respparamsets[[VPG[iR,"respparamset"]]]$param
p$blparam<-blparamsets[[VPG[iR,"blparamset"]]]$param
p$modelparam<-modelparams[VPG[iR,"modelparamset"],]
# Run this set of params 1 time to create N*Nrep simuated participants then subset, for efficiency;
# have to create the dat for each path and merge
#
nP<-length(td)
Ns<-p$modelparam$N%/%nP # How many in each path run?
Ns<-Ns+c(rep(1,p$modelparam$N%%nP),rep(0,nP-p$modelparam$N%%nP)) # distribute the remainder
NNs<-Ns*simparam$Nreps # here adjust so doing all with this paramset at once
p$modelparam$N<-NNs[[1]]
dat<-generateData(p$modelparam,p$respparam,p$blparam,td[[1]],FALSE,TRUE)
dat[,path:=1]
dat[,replicate:=rep(1:simparam$Nreps,Ns[1])]
if(nP>1){
for(iP in 2:nP){
p$modelparam$N<-NNs[[iP]]
dat2<-generateData(p$modelparam,p$respparam,p$blparam,td[[iP]],FALSE,TRUE)
dat2[,path:=iP]
dat2[,replicate:=rep(1:simparam$Nreps,Ns[iP])]
dat<-rbind(dat,dat2)
}
}
p$modelparam$N<-sum(Ns)
# Analysis chunk repeat x nAnalysisParametersets:
for(iAP in 1:dim(analysisparams)[1]){
# Analyze entire population to get "empircal true beta" (ETbeta)
#tic("analysis-whole population")
ETanalysisout<-lme_analysis(td,dat,analysisparams[iAP,])
#toc()
# Now, for each replicate: analyize and save output
#tic("analysis-replicates")
for(iS in 1:simparam$Nreps){
# tryCatch(
# expr = {
# analysisout<-lme_analysis(td,dat[replicate==iS],analysisparams[iAP,])
# analysisout$warning<-NA
# },
# error=function(e){
# message('Error in call to lme_analysis')
# print(e)
# print(paste("Error occurred on VPG line ",iR," on analysis parameterset ",AP," on replicate ",iS,sep=""))
# },
# warning=function(w){
# message('Error in call to lme_analysis')
# print(w)
# print(paste("Error occurred on VPG line ",iR," on analysis parameterset ",AP," on replicate ",iS,sep=""))
# analysisout<-data.table(beta=NA,betaSE=NA,p=NA,issingular=analysisout$issingular,warning=w)
# }
# )
analysisout<-lme_analysis(td,dat[replicate==iS],analysisparams[iAP,])
o<-cbind(VPG[iR,],as.data.table(p$modelparam),
data.table(censorparamset=0,use_DE=analysisparams[iAP,]$useDE,t_random_slope=analysisparams[iAP,]$t_random_slope,
irep=((iR-1)*simparam$Nreps+iS),frac_NA=0,ETbeta=ETanalysisout$beta,
ETbetaSE=ETanalysisout$betaSE,beta=analysisout$beta,betaSE=analysisout$betaSE,
p=analysisout$p,issingular=analysisout$issingular,warning=analysisout$warning))
out<-rbind(out,o)
}
# Repeat for each set of censor parameters
nocensoringflag<-FALSE
if(length(censorparams)<2){
if(is.na(censorparams)){
nocensoringflag<-TRUE
}
}
if(!nocensoringflag){
h_datc<-vector(mode="list",length=(dim(censorparams)[1]))
for(iC in 1:dim(censorparams)[1]){
h_datc[[iC]]<-censordata(dat,td[[1]],censorparams[iC,])
for(iS in 1:simparam$Nreps){
frac_NA<-sum(is.na(h_datc[[iC]][replicate==iS]))/(p$modelparam$N*length(td[[1]]$t_wk))
analysisout<-lme_analysis(td,h_datc[[iC]][replicate==iS],analysisparams[iAP,])
o<-cbind(VPG[iR,],as.data.table(p$modelparam),
data.table(censorparamset=iC,use_DE=analysisparams[iAP,]$useDE,t_random_slope=analysisparams[iAP,]$t_random_slope,
irep=((iR-1)*simparam$Nreps+iS),frac_NA=frac_NA,ETbeta=ETanalysisout$beta,
ETbetaSE=ETanalysisout$betaSE,beta=analysisout$beta,betaSE=analysisout$betaSE,
p=analysisout$p,issingular=analysisout$issingular,warning=analysisout$warning))
out<-rbind(out,o)
}
}
#toc()
}else{
h_datc<-NA
}
}
if(rawdataout){
d_out$precensor[[iparamset]]<-dat
d_out$postcensor[[iparamset]]<-h_datc
}
iparamset<-iparamset+1
toc()
#progress(iparamset,max.value=totparamsets)
} # end iR
# Organize the output and return it
if(rawdataout){
outpt<-list(results=as.data.table(out),
parameterselections=list(trialdesigns=trialdesigns,
respparamsets=respparamsets,
blparamsets=blparamsets,
censorparams=censorparams,
modelparams=modelparams,
analysisparams=analysisparams,
simparam=simparam),
rawdata=d_out)
}else{
outpt<-list(results=as.data.table(out),
parameterselections=list(trialdesigns=trialdesigns,
respparamsets=respparamsets,
blparamsets=blparamsets,
censorparams=censorparams,
modelparams=modelparams,
analysisparams=analysisparams,
simparam=simparam))
}
if(simparam$progressiveSave){
setwd(simparam$savedir)
saveRDS(outpt,paste(simparam$basesavename,iLL,sep="_save"))
}
toc()
} # end iLL
toc()
setwd(initialdirectory)
if(!simparam$progressiveSave) return(outpt)
}
#' Generate simulated data
#'
#' \code{generateData} generates a set of simulated clinical trials' data
#'
#' This is the core data simulation code. See vignettes for additional details.
#' This function is primarily called by \link{generateSimulatedResults}.
#'
#' @param modelparam a datatable with a single entry per named column:
#' \itemize{
#' \item{\code{N}}{ Number of simulated participants}
#' \item{\code{c.bm}}{ Correlation between the biomarker and the biologic response}
#' \item{\code{carryover_t1half}}{ Halflife of the carryover effect}
#' \item{\code{c.tv}}{ Autocorrelation for the tv factor across timepoints}
#' \item{\code{c.pb}}{ Autocorrelation for the pb factor across timepoints}
#' \item{\code{c.br}}{ Autocorrelation for the br factor across timepoints}
#' \item{\code{c.cf1t}}{ Correlation between different factors at a single timepoint}
#' \item{\code{c.cfct}}{ Correlation between different factors at different timepoints}
#' }
#' @param respparam Data table with 5 columns and 3 rows. The first row lists the 3 factors
#' defining the response trajectories (tv, pb, and br). The rest of the columns are the
#' input parameters for the \link{modgompertz} function:
#' \itemize{
#' \item{\code{max}}{ Maximum response value}
#' \item{\code{disp}}{ Displacement}
#' \item{\code{rate}}{ Rate}
#' \item{\code{sd}}{ Standard deviation}
#' }
#' @param blparam Data table with 3 colums and 3 rows. First row lists the varaibles being
#' defined, which consist of \code{BL} for the baseline value of the outcome measure, and
#' \code{bm} for the biomarker values. Remaining columns:
#' \itemize{
#' \item{\code{m}}{ Mean}
#' \item{\code{sd}}{ Standard deviation}
#' }
#' @param trialdesign The information defining a single path through the clinical trial
#' design, as defined by the \code{output$trialpaths} output of \link{buildtrialdesign}
#' @param empirical Should the correlations be empirical? (Usually \code{FALSE} unless
#' you're doing something odd for testing purposes)
#' @param makePositiveDefinite Should the covariance matrix be forced to be positive definite?
#' (Usually yes, although you may want to turn this off at times to test the impact of
#' this step on your covariance sturcture)
#' @param seed Randomization seed (defaults to NA)
#' @param scalefactor TODO update when understand what this does?
#' @param verbose Set to \code{TRUE} if you want chatty outputs; defaults to \code{FALSE}
#' @returns A \code{dat} file that contains both the total symptom scores at each timepoint
#' and also all the individual factors that were used to generate those total scores
#' @examples
#' #See vignettes for examples of how to use this
#' @export
generateData<-function(modelparam,respparam,blparam,trialdesign,empirical,makePositiveDefinite,seed=NA,scalefactor=2,verbose=FALSE){
# I. Turn the trial design information into something easier to use
d<-data.table(trialdesign)
d$t_wk_cumulative<-cumulative(d$t_wk)
d[,onDrug:=(tod>0)]
nP<-dim(trialdesign)[1]
# II. Now set up a whole host of variables that we're going to want to track - essentially our baseline
# parameters for each subject ("bm","BL"), and the three modeled factors for each stage of the trial.
# Set up the variable names
cl<-c("tv","pb","br")
labels<-c(c("bm","BL"),
paste(trialdesign$timeptname,cl[1],sep="."),
paste(trialdesign$timeptname,cl[2],sep="."),
paste(trialdesign$timeptname,cl[3],sep="."))
# Set up vectors with the standard deviations and means
sds<-c(blparam[cat=="bm"]$sd,blparam[cat=="BL"]$sd)
sds<-c(sds,rep(respparam[cat=="tv"]$sd,nP))
sds<-c(sds,rep(respparam[cat=="pb"]$sd,nP)*trialdesign$e)
sds<-c(sds,rep(respparam[cat=="br"]$sd,nP))
means<-c(blparam[cat=="bm"]$m,blparam[cat=="BL"]$m)
for (c in cl){
rp<-respparam[cat==c]
if(c=="tv"){
means<-c(means,modgompertz(d$t_wk_cumulative,rp$max,rp$disp,rp$rate))
}
if(c=="pb"){
means<-c(means,modgompertz(d$tpb,rp$max,rp$disp,rp$rate)*trialdesign$e)
}
if(c=="br"){
brmeans<-modgompertz(d$tod,rp$max,rp$disp,rp$rate)
## Code added in by Ron Thomas:
brtest <- brmeans == 0
rawbrmeans <- brmeans
names(brtest) <- labels[19:26]
names(rawbrmeans) <- labels[19:26]
## End new code
if(nP>1){
for(p in 2:nP){
if(!d[p]$onDrug){
if(d[p]$tsd>0){
brmeans[p]<-brmeans[p]+brmeans[p-1]*(1/2)^(scalefactor * d$tsd[p]/modelparam$carryover_t1half)
}
}
}
}
means<-c(means,brmeans)
}
}
# Turn this into a matrix of correlations
correlations<-diag(length(labels))
rownames(correlations)<-labels
colnames(correlations)<-labels
# Give some output if in verbose mode:
if(verbose==TRUE){
aa <- data.frame(modelparam$carryover_t1half, rawbrmeans, brmeans, diff = brmeans - rawbrmeans)
cat("brmeans before and after adj:\n ")
print(aa)
}
for(c in cl){
# build in the autocorrlations across time
if(nP>1){
ac<-modelparam[(paste("c",c,sep="."))][,]
for(p in 1:(nP-1)){
for(p2 in (1+p):nP){
n1<-paste(trialdesign$timeptname[p],c,sep=".")
n2<-paste(trialdesign$timeptname[p2],c,sep=".")
correlations[n1,n2]<-ac
correlations[n2,n1]<-ac
}
}
}
# build in the autocorrelations across factors
for(c2 in setdiff(cl,c)){
for(p in 1:nP){
n1<-paste(trialdesign$timeptname[p],c,sep=".")
n2<-paste(trialdesign$timeptname[p],c2,sep=".")
correlations[n1,n2]<-modelparam$c.cf1t
correlations[n2,n1] <- modelparam$c.cf1t ##### <--------FIXING TYPO, this was [n1,n2] again
}
for(p in 1:(nP-1)){
for(p2 in (1+p):nP){
n1<-paste(trialdesign$timeptname[p],c,sep=".")
n2<-paste(trialdesign$timeptname[p2],c2,sep=".")
correlations[n1,n2]<-modelparam$c.cfct
correlations[n2,n1]<-modelparam$c.cfct
}
}
}
# correlation with biomarker
for(p in 1:nP){
n1<-paste(trialdesign$timeptname[p],"br",sep=".")
## RON THOMAS VERSION:
if (p > 1) {
n0 <- paste(trialdesign$timeptname[p - 1], "br", sep = ".")
mm1 <- means[which(n1 == labels)]
mm0 <- means[which(n0 == labels)]
correlations["bm", n1] <- correlations[n1, "bm"] <- ifelse(brtest[p], ifelse(brmeans[p] == 0, 0, (mm1 / mm0) * modelparam$c.bm), modelparam$c.bm)
}
##
## Following commented out in Ron Thomas version:
#if(means[which(n1==labels)]!=0){
# correlations[n1,'bm']<-modelparam$c.bm
# correlations['bm',n1]<-modelparam$c.bm
#}
## End commented out by Ron
}
}
# Again, some output if in verbose mode:
if(verbose==TRUE){
cat("carryover: ")
print(modelparam$carryover_t1half)
cat("br correlations: \n")
print(correlations[19:26, 1])
}
# Turn correlation matrix into covariance matrix
sigma<-outer(sds,sds)*correlations
# should be faster: outer(S,S)*R where S is SDs and R is correlation matrix
# previous: sigma<-correlations*sds%o%sds
# If turned on, force to be positive definite:
if(makePositiveDefinite){
if(!is.positive.definite(sigma)){
sigma<-make.positive.definite(sigma, tol=1e-3)
}
}
# Set seed:
#if(is.na(seed)){seed<-round(rnorm(1,10000,10000))}
#set.seed(seed)
# Make the componant data!
dat<-mvrnorm(n=modelparam$N,mu=means,Sigma=sigma,empirical=empirical)
dat<-data.table(dat)
setnames(dat,names(dat),labels)
# TESTING
#tdat1<-dat[, lapply(.SD, mean)]
#plot(c(d$t_wk_cumulative),tdat1[,.(OL.br,BD.br,UD.br,COd.br,COp.br)])
#tdat2<-dat[, lapply(.SD, mean)]
#plot(c(d$t_wk_cumulative),tdat2[,.(OL.br,BD.br,UD.br,COd.br,COp.br)])
# Turn it into actual fake data... calc intervals separately, because will use
dat[,ptID:=1:modelparam$N]
for(p in 1:nP){
n<-trialdesign$timeptname[p]
evalstring<-paste(
"dat[,D_",n,":=sum(",paste(paste(n,cl,sep='.',collapse=','),sep=','),"),by='ptID']",
sep="")
eval(parse(text=evalstring))
}
for(p in 1:nP){
n<-trialdesign$timeptname[p]
evalstring<-paste(
"dat[,",n,":=sum(BL,-",paste(paste(n,cl,sep='.',collapse=',-'),sep=','),"),by='ptID']",
sep="")
eval(parse(text=evalstring))
}
# TESTING
#tdat3<-dat[, lapply(.SD, mean)]
#plot(c(0,d$t_wk_cumulative),tdat3[,.(BL,OL,BD,UD,COd,COp)])
#plot(c(d$t_wk_cumulative),tdat3[,.(D_OL,D_BD,D_UD,D_COd,D_COp)])
return(dat)
}
#' Extracted baseline parameters for the vignette that reproduces the publication results.
#'
#' This contains the mean and standard deviation of (a) the standing systolic blood
#' pressures taken during orthostatic vital signs, and (b) the CAPS-IV PTSD assessment
#' total severity score.
#'
#' @docType data
#'
#' @usage data(extracted_bp)
"extracted_bp"
#' Extracted response parameters for the vignette that reproduces the publication results.
#'
#' This contains a set of maximum, displacement, rate and standard deviation values that, when
#' fed into the function \link{modgompertz} produces a combination of response factors
#' (biologic response to the drug, br here or BR in the publication; expectancy related response,
#' pb here or ER in the publication; and the non-treatment related time-varying factor, tv here
#' or TR in the publication) that together could plausibly describe our existing RCT data set.
#' This set of parameters was designed to make no other particular assumptions about how these
#' factors would perform - to be a 'tabula rasa' parameter set - where e.g. the timecourse and
#' maximum values of the expectancy related and tv/TR factors were assumed to be equal.
#'
#' @docType data
#'
#' @usage data(extracted_rp)
"extracted_rp"
#' The results of the vigenette Part 1 used to plot the trajectories
#'
#' @docType data
#'
#' @usage data(results_trajectories)
"results_trajectories"
#' The results of the vigenette Part 1 used to plot basic paramater space
#'
#' @docType data
#'
#' @usage data(results_core)
"results_core"
#' The results of the vigenette Part 1 used to plot rate response paramater space
#'
#' @docType data
#'
#' @usage data(results_rates)
"results_rates"
#' The results of the vigenette Part 1 used to plot maxes respone paramater space
#'
#' @docType data
#'
#' @usage data(results_maxes)
"results_maxes"
#' The results of existing clinical trial data organized as a data file, for use in Vignette 3
#'
#' @docType data
#'
#' @usage data(CTdata)
"CTdata"
#' Censor the data to simulate a particular pattern of dropouts
#'
#' \code{censordata} simulates a pattern of participant dropouts
#'
#' This function inputs a \code{dat} file as would be produced by
#' \link{generateData} and selects a subset to drop out (censor).
#' The probability of a particular timepoint being selected for
#' censoring in the initial pass is the sum of two factors: one
#' that censors at a set rate as a function of time, and one that
#' biases the rate of censoring based on the trajectory of symptoms,
#' with a large improvement in symptoms from baseline decreasing the
#' probability of dropout, while a small change or a worsening
#' increases the probability of dropout. In a separate, second pass
#' all dropouts are carried forward, such that once a datapoint
#' has been censored, all future datapoints for that particular
#' participant are also censored
#'
#' @param dat dat file as produced by \code{generateData}
#' @param trialdesign a trial design as defined by \code{buildtrialdesign}
#' @param censorparam a data.table provides 3 named parameters:
#' (1) beta0 - the total rate (approximately) of censoring (0-1)
#' (2) beta1 - the proportion of the total dropout that should be biased (0-1)
#' (3) eb1 - the exponent used to define the shape of the curve defining biased dropout.
#' See example, below.
#' @return A new data file, with the censored data points replaced by \code{NA},
#' will be returned
#' @examples
#' # Create a set of censoring params to
#' # feed to censordata one at a time:
#' censorparams<-data.table(
#' names=c("balanced","more of flat","more of biased","high dropout"),
#' beta0=c(.05,.05 ,.05,.15),
#' beta1=c(.5,.2,.8 ,.5),
#' eb1= c(2, 2 ,2 ,2 )
#' )
#' @export
censordata<-function(dat,trialdesign,censorparam){
#Stuff that should eventually be streamlined in trial def:
d<-data.table(trialdesign)
d$t_wk_cumulative<-cumulative(d$t_wk)
# d[,onDrug:=(tod>0)]
nP<-dim(trialdesign)[1]
labels<-d$timeptname
#Stuff to set up here
deltas<-paste("D",labels,sep="_")
evalstring<-paste("cdt<-dat[,.(",paste(deltas, collapse=', ' ),")]",sep="")
eval(parse(text=evalstring))
#Implement:
frac_NA<-censorparam$beta0 #.3
frac_NA_biased<-censorparam$beta1 #.6
fna1<-frac_NA*(1-frac_NA_biased)
fna2<-frac_NA*frac_NA_biased
cdt_ps1<-cdt
cdt_ps1[]<-1
cdt_ps2<-sapply(cdt,function(x) ((x+100)^(censorparam$eb1)))
cdt_p1<-t(t(cdt_ps1)*d$t_wk)
cdt_p2<-t(t(cdt_ps2)*d$t_wk)
cdt_r1<-runif(dim(cdt_p1)[1]*dim(cdt_p1)[2],min=0,max=2*mean(cdt_p1)*(.5/fna1))
cdt_r2<-runif(dim(cdt_p2)[1]*dim(cdt_p2)[2],min=0,max=2*mean(cdt_p2)*(.5/fna2))
do1<-as.data.table(cdt_p1>cdt_r1) # not yet cumulative, do this in masking stage
do2<-as.data.table(cdt_p2>cdt_r2) # not yet cumulative, do this in masking stage
do<-as.data.table(do1|do2)
#sum(do==TRUE)/(dim(do1)[1]*dim(do1)[2])
# TESTING:
# sum(do1==TRUE)/(dim(do1)[1]*dim(do1)[2])
# sum(do2==TRUE)/(dim(do2)[1]*dim(do2)[2])
#test1<-melt(cdt)
#test2<-melt(cdt_ps2)
#test3<-data.table(x=test1$value,y=test2$value)
#plot(test3$x,test3$y)
# Apply to dat:
for(itp in 1:length(labels)){
for(tp in labels[1:itp]){
masking_tp<-paste("D",tp,sep="_")
masked_tp<-labels[itp]
evalstring<-paste("dat$",masked_tp,"[do$",masking_tp,"]<-NA",sep="")
eval(parse(text=evalstring))
}
}
return(dat)
}
#' Build your proposed clinical trial design
#'
#' \code{buildtrialdesign} inputs the details of a proposed clinical trial
#' design in an intuitive way and outputs those details in a structured form
#' that can be used by the pmsimstat package simulation tools
#'
#' @seealso
#' \link{ggplot2} for what it's using to plot
#'
#' @param name_longform A name that will clearly remind you what this trial
#' design is. Can have spaces, etc. If not provided, will default to "Trial
#' Design 1"
#' @param name_shortform An abbreviated version of the name that can be used
#' on plots, in your code when you want to select a certain trial design, etc.
#' @param timepoints A numeric vector containing the timepoints (not including
#' baseline) at which anything of relevance happens in your trial design. You
#' must specify any timepoint where a measurement will be taken, the participant's
#' expectancy about what intervention they are receiving changes, or the actual
#' intervention itself (e.g. all randomizationpoints) can change.
#' NOTE: The unit used is not specified - you must just ensure that the unit you
#' use here is consistent with the unit used for e.g. the halflife of any
#' carryover effect. Typical units would be weeks or days.
#' @param timeptnames A character vector containing brief labels for each of the timepoints.
#' If your trial has different phases, it can be helpful to incorporate those into the
#' timepoint names for later easy reference. It is helpful if they are short enough to
#' use as x-axis labels on a plot. If not specified, will default to "V1, V2, V3..." for
#' Visit 1, Visit 2, etc.
#' @param expectancies A numeric vector with all values ranging from 0 to 1, of the same length
#' as timepoints. These values represent the "expectancy" a participant has at any given
#' point that they are receiving active, effective treatment. It can match the probability
#' that a participant is receiving active treatment (e.g. 0.5 for 1:1 randomization of
#' active drug to placebo), but does not have to. It is used to scale the degree of the
#' expectancy-related response factor.
#' @param ondrug A list containing binary vectors that describe when participants are on active
#' treatment or not (whether they are on a placebo or not is irrelevant for this paramater).
#' Each vector is same length as the number of timepoints, and specificies that someone
#' is either on active treatment at that timepoint (1) or not on active treatment at that
#' timepoint (0). Each possible path through the trial is described by a separate vector.
#' E.g., an open label trial will only have one path (a list with one vector, which contains
#' all 1's), a traditional parallel group RCT would have 2 paths (all 1's and all 0's), and
#' a trial with two randomization points would have 4 paths.
#' @return \code{output$metadata} contains the input variables you used for future reference,
#' named as above.
#' @return \code{output$trialpaths} contains a list whose length is defined by the number
#' of paths through the clinical trial (ie, the length of the input variable ondrug, above),
#' containing the information about the trial in the form required by the pmsimstat tools.
#' @examples
#' tdOL<-buildtrialdesign(
#' name_longform="open label",
#' name_shortform="OL",
#' timepoints=cumulative(rep(2.5,8)),
#' timeptname=paste("OL",1:8,sep=""),
#' expectancies=rep(1,8),
#' ondrug=list(
#' pathA=rep(1,8)
#' )
#' )
#' #Builds a 20 week entirely open-label trial
#'
#'tdCO<-buildtrialdesign(
#' name_longform="traditional crossover",
#' name_shortform="CO",
#' timepoints=cumulative(rep(2.5,8)),
#' timeptname=c(paste("COa",1:4,sep=""),paste("COb",1:4,sep="")),
#' expectancies=rep(.5,8),
#' ondrug=list(
#' # Assumes on drug entire previous interval and this measurement point
#' pathA=c(1,1,1,1,0,0,0,0),
#' pathB=c(0,0,0,0,1,1,1,1)
#' )
#' )
#' # Builds a traditional crossover trial, 20 weeks long, with no washout period
#' @export
buildtrialdesign<-function(name_longform,name_shortform,
timepoints,timeptnames,expectancies,ondrug){
# Add defaults for unspecified parameters
if(missing(name_longform)){name_longform="Trial Design 1"}
if(missing(name_shortform)){name_shortform=name_longform}
if(missing(timeptnames)){timeptnames=paste("V",1:(length(timepoints)),sep="")}
# Build list of trial designs, one for each path through
nTD<-length(ondrug)
tds<-vector(mode="list",length=nTD)
t_wk<-c(timepoints[1],
timepoints[2:length(timepoints)]-timepoints[1:(length(timepoints)-1)])
for(iTD in 1:nTD){
od<-ondrug[[iTD]]
od_intervals<-t_wk*od
tod<-od_intervals
tsd<-t_wk*(od!=1)
tpb<-t_wk*(expectancies>0)
for(iTP in 2:length(timepoints)){
if(od_intervals[iTP]>0){
tod[iTP]<-tod[iTP]+tod[iTP-1]
}else{
tsd[iTP]<-tsd[iTP]+tsd[iTP-1]
}
if(tpb[iTP]>0){
tpb[iTP]<-tpb[iTP]+tpb[iTP-1]
}
}
everondrug<-(cumulative(od)>0)
tsd<-everondrug*tsd
tds[[iTD]]<-data.table(timeptnames=timeptnames,t_wk=t_wk,e=expectancies,tod=tod,tsd=tsd,tpb=tpb)
}
# Retain the metadata in structured form in the output
metadata<-list(name_longform=name_longform,
name_shortform=name_shortform,
timepoints=timepoints,
timeptnames=timeptnames,
expectancies=expectancies,
ondrug=ondrug)
# Output
out<-list(metadata=metadata,trialpaths=tds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.