#' Personalytic, a simplified user interface for linear and generalized linear
#' mixed effects models with high throughput options.
#'
#' @export
#' @import snow
#' @import doSNOW
#' @import foreach
#' @import plyr
#' @import gamlss.dist
#'
#' @description A simplified user interface for longitudinal linear mixed
#' effects models (also known as growth models or hierarchical linear models) using
#' \code{\link{nlme}} and generalized linear mixed effects models using
#' \code{\link{gamlss}} (not currently implemented).
#' The basic mixed effects model is \eqn{dv=time+phase+phase*time}
#' with random intercepts and random slopes for time. The phase variable is optional.
#' Additional independent variables (or covariates) can be included.
#'
#' High throughput capabilities are invoked when the model
#' is to be fit to multiple individuals, multiple dependent variables, iterated
#' over multiple target independent variables, or any combination of these three.
#'
#' The residual correlation structure \code{ARMA(p,q)} is automatically selected
#' among \code{p=1,...,P} and \code{q=1,...,Q}
#' where \code{P} and \code{Q} are selected by the user. Model selection is done
#' by comparing fit indices using either the \code{BIC} or \code{AIC} (ARMA models
#' are not generally nested, precluding the use of likelihood ratio tests).
#'
#' The functional form of the relationship between time and the dependent variable
#' is automatically selected up to \code{time^time_power} where \code{time_power} is
#' selected by the user. Model selection is done using likelihood
#' ratio tests (LRT) with maximum likelihood estimators. For example,
#' if \code{time_power=3} (implying a maximum of a cubic growth model), linear,
#' quadratic and cubic models will be fit. If the best fitting model is a quadratic
#' growth model, then there will be random and fixed effects for \code{time} and
#' \code{time^2}.
#'
#' When there are multiple dependent variables and multiple target independent
#' variables, Type I error corrections or false discovery rate corrections are
#' made across target independent variables within each dependent variable (and
#' within each person if individual models are being fit to the data). Correction
#' options are given in \code{\link{p.adjust}}.
#'
#' @param output Character. The default is \code{NULL}.
#'
#' A character string that will be used to name a file for saving
#' output. If left \code{NULL}, the default is
#' `PersonAlytic_Output`. Do not give a file extension, these will be added
#' automatically. For example, if \code{output='MyResults'}, the output file will
#' be called \code{'MyResults.csv'} if high throughput options are used, or
#' \code{'MyResults.txt'} if only one model is fit. A full path with `/` instead
#' of `\` can also be used. For example, \code{output='C:/MyResults'} will produce
#' the files \code{'C:/MyResults.csv'} or \code{'C:/MyResults.txt'}. If a full
#' path is not given, the results will be saved in the working directory (see
#' \code{\link{getwd}}).
#'
#' If \code{p.method} and \code{nbest} are specified, the output will be saved in
#' a subfolder.
#'
#' @param data A \code{\link{data.frame}}. \code{data} must be provided by the user.
#'
#' \code{data} that contains the variables \code{ids},
#' \code{dv}, \code{time}, and optionally, \code{phase}, \code{ivs},
#' \code{target_ivs}.
#'
#' @param ids Character. \code{ids} must be provided by the user.
#'
#' The name of the ID variable. The ID variable must be a variable in \code{data}
#' and must be numeric.
#'
#' @param dvs Character list. \code{dvs} must be provided by the user.
#'
#' A list of one or more character dependent variable names in \code{data}.
#' The linear mixed effects model
#' \code{dvs[d] ~ phase + time + phase*time + target_ivs[c] + ivs}
#' with random effects \code{~ time | ids[i]} will be fit to the data.
#' The iterators \code{[d]}, \code{[c]}, and \code{[i]}
#' indicate that the model will be fit for each combination of
#' dependent variable in \code{dvs},
#' independent variable in \code{target_ivs}, and
#' each unique ID in \code{ids} (which can be overridden using
#' \code{individual_mods=FALSE}) with each model
#' controlling for the independent variables in \code{ivs}.
#'
#' @param time Character. \code{time} must be provided by the user.
#'
#' The name of the time variable. The time variable must be a variable in \code{data}
#' and must be numeric.
#'
#' @param phase Character. The default value is \code{NULL}.
#'
#' Name of the phase or treatment variable. For model fitting,
#' \code{phase} is treated the same as any variable names in \code{ivs} or
#' \code{target_ivs} but is used for visualizing treatment effects.
#'
#' @param ivs Character list. The default value is \code{NULL}.
#'
#' A list of names of covariates, e.g., \code{list('iv2', 'iv2')}.
#' Note that the variables in \code{ivs} cannot also be in \code{target_ivs}.
#'
#' @param target_ivs Character list. The default value is \code{NULL}.
#'
#' Independent variables that are iterated over (see \code{dv} for details).
#' Effects for these variables are labeled as 'target predictor' in the output.
#'
#' @param interactions Character list. The default value is \code{NULL}.
#'
#' List of pairs of variable names for interactions
#' to include in the model, e.g., \code{list(c('iv1','phase'), c('iv1','iv2'))}.
#'
#' @param time_power Numeric. The default is \code{time_power=3}.
#'
#' Power of the time variable (e.g., \code{time^time_power}).
#' A quadratic or cubic growth model would be specified using
#' \code{time_power=2} or \code{time_power=3}, respectively.
#' If \code{autoSelect$TO$polyMax} is specified, \code{polyMax} is the largest
#' value tested for. The default value is \code{3}, testing up to a cubic growth
#' model. If a linear growth model is desired, set
#' \code{autoSelect$TO=NULL} and \code{time_power=1}.
#'
#' @param correlation Character. The default value is \code{NULL}.
#'
#' See \code{\link{corStruct}} in \code{\link{nlme}}.
#' Must be passed as a character, e.g. \code{"corARMA(p=1)"}.
#' The default value is \code{NULL}, assuming no residual
#' autocorrelation. If \code{auoDetect$AR} is specified, \code{correlation} will
#' be ignored.
#'
#' @param family See \code{\link{gamlss.family}}. The default is normal,
#' \code{family=NO()}. This option is not yet implemented.
#'
#' A list of the same length as \code{length(dv)} can be supplied. For example if
#'\code{dv=list('normal_y', 'binary_y', 'beta_y')}, then family could be
#' \code{family=list(NO(), BI(), BEINF())}. If \code{length(dv)>1} and
#' \code{length(family)==1}, the one distribution in \code{family} will be
#' applied to all outcomes.
#' The \code{family} parameter is ignored if \code{package="nlme"}.
#'
#' @param subgroup Logical vector. The default is \code{subgroup=NULL} wich
#' results in a logical vector of the same length as the number of rows in
#' \code{data} with all values equal to \code{TRUE}.
#'
#' A vector where \code{length(subgroup)==nrow(data)} indicating
#' which subset of the data should be used for analysis. For example, if a model
#' should only be fit to females, \code{subgroup=gender=='female'} might be used.
#'
#' @param standardize Named logical vector. The default is
#' \code{list(dv=FALSE, ivs=FALSE, byids=FALSE)}.
#'
#' Which variables should be standardized?
#' (i.e., rescaled to have 0 mean and unit variance; see
#' \code{\link{scale}})? See \code{dv} and \code{ivs}. The option
#' \code{byids} controls whether standardization is done by individuals or by group.
#' Does not apply to factor variables. The default is \code{TRUE}.
#' Standardization makes parameter estimate magnitudes comparable across
#' individuals, outcomes in
#' \code{dvs}, and covariates in \code{target_ivs}. For dependent variables in
#' \code{dvs}, standardization is only applied for normal outcomes, see \code{family}.
#'
#' @param package Character. The default is \code{"nlme"}.
#'
#' Which package should be used to fit the models?
#' Options are \code{\link{nlme}}, \code{\link{gamlss}}, and \code{\link{arma}}.
#' It is passed as character strings, e.g., \code{"gamlss"} or \code{"nlme"}. If
#' there is only one participant in \code{ids}, "arma" will be used.
#'
#' @param method character. The default is \code{"REML"}.
#'
#' Which likelihood methods should be used to fit the models? Options are
#' \code{"REML"}, which should be used for final parameter estimates and
#' effect sizes, and \code{"ML"}, which should be used for model comparisons
#' (e.g., this is done for automatic residual correlation structure detection
#' and automatic time order detection).
#'
#' @param individual_mods Logical. The default is \code{individual_mods=FALSE}.
#'
#' Should individual models be fit for each ID in \code{ids}?
#'
#' @param PalyticObj See \code{\link{Palytic}}. Not currently implemented.
#'
#' If \code{PalyticObj} is submitted
#' then only \code{dvs}, \code{target_ivs}, and \code{individual_mods} will be
#' used. This allows users access to additional options including generalized linear
#' mixed effects models via the \code{family} option, user specified \code{correlation}
#' structures (in non-\code{NULL} this will override the automated correlation structure
#' search), and user specified models via \code{formula}.
#'
#' @param autoSelect List. The default is
#' \code{
#' list(AR=list(P=3, Q=3) ,
#' TO=list(polyMax=3) ,
#' DIST=list()) }.
#'
#' If no automated model selection for the residual covariance structure (\code{AR}),
#' the polynomial order for the relationship between time and the dependent variable
#' (\code{TO}), or the dependent variable distribution is desired, an empty list
#' should be passed (e.g., \code{autoSelect=list()}).
#'
#' If \code{AR} is in the list,
#' the residual correlation structure will be automatically selected from
#' among \code{ARMA(p,q)} models? See \code{correlation}. Since these models are
#' not generally nested, model selection is done using information information
#' criterion (see \code{whichIC}). Model selection for the residual covariance
#' structure is searches among
#' \code{p=1,...,P} and \code{p=1,...,Q}, where \code{P} and \code{Q} are taken
#' from \code{PQ}, i.e., \code{PQ=c(P,Q)}. The values of \code{p} and \code{p}
#' are passed to \code{\link{corARMA}} ( e.g., \code{corARMA(p=p,q=q)}).
#' If \code{individual_mods=FALSE}, this done
#' comparing \code{lme} modes for N>1 data. If \code{individual_mods=TRUE},
#' this is done using the \code{\link{auto.arima}} function on the residuals for
#' each individual. For more detail, see the \code{$GroupAR()}
#' method in \code{\link{Palytic}}.
#'
#' If \code{TO} is in the list, models with polynomial powers of time from 1 to
#' \code{polyMax} will be tested.
#' For example, if \code{polyMax=3} (implying a cubic growth model), the models
#' compared include \code{time}, \code{time + I(time^2)}, and
#' \code{time + I(time^2)+I(time^3)}. Since these models are nested, the best
#' fitting model is selected using likelihood ratio tests with mixed effects
#' models fit using maximum likelihood estimators in \code{\link{lme}}.
#' This is done separately for each individual in \code{ids} if
#' \code{individual_mods=TRUE}. For more detail, see the \code{$getTO()}
#' method in \code{\link{Palytic}}.
#'
#' If \code{DIST} is in the list and \code{package='gamlss'}, each dependent
#' variable in \code{dvs} will utilize the \code{\link{fitDist}} function of
#' the gamlss package, and the best fitting distribution will be used for each
#' dependent variable. For more detail, see the \code{$dist()} method in
#' \code{\link{Palytic}}. To narrow the distributions that will be tested,
#' the user must specify whether to
#' rescale the dependent variable to the (0,1) range with \code{to01}.
#' Currently settings
#' for \code{DIST} apply to all dependent variables in \code{dvs}. This will be
#' generalized to dependent variable specifics settings in a future release. If
#' your dependent variables require different \code{DIST} settings, use separate
#' calls to \code{PersonAlytic}.
#'
#' @param whichIC Character. The default is \code{whichIC="BIC"}.
#'
#' Either the Akaike Information Criterion (\code{whichIC="AIC"}) or
#' the Bayesian Information Criterion (\code{whichIC="BIC"}).
#'
#' If the \code{time} variable is equally spaced, this is
#' done using the function \code{\link{forecast}}. If the \code{time} variable
#' is not equally spaced, this is done using comparisons of
#' mixed effects models using \code{\link{lme}} fit using maximum likelihood
#' estimators.
#'
#' Residual autocorrelation structure is detected separately for each individual
#' in \code{ids} if \code{individual_mods=TRUE}.
#'
#'
#' @param charSub Character list. The default in \code{charSub=NULL}.
#'
#' A list of paired character strings for character substitution
#' in the output. If the names of the target predictors
#' in \code{target_ivs} had to be edited to make valid variable names, this
#' parameter allows users put the illegal characters back in. For example,
#' if the original variable name was "17.00_832.2375m/z", a letter would need to
#' prefix the variable name and the
#' "/" would need to be replaced with another character, e.g., "X17.00_832.2375m.z".
#' To get the row names of the output back to original variable name, use
#' \code{charSub=list(c("X", ""), c("m.z", "m/z"))}. Note that inputs to charSub
#' must be in double quotes and are case sensitive. All duplicates will be substituted.
#' For example, if the variable name was "X1X23.x" and \code{charSub=list(c("X", ""))},
#' the resulting row label for this variable would be "123.x".
#'
#' @param sigma.formula Not currently implemented.
#'
#' A formula for the variance under \code{\link{gamlss}}.
#' Currently static: it will not change dynamically over iterations nor will it be
#' updated by \code{time_power} or \code{autoSelect}. If model fitting using this
#' option fails, another attempt will be made after resetting it to its default,
#' i.e., \code{~1}.
#'
#' @param p.method See \code{\link{p.adjust.methods}}. When \code{individual_mods=TRUE},
#' \code{length(dvs)>1}, or \code{length(target_ivs)>1}, p-value adjustments
#' are made and reported in a separate folder saved to the working directory. The
#' parameter \code{nbest} must also be specified.
#'
#' @param alpha Numeric value in the (0,1) interval. The Type I error rate for
#' adjusting p-values.
#'
#' @param nbest Numeric integer value. The number of best \code{target_ivs} to
#' report on before and after adjusting p-values. This is only used if
#' \code{length(target_ivs)>1} and the \code{target_ivs} are all continuous.
#'
#' @param alignPhase Character. The default is \code{'none'} which leaves time
#' as is.
#'
#' Other options are \code{'piecewise'} which leads to a piecewise growth model
#' and sets time to be 0 at the beginning of each phase, see \code{\link{pwtime}}.
#' The other option is \code{'align'} which aligns time at 0 between the first
#' and second phase.
#'
#' Should the time variable be realigned at the phase?
#' If \code{TRUE} (the default), the time for first observation in the second phase
#' becomes 0 and times prior to the second phase are negative. For example, if the
#' time variable is \code{c(0,1,2,3,4,5)} and the phase variable is
#' \code{c(0,0,0,1,1,1)}, phase alignment yields a time variable of
#' \code{c{-3,-2,-1,0,1,2}}. This is useful when the timing of the transition
#' between the first and second phases varies by individual, especially for
#' graphing. This approach does not generalize to three or more phases, and
#' alignment only happens at the first phase transition. If there are three or
#' more phases, the later phase will not be aligned.
#'
#' @param fpc Numeric. The default is 0. If the value of fpc is greater than
#' the number of individuals in the data set, fpc is taken as the size of the
#' finite population from which the data were sampled, and a finite population
#' correction is made for the fixed effects standard errors, t-tests, and
#' p-values.
#'
#' @param debugforeach Logical. The default is \code{debugforeach=FALSE}. Not
#' implemented.
#'
#' @param cores Integer. The defaults is \code{parallel::detectCores()-1}, or
#' one fewer cores than what is detected on the machine.
#'
#' @param userFormula List of formulae used to override default model
#' construction. Items in the list must include \code{fixed} and \code{random}
#' to specify the parameters of the same names using \code{\link{lme}}, or
#' \code{formula} to specify the parameter of the same name in
#' \code{\link{gamlss}}. See \code{\link{byPhasebyGroup}} for an example.
#'
#' @param ... Not currently used.
#'
#' @examples
#'
#' \dontrun{
#'
#' # full sample model
#' t0 <- PersonAlytic(output = 'Test0' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = "follicles" ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "nlme" )
#'
#' # individual models
#' t1 <- PersonAlytic(output = 'Test1' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = "follicles" ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "arma" ,
#' individual_mods = TRUE )
#'
#' summary(t0)
#' summary(t1)
#'
#'
#' # full sample model with a finite population correction (FPC)
#' t0fpc <- PersonAlytic(output = 'Test0FPC' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = "follicles" ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "nlme" ,
#' fpc = 100 )
#'
#' # gamlss with two distributions - features not implemented
#' #OvaryICT$follicles01 <- to01(OvaryICT$follicles)
#' #t2 <- PersonAlytic(data=OvaryICT,
#' # ids="Mare",
#' # dvs=list("follicles", "follicles01"),
#' # phase="Phase",
#' # time="Time",
#' # family=c(NO(), BEINF()),
#' # package='gamlss')
#'
#'
#' # individual models with target variables
#' t3 <- PersonAlytic(output = 'TargetIVStest' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = "follicles" ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "arma" ,
#' individual_mods = TRUE ,
#' target_ivs = names(OvaryICT)[6:11] )
#'
#' # multiple DVs with no target_ivs and group models
#' t4 <- PersonAlytic(output = 'MultiDVnoIDnoIV' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = names(OvaryICT)[c(3,9:11)] ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "lme" ,
#' individual_mods = FALSE ,
#' target_ivs = NULL ,
#' autoSelect = list() )
#'
#'
#' # repeat t4 with finite population correction
#' t5 <- PersonAlytic(output = 'MultiDVnoIDnoIVFPC' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = names(OvaryICT)[c(3,9:11)] ,
#' phase = "Phase" ,
#' time = "Time" ,
#' package = "lme" ,
#' individual_mods = FALSE ,
#' target_ivs = NULL ,
#' autoSelect = list() ,
#' fpc = 200 )
#'
#' # repeat t5 with piecewise model, first making time an integer (required for piecewise)
#' OvaryICT$TimeP <- round(30*OvaryICT$Time)
#' t6 <- PersonAlytic(output = 'PiecewiseExample' ,
#' data = OvaryICT ,
#' ids = "Mare" ,
#' dvs = names(OvaryICT)[c(3,9:11)] ,
#' phase = "Phase" ,
#' time = "TimeP" ,
#' package = "lme" ,
#' individual_mods = FALSE ,
#' target_ivs = NULL ,
#' autoSelect = list() ,
#' fpc = 200 ,
#' alignPhase = "piecewise" )
#'
#' # clean up
#' file.remove( 'PiecewiseExample_PersonAlytic.csv' )
#' file.remove( 'REMLlme.txt' )
#' file.remove( 'MultiDVnoIDnoIVFPC_PersonAlytic.csv' )
#' file.remove( 'MultiDVnoIDnoIV_PersonAlytic.csv' )
#' file.remove( 'TargetIVStest_PersonAlytic.csv' )
#' file.remove( 'NotREMLarma.txt' )
#' file.remove( 'Test0FPC.txt' )
#' file.remove( 'Test1_PersonAlytic.csv' )
#' file.remove( 'Test0.txt' )
#'
#' }
# \dontrun{
# # if you wish to delete the automatically created csv file, run
# #NOT IMPLEMENTED YET
# }
# non-user options allowed in ...
# packageTest - override the package, for research purposes
PersonAlytic <- function(output = NULL ,
data ,
ids ,
dvs ,
time ,
phase = NULL ,
ivs = NULL ,
target_ivs = NULL ,
interactions = NULL ,
time_power = 1 ,
correlation = NULL ,
family = gamlss.dist::NO() ,
subgroup = NULL ,
standardize = list(dv=FALSE, iv=FALSE, byids=FALSE) ,
method = 'REML' ,
package = 'nlme' ,
individual_mods = FALSE ,
PalyticObj = NULL ,
autoSelect = list(AR=list(P=3, Q=3) ,
TO=list(polyMax=3) ,
DIST=list()) ,
whichIC = c("BIC", "AIC") ,
charSub = NULL ,
sigma.formula = ~1 ,
p.method = "BY" ,
alpha = .05 ,
nbest = NULL ,
alignPhase = 'none' ,
fpc = 0 ,
debugforeach = FALSE ,
cores = parallel::detectCores()-1 ,
userFormula = NULL ,
...)
{
#message('alignPhase=',alignPhase)
if(length(whichIC)>1) whichIC <- whichIC[1]
if(is.null(correlation)) correlation <- "NULL"
# output labeling ####
if( is.null(output)) output <- "PersonAlytics_Output"
# create copy that can be manipulated w/o loosing original
fileLabel <- output
# check the subgroup input ####
if(!is.logical(subgroup) & !is.null(subgroup))
{
stop('`subgroup` must be a logical vector with TRUE/FALSE values.')
}
if(is.null(subgroup)) subgroup <- rep(TRUE, nrow(data))
# override user defaults if there is a data/package/individual_mods mismatch
n <- length(unique(data[[ids]][subgroup]))
if( n == 1 & package != "arma" )
{
message('\nThere is only one participant,', '
automatically switching to `package="arma"`.\n')
package <- 'arma'
}
if( n > 1 & package == 'arma' &
individual_mods != TRUE)
{
message('There is more than one participant, using `package="nlme"`.',
'\nIf individual models are needed, switch to `individual_mods=TRUE`.')
package <- 'nlme'
}
if( individual_mods == TRUE & package != 'arma' )
{
message('\nWhen `individual_mods=TRUE` you must use `package="arma"`.',
'\nAutomatically switching to `package="arma".\n')
package <- 'arma'
}
# for internal testing use only, allows us to compare n=1 lme models to
# arma models
#args <- list(packageTest='nlme')
args <- list(...)
if(package=='lme') package <- 'nlme'
if("packageTest" %in% names(args))
{
if(args$packageTest=='lme') args$packageTest <- 'nlme'
if(args$packageTest %in% c('nlme', 'gamlss', 'arma')) package <- args$packageTest
message('\npackage was overridden by packageTest to be `', package, '`\n')
}
# check userFormula
userFormulae <- list(
fixed=NULL,
random=NULL,
formula=NULL)
if(!all(unlist(lapply(userFormula, is.null))))
{
supportedForms <- names(userFormula)[names(userFormula) %in%
names(userFormulae)]
for(i in seq_along(supportedForms))
{
if(!inherits(userFormula[[supportedForms[i]]], "formula"))
{
stop("\nuserFormula$", supportedForms[i], " is not a formula:\n\n",
userFormula[[supportedForms[i]]], "\n\n use:\n",
"userFormula$", supportedForms[i], " <- formula(",
userFormula[[supportedForms[i]]], ")\n\n")
}
}
unsupportedForms <- names(userFormula)[!names(userFormula) %in%
names(userFormulae)]
if(length(unsupportedForms)> 0)
{
warning("\nThe following items in userFormula are not yet supported:\n\n",
paste(unsupportedForms, "\n"))
}
if(length(userFormula$fixed) < 3)
{
stop("\nlength(userFormula$fixed) must be 3, make sure there is a LHS",
"\nvariable. If multiple dvs are present, this will be overwritten.")
}
}
# if -1 is at the end of fixed or formula, it needs to be at the front
nointCheck <- function(frm)
{
oldfrm <- as.character(frm)
temp <- unlist(strsplit(oldfrm[length(oldfrm)], "-"))
if(temp[length(temp)] == " 1")
{
olddv <- ifelse(oldfrm[1]=="~", oldfrm[2], oldfrm[1])
newfrm <- formula(paste(olddv, "~ -1 +", temp[1]))
return(newfrm)
}
if(temp[1]=="") return(frm)
else return(frm)
}
if(!is.null(userFormula$fixed)) userFormula$fixed <- nointCheck(userFormula$fixed)
# next line is untested
if(!is.null(userFormula$formula)) userFormula$formula <- nointCheck(userFormula$formula)
# override individual_mods if only 1 id, 1 dv, <= 1 target_iv
luid <- length(unique(data[[ids]]))
if(individual_mods==TRUE & luid==1 &
length(dvs)==1 & length(target_ivs)<=1)
{
individual_mods <- FALSE
}
# finite population correction (fpc) settings
if( fpc > n ) fpcCheck(fpc, n)
if( is.numeric(fpc) )
{
popsize2 <- fpc
.fpc <- TRUE
}
if( !is.numeric(fpc) | fpc <= n )
{
popsize2 <- NULL
.fpc <- FALSE
}
fpc <- .fpc; rm(.fpc)
# augment time
time <- list(raw = time ,
power = time_power ,
analysis = time )
#cat(package, file=paste(format(Sys.time(),"%Y-%m-%d-%H-%M-%S"), 'txt', sep='.'))
# call 'methods'
if(individual_mods==FALSE & length(dvs)==1 & length(target_ivs)<=1)
{
pout <- pa1()
}
if(individual_mods==TRUE | length(dvs)>1 | length(target_ivs)>1)
{
pout <- paHTP()
}
while( sink.number() > 0 ) sink()
return(pout)
}
#TODO(Stephen): individual_mods = FALSE is yielding different results than
#if running via htp (which can be forced), see the example in
#R:\PaCCT\02 Clients\curelator\Analyses\20181004\20181004 Curelator Analyses
#PersonAlytics_v0.1.2.2.r
#' pa1
#' @author Stephen Tueller \email{stueller@@rti.org}
#' @keywords internal
pa1 <- function(e=parent.frame())
{
# set output filename ####
fileName <- paste(e$fileLabel, '_PersonAlytic.txt', sep='')
if(is.null(e$subgroup)) e$subgroup <- rep(TRUE, nrow(e$data))
# concatenate ivs
ivs <- c(e$ivs, e$target_ivs)
ivs <- ivs[!duplicated(ivs)]
t1 <- Palytic$new(data=e$data ,
ids=e$ids ,
dv=e$dvs[[1]] ,
time=e$time$raw ,
phase=e$phase ,
ivs=ivs ,
interactions=e$interactions ,
time_power=e$time_power ,
alignPhase=e$alignPhase ,
correlation=e$correlation ,
family=e$family ,
method=e$method ,
standardize=e$standardize ,
autoSelect=e$autoSelect )
# userFormula
if(!is.null(e$userFormula$fixed))
{
dvFormula <- e$userFormula
rhs <- Reduce(paste, deparse(dvFormula$fixed[[3]]))
dvFormula$fixed <- formula(paste(e$dvs, "~", rhs))
t1$fixed <- dvFormula$fixed
}
if(!is.null(e$userFormula$random))
{
t1$random <- e$userFormula$random
}
if(!is.null(e$userFormula$formula))
{
t1$formula <- e$userFormula$formula
}
# autoselection
if(e$package=="nlme" | e$package=="arma")
{
temp <- t1$autoSelect
temp$DIST <- NULL
t1$autoSelect <- temp; rm(temp)
}
dims <- list(ID="All Cases")
if(var(t1$datac[[t1$ids]][e$subgroup], na.rm=TRUE)==0)
{
dims <-list(ID=sort(unique(e$data[[e$ids]])))
}
t1$detect(model=NULL, parallel="snow", plot=FALSE, userFormula=e$userFormula,
dims=dims)
# t1$correlation
# t1$formula
# fit the models
if(e$package=="gamlss") Grp.out <- t1$gamlss(e$subgroup, sigma.formula=e$sigma.formula)
if(e$package=="nlme") Grp.out <- t1$lme(e$subgroup, fpc=e$fpc,
popsize2=e$popsize2)
if(e$package=="arma") Grp.out <- t1$arma(e$subgroup )
sink(file=fileName)
print( Grp.out$tTable )
cat("\n\n")
print( t1$summary()$skew_kurt )
sink()
return(Grp.out)
}
#' paHTP
#' @author Stephen Tueller \email{stueller@@rti.org}
#' @keywords internal
paHTP <- function(e=parent.frame())
{
# set output filename ####
fileName <- paste(e$fileLabel, '_PersonAlytic.csv', sep='')
# check that dvs, target_ivs are lists, if not, force
if( ! "list" %in% class(e$dvs) ) e$dvs <- as.list(e$dvs)
if( ! "list" %in% class(e$ivs) ) e$ivs <- as.list(e$ivs)
#if( ! "list" %in% class(e$target_ivs) ) e$target_ivs <- as.list(e$target_ivs)
# check that inputs conform. This is also done when creating a Palytic
# object, but we do it early on here to avoid problems after loops start.
# Set standardize to FALSE here, standardization will be done later if
# requested by the user (this avoids standardizing standardized variables,
# which may differ under different subsets, e.g., individual_models = TRUE).
e$data <- clean(
data = e$data ,
ids = e$ids ,
dv = NULL ,
time = e$time ,
phase = e$phase ,
ivs = e$ivs ,
fixed = NULL ,
random = NULL ,
formula = NULL ,
correlation = e$correlation ,
family = e$family ,
dvs = e$dvs ,
target_ivs = e$target_ivs ,
standardize = list(dvs=FALSE,ivs=FALSE,byids=FALSE) ,
alignPhase = "none" ,
debugforeach = e$debugforeach )
# standardize target_ivs
if(e$standardize$iv)
{
message('\nPersonAlytics is standardizing the variables in `targe_ivs`.\n')
for(i in seq_along(e$target_ivs))
{
tiv <- e$target_ivs[[i]]
if(is.numeric(e$data[tiv]))
{
e$data[[tiv]] <- scale(e$data[[tiv]])
}
}
}
# subgroup the data and delete the parameter, after this point, it is only
# used to subgroup to unique ids
if( is.null(e$subgroup)) e$subgroup <- rep(TRUE, nrow(e$data))
if(!is.null(e$data)) e$data <- e$data[e$subgroup,]
# check whether any variables in ivs are in target_ivs -
# in the future, split them out automatically
if(any(e$ivs %in% e$target_ivs) | any(e$target_ivs %in% e$ivs))
{
stop('target_ivs and ivs cannot share any variables.')
}
# autoselection
if(e$package=="nlme" | e$package=="arma")
{
e$autoSelect$DIST <- NULL
}
# unique ids
uids <- sort(unique(e$data[[e$ids]]))
# dimensions for loops
ID <- indID <- uids
IV <- seq_along(e$target_ivs)
if(is.null(e$target_ivs) | length(e$target_ivs)==0) IV <- 1
DV <- seq_along(e$dvs)
dims <- list(ID=ID, IV=IV, DV=DV, indID=indID)
if( e$debugforeach )
{
cat('\n\n\n')
print(head(e$data) ); cat('\n')
print(dims ); cat('\n')
print(e$dvs ); cat('\n')
print(e$phase ); cat('\n')
print(e$ids ); cat('\n')
print(uids ); cat('\n')
print(e$time ); cat('\n')
print(e$ivs ); cat('\n')
print(e$target_ivs ); cat('\n')
print(e$interactions ); cat('\n')
print(e$time_power ); cat('\n')
print(e$alignPhase ); cat('\n')
print(e$correlation ); cat('\n')
print(e$family ); cat('\n')
print(e$standardize ); cat('\n')
print(e$fpc ); cat('\n')
print(e$popsize2 ); cat('\n')
print(e$package ); cat('\n')
print(e$autoSelect ); cat('\n')
print(e$PQ ); cat('\n')
print(e$whichIC ); cat('\n')
print(e$polyMax ); cat('\n')
print(e$sigma.formula); cat('\n')
print(e$debugforeach ); cat('\n')
print(e$cores ); cat('\n')
print(e$userFormula ); cat('\n')
cat('\n\n\n')
}
#
if( e$individual_mods )
{
DVout <- htp(data = e$data ,
dims = dims ,
dvs = e$dvs ,
phase = e$phase ,
ids = e$ids ,
uids = uids ,
time = e$time ,
ivs = e$ivs ,
target_ivs = e$target_ivs ,
interactions = e$interactions ,
time_power = e$time_power ,
alignPhase = e$alignPhase ,
correlation = e$correlation ,
family = e$family ,
standardize = e$standardize ,
fpc = e$fpc ,
popsize2 = e$popsize2 ,
package = e$package ,
autoSelect = e$autoSelect ,
whichIC = e$whichIC ,
sigma.formula = e$sigma.formula ,
debugforeach = e$debugforeach ,
userFormula = e$userFormula ,
cores = e$cores )
}
if( !e$individual_mods )
{
grp.dims <- dims
grp.dims$ID <- "All Cases"
DVout <- htp(data = e$data ,
dims = grp.dims ,
dvs = e$dvs ,
phase = e$phase ,
ids = e$ids ,
uids = uids ,
time = e$time ,
ivs = e$ivs ,
target_ivs = e$target_ivs ,
interactions = e$interactions ,
time_power = e$time_power ,
alignPhase = e$alignPhase ,
correlation = e$correlation ,
family = e$family ,
standardize = e$standardize ,
fpc = e$fpc ,
popsize2 = e$popsize2 ,
package = e$package ,
autoSelect = e$autoSelect ,
whichIC = e$whichIC ,
sigma.formula = e$sigma.formula ,
debugforeach = e$debugforeach ,
userFormula = e$userFormula ,
cores = e$cores )
}
# clean up variable names
if(!is.null(e$charSub))
{
DVout$target_iv <- as.character(DVout$target_iv)
for(i in seq_along(e$charSub))
{
DVout$target_iv <- gsub(e$charSub[[i]][1], e$charSub[[i]][2], DVout$target_iv)
}
}
# fix columns that are lists and reorder the columns
DVout <- do.call(data.frame, lapply(DVout, nnull))
# attempt adjusted p-values
if(!is.null(e$p.method) & length(e$target_ivs) > 1 & !is.null(e$nbest) )
{
message("\np-value adjustment method is: ", e$p.method, ",",
"\nsaving the adjusted p-values for the", e$nbest, " largest effect sizes of the\n",
length(e$target_ivs), " target ivs...\n\n")
DVpsuite <- try( psuite(DVout, e$ids,
output = e$fileLabel,
rawdata=e$data,
method=e$p.method,
nbest=e$nbest,
alpha=e$alpha), silent = TRUE )
if(!any(class(DVpsuite) %in% "try-error"))
{
if( ncol(DVpsuite) == (ncol(DVout)+length(e$p.method)+1) )
{
DVout <- DVpsuite
message("\np-value adjustments completed.")
}
else message("\np-value adjustments failed.")
}
else message("\np-value adjustments failed.")
}
# save location same as the folder created by psuite
dn <- paste('PersonAlytics output for', e$fileLabel)
Imoved <- FALSE
if(dir.exists(dn))
{
setwd(dn)
Imoved <- TRUE
}
# save the output
write.csv(DVout, file=fileName, row.names=FALSE)
# move back to parent directory
if(Imoved) setwd("../")
return(DVout)
}
#' nnull - fix columns that are actually lists
#' @keywords internal
nnull <- function(x)
{
if(is.list(x))
{
if( any(as.character(x)=="NULL") )
{
return( unlist(as.character(x)) )
}
if(!any(as.character(x)=="NULL") )
{
return( unlist(x) )
}
}
if(!is.list(x))
{
return( unlist(x) )
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.