# Copyright 2007-2022 Timothy C. Bates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# An institution is the lengthened shadow of one man. Emerson.
# =======================
# = Markdown hyperlinks =
# =======================
# [func()] = function in package = \code{\link[=func]{func()} Another package = [pkg::func()]
# [object] = object, class, data set, etc. in same package. Another package [pkg::object]
# [`object`] = object, typeset as code
# [link text][object] = Different link text. Another package = [link text][pkg::object]
#  = Image in man/figures
# cran_prep = check_for_cran("~/bin/umx/", show_status=FALSE)
# cran_prep$cran_summary()
# Makevars for clang
# mate ~/.R/Makevars
# https://gist.github.com/tbates/9cec0a93e04c06c41b550454eaa892a3
#
# for Mojave
# sudo installer -pkg /Library/Developer/CommandLineTools/Packages/macOS_SDK_headers_for_macOS_10.14.pkg -target /
#
# Plays to watch: "Copenhagen" (Michael Frein); "The chemistry between them" (Dorothy Hodgkin)
#
# ===============================
# = Highlevel models (ACE, GxE) =
# ===============================
.onAttach <- function(libname, pkgname){
# umx_set_condensed_slots(FALSE)
umx_set_plot_format('DiagrammeR')
umx_set_dollar_symbol(umx.dollar.symbol = "\u00A3") # = GBP
umx_set_plot_file_suffix(umx.plot.suffix = "gv")
umx_set_plot_use_hrbrthemes(FALSE)
umx_set_silent(FALSE)
# if(is.null(getOption('knitr.table.format'))){
# umx_set_table_format('markdown')
# options('knitr.table.format' = "markdown")
# }
umx_set_auto_plot(TRUE)
umx_set_auto_run(TRUE)
umx_set_data_variance_check(minVar = .1, maxVarRatio = 1000)
umx_set_separator(umx_default_separator = ",")
umx_set_silent(FALSE)
umx_set_table_format("markdown")
# umx_complete_dollar()
packageStartupMessage("For an overview type '?umx'")
}
# Forgettable functions
# MASS mvrnorm # Used in umx_make_TwinData
# nlme intervals # Used in umxAPA
# polycor hetcor # Used in umxHetCor
# xtable xtable # Used in umxCompare
# MuMIn Weights # Used in umxWeightedAIC
# methods::setClass is called during build not package source code.
# #' @importFrom Base::charToRaw
#' @importFrom DiagrammeR grViz
#' @importFrom DiagrammeR DiagrammeR
# #' @importFrom DiagrammeRsvg export_svg
# #' @importFrom rsvg rsvg_png rsvg_pdf
#' @importFrom graphics plot abline
#' @importFrom methods as getSlots is slotNames setClass
#' @importFrom stats AIC C aggregate as.formula coef complete.cases
#' @importFrom stats confint cor cov cov.wt cov2cor df lm cor.test dnorm pnorm reshape
#' @importFrom stats logLik na.exclude na.omit pchisq pf qchisq predict
#' @importFrom stats qnorm quantile reformulate residuals rnorm runif sd
#' @importFrom stats setNames update var delete.response terms model.frame
#' @importFrom utils combn data flush.console read.table txtProgressBar
#' @importFrom utils globalVariables write.table packageVersion
#' @importFrom utils browseURL install.packages str read.csv read.delim
#' @importFrom MASS mvrnorm
#' @importFrom nlme intervals lme
#' @importFrom polycor hetcor
#' @importFrom xtable xtable
#' @importFrom MuMIn Weights
#' @importFrom ggplot2 ggplot qplot ggtitle ylab xlab labs
#' @importFrom ggplot2 scale_x_continuous scale_x_continuous theme
#' @importFrom ggplot2 geom_abline geom_bar geom_curve geom_errorbar geom_hline geom_jitter geom_line
#' @importFrom ggplot2 geom_point geom_ribbon geom_segment geom_smooth geom_vline arrow unit
#' @importFrom ggplot2 aes aes_string annotate coord_cartesian element_blank element_text
#' @importFrom ggplot2 ggtitle xlab ylab scale_fill_hue expand_limits position_dodge
#' @importFrom ggplot2 theme_bw theme_gray theme_minimal
#' @importFrom cowplot draw_label plot_grid ggdraw
#' @importFrom knitr kable
#' @importFrom kableExtra kbl add_footnote column_spec footnote
#' @importFrom kableExtra kable_classic kable_classic_2 kable_minimal kable_material kable_material_dark kable_paper
utils::globalVariables(c(
## from Doc
"top.ManMean",
'top.bCov',
'top.meanT1',
'top.meanT2MZ',
'top.meanT2DZ',
'top.meanT2sib',
'psi_a',
'psi_c',
'psi_e',
'unitM',
'Mean',
'FacMean',
'B',
'lamba',
'solvBE',
'psi_a',
'psi_c',
'psi_e',
'filter',
'Ts_',
'K',
'CovMZ_',
## from main umx files
"xFamMean", "Bwithin",
"xDiff",
"yDiff",
'ci.lower',
'ci.upper',
'r',
'xLevel',
'N',
'x',
'xtable',
'M', 'S',
'A', 'E',
'a', 'c', 'e',
'Am', 'Cm', 'Em',
'am', "cm",'em',
'Af', 'Cf', 'Ef',
'af', 'cf', 'ef',
"as", 'cs', 'es',
'ai', 'ci', 'ei',
'a_cp', 'c_cp', 'e_cp',
'A_cp', 'C_cp', 'E_cp',
'ACE', 'AC', "ACf", "ACm", 'hAC', 'hACf', 'hACm',
'ACEf', "ACEm",
'A11', 'A12', 'A21', 'A22', 'E11', 'E22','C11', 'C12', 'C21', 'C22',
'ACE.Af', 'ACE.Am', 'ACE.Cf', 'ACE.Cm', 'ACE.Ef', 'ACE.Em',
"ACE.af", "ACE.am", "ACE.cf", "ACE.cm", "ACE.ef", "ACE.em",
'ACE.Vf', 'ACE.Vm',
'Amf', 'Cmf',
'Asm', 'Asf', 'Csm', 'Csf',
'asf', 'asm', 'csf', 'csm',
'rAf', 'rAm', 'rCf', 'rCm', 'rEf', 'rEm',
'top.betaLin', 'top.betaQuad',
'top.a', 'top.c', 'top.e',
'top.A', 'top.C', 'top.E',
'top.a_std', 'top.c_std', 'top.e_std',
'top.A', 'top.a', 'top.af', 'top.ai', 'top.am', 'top.as', 'top.a_cp',
'top.C', 'top.c', 'top.cf', 'top.ci', 'top.cm', 'top.cs', 'top.c_cp',
'top.E', 'top.e', 'top.ef', 'top.ei', 'top.em', 'top.es', 'top.e_cp',
'top.expCovDZ', 'top.expCovMZ', 'top.expMeanDZ', 'top.expMeanMZ',
'top.Means', 'top.nSib', 'top.nVar', 'top.cp_loadings',
'common_loadings','cp_loadings',
'CorMFa', 'CorMFc', 'covT1', 'covT2', 'Def1', 'Def2', 'Def1Rlin', 'Def1Rquad', 'Def2Rlin', 'Def2Rquad', 'L', 'diagL',
'DZ.covsT1DZ' , 'MZ.covsT1MZ' ,
'DZ.covsT2DZ' , 'MZ.covsT2MZ' ,
'DZ.objective' , 'MZ.objective' ,
'DZf.objective' , 'MZf.objective' ,
'DZff.objective', 'MZff.objective',
'DZm.objective' , 'MZm.objective' ,
'DZmm.objective', 'MZmm.objective',
'DZfm.objective',
'Mf', 'Mm',
'MZW', 'DZW',
'fmCOV','mfCOV',
'newLbound',
# from umxACEv
'InvSD',
# from umxACEcov
'varStarts',
'lowerB',
'lowerW',
'CovB',
'CovW',
'CovWB',
'bCovWBb',
'bCovBb',
'bCovWB',
'bCovB',
'CovWBb',
'CovBb',
# from umxACE_cov_fixed
'top.Intercepts',
'defCovT1',
'top.betas',
'defCovT2',
'defCovT1',
'defCovT2',
'top.Intercepts',
'top.betas',
'Iden',
'nDv',
'meanDZ', 'meanMZ',
'nFac_UnitCol', 'nFac_Iden', 'nVar', 'nVar2', 'nVarIdenMatrix', 'nVarUnit', 'betas',
'oneTwinMeans', 'predMeanT1DZ', 'predMeanT1MZ', 'predMeanT2DZ', 'predMeanT2MZ',
'R', 'Ra', 'Raf', 'Ram', 'Rc', 'Rcf', 'Rcm', 'Ref', 'Rem',
'thresholdDeviations', 'totalVariance', 'UnitLower',
'V', 'Vf', 'Vm', 'Vtot',
"C", "logLik", "var", 'SD', 'StdDev',
'binLabels', 'Unit_nBinx1',
'correlatedA', 'minCor', 'pos1by6', 'varList', 'Im1', 'IZ', 'ZI', 'Z',
# umxDOC
"dzAr", "Vmz", "Vdz",
"FacLoadtw", "cause", "Diag1", "Ones",
"Asmz", "Asdz", "Cstw", "Estw",
"specCovMZ", "specCovDZ", "FacCovMZ", "FacCovDZ",
# Used in tmx_genotypic_effect
"x1", "y1", "y2", "dose", "value", "freq", "Frequency",
# Used in umxGxE
"DefT1", "DefT2", "top.betaCoTwin", "top.betaSelf",
# Used in umxGxEbiv
"mod1", "mod2",
"Adz", "Amz",
"PsAdz" , "PsAmz" , "PsC",
"top.PsAdz", "top.PsAmz", "top.PsC",
"chA", "chC", "chE",
"top.a11", "top.a21", "top.a22",
"top.c11", "top.c21", "top.c22",
"top.e11", "top.e21", "top.e22",
"top.aBeta1", "top.aBeta2", "top.eBeta1", "top.eBeta2", "top.cBeta1", "top.cBeta2",
# Used in umxSimplex
"at" , "ct" , "et",
"Iai", "Ici", "Iei",
"SigmaA" , "SigmaC" , "SigmaE",
"SigmaPh11", "SigmaPh21dz", "SigmaPh21mz",
# Used in poly funs
"minus2SatLogLik", "nCells", "diffchi",
# Used in xmu_lavaan_process_group
"constraintOps", "handledOps",
# Used in power.ACE.test
"zygosity",
# xmu_twin_add_WeightMatrices
"mzWeightMatrix",
"dzWeightMatrix",
# xmuTwinUpgradeMeansToCovariateModel
"top.intercept",
"top.meansBetas",
"T1DefVars",
"T2DefVars",
"T3DefVars",
#MRDoC2
'dzmu',
'BE',
'A_',
'C_',
'E_',
'SPh',
'Smz_'
))
# ===================================================================
# = Define some class containers to allow specialised model objects =
# = plot, etc. can then operate on these =
# ===================================================================
methods::setClass("MxModelDoC" , contains = "MxModel")
methods::setClass("MxModelMRDoC" , contains = "MxModel")
methods::setClass("MxModelTwinMaker", contains = "MxModel")
methods::setClass("MxModelACE" , contains = "MxModel")
methods::setClass("MxModelACEv" , contains = "MxModelACE")
methods::setClass("MxModelACEcov" , contains = "MxModel")
methods::setClass("MxModelIP" , contains = "MxModel")
methods::setClass("MxModelCP" , contains = "MxModel")
methods::setClass("MxModelGxE" , contains = "MxModel")
methods::setClass("MxModelGxEbiv" , contains = "MxModelGxE")
methods::setClass("MxModelSexLim" , contains = "MxModel")
methods::setClass("MxModelSimplex", contains = "MxModel")
# ============================
# = Core Modeling Functions =
# ============================
#' Catches users typing umxModel instead of umxRAM.
#'
#' @description
#' Catches a common typo, moving from mxModel to umx.
#'
#' @param ... Anything. We're just going to throw an error.
#' @return None
#' @export
#' @family xmu internal not for end user
#' @seealso - [umxRAM()], [OpenMx::mxModel()]
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' umxModel()
#' }
umxModel <- function(...) {
stop("You probably meant umxRAM?, not umxModel?")
}
#' Build and run path-based SEM models
#'
#' @description
#' `umxRAM` expedites creation of structural equation models, still without doing invisible things to the model. It
#' supports [umxPath()]. To support cross-language sharing and science learning, `umxRAM` also supports lavaan model strings.
#'
#' Here's a path example that models miles per gallon (mpg) as a function of weight (wt) and engine displacement (disp)
#' using the widely used `mtcars` data set.
#'
#' ```
#' m1 = umxRAM("tim", data = mtcars,
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath("wt", with = "disp"),
#' umxPath(v.m. = c("wt", "disp", "mpg"))
#' )
#' ```
#'
#' As you can see, most of the work is done by [umxPath()]. `umxRAM` wraps these paths up, takes the `data =` input, and
#' then internally sets up all the labels and start values for the model, runs it, and calls [umxSummary()], and [plot.MxModel()].
#'
#' Try it, or one of the several models in the examples at the bottom of this page.
#'
#' A common error is to include data in the main list, a bit like
#' saying `lm(y ~ x + df)` instead of `lm(y ~ x, data = df)`.
#'
#' **nb**: Because it uses the presence of a variable in the data to detect if a variable is latent or not, `umxRAM` needs data at build time.
#'
#' **String Syntax**
#'
#' Here is an example using lavaan syntax (for more, see [umxLav2RAM()])
#'
#' ```R
#' m1 = umxRAM("mpg ~ wt + disp", data = mtcars)
#' ```
#'
#' **Sketch mode**
#'
#' If you are at the "sketching" stage of theory consideration, `umxRAM` supports
#' setting data to a simple vector of manifest names.
#' As usual in `umxRAM`, any variables you refer to that are not in data are treated as latents.
#'
#' ```R
#' m1 = umxRAM("sketch", data = c("A", "B"),
#' umxPath("C", to = c("A", "B"), values=.3),
#' umxPath("A", with = "B", values=.45),
#' umxPath(v.m. = c("A", "B")),
#' umxPath(v1m0 = "C")
#' )
#' plot(m1, means = FALSE)
#' ```
#' Will create this figure:
#'
#' \if{html}{\figure{sketch.png}{options: alt="Figure: sketch.png"}}
#' \if{latex}{\figure{sketch.pdf}{options: width=7cm}}
#'
#' @details
#'
#' **Comparison for OpenMx users**
#'
#' `umxRAM` differs from [OpenMx::mxModel()] in the following ways:
#'
#' 1. You don't need to set type = "RAM".
#' 2. You don't need to list manifestVars (they are detected from path usage).
#' 3. You don't need to list latentVars (detected as anything in paths but not in \code{mxData}).
#' 4. You don't need to create mxData when you already have a data.frame.
#' 5. You add data with `data = ` (as elsewhere in R, e.g. [lm()]).
#' 6. You don't need to add labels: paths are automatically labelled "a_to_b" etc.
#' 7. You don't need to set start values, they will be done for you.
#' 8. You don't need to `mxRun` the model: it will run automatically, and print a summary.
#' 9. You don't need to run `summary`: with `autoRun=TRUE`, it will print a summary.
#' 10. You get a plot of the model with estimates on the paths, including multiple groups.
#' 11. Less typing: [umxPath()] offers powerful verbs to describe paths.
#' 12. Supports a subset of lavaan string input.
#'
#' **Start values**. Currently, manifest variable means are set to the observed means,
#' residual variances are set to 80% of the observed variance of each variable,
#' and single-headed paths are set to a positive starting value (currently .9).
#' *note*: The start-value strategy is subject to improvement, and will be documented in the help for [umxRAM()].
#'
#' **Comparison with other software**
#'
#' Some SEM software does a lot of behind-the-scenes defaulting and path addition.
#' If you want this, I'd say use `umxRAM` with lavaan string input.
#'
#' @param model A model to update (or set to string to use as name for new model)
#' @param data data for the model. Can be an [OpenMx::mxData()] or a data.frame
#' @param ... umxPaths, mxThreshold objects, etc.
#' @param group (optional) Column name to use for a multi-group model (default = NULL)
#' @param group.equal In multi-group models, what to equate across groups (default = NULL: all free)
#' @param comparison Compare the new model to the old (if updating an existing model: default = TRUE)
#' @param suffix String to append to each label (useful if model will be used in a multi-group model)
#' @param name A friendly name for the model
#' @param type One of "Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param weight Passes weight values to mxData
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param std Whether to show standardized estimates, raw (NULL print fit only)
#' @param optimizer optionally set the optimizer (default NULL does nothing)
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param setValues Whether to generate likely good start values (Defaults to TRUE)
#' @param refModels pass in reference models if available. Use FALSE to suppress computing these if not provided.
#' @param independent Whether the model is independent (default = NA)
#' @param remove_unused_manifests Whether to remove variables in the data to which no path makes reference (defaults to TRUE)
#' @param verbose Whether to tell the user what latents and manifests were created etc. (Default = FALSE)
#' @param std.lv Whether to auto standardize latent variables when using string syntax (default = FALSE)
#' @param lavaanMode Defaults when building out string syntax default = "sem" (alternative is "lavaan", with very few defaults)
#' @param printTab (for string input, whether to output a table of paths (FALSE)
#' @return - [OpenMx::mxModel()]
#' @export
#' @seealso [umxPath()], [umxSummary()], [plot()], [parameters()], [umxSuperModel()], [umxLav2RAM()]
#' @family Core Model Building Functions
#' @references - <https://tbates.github.io>, <https://github.com/tbates/umx>
#' @md
#' @examples
#' \dontrun{
#'
#' # ============================================
#' # = 1. Here's a simple example with raw data =
#' # ============================================
#' mtcars$litres = mtcars$disp/61.02
#' m1 = umxRAM("tim", data = mtcars,
#' umxPath(c("wt", "litres"), to = "mpg"),
#' umxPath("wt", with = "litres"),
#' umxPath(v.m. = c("wt", "litres", "mpg"))
#' )
#'
#' # 2. Use parameters to see the parameter estimates and labels
#' parameters(m1)
#'
#' # And umxSummary to get standardized parameters, CIs etc from the run model.
#' umxSummary(m1, std=TRUE)
#' # |name | Std.Estimate| Std.SE|CI |
#' # |:--------------|------------:|------:|:--------------------|
#' # |wt_to_mpg | -0.54| 0.17|-0.54 [-0.89, -0.2] |
#' # |disp_to_mpg | -0.36| 0.18|-0.36 [-0.71, -0.02] |
#' # |mpg_with_mpg | 0.22| 0.07|0.22 [0.08, 0.35] |
#' # |wt_with_wt | 1.00| 0.00|1 [1, 1] |
#' # |b1 | 0.89| 0.04|0.89 [0.81, 0.96] |
#' # |disp_with_disp | 1.00| 0.00|1 [1, 1] |
#'
#' # 3. Of course you can plot the model
#' plot(m1)
#' plot(m1, std=TRUE, means=FALSE)
#' plot(m1, std = TRUE, means=FALSE, strip= TRUE, resid = "line")
#'
#' # ===============================================
#' # = lavaan string example (more at ?umxLav2RAM) =
#' # ===============================================
#' m1 = umxRAM(data = mtcars, "#modelName
#' mpg ~ wt + disp")
#'
#'
#' # =======================
#' # = A multi-group model =
#' # =======================
#'
#' mtcars$litres = mtcars$disp/61.02
#' m1 = umxRAM("tim", data = mtcars, group = "am",
#' umxPath(c("wt", "litres"), to = "mpg"),
#' umxPath("wt", with = "litres"),
#' umxPath(v.m. = c("wt", "litres", "mpg"))
#' )
#' # In this model, all parameters are free across the two groups.
#'
#' # ====================================
#' # = A cov model, with steps laid out =
#' # ====================================
#'
#' # *note*: The variance of displacement is in cubic inches and is very large.
#' # to help the optimizer, one might, say, multiply disp *.016 to work in litres
#' tmp = mtcars; tmp$disp= tmp$disp *.016
#'
#' # We can just give the raw data and ask for it to be made into type cov:
#' m1 = umxRAM("tim", data = tmp, type="cov",
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath("wt", with = "disp"),
#' umxPath(var = c("mpg", "wt", "disp"))
#' )
#'
#' # (see ?umxPath for more nifty options making paths...)
#'
#' # =========================================
#' # = umxRAM can also accept mxData as data =
#' # =========================================
#' # For convenience, list up the manifests you will be using
#'
#' selVars = c("mpg", "wt", "disp")
#' tmp = mtcars; tmp$disp= tmp$disp *.016
#' myCov = mxData(cov(tmp[, selVars]), type = "cov", numObs = nrow(mtcars) )
#'
#' m1 = umxRAM("tim", data = myCov,
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath("wt", with = "disp"),
#' umxPath(var = selVars)
#' )
#'
#'
#' # =======================
#' # = umxRAM supports WLS =
#' # =======================
#'
#' # 1. Run an all-continuous WLS model
#' mw = umxRAM("raw", data = mtcars[, c("mpg", "wt", "disp")],
#' type = "WLS", allContinuousMethod = "cumulants",
#' umxPath(var = c("wt", "disp", "mpg")),
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath("wt", with = "disp"),
#' umxPath(var = c("wt", "disp", "mpg"))
#' )
#' # 2. Switch to marginals to support means
#' mw = umxRAM("raw", data = mtcars[, c("mpg", "wt", "disp")],
#' type = "WLS", allContinuousMethod= "marginals",
#' umxPath(var = c("wt", "disp", "mpg")),
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath("wt", with = "disp"),
#' umxPath(var = c("wt", "disp", "mpg"))
#' )
#'
#'
#' # ===============================
#' # = Using umxRAM in Sketch mode =
#' # ===============================
#' # No data needed: just list variable names!
#' # Resulting model will be plotted automatically
#' m1 = umxRAM("what does unique pairs do, I wonder", data = c("A", "B", "C"),
#' umxPath(unique.pairs = c("A", "B", "C"))
#' )
#'
#' m1 = umxRAM("ring around the rosey", data = c("B", "C"),
#' umxPath(fromEach = c("A", "B", "C"))
#' )
#'
#' m1 = umxRAM("fromEach with to", data = c("B", "C"),
#' umxPath(fromEach = c("B", "C"), to= "D")
#' )
#'
#' m1 = umxRAM("CFA_sketch", data = paste0("x", 1:4),
#' umxPath("g", to = paste0("x", 1:4)),
#' umxPath(var = paste0("x", 1:4)),
#' umxPath(v1m0 = "g")
#' )
#'
#' # =================================================
#' # = This is an example of using your own labels: =
#' # umxRAM will not over-ride them =
#' # =================================================
#' m1 = umxRAM("tim", data = mtcars, type="cov",
#' umxPath(c("wt", "disp"), to = "mpg"),
#' umxPath(cov = c("wt", "disp"), labels = "b1"),
#' umxPath(var = c("wt", "disp", "mpg"))
#' )
#' omxCheckEquals(m1$S$labels["disp", "wt"], "b1") # label preserved
#' m1$S$labels
#'# mpg wt disp
#'# mpg "mpg_with_mpg" "mpg_with_wt" "disp_with_mpg"
#'# wt "mpg_with_wt" "wt_with_wt" "b1"
#'# disp "disp_with_mpg" "b1" "disp_with_disp"
#' parameters(m1)
#'
#' # ===========
#' # = Weights =
#' # ===========
#' # !!! Not tested !!!
#' mtcars$litres = mtcars$disp/61.02
#' m1 = umxRAM("tim", data = mtcars, weight= "cyl",
#' umxPath(c("wt", "litres"), to = "mpg"),
#' umxPath("wt", with = "litres"),
#' umxPath(v.m. = c("wt", "litres", "mpg"))
#' )
#'
#' }
#'
umxRAM <- function(model = NA, ..., data = NULL, name = NA, group = NULL, group.equal = NULL, suffix = "", comparison = TRUE, type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), weight = NULL, allContinuousMethod = c("cumulants", "marginals"), autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), std = FALSE, refModels = NULL, remove_unused_manifests = TRUE, independent = NA, setValues = TRUE, optimizer = NULL, verbose = FALSE, std.lv = FALSE, lavaanMode = c("sem", "lavaan"), printTab = FALSE) {
dot.items = list(...) # grab all the dot items: mxPaths, etc...
dot.items = unlist(dot.items) # In case any dot items are lists of mxPaths, etc...
type = match.arg(type)
tryHard = match.arg(tryHard)
lavaanMode = match.arg(lavaanMode)
allContinuousMethod = match.arg(allContinuousMethod)
if(!is.null(weight)){
message("Polite note: Weight feature has not been tested: Models may have spurious fit, consider this feature alpha quality")
}
# if data provided check it isn't a tibble
if(!is.null(data)){
# avoid ingesting tibbles
if(inherits(data, "tbl")){
data = as.data.frame(data)
}
}
# =================
# = Set optimizer =
# =================
if(!is.null(optimizer)){
umx_set_optimizer(optimizer)
}
if(!is.null(group)){
if(!inherits(data, "data.frame")){
stop("Currently, for multiple groups, data must be a raw data.frame so I can subset it into multiple groups. You gave me a ", omxQuotes(class(data)))
}
}
# lavaan string style model
if (is.character(model) && grepl(model, pattern = "(<|~|=~|~~|:=)")){
# Process lavaanString: need to modify so that all the RAM options are processed:
# type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS")
# show
# suffix
# refModels = NULL
# comparison
# allContinuousMethod
# remove_unused_manifests
model = umxLav2RAM(model = model, data = data, type = type, group = group, group.equal = group.equal, std.lv = std.lv, name = name,
lavaanMode = lavaanMode, autoRun = autoRun, tryHard = tryHard, printTab = printTab)
return(model)
}
# umxPath-based model
if(typeof(model) == "character"){
if(is.na(name)){
name = model
} else {
stop("If model is set to a string, don't pass in name as well...")
}
} else {
if(umx_is_RAM(model)){
# message("Updating existing model")
if(is.na(name)){
name = model$name
}
if(is.null(data)){
newModel = mxModel(model, dot.items, name = name)
} else {
if(umx_is_MxData(data)){
newModel = mxModel(model, dot.items, data, name = name)
} else {
stop("Polite note: I don't know how to convert raw data into mxData to update your model - can you please do that for me and try again?")
}
}
# if(setValues){
# newModel = xmuValues(newModel)
# }
newModel = xmu_safe_run_summary(newModel, autoRun = autoRun, tryHard = tryHard, refModels = refModels, std = std)
return(newModel)
} else {
stop("First item must be either an existing model or a name string. You gave me a ", typeof(model))
}
}
umx_check(!is.null(data), "stop", "In umxRAM, you must set 'data = '. If you're building a model with no data, use mxModel")
foundNames = c()
defnNames = c()
for (thisItem in dot.items) {
if(!is.list(thisItem)){
# Sometimes we get a list, so expand everything to a list.
thisItem = list(thisItem)
}
for (i in length(thisItem)) {
thisIs = class(thisItem[[i]])[1]
if(thisIs == "MxPath"){
foundNames = append(foundNames, c(thisItem[[i]]$from, thisItem[[i]]$to))
tmp = namez(thisItem[[i]]$labels, "data\\.")
if(length(tmp) > 0){
defnNames = append(defnNames, namez(tmp, "data\\.(.*)", replacement= "\\1"))
}
} else {
if(thisIs == "MxThreshold"){
# MxThreshold detected
} else if(umx_is_MxMatrix(thisItem[[i]])){
# matrix labels might refer to definition variables
tmp = namez(thisItem[[i]]$labels, "data\\.")
if(length(tmp) > 0){
defnNames = append(defnNames, namez(tmp, "data\\.(.*)", replacement= "\\1"))
}
} else {
# TODO: umxRAM currently not checking for unsupported items.
# stop("I can only handle (u)mxPaths, (u)mxMatrices, mxConstraints, and mxThreshold() objects.\n",
# "You have given me a", class(i)[1],"\n",
# " To include data in umxRAM, say 'data = yourData'")
}
}
}
}
# ============================
# = All dot.items processed =
# ============================
# ====================================
# = Find latentVars and manifestVars =
# ====================================
# Get names from data (forms pool of potential usedManifests)
manifestVars = unique(na.omit(umx_names(data)))
# Omit NAs from found names (empty "to =" can generate these spuriously)
foundNames = unique(na.omit(foundNames))
defnNames = unique(na.omit(defnNames))
if(length(defnNames)>0){
# check'm if you've got'm
umx_check_names(defnNames, data = data, message = "note: used as definition variable, but not present in data")
}
# Anything else used as a path, but not found in the data (and not a key word like "one") must be a latent
latentVars = setdiff(foundNames, c(manifestVars, "one"))
nLatent = length(latentVars)
# Report which latents were created
if (!umx_set_silent(silent=TRUE)) {
if(nLatent == 0){
# message("No latent variables were created.\n")
# latentVars = NA
} else if (nLatent == 1){
message("A latent variable '", latentVars[1], "' was created. ")
} else {
message(nLatent, " latent variables were created:", paste(latentVars, collapse = ", "), ". ")
}
}
# ===========================================================
# = TODO handle user adding an mxThreshold object to umxRAM =
# ===========================================================
# This will be a model where things are not in the data and are not latent...
# ======================================
# = List up used and un-used Manifests =
# ======================================
# Used = all data columns present in found and not reserved, e.g. "one"
unusedManifests = setdiff(manifestVars, c(foundNames, defnNames))
# Include weight if it is passed
if (!is.null(weight)) unusedManifests = setdiff(c(manifestVars, weight), c(foundNames, defnNames))
if(remove_unused_manifests & length(unusedManifests) > 0){
usedManifests = setdiff(intersect(manifestVars, foundNames), "one")
if (!is.null(weight)) {
myData = xmu_make_mxData(data = data, type = type, manifests = usedManifests, fullCovs =
defnNames, verbose = verbose, weight = weight)
} else {
myData = xmu_make_mxData(data = data, type = type, manifests = usedManifests, fullCovs =
defnNames, verbose = verbose)
}
} else {
# keep everything
usedManifests = setdiff(manifestVars, defnNames)
myData = xmu_make_mxData(data= data, type = type, verbose = verbose, manifests = usedManifests, fullCovs =
defnNames)
}
# ==================
# = Assemble model =
# ==================
newModel = do.call("mxModel", list(name = name, type = "RAM",
manifestVars = usedManifests,
latentVars = latentVars,
independent = independent, dot.items)
)
# ============
# = Add data =
# ============
if (inherits(myData, "character")){
# User is just running a trial model, with no data, but provided names for sketch mode
newModel = xmuLabel(newModel, suffix = suffix)
if(is.null(group)){
if(autoRun && umx_set_auto_plot(silent = TRUE)){
plot(newModel)
}
return(newModel)
} else {
# will be added to a super model, but no data needed/available to subset
}
}else{
newModel = mxModel(newModel, myData)
# note: if necessary (group), will be re-processed to add the required data below...
}
# ==========================
# = Add means if necessary =
# ==========================
# Note: WLS data will be mxData(..., type = "raw") at this stage.
# Add means if data are raw and means not requested by user
needsMeans = xmu_check_needs_means(data = myData, type = type, allContinuousMethod = allContinuousMethod)
if(needsMeans && is.null(newModel$matrices$M)){
message("You have raw data, but no means model. I added\n",
"mxPath('one', to = manifestVars)")
newModel = mxModel(newModel, mxPath("one", usedManifests))
}
# =========================
# = Labels and set values =
# =========================
suffix = ifelse(is.null(group), yes = suffix, no = paste0(suffix, "_GROUP"))
newModel = xmuLabel(newModel, suffix = suffix)
if(setValues){
newModel = xmuValues(newModel, onlyTouchZeros = TRUE)
}
if(any(umx_is_ordered(myData$observed))){
newModel = xmuRAM2Ordinal(newModel, verbose = TRUE)
}
# ==============================
# = Add mxFitFunction to model =
# ==============================
if(type %in% c('WLS', 'DWLS', 'ULS')) {
# message("data treated as ", omxQuotes(type), ". allContinuousMethod = ", omxQuotes(allContinuousMethod))
# Replace newModel fit functions
# Still mxExpectationNormal and means not affected (either has or lacks means matrix already).
newModel = mxModel(newModel, mxFitFunctionWLS(type= type, allContinuousMethod = allContinuousMethod) )
}
# =====================
# = Handle group here =
# =====================
if(!is.null(group)){
# 1. Go back to raw data and subset by "group" column
# 2. Create new mxData,
# 3. Add data to copy of the model and accumulate in list of models
# 4. Add list of models to umxSuperModel
modelList = list()
groupCol = data[, group]
levelsOfGroup = unique(groupCol)
# already computed above
# unusedManifests = setdiff(manifestVars, foundNames)
# usedManifests = setdiff(intersect(manifestVars, foundNames), "one")
# usedManifests = manifestVars
for (thisLevelOfGroup in levelsOfGroup) {
thisSubset = data[groupCol == thisLevelOfGroup, ]
if(remove_unused_manifests & length(unusedManifests) > 0){
myData = xmu_make_mxData(data = thisSubset, type = type, manifests = c(usedManifests, defnNames), verbose = FALSE)
} else {
myData = xmu_make_mxData(data= thisSubset, type = type, verbose = FALSE)
}
thisModel = mxModel(newModel, myData, name= paste0(name, "_", thisLevelOfGroup))
if(!is.null(group.equal)){
message("sorry, haven't implemented group.equal yet")
}else{
# replace "_GROUP$" with _thisLevelOfGroup
thisModel = umxSetParameters(thisModel, regex= "_GROUP$", newlabels= paste0("_", thisLevelOfGroup))
}
modelList = c(modelList, thisModel)
}
return(umxSuperModel(name = name, modelList, autoRun = autoRun, tryHard = tryHard, std = std))
}
newModel = omxAssignFirstParameters(newModel)
newModel = xmu_safe_run_summary(newModel, autoRun = autoRun, tryHard = tryHard, refModels = refModels, std = std)
invisible(newModel)
}
#' Make a multi-group model
#'
#' @description
#' `umxSuperModel` takes 1 or more models and wraps them in a supermodel with a
#' [OpenMx::mxFitFunctionMultigroup()] fit function that minimizes the sum of the
#' fits of the sub-models.
#'
#' *note*: Any duplicate model-names are renamed to be unique by suffixing `_1` etc.
#'
#' @param name The name for the container model (default = 'super')
#' @param ... Models forming the multiple groups contained in the supermodel.
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param std Show standardized parameters, raw (default), or just the fit indices (null)
#' @return - [OpenMx::mxModel()]
#' @export
#' @family Core Model Building Functions
#' @seealso - [OpenMx::mxFitFunctionMultigroup()], [umxRAM()]
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' library(umx)
#' # Create two sets of data in which X & Y correlate ~ .4 in both datasets.
#' manifests = c("x", "y")
#' tmp = umx_make_TwinData(nMZpairs = 100, nDZpairs = 150,
#' AA = 0, CC = .4, EE = .6, varNames = manifests)
#'
#' # Group 1
#' grp1 = tmp[tmp$zygosity == "MZ", manifests]
#' g1Data = mxData(cov(grp1), type = "cov", numObs = nrow(grp1), means=umx_means(grp1))
#'
#' # Group 2
#' grp2 = tmp[tmp$zygosity == "DZ", manifests]
#' g2Data = mxData(cov(grp2), type = "cov", numObs = nrow(grp2), means=umx_means(grp2))
#'
#'
#' # Model 1 (could add autoRun = FALSE if you don't want to run this as it is being built)
#' m1 = umxRAM("m1", data = g1Data,
#' umxPath("x", to = "y", labels = "beta"),
#' umxPath(var = manifests, labels = c("Var_x", "Resid_y_grp1")),
#' umxPath(means = manifests, labels = c("Mean_x", "Mean_y"))
#' )
#'
#' # Model 2
#' m2 = umxRAM("m2", data = g2Data,
#' umxPath("x", to = "y", labels = "beta"),
#' umxPath(var = manifests, labels=c("Var_x", "Resid_y_grp2")),
#' umxPath(means = manifests, labels=c("Mean_x", "Mean_y"))
#' )
#'
#' # Place m1 and m2 into a supermodel, and autoRun it
#' # NOTE: umxSummary is only semi-smart/certain enough to compute saturated models etc
#' # and report multiple groups correctly.
#'
#' m3 = umxSuperModel('top', m1, m2)
#'
#' umxSummary(m3, std= TRUE)
#'
#' # |name | Std.Estimate| Std.SE|CI |
#' # |:------------|------------:|------:|:-----------------|
#' # |beta | 0.51| 0.05|0.51 [0.41, 0.61] |
#' # |Var_x | 1.00| 0.00|1 [1, 1] |
#' # |Resid_y_grp1 | 0.74| 0.05|0.74 [0.64, 0.84] |
#' # |beta | 0.50| 0.05|0.5 [0.41, 0.6] |
#' # |Var_x | 1.00| 0.00|1 [1, 1] |
#' # |Resid_y_grp2 | 0.75| 0.05|0.75 [0.65, 0.84] |
#'
#' summary(m3)
#'
#' # ====================================
#' # = Test models with duplicate names =
#' # ====================================
#' data(GFF)
#' mzData = subset(GFF, zyg_2grp == "MZ")
#' dzData = subset(GFF, zyg_2grp == "DZ")
#' selDVs = c("gff", "fc", "qol")
#' m1 = umxCP(selDVs= selDVs, nFac= 1, dzData= dzData, mzData= mzData, sep= "_T", autoRun= TRUE)
#' m2 = mxRename(m1, "CP2")
#' umxModelNames(m1) # "top" "MZ" "DZ"
#' umxModelNames(m2) # "top" "MZ" "DZ"
#' super = umxSuperModel("myModel", m1, m2, autoRun = TRUE)
#' umxModelNames(super)
#' }
umxSuperModel <- function(name = 'super', ..., autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), std = FALSE) {
tryHard = match.arg(tryHard)
umx_check(boolean.test= is.character(name), action= "stop", message= "You need to set the name for the supermodel, i.e. add name = 'modelName' ")
dot.items = list(...) # grab all the dot items: models...
dot.items = unlist(dot.items)
nModels = length(dot.items)
# Get list of model names
modelNames = c()
for(modelIndex in 1:nModels) {
thisModel = dot.items[[modelIndex]]
if(umx_is_MxModel(thisModel)){
if(is.null(thisModel$fitfunction)){
# ignore model... no fitfunction to optimize
} else {
modelNames = c(modelNames, thisModel$name)
}
} else {
stop("Only models can be included in ... ", thisModel, " was a ", class(dot.items[[thisModel]]))
}
}
if(length(modelNames) < 1){
stop("No models in '...' had a fitfunction: At least two models must have a fitfunction and objective for umxSuperModel to jointly optimize")
}else if(anyDuplicated(modelNames)){
stop("Models must have unique names: Duplicates detected in ", omxQuotes(modelNames))
}
# multiple group fit function sums the likelihoods of its component models
newModel = mxModel(name, dot.items, mxFitFunctionMultigroup(modelNames))
# Trundle through and make sure values with the same label have the same start value... means for instance.
newModel = omxAssignFirstParameters(newModel)
# 2. Find and change any duplicate model names inside the models
# 1. find all duplicated names
# 2. loop over the sub models, finding and changing each duplicate name
tmpnames = umxModelNames(newModel)
dupes = tmpnames[duplicated(tmpnames)] # "top" "MZ" "DZ"
if(length(dupes) > 0){
suffix = 2
subNames = names(newModel$submodels)[-1]
for(thisSub in subNames){
thisModel = newModel$submodels[[thisSub]]
for(thisDupName in dupes){
thisModel = mxRename(thisModel, paste0(thisDupName, "_", suffix), oldname = thisDupName)
}
newModel = mxModel(newModel, thisModel)
suffix = suffix + 1
}
print(paste0("Polite note: Renamed sub-models with duplicate names, e.g. ", omxQuotes(dupes[1]), " -> ", omxQuotes(paste0(dupes[1], "_2"))))
}
newModel = xmu_safe_run_summary(newModel, autoRun = autoRun, tryHard = tryHard, std = std)
invisible(newModel)
}
#' umxModify: Add, set, or drop model paths by label.
#'
#' umxModify allows you to modify, re-run and summarize an [OpenMx::mxModel()], all in one line of script.
#'
#' @details
#' You can add paths, or other model elements, set path values (default is 0), or replace labels.
#' As an example, this one-liner drops a path labelled "Cs", and returns the updated model:
#'
#' `fit2 = umxModify(fit1, update = "Cs", name = "newModelName", comparison = TRUE)`
#'
#' Regular expressions are a powerful feature: they let you drop collections of paths by matching patterns
#' for instance, this would match labels containing either "Cs" or "Cr":
#'
#' ```R
#' fit2 = umxModify(fit1, regex = "C\[sr\]", name = "drop_Cs_and_Cr", comparison = TRUE)
#' ```
#'
#' You may find it easier to be more explicit. Like this:
#'
#' ```R
#' fit2 = umxSetParameters(fit1, labels = c("Cs", "Cr"), values = 0, free = FALSE, name = "newName")
#' fit2 = mxRun(fit2)
#' summary(fit2)
#' ```
#'
#' *Note*: A (minor) limitation is that you cannot simultaneously set value to 0
#' AND relabel cells (because the default value is 0, so it is ignored when using newlabels).
#'
#' @aliases umxModify
#' @param lastFit The [OpenMx::mxModel()] you wish to update and run.
#' @param update What to update before re-running. Can be a list of labels, a regular expression (set regex = TRUE) or an object such as mxCI etc.
#' @param regex Whether or not update is a regular expression (default FALSE). If you provide a string, it overrides the contents of update, and sets regex to TRUE.
#' @param free The state to set "free" to for the parameters whose labels you specify (defaults to free = FALSE, i.e., fixed)
#' @param value The value to set the parameters whose labels you specify too (defaults to 0)
#' @param newlabels If not NULL, used as a replacement set of labels (can be regular expression). value and free are ignored!
#' @param freeToStart Whether to update parameters based on their current free-state. free = c(TRUE, FALSE, NA), (defaults to NA - i.e, not checked)
#' @param name The name for the new model
#' @param comparison Whether to run umxCompare() on the new and old models.
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param master If you set master, then the update labels will be equated to these (i.e. replaced by them).
#' @param intervals Whether to run confidence intervals (see [OpenMx::mxRun()])
#' @param verbose How much feedback to give
#' @return - [OpenMx::mxModel()]
#' @family Core Model Building Functions
#' @references - <https://github.com/tbates/umx>
#' @export
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' # First we'll just build a 1-factor model
#' umx_set_optimizer("SLSQP")
#' data(demoOneFactor)
#' manifests = names(demoOneFactor)
#'
#' m1 = umxRAM("One Factor", data = demoOneFactor, type = "cov",
#' umxPath("G", to = manifests),
#' umxPath(var = manifests),
#' umxPath(var = "G", fixedAt = 1)
#' )
#'
#' # 1. Drop the path to x1 (also updating the name so it's
#' # self-explanatory, and get a fit comparison
#' m2 = umxModify(m1, update = "G_to_x1", name = "drop_X1", comparison = TRUE)
#'
#' # 2. Add the path back (setting free = TRUE)
#' m2 = umxModify(m1, update = "G_to_x1", free= TRUE, name = "addback_X1", comparison = TRUE)
#' # 3. Fix a value at a non-zero value
#' m3 = umxModify(m1, update = "G_to_x1", value = .35, name = "fix_G_x1_at_35", comp = TRUE)
#' # You can add objects to models. For instance this would add a path (overwriting the existing one)
#' # (thanks Johannes!)
#' m3 = umxModify(m1, umxPath("G", with = "x1"), name= "addedPath")
#'
#' # Use regular expression to drop multiple paths: e.g. G to x3, x4, x5
#' m3 = umxModify(m1, regex = "^G_to_x[3-5]", name = "tried_hard", comp = TRUE, tryHard="yes")
#'
#' # Same, but don't autoRun
#' m2 = umxModify(m1, regex = "^G_to_x[3-5]", name = "no_G_to_x3_5", autoRun = FALSE)
#'
#' # Re-write a label
#' newLabel = "A_rose_by_any_other_name"
#' newModelName = "model_doth_smell_as_sweet"
#' m2 = umxModify(m1, update = "G_to_x1", newlabels= newLabel, name = newModelName, comparison = TRUE)
#' # Change labels in 2 places
#' labsToUpdate = c("G_to_x1", "G_to_x2")
#' newLabel = "G_to_1_or_2"
#' m2 = umxModify(m1, update = labsToUpdate, newlabels= newLabel, name = "equated", comparison = TRUE)
#'
#' # Advanced!
#' # Regular expressions let you use pieces of the old names in creating new ones!
#' searchString = "G_to_x([0-9])"
#' newLabel = "loading_for_path\\1" # use value in regex group 1
#' m2 = umxModify(m1, regex = searchString, newlabels= newLabel, name = "grep", comparison = TRUE)
#' } # end dontrun
#'
umxModify <- function(lastFit, update = NULL, regex = FALSE, free = FALSE, value = 0, newlabels = NULL, freeToStart = NA, name = NULL, comparison = FALSE, autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), master = NULL, intervals = FALSE, verbose = FALSE) {
tryHard = match.arg(tryHard)
if(!is.null(name) && name == "guess"){
name = update[1]
}
if(!is.null(master)){
x = umxEquate(lastFit, a = master, b = update, free = freeToStart, verbose = verbose, name = name, autoRun = autoRun, comparison = comparison)
return(x)
}
if (typeof(regex) != "logical"){
# Use the regex as update, and switch to regex mode
if(!is.null(update)){
stop("If you input a regular expression in ", omxQuotes("regex"), " you must leave ", omxQuotes("update"), " set to NULL.")
}
update = regex
regex = TRUE
}
if(is.null(update)){
message("You haven't asked to do anything: the parameters that are free to be dropped are:")
print(parameters(lastFit))
stop()
}
if(!is.null(newlabels)){
# check length(update) == length(newlabels) or length(newlabels) == 1
if(length(update) != length(newlabels)){
if(length(newlabels) != 1){
stop(paste0("Length of newlabels must be 1, or same as update. You gave me ",
length(update), " labels to update, and ", length(newlabels), " newlabels"))
}else{
# Copy out newlabels to match length of update
newlabels = rep(newlabels, length(update))
}
}
}
if(regex | typeof(update) == "character") {
newModel = lastFit
# handle labels as input
if (!regex) {
theLabels = update
# TODO: check the labels are present
# if not suggest reversal for with items
if(is.null(newlabels)){
newModel = omxSetParameters(newModel, labels = theLabels, free = free, values = value, name = name)
}else{
newModel = omxSetParameters(newModel, labels = theLabels, newlabels = newlabels, name = name)
}
} else {
# Handle 1 or more regular expressions.
for (i in 1:length(update)) {
match = umxGetParameters(newModel, regex = update[i], free = freeToStart, verbose = verbose)
if(verbose){
message("Matched labels = ", omxQuotes(match))
}
if(is.null(newlabels)){
newModel = omxSetParameters(newModel, labels = match, free = free, values = value, name = name)
}else{
# There are new labels to match up
newFinds = namez(df= match, pattern= update[i], replacement= newlabels[i] )
newModel = omxSetParameters(newModel, labels = match, newlabels = newFinds, name = name)
if(verbose){
message("newlabels = ", omxQuotes(newFinds))
}
}
}
}
} else {
# Add objects passed in under "update"
# TODO umxModify: if object is RAM, add re-label and re-start new object?
if(is.null(name)){ name = NA } # i.e. do nothing
newModel = mxModel(lastFit, update, name = name)
}
newModel = omxAssignFirstParameters(newModel)
newModel = xmu_safe_run_summary(newModel, lastFit, autoRun = autoRun, tryHard = tryHard, comparison = comparison)
return(newModel)
}
# ==================
# = Twin Functions =
# ==================
#' Build and run a 2-group Cholesky ACE twin model (univariate or multivariate)
#'
#' @description
#' Implementing a core task in twin modeling, umxACE models the genetic and environmental
#' structure of one or more phenotypes (measured variables) using the Cholesky ACE model
#' (Neale and Cardon, 1996).
#'
#' Classical twin modeling uses the genetic and environmental differences
#' among pairs of mono-zygotic (MZ) and di-zygotic (DZ) twins reared together.
#'
#' `umxACE` implements a 2-group model to capture these data and represent the phenotypic variance as a sum of Additive genetic,
#' unique environmental (E) and, optionally, either common or shared-environment (C) or
#' non-additive genetic effects (D).
#'
#' The following figure shows the ACE model for one variable "x" as a path diagram:
#'
#' \if{html}{\figure{ACEunivariate.png}{options: width=50% alt="Figure: ACE univariate.png"}}
#' \if{latex}{\figure{ACEunivariate.pdf}{options: width=7cm}}
#'
#' `umxACE` allows multivariate analyses, and this brings us to the Cholesky part of the model.
#'
#' The Cholesky decomposition creates as many latent A (and C and E) latent variables as there are phenotypes, and, moving
#' from left to right, decomposes the variance in each phenotype into successively restricted
#' factors. The following figure shows the multivariate ACE model for three variables:
#'
#' \if{html}{\figure{ACEmatrix.png}{options: width=50% alt="Figure: ACE matrix.png"}}
#' \if{latex}{\figure{ACEmatrix.pdf}{options: width=7cm}}
#'
#' In this ACE model of three phenotypes, the expected variance-covariance matrix of the original data
#' is the product of each lower Cholesky and its transform (i.e., `A = a %*% t(a)` summed for `A+C+E`.
#'
#' This lower-triangle decomposition feature of the Cholesky yields a model which is certain to both
#' account for all the variance (with some restrictions) in the data and be solvable.
#'
#' This figure also contains the key to understanding how to modify models that `umxACE` produces
#' using `umxModify()` to drop paths by label like `"a_r1c1"`. **nb**: Read the "Matrices and Labels in ACE model" section in details below...
#'
#' **NOTE**: Scroll down to details for how to use the function, a figure and multiple examples.
#'
#' @details
#' \strong{Covariates}
#' umxACE handles covariates by modelling them in the means.
#' On the plus side, there is no distributional assumption for this method. A downside of this approach is that all
#' covariates must be non-NA, thus dropping any rows where one or more covariates are missing.
#' This can waste data. See also [umx_residualize()]).
#'
#' \strong{Data Input}
#' The function flexibly accepts raw data, and also summary covariance data
#' (in which case the user must also supple numbers of observations for the two input data sets).
#'
#' The `type` parameter can select how you want the model data treated.
#' "FIML" is the normal treatment. "cov" and "cor" will turn raw data into cor data for analysis, or
#' check that you've provided cor data as input.
#'
#' Types "WLS", "DWLS", and "ULS" will process raw data into WLS data of these types.
#'
#' The default, "Auto" will treat data as the type they are provided as.
#'
#' \strong{Ordinal Data}
#' In an important capability, the model transparently handles ordinal (binary or multi-level
#' ordered factor data) inputs, and can handle mixtures of continuous, binary, and ordinal
#' data in any combination. An experimental feature is under development to allow Tobit modeling.
#'
#' The function also supports weighting of individual data rows. In this case,
#' the model is estimated for each row individually, then each row likelihood
#' is multiplied by its weight, and these weighted likelihoods summed to form
#' the model-likelihood, which is to be minimized.
#' This feature is used in the non-linear GxE model functions.
#'
#' **Additional features**
#' The umxACE function supports varying the DZ genetic association (defaulting to .5)
#' to allow exploring assortative mating effects, as well as varying the DZ \dQuote{C} factor
#' from 1 (the default for modeling family-level effects shared 100% by twins in a pair),
#' to .25 to model dominance effects.
#'
#' **Matrices and Labels in ACE model**
#'
#' Matrices 'a', 'c', and 'e' contain the path loadings of the Cholesky ACE factor model.
#'
#' So, labels relevant to modifying the model are of the form `"a_r1c1", "c_r1c1"` etc.
#'
#' Variables are in rows, and factors are in columns. So to drop the influence of factor 2 on variable 3, you would say:
#'
#' `m2 = umxModify(m1, update = "c_r3c2")`
#'
#' Less commonly-modified matrices are the mean matrix `expMean`. This has 1 row, and the columns are laid out for
#' each variable for twin 1, followed by each variable for twin 2.
#'
#' So, in a model where the means for twin 1 and twin 2 had been equated (set = to T1), you
#' could make them independent again with this script:
#'
#' `m1$top$expMean$labels[1, 4:6] = c("expMean_r1c4", "expMean_r1c5", "expMean_r1c6")`
#'
#' \emph{note}: Only one of C or D may be estimated simultaneously. This restriction reflects the lack
#' of degrees of freedom to simultaneously model C and D with only MZ and DZ twin pairs (Eaves et al. 1978, p267).
#'
#' @param name The name of the model (defaults to"ACE").
#' @param selDVs The variables to include from the data: preferably, just "dep" not c("dep_T1", "dep_T2").
#' @param selCovs (optional) covariates to include from the data (do not include sep in names)
#' @param sep The separator in twin variable names, often "_T", e.g. "dep_T1". Simplifies selDVs.
#' @param dzData The DZ dataframe.
#' @param mzData The MZ dataframe.
#' @param type Analysis method one of c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS")
#' @param data If provided, dzData and mzData are treated as levels of zyg to select() MZ and DZ data sets (default = NULL)
#' @param zyg If data provided, this column is used to select rows by zygosity (Default = "zygosity")
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param residualizeContinuousVars Not yet implemented.
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param intervals Whether to run mxCI confidence intervals (default = FALSE)
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param optimizer Optionally set the optimizer (default NULL does nothing).
#' @param nSib Number of siblings in a family (default - 2). "3" = extra sib.
#' @param dzAr The DZ genetic correlation (defaults to .5, vary to examine assortative mating).
#' @param dzCr The DZ "C" correlation (defaults to 1: set to .25 to make an ADE model).
#' @param numObsDZ Number of DZ twins: Set this if you input covariance data.
#' @param numObsMZ Number of MZ twins: Set this if you input covariance data.
#' @param weightVar If provided, a vector objective will be used to weight the data. (default = NULL).
#' @param equateMeans Whether to equate the means across twins (defaults to TRUE).
#' @param boundDiag Numeric lbound for diagonal of the a, c, and e matrices. Defaults to 0 since umx version 1.8
#' @param addStd Whether to add the algebras to compute a std model (defaults to TRUE).
#' @param addCI Whether to add intervals to compute CIs (defaults to TRUE).
#' @return - [OpenMx::mxModel()] of subclass mxModel.ACE
#' @export
#' @family Twin Modeling Functions
#' @seealso - [umxPlotACE()], [umxSummaryACE()], [power.ACE.test()], [umxModify()]
#' @references - Eaves, L. J., Last, K. A., Young, P. A., & Martin, N. G. (1978). Model-fitting approaches
#' to the analysis of human behaviour. *Heredity*, **41**, 249-320. \doi{https://doi.org/10.1038/hdy.1978.101}
#' @md
#' @examples
#' \donttest{
#' require(umx)
#' # ============================
#' # = How heritable is height? =
#' # ============================
#'
#' # 1. Height in meters has a tiny variance, and this makes optimising hard.
#' # We'll scale it by 10x to make the Optimizer's task easier.
#' data(twinData) # ?twinData from Australian twins.
#' twinData[, c("ht1", "ht2")] = twinData[, c("ht1", "ht2")] * 10
#'
#' # 2. Make mz & dz data.frames (no need to select variables: umx will do this)
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#'
#' # 3. Built & run the model, controlling for age in the means model
#' m1 = umxACE(selDVs = "ht", selCovs = "age", sep = "", dzData = dzData, mzData = mzData)
#'
#' # sidebar: umxACE figures out variable names using sep:
#' # e.g. selVars = "wt" + sep= "_T" -> "wt_T1" "wt_T2"
#'
#' umxSummary(m1, std = FALSE) # un-standardized
#'
#' # tip 1: set report = "html" and umxSummary prints the table to your browser!
#' # tip 2: plot works for umx: Get a figure of the model and parameters
#' # plot(m1) # Also, look at the options for ?plot.MxModel.
#'
#' # ===========================================
#' # = Test ADE, AE, CE, E, and generate table =
#' # ===========================================
#'
#' umxReduce(m1, report="html", silent= TRUE)
#'
#' # ============================
#' # = Model, with 2 covariates =
#' # ============================
#'
#' # Create another covariate: cohort
#' twinData$cohort1 = twinData$cohort2 =twinData$part
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#'
#' # 1. def var approach
#' m2 = umxACE(selDVs = "ht", selCovs = c("age", "cohort"), sep = "", dzData = dzData, mzData = mzData)
#'
#' # 2. Residualized approach: remove height variance accounted-for by age.
#' FFdata = twinData[twinData$zygosity %in% c("MZFF", "DZFF"), ]
#' FFdata = umx_residualize("ht", "age", suffixes = 1:2, data = FFdata)
#' mzData = FFdata[FFdata$zygosity %in% "MZFF", ]
#' dzData = FFdata[FFdata$zygosity %in% "DZFF", ]
#' m3 = umxACE(selDVs = "ht", sep = "", dzData = dzData, mzData = mzData)
#'
#' # =============================================================
#' # = ADE: Evidence for dominance ? (DZ correlation set to .25) =
#' # =============================================================
#' m2 = umxACE(selDVs = "ht", sep = "", dzData = dzData, mzData = mzData, dzCr = .25)
#' umxCompare(m2, m1) # ADE is better
#' umxSummary(m2, comparison = m1)
#' # nb: Although summary is smart enough to print d, the underlying
#' # matrices are still called a, c & e.
#'
#' # tip: try umxReduce(m1) to automatically build and compare ACE, ADE, AE, CE
#' # including conditional probabilities!
#'
#' # ===================================================
#' # = WLS example using diagonal weight least squares =
#' # ===================================================
#'
#' m3 = umxACE(selDVs = "ht", sep = "", dzData = dzData, mzData = mzData,
#' type = "DWLS", allContinuousMethod='marginals'
#' )
#'
#'
#' # ==============================
#' # = Univariate model of weight =
#' # ==============================
#'
#' # Things to note:
#'
#' # 1. Weight has a large variance, and this makes solution finding very hard.
#' # Here, we residualize the data for age, which also scales weight and height.
#'
#' data(twinData)
#' tmp = umx_residualize(c("wt", "ht"), cov = "age", suffixes= c(1, 2), data = twinData)
#' mzData = tmp[tmp$zygosity %in% "MZFF", ]
#' dzData = tmp[tmp$zygosity %in% "DZFF", ]
#'
#' # tip: You might also want transform variables
#' # tmp = twinData$wt1[!is.na(twinData$wt1)]
#' # car::powerTransform(tmp, family="bcPower"); hist(tmp^-0.6848438)
#' # twinData$wt1 = twinData$wt1^-0.6848438
#' # twinData$wt2 = twinData$wt2^-0.6848438
#'
#' # 4. note: the default boundDiag = 0 lower-bounds a, c, and e at 0.
#' # Prevents mirror-solutions. If not desired: set boundDiag = NULL.
#'
#' m2 = umxACE(selDVs = "wt", dzData = dzData, mzData = mzData, sep = "", boundDiag = NULL)
#'
#' # A short cut (which is even shorter for "_T" twin data with "MZ"/"DZ" data in zygosity column is:
#' m1 = umxACE(selDVs = "wt", sep = "", data = twinData,
#' dzData = c("DZMM", "DZFF", "DZOS"), mzData = c("MZMM", "MZFF"))
#' # | | a1|c1 | e1|
#' # |:--|----:|:--|----:|
#' # |wt | 0.93|. | 0.38|
#'
#' # tip: umx_make_twin_data_nice() will make data into this nice format for you!
#'
#' # ======================
#' # = MODEL MODIFICATION =
#' # ======================
#' # We can modify this model, e.g. test shared environment.
#' # Set comparison to modify, and show effect in one step.
#'
#' m2 = umxModify(m1, update = "c_r1c1", name = "no_C", comparison = TRUE)
#' #*tip* call umxModify(m1) with no parameters, and it will print the labels available to fix!
#' # nb: You can see parameters of any model with parameters(m1)
#'
#' # =========================================================
#' # = Well done! Now you can make modify twin models in umx =
#' # =========================================================
#'
#' # =====================================
#' # = Bivariate height and weight model =
#' # =====================================
#' data(twinData)
#' # We'll scale height (ht1 and ht2) and weight
#' twinData = umx_scale_wide_twin_data(data = twinData, varsToScale = c("ht", "wt"), sep = "")
#' mzData = twinData[twinData$zygosity %in% c("MZFF", "MZMM"),]
#' dzData = twinData[twinData$zygosity %in% c("DZFF", "DZMM", "DZOS"), ]
#' m1 = umxACE(selDVs = c("ht", "wt"), sep = '', dzData = dzData, mzData = mzData)
#' umxSummary(m1)
#'
#' # ===================
#' # = Ordinal example =
#' # ===================
#'
#' # Prep data
#' require(umx)
#' data(twinData)
#' # Cut BMI column to form ordinal obesity variables
#' obLevels = c('normal', 'overweight', 'obese')
#' cuts = quantile(twinData[, "bmi1"], probs = c(.5, .2), na.rm = TRUE)
#' twinData$obese1=cut(twinData$bmi1, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#' twinData$obese2=cut(twinData$bmi2, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#'
#' # Make the ordinal variables into umxFactors
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#'
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#'
#' # Model and summary!
#' m1 = umxACE(selDVs = "obese", dzData = dzData, mzData = mzData, sep = '')
#'
#' # And controlling age (otherwise manifests appearance as latent C)
#' m1 = umxACE(selDVs = "obese", selCov= "age", dzData = dzData, mzData = mzData, sep = '')
#' # umxSummary(m1)
#'
#' # ============================================
#' # = Bivariate continuous and ordinal example =
#' # ============================================
#' data(twinData)
#' twinData= umx_scale_wide_twin_data(data=twinData,varsToScale="wt",sep= "")
#' # Cut BMI column to form ordinal obesity variables
#' obLevels = c('normal', 'overweight', 'obese')
#' cuts = quantile(twinData[, "bmi1"], probs = c(.5, .2), na.rm = TRUE)
#' twinData$obese1=cut(twinData$bmi1,breaks=c(-Inf,cuts,Inf),labels=obLevels)
#' twinData$obese2=cut(twinData$bmi2,breaks=c(-Inf,cuts,Inf),labels=obLevels)
#' # Make the ordinal variables into mxFactors
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' mzData = mzData[1:80,] # just top 80 so example runs in a couple of secs
#' dzData = dzData[1:80,]
#' m1 = umxACE(selDVs= c("wt","obese"), dzData= dzData, mzData= mzData, sep='')
#'
#' # And controlling age
#' m1 = umxACE(selDVs = c("wt","obese"), selCov= "age", dzData = dzData, mzData = mzData, sep = '')
#'
#' # =======================================
#' # = Mixed continuous and binary example =
#' # =======================================
#' require(umx)
#' data(twinData)
#' twinData= umx_scale_wide_twin_data(data= twinData,varsToScale= "wt", sep="")
#' # Cut to form category of 20% obese subjects
#' # and make into mxFactors (ensure ordered is TRUE, and require levels)
#' obLevels = c('normal', 'obese')
#' cuts = quantile(twinData[, "bmi1"], probs = .2, na.rm = TRUE)
#' twinData$obese1= cut(twinData$bmi1, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#' twinData$obese2= cut(twinData$bmi2, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#'
#' selDVs = c("wt", "obese")
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' m1 = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData, sep = '')
#' umxSummary(m1)
#'
#' # ==============
#' # = Two binary =
#' # ==============
#' require(umx)
#' data(twinData)
#' htLevels = c('short', 'tall')
#' obLevels = c('normal', 'obese')
#' cuts = quantile(twinData[, "bmi1"], probs = .2, na.rm = TRUE)
#' twinData$obese1= cut(twinData$bmi1, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#' twinData$obese2= cut(twinData$bmi2, breaks=c(-Inf,cuts,Inf), labels=obLevels)
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#'
#' twinData$short1 = cut(twinData$ht1, breaks=c(-Inf,1.6,Inf), labels=htLevels)
#' twinData$short2 = cut(twinData$ht2, breaks=c(-Inf,1.6,Inf), labels=htLevels)
#' ordDVs = c("short1", "short2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#'
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' m1 = umxACE(selDVs = c("short", "obese"), dzData = dzData, mzData = mzData, sep = '')
#'
#' # ===================================
#' # Example with covariance data only =
#' # ===================================
#'
#' require(umx)
#' data(twinData)
#' twinData= umx_scale_wide_twin_data(data=twinData, varsToScale= "wt", sep="")
#' selDVs = c("wt1", "wt2")
#' mz = cov(twinData[twinData$zygosity %in% "MZFF", selDVs], use = "complete")
#' dz = cov(twinData[twinData$zygosity %in% "DZFF", selDVs], use = "complete")
#' m1 = umxACE(selDVs=selDVs, dzData=dz, mzData=mz, numObsDZ=569, numObsMZ=351)
#' umxSummary(m1)
#' plot(m1)
#' }
#'
umxACE <- function(name = "ACE", selDVs, selCovs = NULL, dzData= NULL, mzData= NULL, sep = NULL, data = NULL, zyg = "zygosity", type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), numObsDZ = NULL, numObsMZ = NULL, boundDiag = 0, allContinuousMethod = c("cumulants", "marginals"), autoRun = getOption("umx_auto_run"), intervals = FALSE, tryHard = c("no", "yes", "ordinal", "search"), optimizer = NULL, residualizeContinuousVars = FALSE, nSib = 2, dzAr = .5, dzCr = 1, weightVar = NULL, equateMeans = TRUE, addStd = TRUE, addCI = TRUE) {
tryHard = match.arg(tryHard)
type = match.arg(type)
allContinuousMethod = match.arg(allContinuousMethod)
if(residualizeContinuousVars){
stop("residualizing (as opposed to modelling) continuous variables not implemented yet: just set to FALSE for now")
}
if(dzCr == .25 & (name == "ACE")){ name = "ADE" }
# if data provided create twin files
if(!is.null(data)){
if(is.null(sep)){ sep = "_T" }
# Avoid ingesting tibbles
if("tbl" %in% class(data)){
data = as.data.frame(data)
}
mzData = data[data[,zyg] %in% ifelse(is.null(mzData), "DZ", mzData), ]
dzData = data[data[,zyg] %in% ifelse(is.null(dzData), "DZ", dzData), ]
}else{
# avoid ingesting tibbles
if("tbl" %in% class(mzData)){
mzData = as.data.frame(mzData)
dzData = as.data.frame(dzData)
}
}
xmu_twin_check(selDVs= selDVs, sep = sep, dzData = dzData, mzData = mzData, enforceSep = FALSE, nSib = nSib, optimizer = optimizer)
# nSib = 2, equateMeans = TRUE, verbose = verbose
# New-style build-block: Expand var names if necessary and make the basic components of a twin model
# full names passed in... gosh I wish I'd not allowed this early on...
selVars = xmu_twin_upgrade_selDvs2SelVars(selDVs = selDVs, sep = sep, nSib= nSib)
model = xmu_make_TwinSuperModel(name=name, mzData = mzData, dzData = dzData, selDVs = selDVs, selCovs= selCovs, sep = sep, type = type, allContinuousMethod = allContinuousMethod, numObsMZ = numObsMZ, numObsDZ = numObsDZ, nSib= nSib, equateMeans = equateMeans, weightVar = weightVar, bVector = FALSE, verbose= FALSE)
tmp = xmu_starts(mzData, dzData, selVars = selDVs, sep = sep, nSib = nSib, varForm = "Cholesky", equateMeans= equateMeans, SD= TRUE, divideBy = 3)
nVar = length(selVars)/nSib; # Number of dependent variables per **INDIVIDUAL** (so x2 per family)
# Finish building top
# Finish building top
if(nSib==2){
expCovMZ = mxAlgebra(rbind (cbind(ACE, AC), cbind( AC, ACE)), dimnames = list(selVars, selVars), name = "expCovMZ")
expCovDZ = mxAlgebra(rbind (cbind(ACE, hAC), cbind(hAC, ACE)), dimnames = list(selVars, selVars), name = "expCovDZ")
} else if (nSib==3) {
expCovMZ = mxAlgebra(name="expCovMZ", dimnames = list(selVars, selVars), rbind(
cbind(ACE, AC, hAC),
cbind(AC , ACE, hAC),
cbind(hAC, hAC, ACE))
)
expCovDZ = mxAlgebra(name= "expCovDZ", dimnames = list(selVars, selVars), rbind(
cbind(ACE, hAC, hAC),
cbind(hAC, ACE, hAC),
cbind(hAC, hAC, ACE))
)
}else{
stop("3 sibs is experimental, but ", nSib, "? ... Maybe come back in 2024, best tim :-)")
}
top = mxModel(model$top,
# NB: "top" defines the algebra of the twin model, which MZ and DZ slave off of
# it already has the means model and thresholds matrix added if necessary - see "xmu_make_TwinSuperModel" above
# Additive, Common, and Unique environmental paths
umxMatrix("a", type = "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = tmp$varStarts, byrow = TRUE),
umxMatrix("c", type = "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = tmp$varStarts, byrow = TRUE),
umxMatrix("e", type = "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = tmp$varStarts, byrow = TRUE),
umxMatrix("dzAr", "Full", 1, 1, free = FALSE, values = dzAr),
umxMatrix("dzCr", "Full", 1, 1, free = FALSE, values = dzCr),
# Multiply by each path coefficient by its inverse to get variance component
# Quadratic multiplication to add common_loadings
mxAlgebra(name = "A", a %*% t(a)), # Additive genetic variance
mxAlgebra(name = "C", c %*% t(c)), # Common environmental variance
mxAlgebra(name = "E", e %*% t(e)), # Unique environmental variance
mxAlgebra(name = "ACE", A+C+E),
mxAlgebra(name = "AC" , A+C ),
mxAlgebra(name = "hAC", (dzAr %x% A) + (dzCr %x% C)),
expCovMZ, expCovDZ
)
model = mxModel(model, top)
if(!is.null(boundDiag)){
if(!is.numeric(boundDiag)){
stop("boundDiag must be NULL, a value or a vector of values. You gave me a ", class(boundDiag))
} else {
newLbound = model$top$matrices$a@lbound
if(length(boundDiag) > 1 ){
if(length(boundDiag) != length(diag(newLbound)) ){
stop("Typically boundDiag is 1 digit: if more, must be size of diag(a)")
}
}
diag(newLbound) = boundDiag;
model$top$a$lbound = newLbound
model$top$c$lbound = newLbound
model$top$e$lbound = newLbound
}
}
if(addStd){
newTop = mxModel(model$top,
umxMatrix("I", "Iden", nVar, nVar), # nVar Identity matrix
mxAlgebra(name = "Vtot", A + C+ E), # Total variance
mxAlgebra(name = "SD", solve(sqrt(I * Vtot))), # total variance --> 1/SD
mxAlgebra(name = "a_std", SD %*% a), # standardized a
mxAlgebra(name = "c_std", SD %*% c), # standardized c
mxAlgebra(name = "e_std", SD %*% e), # standardized e
mxAlgebra(name = "A_std", SD %&% A), # standardized A
mxAlgebra(name = "C_std", SD %&% C), # standardized C
mxAlgebra(name = "E_std", SD %&% E) # standardized E
)
model = mxModel(model, newTop)
}
if(addCI){
if(addStd){
model = mxModel(model, mxCI(c('top.a_std', 'top.c_std', 'top.e_std')))
}else{
model = mxModel(model, mxCI(c('top.a', 'top.c', 'top.e')))
}
}
# Trundle through and make sure values with the same label have the same start value... means for instance.
model = omxAssignFirstParameters(model)
model = as(model, "MxModelACE") # set class so that S3 plot() dispatches
model = xmu_safe_run_summary(model, autoRun = autoRun, tryHard = tryHard, std = TRUE, intervals = intervals)
return(model)
} # end umxACE
#' umxGxE: Implements ACE models with moderation of paths, e.g. by SES.
#'
#' Make a 2-group GxE (moderated ACE) model (Purcell, 2002). GxE interaction studies test the hypothesis that the strength
#' of genetic (or environmental) influence varies parametrically (usually linear effects on path estimates)
#' across levels of environment. umxGxE allows detecting,
#' testing, and visualizing G xE (or C or E x E) interaction forms.
#'
#' The following figure the GxE model as a path diagram:
#'
#' \if{html}{\figure{GxE.png}{options: width=50% alt="Figure: GxE.png"}}
#' \if{latex}{\figure{GxE.pdf}{options: width=7cm}}
#'
#' @param name The name of the model (default= "G_by_E")
#' @param selDVs The dependent variable (e.g. "IQ")
#' @param selDefs The definition variable (e.g. "SES")
#' @param sep How to expand selDVs into full names, i.e., "_T" makes "var" -> "var_T1" and "var_T2"
#' @param dzData The DZ dataframe containing the Twin 1 and Twin 2 DV and moderator (4 columns)
#' @param mzData The MZ dataframe containing the Twin 1 and Twin 2 DV and moderator (4 columns)
#' @param data If provided, dzData and mzData are treated as valid levels of zyg to select() data sets (default = NULL)
#' @param zyg If data provided, this column is used to select rows by zygosity (Default = "zygosity")
#' @param digits Rounding precision for tables (default 3)
#' @param dropMissingDef Whether to automatically drop missing def var rows for the user (default = TRUE). You get a polite note.
#' @param dzAr The DZ genetic correlation (defaults to .5, vary to examine assortative mating).
#' @param dzCr The DZ "C" correlation (defaults to 1: set to .25 to make an ADE model).
#' @param lboundACE If not NA, then lbound the main effects at this value (default = NA, can help to set this to 0)
#' @param lboundM If not NA, then lbound the moderator effects at this value (default = NA, can help to set this to 0)
#' @param autoRun Optionally run the model (default), or just to create it and return without running.
#' @param tryHard Optionally tryHard to get the model to converge (Default = 'no'). "yes" uses mxTryHard. Other options: "ordinal", "search".
#' @param optimizer Optionally set the optimizer (default NULL does nothing)
#' @return - GxE [OpenMx::mxModel()]
#' @export
#' @seealso [umxGxE_window()], [umxReduce()], [umxSummary()]
#' @family Twin Modeling Functions
#' @references - Purcell, S. (2002). Variance components models for gene-environment interaction in twin analysis. *Twin Research*,
#' **6**, 554-571. \doi{10.1375/twin.5.6.554}
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(twinData)
#' twinData$age1 = twinData$age2 = twinData$age
#' selDVs = "bmi"
#' selDefs = "age"
#' mzData = subset(twinData, zygosity == "MZFF")[1:100,]
#' dzData = subset(twinData, zygosity == "DZFF")[1:100,]
#' m1 = umxGxE(selDVs= "bmi", selDefs= "age", sep= "", dzData= dzData, mzData= mzData, tryHard= "yes")
#'
#' # Select the data on the fly with data= and zygosity levels
#' m1 = umxGxE(selDVs= "bmi", selDefs= "age", sep="", dzData= "DZFF", mzData= "MZFF", data= twinData)
#'
#' # ===============================================================
#' # = example with Twins having different values of the moderator =
#' # ===============================================================
#'
#' twinData$age1 = twinData$age2 = twinData$age
#' tmp = twinData
#' tmp$age2 = tmp$age2 +rnorm(n=length(tmp$age2))
#' selDVs = "bmi"
#' selDefs = "age"
#' mzData = subset(tmp, zygosity == "MZFF")
#' dzData = subset(tmp, zygosity == "DZFF")
#' m1 = umxGxE(selDVs= "bmi", selDefs= "age", sep= "", dzData= dzData, mzData= mzData, tryHard= "yes")
#'
#' # ====================================
#' # = Controlling output of umxSummary =
#' # ====================================
#' umxSummaryGxE(m1)
#' umxSummary(m1, location = "topright")
#' umxSummary(m1, separateGraphs = TRUE)
#'
# # Test dropping moderation on a path
#' m2 = umxModify(m1, regex = "am_.*", comparison = TRUE, tryHard = "yes")
#'
#' # umxReduce knows how to test all relevant hypotheses for GxE models,
#' # reporting these in a nice table.
#' umxReduce(m1)
#' }
umxGxE <- function(name = "G_by_E", selDVs, selDefs, dzData, mzData, sep = NULL, data = NULL, zyg = "zygosity", digits = 3, lboundACE = NA, lboundM = NA, dropMissingDef = TRUE, dzAr = .5, dzCr = 1, autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), optimizer = NULL) {
# ===================================================================
# = Thoughts about expanding covariate modelling here and elsewhere =
# ===================================================================
# In top model (would use xmuTwinUpgradeMeansToCovariateModel)
# TODO: umxGxE: Add covariates
# if(0){
# TODO: umxGxE If there are covs
# mxMatrix(name = "betas" , "Full", nrow = nCov, ncol = nVar, free = TRUE, values = 0.05, labels = paste0("beta_", covariates))
# }
# And In MZ and DZ models...
# if(0){ # TODO if there are covs
# # matrices for covariates (just on the means)
# mxMatrix(name = "covsT1", "Full", nrow = 1, ncol = nCov, free = FALSE, labels = paste0("data.", covsT1)),
# mxMatrix(name = "covsT2", "Full", nrow = 1, ncol = nCov, free = FALSE, labels = paste0("data.", covsT2)),
# mxAlgebra(top.betas %*% covsT1, name = "predMeanT1"),
# mxAlgebra(top.betas %*% covsT2, name = "predMeanT2"),
# mxAlgebra( cbind(top.intercept + DefT1Rlin + DefT1Rquad + predMeanT1,
# top.intercept + DefT2Rlin + DefT2Rquad + predMeanT2), name = "expMeans")
# } else {
# mxAlgebra( cbind(top.intercept + DefT1Rlin + DefT1Rquad, top.intercept + DefT2Rlin + DefT2Rquad), name = "expMeans")
# },
if(dzCr == .25 & name == "G_by_E") name = "G_by_E_ADE"
tryHard = match.arg(tryHard)
nSib = 2;
# if data provided create twin files
if(!is.null(data)){
# avoid ingesting tibbles
if("tbl" %in% class(data)){
data = as.data.frame(data)
}
if(is.null(dzData)){ dzData = "DZ"; mzData = "MZ" }
mzData = data[data[,zyg] %in% mzData, ]
dzData = data[data[,zyg] %in% dzData, ]
}else{
# avoid ingesting tibbles
if("tbl" %in% class(mzData)){
mzData = as.data.frame(mzData)
dzData = as.data.frame(dzData)
}
}
xmu_twin_check(selDVs=selDVs, dzData = dzData, mzData = mzData, optimizer = optimizer, sep = sep, nSib = nSib)
selDVs = umx_paste_names(selDVs , sep = sep, suffixes = 1:nSib)
selDefs = umx_paste_names(selDefs, sep = sep, suffixes = 1:nSib)
if(any(selDefs %in% selDVs)) {
warning("selDefs was found in selDVs: You probably gave me all the variables in selDVs instead of just the DEPENDENT variable");
}
if(length(selDefs) != nSib){
warning("selDefs must be length = 2");
}
umx_check(is.numeric(mzData[, selDefs[1] ]), "stop", "Definition vars (selDefs) must be numeric (not, e.g. factor)")
if(length(selDVs) != nSib){
stop("DV list must be length = 2: 1 variable for each of 2 twins... You tried ", length(selDVs)/nSib)
}
umx_check_names(selDVs, mzData)
umx_check_names(selDVs, dzData)
if(!umx_set_silent(silent = TRUE)){
message("selDVs: ", omxQuotes(selDVs))
}
selVars = c(selDVs, selDefs)
obsMean = mean(colMeans(mzData[,selDVs], na.rm = TRUE)); # Just one average mean for all twins
nVar = length(selDVs)/nSib; # number of dependent variables ** per INDIVIDUAL ( so times-2 for a family)**
rawVar = diag(var(mzData[,selDVs], na.rm = TRUE))[1]
startMain = sqrt(c(.8, .0 ,.6) * rawVar)
umx_check(!umx_is_cov(dzData, boolean = TRUE), "stop", "data must be raw for gxe")
# drop unused variables
dzData = dzData[ , selVars]
mzData = mzData[ , selVars]
mzData = xmu_data_missing(mzData, selVars = selDefs, sep = NULL, dropMissingDef = dropMissingDef, hint= "mzData")
dzData = xmu_data_missing(dzData, selVars = selDefs, sep = NULL, dropMissingDef = dropMissingDef, hint= "dzData")
# =====================================
# = DO T1 and T2 share the moderator? =
# =====================================
bModeratorsIdentical = all(mzData[, selDefs[1]] == mzData[, selDefs[2]])
if(!bModeratorsIdentical){
message("Twins do not share the moderator... I will regress both twin's moderator from each twin, but you need to check this doesn't violate assumptions")
}
model = mxModel(name,
mxModel("top",
# ======================================
# = Matrices and algebra for the model =
# ======================================
# Matrices to store a, c, and e path coefficients
umxMatrix("a", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = startMain[1]),
umxMatrix("c", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = startMain[2]),
umxMatrix("e", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = startMain[3]),
# Matrices to store moderated path coefficients
umxMatrix("am", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = 0),
umxMatrix("cm", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = 0),
umxMatrix("em", "Lower", nrow = nVar, ncol = nVar, free = TRUE, values = 0),
# Matrices A, C, and E compute non-moderated variance components
mxAlgebra(name = "A", a %*% t(a) ),
mxAlgebra(name = "C", c %*% t(c) ),
mxAlgebra(name = "E", e %*% t(e) ),
# Algebra to compute total variances and inverse of standard deviations (diagonal only)
mxAlgebra(name = "V", A + C + E),
mxMatrix(name = "I", "Iden", nrow = nVar, ncol = nVar),
mxAlgebra(name = "iSD", solve(sqrt(I * V)) ),
# Matrix & Algebra for intercept of the means vector
mxMatrix(name = "intercept", "Full", nrow = 1, ncol = nVar, free = TRUE, values = obsMean, labels = "mean"), # needs mods for multivariate!
# Matrice for moderated the means model (algebars in the data models)
if(bModeratorsIdentical){
# Matrices for betas
list(
umxMatrix(name = "betaLin" , "Full", nrow = nVar, ncol = 1, free = TRUE, values = .0, labels = "lin11"),
umxMatrix(name = "betaQuad", "Full", nrow = nVar, ncol = 1, free = TRUE, values = .0, labels = "quad11")
)
}else{
list(
umxMatrix(name = "betaSelf" , "Full", nrow = nVar, ncol = 1, free = TRUE, values = .0),
umxMatrix(name = "betaCoTwin", "Full", nrow = nVar, ncol = 1, free = TRUE, values = .0)
)
}
),
mxModel("MZ",
# Matrices for moderating/interacting variable
umxMatrix("DefT1", "Full", nrow=1, ncol=1, free=FALSE, labels = paste0("data.", selDefs[1])), # c("data.age1")
umxMatrix("DefT2", "Full", nrow=1, ncol=1, free=FALSE, labels = paste0("data.", selDefs[2])), # c("data.age2")
# ====================================
# = Algebra for expected mean vector =
# ====================================
if(bModeratorsIdentical){
# do lm(DV ~ moderator + moderator^2)
mxAlgebra(name = "expMean", cbind(
top.intercept + (top.betaLin %*% DefT1) + (top.betaQuad %*% DefT1^2),
top.intercept + (top.betaLin %*% DefT2) + (top.betaQuad %*% DefT2^2))
)
}else{
# do lm(DV ~ self moderator + co-twin moderator)
mxAlgebra(name = "expMean", cbind(
top.intercept + (top.betaSelf %*% DefT1) + (top.betaCoTwin %*% DefT2),
top.intercept + (top.betaSelf %*% DefT2) + (top.betaCoTwin %*% DefT1))
)
}
,
# Compute ACE variance components
mxAlgebra(name = "A11", (top.a + top.am %*% DefT1) %*% t(top.a+ top.am %*% DefT1)),
mxAlgebra(name = "C11", (top.c + top.cm %*% DefT1) %*% t(top.c+ top.cm %*% DefT1)),
mxAlgebra(name = "E11", (top.e + top.em %*% DefT1) %*% t(top.e+ top.em %*% DefT1)),
mxAlgebra(name = "A12", (top.a + top.am %*% DefT1) %*% t(top.a+ top.am %*% DefT2)),
mxAlgebra(name = "C12", (top.c + top.cm %*% DefT1) %*% t(top.c+ top.cm %*% DefT2)),
mxAlgebra(name = "A21", (top.a + top.am %*% DefT2) %*% t(top.a+ top.am %*% DefT1)),
mxAlgebra(name = "C21", (top.c + top.cm %*% DefT2) %*% t(top.c+ top.cm %*% DefT1)),
mxAlgebra(name = "A22", (top.a + top.am %*% DefT2) %*% t(top.a+ top.am %*% DefT2)),
mxAlgebra(name = "C22", (top.c + top.cm %*% DefT2) %*% t(top.c+ top.cm %*% DefT2)),
mxAlgebra(name = "E22", (top.e + top.em %*% DefT2) %*% t(top.e+ top.em %*% DefT2)),
# Algebra for expected variance/covariance matrix and expected mean vector in MZ
mxAlgebra(name = "expCovMZ", rbind(
cbind(A11+C11+E11, A12+C12),
cbind(A21+C21 , A22+C22+E22))
),
# Data & Objective
mxData(mzData, type = "raw"),
mxExpectationNormal("expCovMZ", means = "expMean", dimnames = selDVs), mxFitFunctionML()
),
mxModel("DZ",
umxMatrix("DefT1", "Full", nrow=1, ncol=1, free=FALSE, labels=paste0("data.", selDefs[1])), # twin1 c("data.divorce1")
umxMatrix("DefT2", "Full", nrow=1, ncol=1, free=FALSE, labels=paste0("data.", selDefs[2])), # twin2 c("data.divorce2")
# =================
# = Means Algebra =
# =================
if(bModeratorsIdentical){
# do lm(DV ~ moderator + moderator^2)
mxAlgebra(name = "expMean", cbind(
top.intercept + (top.betaLin %*% DefT1) + (top.betaQuad %*% DefT1^2),
top.intercept + (top.betaLin %*% DefT2) + (top.betaQuad %*% DefT2^2))
)
}else{
# do lm(DV ~ self moderator + co-twin moderator)
mxAlgebra(name = "expMean", cbind(
top.intercept + (top.betaSelf %*% DefT1) + (top.betaCoTwin %*% DefT2),
top.intercept + (top.betaSelf %*% DefT2) + (top.betaCoTwin %*% DefT1))
)
}
,
# Compute ACE variance components
mxAlgebra(name= "A11", (top.a+ top.am%*% DefT1) %*% t(top.a+ top.am%*% DefT1)),
mxAlgebra(name= "C11", (top.c+ top.cm%*% DefT1) %*% t(top.c+ top.cm%*% DefT1)),
mxAlgebra(name= "E11", (top.e+ top.em%*% DefT1) %*% t(top.e+ top.em%*% DefT1)),
mxAlgebra(name= "A12", (top.a+ top.am%*% DefT1) %*% t(top.a+ top.am%*% DefT2)),
mxAlgebra(name= "C12", (top.c+ top.cm%*% DefT1) %*% t(top.c+ top.cm%*% DefT2)),
mxAlgebra(name= "A21", (top.a+ top.am%*% DefT2) %*% t(top.a+ top.am%*% DefT1)),
mxAlgebra(name= "C21", (top.c+ top.cm%*% DefT2) %*% t(top.c+ top.cm%*% DefT1)),
mxAlgebra(name= "A22", (top.a+ top.am%*% DefT2) %*% t(top.a+ top.am%*% DefT2)),
mxAlgebra(name= "C22", (top.c+ top.cm%*% DefT2) %*% t(top.c+ top.cm%*% DefT2)),
mxAlgebra(name= "E22", (top.e+ top.em%*% DefT2) %*% t(top.e+ top.em%*% DefT2)),
# Expected DZ variance/covariance matrix
umxMatrix("dzAr", "Full", 1, 1, free = FALSE, values = dzAr),
umxMatrix("dzCr", "Full", 1, 1, free = FALSE, values = dzCr),
mxAlgebra(name= "expCovDZ",
rbind(cbind(A11+C11+E11, dzAr%x%A12+dzCr%x%C12),
cbind(dzAr%x%A21+dzCr%x%C21, A22+C22+E22) )),
# Data & Objective
mxData(dzData, type = "raw"),
mxExpectationNormal("expCovDZ", means = "expMean", dimnames = selDVs),
mxFitFunctionML()
),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
if(!is.na(lboundACE)){
model = omxSetParameters(model, labels = c('a_r1c1', 'c_r1c1', 'e_r1c1'), lbound = lboundACE)
}
if(!is.na(lboundM)){
model = omxSetParameters(model, labels = c('am_r1c1', 'cm_r1c1', 'em_r1c1'), lbound = lboundM)
}
model = as(model, "MxModelGxE")
model = xmu_safe_run_summary(model, autoRun = autoRun, tryHard = tryHard, digits = digits)
return(model)
}
#' Implement the moving-window form of GxE analysis.
#'
#' Make a 2-group GxE (moderated ACE) model using LOSEM. In GxE interaction studies, typically,
#' the hypothesis that the strength of genetic influence varies parametrically (usually linear effects
#' on path estimates) across levels of environment. Of course, the function linking genetic influence
#' and context is not necessarily linear, but may react more steeply at the extremes, or take other, unknown forms.
#' To avoid obscuring the underlying shape of the interaction effect, local structural equation
#' modeling (LOSEM) may be used, and GxE_window implements this. LOSEM is a non-parametric,
#' estimating latent interaction effects across the range of a measured moderator using a
#' windowing function which is walked along the context dimension, and which weights subjects
#' near the center of the window highly relative to subjects far above or below the window center.
#' This allows detecting and visualizing arbitrary GxE (or CxE or ExE) interaction forms.
#'
#' @param selDVs The dependent variables for T1 and T2, e.g. c("bmi_T1", "bmi_T2")
#' @param moderator The name of the moderator variable in the dataset e.g. "age", "SES" etc.
#' @param mzData Dataframe containing the DV and moderator for MZ twins
#' @param dzData Dataframe containing the DV and moderator for DZ twins
#' @param sep (optional) separator, e.g. "_T" which will be used expand base names into full variable names:
#' e.g.: 'bmi' --> c("bmi_T1", "bmi_T2")
#' @param weightCov Whether to use cov.wt matrices or FIML default = FALSE, i.e., FIML
#' @param width An option to widen or narrow the window from its default (of 1)
#' @param target A user-selected list of moderator values to test (default = NULL = explore the full range)
#' @param plotWindow whether to plot the data window.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param return whether to return the last model (useful for specifiedTargets) or the list of estimates (default = "estimates")
#' @return - Table of estimates of ACE along the moderator
#' @export
#' @seealso [umxGxE()]
#' @family Twin Modeling Functions
#' @references - Hildebrandt, A., Wilhelm, O, & Robitzsch, A. (2009)
#' Complementary and competing factor analytic approaches for the investigation
#' of measurement invariance. *Review of Psychology*, **16**, 87--107.
#'
#' Briley, D.A., Harden, K.P., Bates, T.C., Tucker-Drob, E.M. (2015).
#' Nonparametric Estimates of Gene x Environment Interaction Using Local Structural Equation Modeling.
#' *Behavior Genetics*, **45**, 581-96. \doi{10.1007/s10519-015-9732-8}.
#' @md
#' @examples
#' \dontrun{
#' library(umx);
#'
#' # ==============================
#' # = 1. Open and clean the data =
#' # ==============================
#' # umxGxE_window takes a data.frame consisting of a moderator and two DV columns: one for each twin.
#' # The model assumes two groups (MZ and DZ). Moderator can't be missing
#' mod = "age" # The full name of the moderator column in the dataset
#' selDVs = c("bmi1", "bmi2") # The DV for twin 1 and twin 2
#' data(twinData) # Dataset of Australian twins, built into OpenMx
#' # The twinData consist of two cohorts: "younger" and "older".
#' # zygosity is a factor. levels = MZFF, MZMM, DZFF, DZMM, DZOS.
#'
#' # Delete missing moderator rows
#' twinData = twinData[!is.na(twinData[mod]), ]
#' mzData = subset(twinData, zygosity == "MZFF")
#' dzData = subset(twinData, zygosity == "DZFF")
#'
#' # ========================
#' # = 2. Run the analyses! =
#' # ========================
#' # Run and plot for specified windows (in this case just 1927)
#' umxGxE_window(selDVs = selDVs, moderator = mod, mzData = mzData, dzData = dzData,
#' target = 40, plotWindow = TRUE)
#'
#' umxGxE_window(selDVs = "bmi", sep="", moderator = mod, mzData = mzData, dzData = dzData,
#' target = 40, plotWindow = TRUE, tryHard = "yes")
#'
#' # Run with tryHard
#' umxGxE_window(selDVs = "bmi", sep="", moderator = "age", mzData = mzData, dzData = dzData)
#' umxGxE_window(selDVs="bmi", sep="", moderator="age", mzData=mzData, dzData=dzData, tryHard="yes")
#'
#' # Run creating weighted covariance matrices (excludes missing data)
#' umxGxE_window(selDVs = "bmi", sep="", moderator= "age", mzData = mzData, dzData = dzData,
#' weightCov = TRUE)
#' # This example runs multiple target moderator values
#' mxGxE_window(selDVs = selDVs, moderator = mod, mzData = mzData, dzData = dzData,
#' target = c(39,40,50), plotWindow = TRUE)
#'
#' }
umxGxE_window <- function(selDVs = NULL, moderator = NULL, mzData = mzData, dzData = dzData, sep = NULL, weightCov = FALSE, target = NULL, width = 1, plotWindow = FALSE, tryHard = c("no", "yes", "ordinal", "search"), return = c("estimates","last_model")) {
return = match.arg(return)
tryHard = match.arg(tryHard)
nSib = 2 # Number of siblings in a twin pair.
xmu_twin_check(selDVs= selDVs, sep = sep, dzData = dzData, mzData = mzData, enforceSep = FALSE, nSib = nSib)
umx_check(!is.null(moderator), "stop", "Moderator must be set to the name of the moderator column, e.g, moderator = 'birth_year'")
if(!is.null(sep)){
selVars = umx_paste_names(selDVs, sep = sep, 1:2)
}else{
selVars = selDVs
}
# TODO umxGxE_window: allow missing moderator?
# Check DVs exists in mzData and dzData (and nothing else apart from the moderator)
umx_check_names(c(selVars, moderator), data = mzData, die = TRUE)
umx_check_names(c(selVars, moderator), data = dzData, die = TRUE)
# Drop any extraneous columns
mzData = mzData[, c(selVars, moderator)]
dzData = mzData[, c(selVars, moderator)]
# Add a zygosity column (that way we know what it's called)
mzData$ZYG = "MZ";
dzData$ZYG = "DZ"
# If using cov.wt, remove missing
if(weightCov){
dz.complete = complete.cases(dzData)
if(sum(dz.complete) != nrow(dzData)){
message("removed ", nrow(dzData) - sum(dz.complete), " cases from DZ data due to missingness. To use incomplete data, set weightCov = FALSE")
dzData = dzData[dz.complete, ]
}
mz.complete = complete.cases(mzData)
if(sum(mz.complete) != nrow(mzData)){
message("removed ", nrow(mzData) - sum(mz.complete), " cases from MZ data due to missingness. To use incomplete data, set weightCov = FALSE")
mzData = mzData[mz.complete, ]
}
}
# bind the MZ and DZ data into one frame so we can work with it repeatedly over weight iterations
allData = rbind(mzData, dzData)
# Create range of moderator values to iterate over (using the incoming moderator variable name)
modVar = allData[, moderator]
if(any(is.na(modVar))){
stop("Moderator \"", moderator, "\" contains ", length(modVar[is.na(modVar)]), "NAs. This is not currently supported.\n",
"NA found on rows", paste(which(is.na(modVar)), collapse = ", "), " of the combined data."
)
}
if(!is.null(target)){
if(any(target < min(modVar))) {
stop("A target found below the minimum value of moderator. min(modVar) was ", min(modVar))
} else if(any(target > max(modVar))){
stop("target above the max value of the moderator. max(modVar) was ", max(modVar))
} else {
targetLevels = target
}
} else {
# by default, run across each integer value of the moderator
targetLevels = seq(min(modVar), max(modVar))
}
numPairs = nrow(allData)
moderatorSD = sd(modVar, na.rm = TRUE)
bw = 2 * numPairs^(-.2) * moderatorSD * width # -.2 == -1/5
tmp = rep(NA, length(targetLevels))
out = data.frame(modLevel = targetLevels, Astd = tmp, Cstd = tmp, Estd = tmp, A = tmp, C = tmp, E = tmp)
n = 1
for (i in targetLevels) {
# i = targetLevels[1]
message("mod = ", i)
zx = (modVar - i)/bw
k = (1 / (2 * pi)^.5) * exp((-(zx)^2) / 2)
# ===========================================================
# = Insert the weights variable into data.frames as "weight" =
# ===========================================================
allData$weight = k/.399
mzData = allData[allData$ZYG == "MZ", c(selVars, "weight")]
dzData = allData[allData$ZYG == "DZ", c(selVars, "weight")]
if(weightCov){
mz.wt = cov.wt(mzData[, selVars], mzData$weight)
dz.wt = cov.wt(dzData[, selVars], dzData$weight)
m1 = umxACE(selDVs = selDVs, sep=sep, dzData = dz.wt$cov, mzData = mz.wt$cov, numObsDZ = dz.wt$n.obs, numObsMZ = mz.wt$n.obs, autoRun = FALSE)
} else {
m1 = umxACE(selDVs = selDVs, sep=sep, dzData = dzData, mzData = mzData, weightVar = "weight", autoRun = FALSE)
}
m1 = umxRun(m1, tryHard = tryHard, summary = FALSE)
if(plotWindow){
plot(allData[,moderator], allData$weight) # normal-curve yumminess
umxSummaryACE(m1)
}
out[n, ] = mxEval(c(i, top.a_std[1,1], top.c_std[1,1], top.e_std[1,1], top.a[1,1], top.c[1,1], top.e[1,1]), m1)
n = n + 1
}
if(length(targetLevels)==1){
# no need to plot: perhaps draw the ACE diagram?
plot(m1)
} else {
# plot ACE across levels of the moderator
ACE = c("A", "C", "E")
# Squaring paths to produce variances
out[,ACE] = out[,ACE]^2
# plotting variance components
with(out, {
plot(A ~ modLevel, main = paste0(selDVs[1], " variance"), ylab = "Variance", xlab=moderator, las = 1, bty = 'l', type = 'l', col = 'red', ylim = c(0, 1), data = out)
lines(modLevel, C, col = 'green')
lines(modLevel, E, col = 'blue')
legend('topright', fill = c('red', 'green', 'blue'), legend = ACE, bty = 'n', cex = .8)
plot(Astd ~ modLevel, main = paste0(selDVs[1], " std variance"), ylab = "Std Variance", xlab=moderator, las = 1, bty = 'l', type = 'l', col = 'red', ylim = c(0, 1), data = out)
lines(modLevel, Cstd, col = 'green')
lines(modLevel, Estd, col = 'blue')
legend('topright', fill = c('red', 'green', 'blue'), legend = ACE, bty = 'n', cex = .8)
})
}
if(return == "last_model"){
invisible(m1)
} else if(return == "estimates") {
invisible(out)
}
}
#' Run a Cholesky with covariates that are random (in the expected covariance matrix)
#'
#' Often, researchers include covariates in 2-group Cholesky [umxACE()] twin models.
#' The umxACEcov 'random' option models the covariates in the expected covariance matrix, thus allowing
#' all data to be preserved. The downside is that this method has a strong assumption
#' of multivariate normality. Covariates like age, which are perfectly correlated in twins cannot be used.
#' Covariates like sex, which are ordinal, violate the normality assumption.
#' Binary and ordinal covariates like sex also violate the normality assumption. Which is most of the use cases :-(.
#'
#' The following figure shows how the ACE model with random covariates appears as a path diagram:
#'
#' \if{html}{\figure{ACEcovVarianceModel.png}{options: width=50% alt="Figure: ACEcovVarianceModel.png"}}
#' \if{latex}{\figure{ACEcovVarianceModel.pdf}{options: width=7cm}}
#'
#'
#' @param name The name of the model (defaults to"ACE").
#' @param selDVs The variables to include from the data (do not include sep).
#' @param selCovs The covariates to include from the data (do not include sep).
#' @param dzData The DZ dataframe.
#' @param mzData The MZ dataframe.
#' @param sep Separator text between basename for twin variable names. Often "_T".
#' Used to expand selDVs into full column names, i.e., "dep" --> c("dep_T1", "dep_T2").
#' @param type Analysis method one of c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS")
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param dzAr The DZ genetic correlation (defaults to .5, vary to examine assortative mating).
#' @param dzCr The DZ "C" correlation (defaults to 1: set to .25 to make an ADE model).
#' @param addStd Whether to add the algebras to compute a std model (defaults to TRUE).
#' @param addCI Whether to add intervals to compute CIs (defaults to TRUE).
#' @param boundDiag = Whether to bound the diagonal of the a, c, and e matrices.
#' @param equateMeans Whether to equate the means across twins (defaults to TRUE).
#' @param bVector Whether to compute row-wise likelihoods (defaults to FALSE).
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param optimizer optionally set the optimizer. Default (NULL) does nothing.
#' @return - [OpenMx::mxModel()] of subclass mxModel.ACEcov
#' @export
#' @family Twin Modeling Functions
#' @md
#' @references - Neale, M. C., & Martin, N. G. (1989). The effects of age, sex,
#' and genotype on self-report drunkenness following a challenge dose of alcohol.
#' *Behavior Genetics*, **19**, 63-78. \doi{10.1007/BF01065884}.
#' * Schwabe, I., Boomsma, D. I., Zeeuw, E. L., & Berg, S. M. (2015). A New Approach
#' to Handle Missing Covariate Data in Twin Research : With an Application to
#' Educational Achievement Data. *Behavior Genetics*, **46**, 583-95. \doi{10.1007/s10519-015-9771-1}.
#'
#' @examples
#' \dontrun{
#' # ============================================
#' # = BMI, can't use Age as a random covariate =
#' # ============================================
#' require(umx)
#' data(twinData)
#' # Replicate age to age1 & age2
#' twinData$age1 = twinData$age2 = twinData$age
#' mzData = subset(twinData, zygosity == "MZFF")
#' dzData = subset(twinData, zygosity == "DZFF")
#'
#' # =====================================================================
#' # = Trying to use identical var (like age) as a random cov is ILLEGAL =
#' # =====================================================================
#' m1 = umxACEcov(selDVs = "bmi", selCovs = "age", dzData = dzData, mzData = mzData, sep = "")
#'
#' # ========================================================
#' # = Use an lm-based age-residualisation approach instead =
#' # ========================================================
#'
#' resid_data = umx_residualize("bmi", "age", suffixes = 1:2, twinData)
#' mzData = subset(resid_data, zygosity == "MZFF")
#' dzData = subset(resid_data, zygosity == "DZFF")
#' m2 = umxACE("resid", selDVs = "bmi", dzData = dzData, mzData = mzData, sep = "")
#'
#' # Univariate BMI without covariate of age for comparison
#' mzData = subset(twinData, zygosity == "MZFF")
#' dzData = subset(twinData, zygosity == "DZFF")
#' m3 = umxACE("raw_bmi", selDVs = "bmi", dzData = dzData, mzData = mzData, sep = "")
#'
#' # ===========================================================================
#' # = A bivariate example (need a dataset with a VIABLE COVARIATE to do this) =
#' # ===========================================================================
#' selDVs = "wt" # Set the DVs
#' selCovs = "ht" # Set the COV
#' selVars = umx_paste_names(selDVs, covNames = selCovs, sep = "", sep = 1:2)
#' mzData = subset(twinData, zygosity == "MZFF")
#' dzData = subset(twinData, zygosity == "DZFF")
#' m1 = umxACEcov(selDVs = selDVs, selCovs = selCovs,
#' dzData = dzData, mzData = mzData, sep = "", autoRun = TRUE
#' )
#' }
#'@md
umxACEcov <- function(name = "ACEcov", selDVs, selCovs, dzData, mzData, sep = NULL, type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), allContinuousMethod = c("cumulants", "marginals"), dzAr = .5, dzCr = 1, addStd = TRUE, addCI = TRUE, boundDiag = 0, equateMeans = TRUE, bVector = FALSE, autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), optimizer = NULL) {
# TODO sub-class umxACEcov (random covariates) fn to support umxSummary and plot
nSib = 2 # Number of siblings in a twin pair
tryHard = match.arg(tryHard)
type = match.arg(type)
allContinuousMethod = match.arg(allContinuousMethod)
if(type!="Auto"){
stop("type not support in umxACEcov yet, sorry...")
}
if(!is.null(optimizer)){
umx_set_optimizer(optimizer)
}
if(dzCr == .25 && name == "ACEcov"){
name = "ADEcov"
}
# ==================
# = Validate input =
# ==================
# Look for name conflicts
badNames = umx_grep(selDVs, grepString = "^[ACDEacde][0-9]*$")
if(!identical(character(0), badNames)){
stop("The data contain variables that look like parts of the a, c, e model, i.e., a1 is illegal.\n",
"BadNames included: ", omxQuotes(badNames) )
}
if(is.null(sep)){
stop("I need a sep, like '_T'. (I will add 1 and 2 after it...) \n",
"i.e., selDVs should be 'bmi' etc., and I will re-name to 'bmi_T1' and 'bmi_T2' etc.")
}else if(length(sep) > 1){
stop("sep should be just one word, like '_T'. I will add 1 and 2 after that...\n",
"i.e., if variables are like 'var_T1', give me selVars = 'var' and sep = '_T'")
}else{
# stash base names for use later
baseDVs = selDVs
baseCovs = selCovs
# fill out full trait names
selDVs = umx_paste_names(baseDVs , sep = sep, suffixes = (1:nSib) )
selCovs = umx_paste_names(baseCovs, sep = sep, suffixes = (1:nSib) )
}
nDV = length(baseDVs)
nCov = length(baseCovs)
nVar = nDV + nCov # Number of variables per **INDIVIDUAL** ( so * 2 for a family)
selVars = c(selDVs, selCovs)
umx_check_names(selVars, mzData)
umx_check_names(selVars, dzData)
# message("selVars: ", omxQuotes(selVars))
# Drop unused columns from mzData and dzData
mzData = mzData[, selVars]
dzData = dzData[, selVars]
# check covariates are not identical across twins
for (i in baseCovs) {
checkVars = umx_paste_names(i , sep = sep, suffixes = (1:nSib) )
if(cor(mzData[, checkVars], use = "com")[2, 1] == 1){
stop("The covariate ", omxQuotes(i), " is identical for twin 1 and twin 2... That's not allowed for random-effects covariates. Try modeling this as a def var in the means instead.")
}
}
if(any(umx_is_ordered(dzData, strict = FALSE))){
stop("Sorry: umxACEcov can't handle ordinal yet: e-mail tim and chew him out")
}
if(nrow(dzData) == 0){ stop("Your DZ dataset has no rows!") }
if(nrow(mzData) == 0){ stop("Your MZ dataset has no rows!") }
# ===============
# = Setup means =
# ===============
# c(T1 DV means, T1 DV means, T1 COV means, T1 COV means)
# Equate means for twin1 and twin 2 by matching labels in the first and second halves of the means labels matrix
if(equateMeans){
meanDimNames = list(NULL, selVars)
meanLabels = umx_paste_names(varNames = baseDVs, covNames = baseCovs, suffixes = c('',''), prefix = "expMean_")
DVmeanStarts = umx_means(mzData[, selDVs[1:nDV] , drop = FALSE], ordVar = 0, na.rm = TRUE)
CovMeanStarts = umx_means(mzData[, selCovs[1:nCov], drop = FALSE], ordVar = 0, na.rm = TRUE)
meanStarts = c(DVmeanStarts, DVmeanStarts, CovMeanStarts, CovMeanStarts)
} else {
stop("Sorry, currently, means must be equated...")
}
# Make beta labels
betaLabels = paste0(rep(paste0("var", 1:nDV), each = nCov), rep(paste0("beta", 1:nCov), times = nDV))
betaLabels = matrix(betaLabels, nrow = nCov, ncol = nDV, byrow = FALSE)
# =====================
# = Set up varStarts =
# =====================
# DVS
DVvarStarts = umx_var(mzData[, selDVs[1:nDV], drop = FALSE], format= "diag", ordVar = 1, use = "pairwise.complete.obs")
if(nDV == 1){
DVvarStarts = sqrt(DVvarStarts/3)
} else {
DVvarStarts = t(chol(diag(DVvarStarts/3))) # Divide variance up equally, and set to Cholesky form.
}
DVvarStarts = matrix(DVvarStarts, nDV, nDV)
# covs
covStarts = umx_var(mzData[, selCovs[1:nCov], drop = FALSE], format= "diag", ordVar = 1, use = "pairwise.complete.obs")
if(nCov == 1){
covStarts = sqrt(covStarts)
} else {
covStarts = t(chol(diag(covStarts))) # Set to Cholesky form.
}
covStarts = matrix(covStarts, nCov, nCov)
top = mxModel("top",
# "top" defines the algebra of the twin model, which MZ and DZ slave off of.
umxMatrix("dzAr", "Full", 1, 1, free = FALSE, values = dzAr),
umxMatrix("dzCr", "Full", 1, 1, free = FALSE, values = dzCr),
# Matrices a, c, e to store a, c, e path coefficients.
umxMatrix(name = "a", type = "Lower", nrow = nDV, ncol = nDV, free = TRUE, values = DVvarStarts, byrow = TRUE, dimnames = list(baseDVs, baseDVs)),
umxMatrix(name = "c", type = "Lower", nrow = nDV, ncol = nDV, free = TRUE, values = DVvarStarts, byrow = TRUE),
umxMatrix(name = "e", type = "Lower", nrow = nDV, ncol = nDV, free = TRUE, values = DVvarStarts, byrow = TRUE),
# Matrices A, C,E + compute variance components
mxAlgebra(name = "A", a %*% t(a)),
mxAlgebra(name = "C", c %*% t(c)),
mxAlgebra(name = "E", e %*% t(e)),
umxMatrix("expMean", "Full" , nrow = 1, ncol = (nVar * nSib), free = TRUE, values = meanStarts, labels = meanLabels, dimnames = meanDimNames),
# general (between-pair) cov of covariates
umxMatrix("lowerB", 'Lower', nrow = nCov, ncol = nCov, values = (covStarts * .4), free = TRUE),
# # specific (within-pair) cov of covariates
umxMatrix("lowerW", 'Lower', nrow = nCov, ncol = nCov, values = (covStarts * .6), free = TRUE),
mxAlgebra(name= "CovB" , lowerB %*% t(lowerB)),
mxAlgebra(name= "CovW" , lowerW %*% t(lowerW)),
mxAlgebra(name= "CovWB", CovW + CovB),
# ========================================
# = Beta matrix of regression parameters =
# ========================================
mxMatrix(name = "beta", type = "Full", nrow = nCov, ncol = nDV, free = TRUE, values = 0, labels = betaLabels),
# Some handy component algebras
mxAlgebra(name = "ACE", A + C + E),
mxAlgebra(name = "AC" , A + C),
mxAlgebra(name = "hAC", (dzAr %x% A) + (dzCr %x% C)),
mxAlgebra(name = "bCovWBb", (t(beta) %*% CovWB) %*% beta), # output is[nDV * nDV] to match ACE
mxAlgebra(name = "bCovBb" , (t(beta) %*% CovB) %*% beta),
mxAlgebra(name = "bCovWB" , t(beta) %*% CovWB),
mxAlgebra(name = "bCovB" , t(beta) %*% CovB),
mxAlgebra(name = "CovWBb" , CovWB %*% beta),
mxAlgebra(name = "CovBb" , CovB %*% beta),
# Algebra for expected variance/covariance matrix #in MZ twins
mxAlgebra(name = "expCovMZ", dimnames = list(names(mzData), names(mzData)), expression = rbind(
cbind(ACE + bCovWBb, AC + bCovBb , bCovWB, bCovB),
cbind(AC + bCovBb , ACE + bCovWBb, bCovB , bCovWB),
cbind( CovWBb, CovBb , CovWB , CovB),
cbind( CovBb , CovWBb, CovB , CovWB))
),
# Algebra for expected variance/covariance matrix #in DZ twins
mxAlgebra(name = "expCovDZ", dimnames = list(names(dzData), names(dzData)), expression = rbind(
cbind(ACE + bCovWBb, hAC + bCovBb , bCovWB, bCovB),
cbind(hAC + bCovBb , ACE + bCovWBb, bCovB , bCovWB),
cbind( CovWBb, CovBb , CovWB, CovB),
cbind( CovBb , CovWBb, CovB , CovWB))
)
) # end top
MZ = mxModel("MZ", mxExpectationNormal("top.expCovMZ", "top.expMean"), mxFitFunctionML(vector = bVector), mxData(mzData, type = "raw") )
DZ = mxModel("DZ", mxExpectationNormal("top.expCovDZ", "top.expMean"), mxFitFunctionML(vector = bVector), mxData(dzData, type = "raw") )
MZ = mxModel("MZ",
mxData(mzData , type = "raw"),
mxExpectationNormal("top.expCovMZ", means= "top.expMean", dimnames = names(mzData)),
mxFitFunctionML()
)
DZ = mxModel("DZ",
mxData(dzData, type = "raw"),
mxExpectationNormal("top.expCovDZ", means = "top.expMean", dimnames = names(dzData)),
mxFitFunctionML()
)
model = mxModel(name, MZ, DZ, top,
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
if(!is.null(boundDiag)){
if(!is.numeric(boundDiag)){
stop("boundDiag must be a digit or vector of numbers. You gave me a ", class(boundDiag))
} else {
newLbound = model$top$matrices$a@lbound
if(length(boundDiag) > 1 ){
if(length(boundDiag) != length(diag(newLbound)) ){
stop("Typically boundDiag is 1 digit: if more, must be size of diag(a)")
}
}
diag(newLbound) = boundDiag;
model$top$a$lbound = newLbound
model$top$c$lbound = newLbound
model$top$e$lbound = newLbound
}
}
if(addStd){
newTop = mxModel(model$top,
mxMatrix(name = "Iden", "Iden", nDV, nDV), # nDV Identity matrix
mxAlgebra(name = "Vtot", A + C+ E), # Total variance
mxAlgebra(name = "SD", solve(sqrt(Iden * Vtot))), # Total variance
mxAlgebra(name = "a_std", SD %*% a), # standardized a
mxAlgebra(name = "c_std", SD %*% c), # standardized c
mxAlgebra(name = "e_std", SD %*% e) # standardized e
)
model = mxModel(model, newTop)
if(addCI){
model = mxModel(model, mxCI(c('top.a_std', 'top.c_std', 'top.e_std')))
}
}
# Just trundle through and make sure values with the same label have the same start value... means for instance.
model = omxAssignFirstParameters(model)
model = as(model, "MxModelACEcov") # set class so umxSummary, plot, etc. work.
model = xmu_safe_run_summary(model, autoRun = autoRun, tryHard = tryHard)
invisible(model)
}
#' umxCP: Build and run a Common Pathway twin model
#'
#' @description
#' Make a 2-group Common Pathway twin model.
#'
#' The common-pathway model (aka "psychometric model" (McArdle and Goldsmith, 1990) provides a powerful tool
#' for theory-based testing of genetic and environmental differences. It proposes that `A`, `C`, and `E` components
#' act on a latent substrate (organ, mental mechanism etc.) and this is manifested in the measured phenotypes.
#'
#' `umxCP` supports this with pairs of mono-zygotic (MZ) and di-zygotic (DZ) twins reared together
#' to model the genetic and environmental structure of multiple phenotypes
#' (measured behaviors).
#'
#' Common-pathway path diagram:
#'
#' \if{html}{\figure{CP.svg}{options: width=50% alt="Figure: CP model"}}
#' \if{latex}{\figure{CP.pdf}{options: width=7cm}}
#'
#' As can be seen, each phenotype also by default has A, C, and E influences specific to that phenotype.
#'
#' Features include the ability to include more than one common pathway, to use ordinal data.
#'
#' **note**: The function [umx_set_optimization_options()] allows users to see and set `mvnRelEps` and `mvnMaxPointsA`
#' mvnRelEps defaults to .005. For ordinal models, you might find that '0.01' works better.
#'
#' @details
#' Like the [umxACE()] model, the CP model decomposes phenotypic variance
#' into additive genetic (A), unique environmental (E) and, optionally, either
#' common or shared-environment (C) or non-additive genetic effects (D).
#'
#' Unlike the Cholesky, these factors do not act directly on the phenotype. Instead latent A,
#' C, and E influences impact on one or more latent factors which in turn account for variance in the phenotypes (see Figure).
#'
#'
#' **Data Input**
#' Currently, the umxCP function accepts only raw data. This may change in future versions.
#'
#' **Ordinal Data**
#'
#' In an important capability, the model transparently handles ordinal (binary or multi-level
#' ordered factor data) inputs, and can handle mixtures of continuous, binary, and ordinal
#' data in any combination.
#'
#' **Additional features**
#'
#' The umxCP function supports varying the DZ genetic association (defaulting to .5)
#' to allow exploring assortative mating effects, as well as varying the DZ \dQuote{C} factor
#' from 1 (the default for modeling family-level effects shared 100% by twins in a pair),
#' to .25 to model dominance effects.
#'
#' **Matrices and Labels in CP model**
#'
#' A good way to see which matrices are used in umxCP is to run an example model and plot it.
#'
#' All the shared matrices are in the model "top".
#'
#' Matrices `top$as`, `top$cs`, and `top$es` contain the path loadings specific to each variable on their diagonals.
#'
#' So, to see the 'as' values, labels, or free states, you can say:
#'
#' `m1$top$as$values`
#'
#' `m1$top$as$free`
#'
#' `m1$top$as$labels`
#'
#' Labels relevant to modifying the specific loadings take the form "as_r1c1", "as_r2c2" etc.
#'
#' The common-pathway loadings on the factors are in matrices `top$a_cp`, `top$c_cp`, `top$e_cp`.
#'
#' The common factors themselves are in the matrix `top$cp_loadings` (an nVar * 1 matrix)
#'
#' Less commonly-modified matrices are the mean matrix `expMean`. This has 1 row, and the columns are laid out for each variable for twin 1, followed by each variable for twin 2.
#' So, in a model where the means for twin 1 and twin 2 had been equated (set = to T1), you could make them independent again with this line:
#'
#' `m1$top$expMean$labels[1,4:6] = c("expMean_r1c4", "expMean_r1c5", "expMean_r1c6")`
#'
#' For a deep-dive, see [xmu_make_TwinSuperModel()]
#'
#' @param name The name of the model (defaults to "CP").
#' @param selDVs The variables to include.
#' omit sep in selDVs, i.e., just "dep" not c("dep_T1", "dep_T2").
#' @param selCovs basenames for covariates
#' @param sep (required) The suffix for twin 1 and twin 2, often "_T".
#' @param dzData The DZ dataframe.
#' @param mzData The MZ dataframe.
#' @param nFac How many common factors (default = 1)
#' @param type One of "Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param data If provided, dzData and mzData are treated as valid levels of zyg to select() data sets (default = NULL)
#' @param zyg If data provided, this column is used to select rows by zygosity (Default = "zygosity")
#' @param correlatedACE DON'T USE THIS! Allows correlations between the factors built by each of the a, c, and e matrices. Default = FALSE.
#' @param dzAr The DZ genetic correlation (defaults to .5, vary to examine assortative mating).
#' @param dzCr The DZ "C" correlation (defaults to 1: set to .25 to make an ADE model).
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param tryHard Default ("yes") uses mxTryHard, "no" uses normal mxRun. Other options: "ordinal", "search"
#' @param optimizer optionally set the optimizer (default NULL does nothing).
#' @param weightVar If provided, a vector objective will be used to weight the data. (default = NULL).
#' @param bVector Whether to compute row-wise likelihoods (defaults to FALSE).
#' @param boundDiag = Numeric lbound for diagonal of the a_cp, c_cp, & e_cp matrices. Set = NULL to ignore.
#' @param addStd Whether to add the algebras to compute a std model (defaults to TRUE).
#' @param addCI Whether to add the interval requests for CIs (defaults to TRUE).
#' @param numObsDZ = not yet implemented: Ordinal Number of DZ twins: Set this if you input covariance data.
#' @param numObsMZ = not yet implemented: Ordinal Number of MZ twins: Set this if you input covariance data.
#' @param equateMeans Whether to equate the means across twins (defaults to TRUE).
#' @param freeLowerA (ignore): Whether to leave the lower triangle of A free (default = FALSE).
#' @param freeLowerC (ignore): Whether to leave the lower triangle of C free (default = FALSE).
#' @param freeLowerE (ignore): Whether to leave the lower triangle of E free (default = FALSE).
#' @param correlatedA deprecated.
#' @return - [OpenMx::mxModel()]
#' @export
#' @family Twin Modeling Functions
#' @seealso - [umxSummaryCP()], [umxPlotCP()]. See [umxRotate.MxModelCP()] to rotate the factor loadings of a [umxCP()] model. See [umxACE()] for more examples of twin modeling.
#' [plot()] and [umxSummary()] work for all twin models, e.g., [umxIP()], [umxCP()], [umxGxE()], and [umxACE()].
#' @references * Martin, N. G., & Eaves, L. J. (1977). The Genetical Analysis of Covariance Structure. *Heredity*, **38**, 79-95.
#' * Kendler, K. S., Heath, A. C., Martin, N. G., & Eaves, L. J. (1987). Symptoms of anxiety and symptoms of depression.
#' Same genes, different environments? *Archives of General Psychiatry*, **44**, 451-457. \doi{10.1001/archpsyc.1987.01800170073010}.
#' * McArdle, J. J., & Goldsmith, H. H. (1990). Alternative common factor models for multivariate biometric analyses.
#' *Behavior Genetics*, **20**, 569-608. \doi{10.1007/BF01065873}.
#' * <https://github.com/tbates/umx>
#'
#' @md
#' @examples
#' \dontrun{
#' # ========================================================
#' # = Run a 3-factor Common pathway twin model of 6 traits =
#' # ========================================================
#' require(umx)
#' data(GFF)
#' mzData = subset(GFF, zyg_2grp == "MZ")
#' dzData = subset(GFF, zyg_2grp == "DZ")
# # These will be expanded into "gff_T1" "gff_T2" etc.
#' selDVs = c("gff", "fc", "qol", "hap", "sat", "AD")
#' m1 = umxCP(selDVs = selDVs, sep = "_T", nFac = 3, tryHard = "yes",
#' dzData = dzData, mzData = mzData)
#'
#' # Shortcut using "data ="
#' selDVs = c("gff", "fc", "qol", "hap", "sat", "AD")
#' m1 = umxCP(selDVs= selDVs, nFac= 3, data=GFF, zyg="zyg_2grp")
#'
#' # ===================
#' # = Do it using WLS =
#' # ===================
#' m2 = umxCP("new", selDVs = selDVs, sep = "_T", nFac = 3, optimizer = "SLSQP",
#' dzData = dzData, mzData = mzData, tryHard = "ordinal",
#' type= "DWLS", allContinuousMethod='marginals'
#' )
#'
#' # =================================================
#' # = Find and test dropping of shared environment =
#' # =================================================
#' # Show all labels for C parameters
#' umxParameters(m1, patt = "^c")
#' # Test dropping the 9 specific and common-factor C paths
#' m2 = umxModify(m1, regex = "(cs_.*$)|(c_cp_)", name = "dropC", comp = TRUE)
#' umxSummaryCP(m2, comparison = m1, file = NA)
#' umxCompare(m1, m2)
#'
#' # =======================================
#' # = Mixed continuous and binary example =
#' # =======================================
#' data(GFF)
#' # Cut to form umxFactor 20% depressed DEP
#' cutPoints = quantile(GFF[, "AD_T1"], probs = .2, na.rm = TRUE)
#' ADLevels = c('normal', 'depressed')
#' GFF$DEP_T1 = cut(GFF$AD_T1, breaks = c(-Inf, cutPoints, Inf), labels = ADLevels)
#' GFF$DEP_T2 = cut(GFF$AD_T2, breaks = c(-Inf, cutPoints, Inf), labels = ADLevels)
#' ordDVs = c("DEP_T1", "DEP_T2")
#' GFF[, ordDVs] = umxFactor(GFF[, ordDVs])
#'
# # These will be expanded into "gff_T1" "gff_T2" etc.
#' selDVs = c("gff","fc","qol","hap","sat","DEP")
#' mzData = subset(GFF, zyg_2grp == "MZ")
#' dzData = subset(GFF, zyg_2grp == "DZ")
#'
#' # umx_set_optimizer("NPSOL")
#' # umx_set_optimization_options("mvnRelEps", .01)
#' m1 = umxCP(selDVs = selDVs, sep = "_T", nFac = 3, dzData = dzData, mzData = mzData)
#' m2 = umxModify(m1, regex = "(cs_r[3-5]|c_cp_r[12])", name = "dropC", comp= TRUE)
#'
#' # Do it using WLS
#' m3 = umxCP(selDVs = selDVs, sep = "_T", nFac = 3, dzData = dzData, mzData = mzData,
#' tryHard = "ordinal", type= "DWLS")
#' # TODO umxCPL fix WLS here
#' # label at row 1 and column 1 of matrix 'top.binLabels'' in model 'CP3fac' : object 'Vtot'
#'
#' # ==============================
#' # = Correlated factors example =
#' # ==============================
#' # ====================
#' # = DON'T USE THIS!!! =
#' # ====================
#' data(GFF)
#' mzData = subset(GFF, zyg_2grp == "MZ")
#' dzData = subset(GFF, zyg_2grp == "DZ")
#' selDVs = c("gff", "fc", "qol", "hap", "sat", "AD")
#' m1 = umxCP("base_model", selDVs = selDVs, sep = "_T", correlatedACE = TRUE,
#' dzData = dzData, mzData = mzData, nFac = 3, tryHard = "yes")
#'
#' # What are the ace covariance labels? (two ways to get)
#' umx_lower.tri(m1$top$a_cp$labels)
#' parameters(m1, patt = "[ace]_cp")
#'
#' # 1. Now allow a1 and a2 to correlate
#' m2=umxModify(m1,regex="a_cp_r2c1",name="a2_a1_cov",free=TRUE,tryHard="yes")
#' umxCompare(m2, m1)
#'
#' # 2. Drop all (a|c|e) correlations from a model
#' tmp= namez(umx_lower.tri(m2$top$a_cp$labels), "a_cp", replace= "[ace]_cp")
#' m3 = umxModify(m2, regex= tmp, comparison = TRUE)
#' } # end dontrun
#'
umxCP <- function(name = "CP", selDVs, selCovs=NULL, dzData= NULL, mzData= NULL, sep = NULL, nFac = 1, type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), data = NULL, zyg = "zygosity", allContinuousMethod = c("cumulants", "marginals"), correlatedACE = FALSE, dzAr= .5, dzCr= 1, autoRun = getOption("umx_auto_run"), tryHard = c("yes", "no", "ordinal", "search"), optimizer = NULL, equateMeans= TRUE, weightVar = NULL, bVector = FALSE, boundDiag = 0, addStd = TRUE, addCI = TRUE, numObsDZ = NULL, numObsMZ = NULL, freeLowerA = FALSE, freeLowerC = FALSE, freeLowerE = FALSE, correlatedA = "deprecated") {
# TODO umxCP: Add covariates to means model: Will involve xmu_make_top_twin? also means model?
tryHard = match.arg(tryHard)
type = match.arg(type)
allContinuousMethod = match.arg(allContinuousMethod)
nSib = 2 # Number of siblings in a twin pair.
# Add nFac to base name if no user-set name provided.
if(name == "CP"){ name = paste0(name, nFac, "fac") }
# if data provided create twin files
if(!is.null(data)){
if(is.null(sep)){ sep = "_T" }
# avoid ingesting tibbles
if("tbl" %in% class(data)){
data = as.data.frame(data)
}
mzData = data[data[,zyg] %in% ifelse(is.null(mzData), "DZ", mzData), ]
dzData = data[data[,zyg] %in% ifelse(is.null(dzData), "DZ", dzData), ]
}else{
# avoid ingesting tibbles
if("tbl" %in% class(mzData)){
mzData = as.data.frame(mzData)
dzData = as.data.frame(dzData)
}
}
xmu_twin_check(selDVs= selDVs, dzData = dzData, mzData = mzData, enforceSep = TRUE, sep = sep, nSib = nSib, optimizer = optimizer)
# New-style build-block: Expand var names if necessary and make the basic components of a twin model
selVars = xmu_twin_upgrade_selDvs2SelVars(selDVs = selDVs, sep = sep, nSib= nSib)
nVar = length(selVars)/nSib; # Number of dependent variables per **INDIVIDUAL** (so x2 per family)
model = xmu_make_TwinSuperModel(name=name, mzData = mzData, dzData = dzData, selDVs = selDVs, selCovs= selCovs, sep = sep, type = type, allContinuousMethod = allContinuousMethod, numObsMZ = numObsMZ, numObsDZ = numObsDZ, nSib= nSib, equateMeans = equateMeans, weightVar = weightVar, bVector = FALSE, verbose= FALSE)
tmp = xmu_starts(mzData, dzData, selVars = selDVs, sep = sep, nSib = nSib, varForm = "Cholesky", equateMeans= equateMeans, SD= TRUE, divideBy = 3)
varStarts = tmp$varStarts
if(correlatedA != "deprecated"){
message("Polite message: As of February 2021, please use 'correlatedACE' in place of 'correlatedA'.
The new behavior with 'correlatedACE' still makes a_cp_matrix etc. type Lower, but leaves the off-diagonal elements fixed at zero.
(you can then free each one as you choose)")
correlatedACE = TRUE
}
if(correlatedACE){
umx_msg("Polite message: correlatedACE is in alpha: Results are not valid currently!!! Do not use!!!")
if(correlatedA != "deprecated"){
a_cp_matrix = umxMatrix("a_cp", "Lower", nFac, nFac, free = TRUE, values = 0) # Latent common factor
c_cp_matrix = umxMatrix("c_cp", "Lower", nFac, nFac, free = TRUE, values = 0) # latent common factor Common environmental path coefficients
e_cp_matrix = umxMatrix("e_cp", "Lower", nFac, nFac, free = TRUE, values = 0) # latent common factor Unique environmental path coefficients
}else{
a_cp_matrix = umxMatrix("a_cp", "Lower", nFac, nFac, free = FALSE, values = 0) # Latent common factor
c_cp_matrix = umxMatrix("c_cp", "Lower", nFac, nFac, free = FALSE, values = 0) # latent common factor Common environmental path coefficients
e_cp_matrix = umxMatrix("e_cp", "Lower", nFac, nFac, free = FALSE, values = 0) # latent common factor Unique environmental path coefficients
diag(a_cp_matrix$free) = TRUE
diag(c_cp_matrix$free) = TRUE
diag(e_cp_matrix$free) = TRUE
}
diag(a_cp_matrix$values) = .7
diag(c_cp_matrix$values) = .0
diag(e_cp_matrix$values) = .7
a_cp_matrix$lbound[lower.tri(a_cp_matrix$lbound)] = -1
c_cp_matrix$lbound[lower.tri(c_cp_matrix$lbound)] = -1
e_cp_matrix$lbound[lower.tri(e_cp_matrix$lbound)] = -1
a_cp_matrix$ubound[lower.tri(a_cp_matrix$ubound)] = 1
c_cp_matrix$ubound[lower.tri(c_cp_matrix$ubound)] = 1
e_cp_matrix$ubound[lower.tri(e_cp_matrix$ubound)] = 1
} else {
a_cp_matrix = umxMatrix("a_cp", "Diag" , nFac, nFac, free = TRUE, values = .7)
c_cp_matrix = umxMatrix("c_cp", "Diag" , nFac, nFac, free = TRUE, values = .0)
e_cp_matrix = umxMatrix("e_cp", "Diag" , nFac, nFac, free = TRUE, values = .7)
}
# Finish building top
top = mxModel(model$top,
umxMatrix("dzAr" , "Full", 1, 1, free = FALSE, values = dzAr),
umxMatrix("dzCr" , "Full", 1, 1, free = FALSE, values = dzCr),
umxMatrix("nFac_UnitCol", "Unit" , nrow = nFac, ncol = 1),
umxMatrix("nFac_Iden" , "Iden" , nrow = nFac, ncol = nFac),
umxMatrix("nFac_Lower1s", "Lower", nrow = nFac, ncol = nFac, values= 1),
# Latent common factor genetic paths
a_cp_matrix, c_cp_matrix, e_cp_matrix,
# Constrain variance of latent phenotype factor to 1.0
# Multiply by each path coefficient by its inverse to get variance component
mxAlgebra(name = "A_cp", a_cp %*% t(a_cp) ), # A_cp variance
mxAlgebra(name = "C_cp", c_cp %*% t(c_cp) ), # C_cp variance
mxAlgebra(name = "E_cp", e_cp %*% t(e_cp) ), # E_cp variance
mxAlgebra(name = "L" , A_cp + C_cp + E_cp), # total common factor covariance (a+c+e)
# multiply by lower 1s?
# mxAlgebra(name = "sumL", nFac_Lower1s %*% L),
# mxConstraint(name = "fix_CP_variances_to_1", sumL[nFac,1:nFac] == nFac_UnitCol),
mxAlgebra(name = "diagL", diag2vec(L)),
mxConstraint(name = "fix_CP_variances_to_1", diagL == nFac_UnitCol),
umxMatrix("as", "Lower", nVar, nVar, free = TRUE, values = .5), # Additive gen path
umxMatrix("cs", "Lower", nVar, nVar, free = TRUE, values = .1), # Common env path
umxMatrix("es", "Lower", nVar, nVar, free = TRUE, values = .5), # Unique env path
umxMatrix("cp_loadings", "Full", nVar, nFac, free = TRUE, values = .5), # loadings on latent phenotype
# Quadratic multiplication to add cp_loading effects
mxAlgebra(name = "A" , cp_loadings %&% (A_cp * nFac_Iden) + as %*% t(as)), # Additive genetic variance
mxAlgebra(name = "C" , cp_loadings %&% (C_cp * nFac_Iden) + cs %*% t(cs)), # Common environmental variance
mxAlgebra(name = "E" , cp_loadings %&% (E_cp * nFac_Iden) + es %*% t(es)), # Unique environmental variance
mxAlgebra(name = "ACE", A + C + E),
mxAlgebra(name = "AC" , A + C),
mxAlgebra(name = "hAC", (dzAr %x% A) + (dzCr %x% C)),
mxAlgebra(name= "expCovMZ", dimnames = list(selVars, selVars),
rbind( cbind(ACE, AC),
cbind(AC , ACE))
),
mxAlgebra(name= "expCovDZ", dimnames = list(selVars, selVars),
rbind( cbind(ACE, hAC),
cbind(hAC, ACE))
)
)
model = mxModel(model, top)
if(!freeLowerA){
toset = model$top$matrices$as$labels[lower.tri(model$top$matrices$as$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(!freeLowerC){
toset = model$top$matrices$cs$labels[lower.tri(model$top$matrices$cs$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(!freeLowerE){
toset = model$top$matrices$es$labels[lower.tri(model$top$matrices$es$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(addStd){
newTop = mxModel(model$top,
# nVar Identity matrix
mxMatrix(name = "I", "Iden", nVar, nVar),
# inverse of standard deviation diagonal (same as "(\sqrt(I.Vtot))~"
mxAlgebra(name = "SD", solve(sqrt(I * ACE))),
# Standard specific path coefficients
mxAlgebra(name = "as_std", SD %*% as), # standardized a
mxAlgebra(name = "cs_std", SD %*% cs), # standardized c
mxAlgebra(name = "es_std", SD %*% es), # standardized e
# Standardize loadings on Common factors
mxAlgebra(name = "cp_loadings_std", SD %*% cp_loadings) # Standardized path coefficients (general factor(s))
)
model = mxModel(model, newTop)
if(addCI){
# TODO umxCP: break these CIs out into single labels?
model = mxModel(model, mxCI(c('top.a_cp', 'top.c_cp', 'top.e_cp', 'top.as_std', 'top.cs_std', 'top.es_std', 'top.cp_loadings_std')))
}
}
if(!is.null(boundDiag)){
if(!is.numeric(boundDiag)){
stop("boundDiag must be a digit or vector of numbers. You gave me a ", class(boundDiag))
} else {
if(length(boundDiag) > 1 ){
if(length(boundDiag) != length(diag(newLbound)) ){
stop("Typically boundDiag is 1 digit: if more, must be size of diag(a_cp)")
}
}
newCPLbound = model$top$matrices$a_cp@lbound
diag(newCPLbound) = boundDiag;
model$top$a_cp$lbound = newCPLbound
model$top$c_cp$lbound = newCPLbound
model$top$e_cp$lbound = newCPLbound
newSpecLbound = model$top$matrices$as@lbound
diag(newSpecLbound) = boundDiag;
model$top$as$lbound = newSpecLbound
model$top$cs$lbound = newSpecLbound
model$top$es$lbound = newSpecLbound
}
}
# Set values with the same label to the same start value... means for instance.
model = omxAssignFirstParameters(model)
model = as(model, "MxModelCP")
model = xmu_safe_run_summary(model, autoRun = autoRun, tryHard = tryHard)
return(model)
} # end umxCP
#' umxIP: Build and run an Independent Pathway twin model
#'
#' @description
#' Make a 2-group Independent Pathway twin model.
#'
#' The independent-pathway model (aka "biometric model" (McArdle and Goldsmith, 1990) proposes that `A`,
#' `C`, and `E` components act directly on the manifest or measured phenotypes. This contrasts with
#' the [umxCP()] model, in which these influences are collected on a hypothesized or latent causal
#' variable, which is manifested in the measured phenotypes.
#'
#' The following figure shows the IP model diagrammatically:
#'
#' \if{html}{\figure{IP.svg}{options: width=50% alt="Figure: IP model"}}
#' \if{latex}{\figure{IP.pdf}{options: width=7cm}}
#'
#' As can be seen, each phenotype also by default has A, C, and E influences specific to that phenotype.
#'
#' Features of the model include the ability to include add more one set of independent pathways, different numbers
#' of pathways for a, c, and e, as well the ability to use ordinal data, and different fit functions, e.g. WLS.
#'
#' **note**: The function [umx_set_optimization_options()] allows users to see and set `mvnRelEps` and `mvnMaxPointsA`
#' mvnRelEps defaults to .005. For ordinal models, you might find that '0.01' works better.
#'
#' @details
#' Like the [umxACE()] model, the IP model decomposes phenotypic variance
#' into additive genetic (A), unique environmental (E) and, optionally, either
#' common or shared-environment (C) or
#' non-additive genetic effects (D).
#'
#' Unlike the Cholesky, these factors do not act directly on the phenotype. Instead latent A,
#' C, and E influences impact on one or more latent common factors which, in turn, account for
#' variance in the phenotypes (see Figure).
#'
#'
#' **Data Input**
#' Currently, `umxIP` accepts only raw data. This may change in future versions. You can
#' choose other fit functions, e.g. WLS.
#'
#' **Ordinal Data**
#'
#' In an important capability, the model transparently handles ordinal (binary or multi-level
#' ordered factor data) inputs, and can handle mixtures of continuous, binary, and ordinal
#' data in any combination.
#'
#' **Additional features**
#'
#' `umxIP` supports varying the DZ genetic association (defaulting to .5)
#' to allow exploring assortative mating effects, as well as varying the DZ \dQuote{C} factor
#' from 1 (the default for modeling family-level effects shared 100% by twins in a pair),
#' to .25 to model dominance effects.
#'
#' **Matrices and Labels in IP model**
#'
#' A good way to see which matrices are used in umxIP is to run an example model and plot it.
#'
#' All the shared matrices are in the model "top".
#'
#' Matrices `as`, `cs`, and `es` contain the path loadings specific to each variable on their diagonals.
#'
#' To see the 'as' values, you can simply execute:
#'
#' m1$top#as$values
#'
#' m1$top#as$labels
#'
#' m1$top#as$free
#'
#' Labels relevant to modifying the specific loadings take the form "as_r1c1", "as_r2c2" etc.
#'
#' The independent-pathway loadings on the manifests are in matrices `a_ip`, `c_ip`, `e_ip`.
#'
#' Less commonly-modified matrices are the mean matrix `expMean`.
#' This has 1 row, and the columns are laid out for each variable
#' for twin 1, followed by each variable for twin 2.
#'
#' So, in a model where the means for twin 1 and twin 2 had been equated
#' (set = to T1), you could make them independent again with this line:
#'
#' `m1$top$expMean$labels[1,4:6] = c("expMean_r1c4", "expMean_r1c5", "expMean_r1c6")`
#'
#' @param name The name of the model (defaults to "IP").
#' @param selDVs The base names of the variables to model. note: Omit suffixes - just "dep" not c("dep_T1", "dep_T2")
#' @param sep The suffix for twin 1 and twin 2. e.g. selDVs= "dep", sep= "_T" -> c("dep_T1", "dep_T2")
#' @param dzData The DZ dataframe.
#' @param mzData The MZ dataframe.
#' @param nFac How many common factors for a, c, and e. If one number is given, applies to all three.
#' @param type Analysis method one of c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS")
#' @param data If provided, dzData and mzData are treated as levels of zyg to select() MZ and DZ data sets (default = NULL)
#' @param zyg If data provided, this column is used to select rows by zygosity (Default = "zygosity")
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param numObsDZ = For cov data, the number of DZ pairs.
#' @param numObsMZ = For cov data, the number of MZ pairs.
#' @param dzAr The DZ genetic correlation (defaults to .5, vary to examine assortative mating).
#' @param dzCr The DZ "C" correlation (defaults to 1: set to .25 to make an ADE model).
#' @param correlatedA Whether factors are allowed to correlate (not implemented yet: FALSE).
#' @param autoRun Whether to run and return the model (default), or just to create and return without running.
#' @param tryHard Whether to tryHard (default 'no' uses normal mxRun). options: "mxTryHard", "mxTryHardOrdinal", or "mxTryHardWideSearch"
#' @param optimizer optionally set the optimizer (default NULL does nothing).
#' @param equateMeans Whether to equate the means across twins (defaults to TRUE).
#' @param weightVar If a weighting variable is provided, a vector objective will be used to weight the data. (default = NULL).
#' @param addStd Whether to add algebras for a standardized model (defaults to TRUE).
#' @param addCI Whether to add CIs (defaults to TRUE).
#' @param freeLowerA ignore: Whether to leave the lower triangle of A free (default = FALSE).
#' @param freeLowerC ignore: Whether to leave the lower triangle of C free (default = FALSE).
#' @param freeLowerE ignore: Whether to leave the lower triangle of E free (default = FALSE).
#' @return - [OpenMx::mxModel()]
#' @export
#' @family Twin Modeling Functions
#' @seealso - [plot()], [umxSummary()], [umxCP()]
#' @references * Kendler, K. S., Heath, A. C., Martin, N. G., & Eaves, L. J. (1987). Symptoms of anxiety and symptoms of depression.
#' Same genes, different environments? *Archives of General Psychiatry*, **44**, 451-457. \doi{10.1001/archpsyc.1987.01800170073010}.
#' * McArdle, J. J., & Goldsmith, H. H. (1990). Alternative common factor models for multivariate biometric analyses.
#' *Behavior Genetics*, **20**, 569-608. \doi{10.1007/BF01065873}.
#' * <https://github.com/tbates/umx>
#'
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(GFF)
#' mzData = subset(GFF, zyg_2grp == "MZ")
#' dzData = subset(GFF, zyg_2grp == "DZ")
#' selDVs = c("gff","fc","qol","hap","sat","AD") # These will be expanded into "gff_T1" "gff_T2" etc.
#' m1 = umxIP(selDVs = selDVs, sep = "_T", dzData = dzData, mzData = mzData)
#'
#' # WLS example: Use "marginals" method to enable all continuous data with missingness.
#' m3 = umxIP(selDVs = selDVs, sep = "_T", dzData = dzData, mzData = mzData,
#' type = "DWLS", allContinuousMethod='marginals')
#' # omit missing to enable default WLS method to work on all continuous data
#' dzD = na.omit(dzData[, tvars(selDVs, "_T")])
#' mzD = na.omit(dzData[, tvars(selDVs, "_T")])
#' m4 = umxIP(selDVs = selDVs, sep = "_T", dzData = dzD, mzData = mzD, type = "DWLS")
#'
#' # ====================================================================
#' # = Try with a non-default number of a, c, and e independent factors =
#' # ====================================================================
#' nFac = c(a = 2, c = 1, e = 1)
#' m2 = umxIP(selDVs = selDVs, sep = "_T", dzData = dzData, mzData = mzData, nFac = nFac,
#' tryHard = "yes")
#' umxCompare(m1, m2)
#' }
#
umxIP <- function(name = "IP", selDVs, dzData, mzData, sep = NULL, nFac = c(a=1, c=1, e=1), data = NULL, zyg = "zygosity", type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), allContinuousMethod = c("cumulants", "marginals"), dzAr = .5, dzCr = 1, correlatedA = FALSE, numObsDZ = NULL, numObsMZ = NULL, autoRun = getOption("umx_auto_run"), tryHard = c("no", "yes", "ordinal", "search"), optimizer = NULL, equateMeans = TRUE, weightVar = NULL, addStd = TRUE, addCI = TRUE, freeLowerA = FALSE, freeLowerC = FALSE, freeLowerE = FALSE) {
# TODO implement correlatedA
type = match.arg(type)
allContinuousMethod = match.arg(allContinuousMethod)
tryHard = match.arg(tryHard)
nSib = 2 # Number of siblings in a twin pair.
if(correlatedA){ message("Sorry, I haven't implemented correlated A yet in umxIP...") }
# if data provided create twin files
if(!is.null(data)){
if(is.null(sep)){ sep = "_T" }
# avoid ingesting tibbles
if("tbl" %in% class(data)){
data = as.data.frame(data)
}
mzData = data[data[,zyg] %in% ifelse(is.null(mzData), "DZ", mzData), ]
dzData = data[data[,zyg] %in% ifelse(is.null(dzData), "DZ", dzData), ]
}else{
# avoid ingesting tibbles
if("tbl" %in% class(mzData)){
mzData = as.data.frame(mzData)
dzData = as.data.frame(dzData)
}
}
# TODO umxIP: check covariates
xmu_twin_check(selDVs= selDVs, sep = sep, dzData = dzData, mzData = mzData, enforceSep = TRUE, nSib = nSib, optimizer = optimizer)
if(length(nFac) == 1){
nFac = c(a = nFac, c = nFac, e = nFac)
} else if (length(nFac) == 3){
if(is.null(names(nFac))){
names(nFac)=c("a","c","e")
}
if(!all(names(nFac)==c("a","c","e"))){
stop("nFac must be named a=, c=, and e=")
}
}else{
stop("nFac must be either 1 number or 3. You gave me ", length(nFac))
}
if(dzCr == .25 & (name == "IP")){
name = "IP_ADE"
}else if(name == "IP"){
# Add nFac to base name if no user-set name provided.
if (length(nFac) == 1){
name = paste0(name, nFac, "fac")
}else{
name = paste0(name, paste0(c("a", "c", "e"), nFac, collapse = ""))
}
}
# New-style build-block: Expand var names if necessary and make the basic components of a twin model
selVars = tvars(selDVs, sep = sep, suffixes = 1:nSib)
nVar = length(selVars)/nSib; # Number of dependent variables per **INDIVIDUAL** (so x2 per family)
model = xmu_make_TwinSuperModel(name=name, mzData = mzData, dzData = dzData, selDVs = selDVs, selCovs= NULL, sep = sep, type = type, allContinuousMethod = allContinuousMethod, numObsMZ = numObsMZ, numObsDZ = numObsDZ, nSib= nSib, equateMeans = equateMeans, weightVar = weightVar, verbose= FALSE)
# TODO: umxIP improve start values (hard coded at std type values)
# tmp = xmu_starts(mzData, dzData, selVars = selDVs, sep = sep, nSib = nSib, varForm = "Cholesky", equateMeans= equateMeans, SD= TRUE, divideBy = 3)
# varStarts = tmp$varStarts
top = mxModel(model$top,
# "top" defines the algebra of the twin model, which MZ and DZ slave off of
# NB: top already has the means model and thresholds matrix added if necessary - see above
# Additive, Common, and Unique environmental paths
# Matrices ai, ci, and ec, to store a, c, and e path coefficients for independent general factors
umxMatrix("ai", "Full", nVar, nFac['a'], free = TRUE, values = .6, jiggle = .05), # latent common factor Additive genetic path
umxMatrix("ci", "Full", nVar, nFac['c'], free = TRUE, values = .0, jiggle = .05), # latent common factor Common #environmental path coefficient
umxMatrix("ei", "Full", nVar, nFac['e'], free = TRUE, values = .6, jiggle = .05), # latent common factor Unique environmental path #coefficient
# Matrices as, cs, and es, to store a, c, and e path coefficients for specific factors
umxMatrix("as", "Lower", nVar, nVar, free = TRUE, values = .6, jiggle = .05), # Additive genetic path
umxMatrix("cs", "Lower", nVar, nVar, free = TRUE, values = .0, jiggle = .05), # Common environmental path
umxMatrix("es", "Lower", nVar, nVar, free = TRUE, values = .6, jiggle = .05), # Unique environmental path.
umxMatrix("dzAr", "Full", 1, 1, free = FALSE, values = dzAr),
umxMatrix("dzCr", "Full", 1, 1, free = FALSE, values = dzCr),
# Multiply by each path coefficient by its inverse to get variance component
# Sum the squared independent and specific paths to get total variance in each component
mxAlgebra(name = "A", ai%*%t(ai) + as%*%t(as) ), # Additive genetic variance
mxAlgebra(name = "C", ci%*%t(ci) + cs%*%t(cs) ), # Common environmental variance
mxAlgebra(name = "E", ei%*%t(ei) + es%*%t(es) ), # Unique environmental variance
mxAlgebra(name = "ACE", A+C+E),
mxAlgebra(name = "AC" , A+C ),
mxAlgebra(name = "hAC", (dzAr %x% A) + (dzCr %x% C)),
mxAlgebra(rbind (cbind(ACE, AC),
cbind(AC , ACE)), dimnames = list(selVars, selVars), name = "expCovMZ"),
mxAlgebra(rbind (cbind(ACE, hAC),
cbind(hAC, ACE)), dimnames = list(selVars, selVars), name = "expCovDZ"),
# Algebra to compute total variances and standard deviations (diagonal only)
mxMatrix("Iden", nrow = nVar, name = "I"),
mxAlgebra(solve(sqrt(I * ACE)), name = "iSD")
)
model = mxModel(model, top)
if(!freeLowerA){
toset = model$top$matrices$as$labels[lower.tri(model$top$matrices$as$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(!freeLowerC){
toset = model$top$matrices$cs$labels[lower.tri(model$top$matrices$cs$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(!freeLowerE){
toset = model$top$matrices$es$labels[lower.tri(model$top$matrices$es$labels)]
model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
} else {
# set the first column off, bar r1
model = omxSetParameters(model, labels = "es_r[^1]0-9?c1", free = FALSE, values = 0)
# toset = model$top$matrices$es$labels[lower.tri(model$top$matrices$es$labels)]
# model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
# toset = model$top$matrices$es$labels[lower.tri(model$top$matrices$es$labels)]
# model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
# Used to drop the ei paths, as we have a full Cholesky for E, now just set the bottom row TRUE
# toset = umxGetParameters(model, "^ei_r.c.", free= TRUE)
# model = omxSetParameters(model, labels = toset, free = FALSE, values = 0)
}
if(addStd){
newTop = mxModel(model$top,
# nVar Identity matrix
mxMatrix("Iden", nrow = nVar, name = "I"),
# inverse of standard deviation diagonal (same as "(\sqrt(I.Vtot))~"
mxAlgebra(solve(sqrt(I * ACE)), name = "SD"),
# Standard general path coefficients
mxAlgebra(SD %*% ai, name = "ai_std"), # standardized ai
mxAlgebra(SD %*% ci, name = "ci_std"), # standardized ci
mxAlgebra(SD %*% ei, name = "ei_std"), # standardized ei
# Standardize specific path coefficients
mxAlgebra(SD %*% as, name = "as_std"), # standardized as
mxAlgebra(SD %*% cs, name = "cs_std"), # standardized cs
mxAlgebra(SD %*% es, name = "es_std") # standardized es
)
model = mxModel(model, newTop)
if(addCI){
model = mxModel(model, mxCI(c('top.ai_std', 'top.ci_std', 'top.ei_std', 'top.as_std', 'top.cs_std', 'top.es_std')))
}
}
model = omxAssignFirstParameters(model) # ensure parameters with the same label have the same start value... means, for instance.
model = as(model, "MxModelIP")
model = xmu_safe_run_summary(model, autoRun = autoRun, tryHard = tryHard)
return(model)
} # end umxIP
#' Generic SEM factor model loading rotation function
#'
#' See [umxRotate.MxModelCP()] to rotate the factor loadings of a [umxCP()] model
#'
#' @param model a model to rotate
#' @param rotation name of the rotation.
#' @param tryHard Default ("yes") is to tryHard
#' @param freeLoadingsAfter Whether to keep the rotated loadings fixed (Default, free them again)
#' @param verbose print detail about the rotation
#' @return - Rotated solution
#' @family Reporting functions
#' @export
#' @md
umxRotate <- function(model, rotation = c("varimax", "promax"), tryHard = "yes", freeLoadingsAfter = TRUE, verbose = TRUE){
UseMethod("umxRotate", model)
}
#' @export
umxRotate.default <- function(model, rotation = c("varimax", "promax"), tryHard = "yes", freeLoadingsAfter = TRUE, verbose = TRUE){
stop("umxRotate is not defined for objects of class:", class(model))
}
#' Rotate a CP solution
#'
#' @description
#' Rotate a CP solution.
#' Should work with rotations provided in `libs("GPArotation")` and `libs("psych")`, e.g.,
#'
#' **Orthogonal**: "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor"
#'
#' **Oblique**: "Promax", "promax", "oblimin", "simplimax", "bentlerQ", "geominQ", "biquartimin" and "cluster"
#'
#'
#' @details This works by taking the common-pathways loadings matrix from a solved [umxCP()] model, rotating these, placing
#' them back into the loadings matrix, re-estimating the model with the parameters fixed at this rotation, then return the new model.
#'
#' @param model a [umxCP()] model to rotate.
#' @param rotation name of the rotation.
#' @param tryHard Default ("yes") is to tryHard.
#' @param freeLoadingsAfter return the model with factor loadings free (default) or fixed in the new locations.
#' @param verbose print detail about the rotation
#' @return - Rotated solution.
#' @export
#' @family Twin Modeling Functions
#' @seealso - [umxCP()]
#' @md
#' @examples
#' \dontrun{
#' # Rotate a CP solution(param)
#' # Common pathway model rotation
#' library(umx)
#' # Fit 3 factor CPM
#' data(GFF)
#' selDVs = c("gff", "fc", "qol", "hap", "sat", "AD")
#' m1 = umxCP(selDVs = selDVs, nFac = 2, data = data, tryHard = "yes")
#' m2 = umxRotate(m1, rotation = "varimax", tryHard = "yes")
#'
#' }
umxRotate.MxModelCP <- function(model, rotation = c("varimax", "promax"), tryHard = "yes", freeLoadingsAfter = TRUE, verbose = TRUE) {
rotation = match.arg(rotation)
# TODO: Check nFac > 1)
# 1. get loadings
x = model$top$cp_loadings$values
# 2. rotate matrix
rotated = eval(parse(text = paste0(rotation, "(x)")))
# 3. fix loadings at their new rotated values
model$top = omxSetParameters(model$top, labels= model$top$cp_loadings$labels, values = rotated$loadings, free = FALSE)
# run the model to re-estimate common and residual loadings given the (fixed) rotated loadings
model = xmu_safe_run_summary(model, autoRun = TRUE, tryHard = tryHard, comparison = TRUE, digits = 3)
# free the values so mxCompare gets the right answers
if(freeLoadingsAfter){
model$top = omxSetParameters(model$top, labels= model$top$cp_loadings$labels, free = TRUE)
}
if(verbose){
print("Rotation results")
print(rotated$loadings) # print out the nice rotation result
rotmat = rotated$rotmat
print("Factor Correlation Matrix")
print(solve(t(rotmat) %*% rotmat))
}
return(model)
}
# =====================================
# = Advanced Build and Modify helpers =
# =====================================
#' xmuRAM2Ordinal
#'
#' xmuRAM2Ordinal: Convert a RAM model whose data contain ordinal variables to a threshold-based model
#'
#' @param model An RAM model to add thresholds too.
#' @param name = A new name for the modified model. Default (NULL) = leave it as is).
#' @param verbose Tell the user what was added and why (Default = TRUE).
#' @return - [OpenMx::mxModel()]
#' @export
#' @family xmu internal not for end user
#' @seealso - [umxRAM()]
#' @md
#' @examples
#' \dontrun{
#' data(twinData)
#' # Cut to form category of 20% obese subjects
#' obesityLevels = c('normal', 'obese')
#' cutPoints = quantile(twinData[, "bmi1"], probs = .2, na.rm = TRUE)
#' twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' m1 = umxRAM("tim", data = mzData,
#' umxPath("bmi1", with = "bmi2"),
#' umxPath(v.m.= c("bmi1", "bmi2"))
#')
#'
#' m1 = umxRAM("tim", data = mzData,
#' umxPath("obese1", with = "obese2"),
#' umxPath(v.m.= c("obese1", "obese2"))
#' )
#' }
xmuRAM2Ordinal <- function(model, verbose = TRUE, name = NULL) {
if(!umx_is_RAM(model)){
stop("xmuRAM2Ordinal only works with RAM models, sorry.")
}
if(!is.null(name)){
model = mxRename(model, name)
}
model$expectation$thresholds = "threshMat"
model = mxModel(model, umxThresholdMatrix(model$data$observed, fullVarNames = model$manifestVars, verbose = verbose))
return(model)
}
#' xmuValues: Set values in RAM model, matrix, or path
#'
#' For models to be estimated, it is essential that path values start at credible values.
#' `xmuValues` takes on that task for you.
#'
#' xmuValues can set start values for the free parameters in both RAM and Matrix [OpenMx::mxModel()]s.
#' It can also take an mxMatrix as input.
#' It tries to be smart in guessing starts from the values in your data and the model type.
#'
#' *note*: If you give xmuValues a numeric input, it will use obj as the mean, and return a
#' list of length n, with sd = sd.
#'
#' @param obj The RAM or matrix [OpenMx::mxModel()], or [OpenMx::mxMatrix()] that you want to set start values for.
#' @param sd Optional Standard Deviation for start values
#' @param n Optional Mean for start values
#' @param onlyTouchZeros Don't alter parameters that have starts (useful to speed [umxModify()])
#' @return - [OpenMx::mxModel()] with updated start values
#' @export
#' @seealso - Core functions:
#' @family Advanced Model Building Functions
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(demoOneFactor)
#' latents = c("G")
#' manifests = names(demoOneFactor)
#'
#' # ====================================================================
#' # = Make an OpenMx model (which will lack start values and labels..) =
#' # ====================================================================
#' m1 = mxModel("One Factor", type = "RAM",
#' manifestVars = manifests, latentVars = latents,
#' mxPath(from = latents , to = manifests),
#' mxPath(from = manifests, arrows = 2),
#' mxPath(from = latents , arrows = 2, free = FALSE, values = 1.0),
#' mxData(cov(demoOneFactor), type = "cov", numObs=500)
#' )
#' mxEval(S, m1) # default variances are jiggled away from near-zero
#' # Add start values to the model
#' m1 = xmuValues(m1)
#' mxEval(S, m1) # plausible variances
#' umx_print(mxEval(S,m1), 3, zero.print = ".") # plausible variances
#' xmuValues(14, sd = 1, n = 10) # Return vector of length 10, with mean 14 and sd 1
#'
#' }
xmuValues <- function(obj = NA, sd = NA, n = 1, onlyTouchZeros = FALSE) {
if(is.numeric(obj) ) {
# Use obj as the mean, return a list of length n, with sd = sd
return(xmu_start_value_list(mean = obj, sd = sd, n = n))
} else if (umx_is_MxMatrix(obj) ) {
message("I don't know how to create values for a matrix: too many options.")
} else if (umx_is_RAM(obj) ) {
# This is a RAM Model: Set sane starting values
# Means at manifest means
# S at variance on diag, quite a bit less than cov off diag
# TODO: Start latent means?...
# TODO: Handle sub models...
if (length(obj$submodels) > 0) {
stop("xmuValues cannot yet handle sub-models. Build each with umxRAM, then use umxSuperModel to assemble")
}
if (is.null(obj$data)) {
stop("'model' does not contain any data")
}
if(!is.null(obj$matrices$Thresholds)){
message("This is a threshold RAM model... Not sure how to set values in these yet, so left it as-is.")
return(obj)
}
theData = obj$data$observed
type = obj$data$type
manifests = obj@manifestVars
latents = obj@latentVars
nVar = length(manifests)
if(length(latents) > 0){
lats = (nVar + 1):(nVar + length(latents))
# The diagonal is variances
if(onlyTouchZeros) {
freePaths = (obj$matrices$S$free[lats, lats] == TRUE) & obj$matrices$S$values[lats, lats] == 0
} else {
freePaths = (obj$matrices$S$free[lats, lats] == TRUE)
}
# set free latent variances to 1
obj$S$values[lats, lats][freePaths] = 1
offDiag = !diag(length(latents))
newOffDiags = obj$matrices$S$values[lats, lats][offDiag & freePaths]/3
obj$S@values[lats, lats][offDiag & freePaths] = newOffDiags
}
if(nVar == 0){
# model with no manifests... nothing to set. Maybe it's a model with only defVars or something.
return(obj)
}
# =============
# = Set means =
# =============
# print(str(obj$data))
# umx_msg(obj$data$preferredFit)
# umx_msg(obj$data$.wlsContinuousType)
if(is.null(obj$matrices$M)){
# no means: must be cov data?
# We are in a RAM model, so the data must be mxData: check the type, rather than guessing.
# need to handle raw data that will be treated as WLS and not end up with means
if(type == "raw"){
covData = umx_var(df = theData[, manifests, drop = FALSE], format = "full", ordVar = 1, use = "pairwise.complete.obs", allowCorForFactorCovs=TRUE)
}else if (type == "acov"){
covData = as.matrix(theData)
}else if (type %in% c("cov", "cor")){
covData = as.matrix(theData)
}else{
message("xmuValues can't recognise data of type ", type, ". I only know raw, cov, cor, and acov")
covData = as.matrix(theData)
}
} else {
dataMeans = umx_means(theData[, manifests, drop = FALSE], ordVar = 0, na.rm = TRUE)
freeManifestMeans = (obj$matrices$M$free[1, manifests] == TRUE)
obj$M@values[1, manifests][freeManifestMeans] = dataMeans[freeManifestMeans]
# covData = cov(theData, )
covData = umx_var(df = theData[, manifests, drop = FALSE], format = "full", ordVar = 1, use = "pairwise.complete.obs", allowCorForFactorCovs=TRUE)
}
# ==========================================================
# = Fill the S (symmetrical) matrix with good start values =
# ==========================================================
# Set S diagonal (variances) where the cells are free.
# if(!is.null(dim(covData)) || length(covData) > 1){
# covData = diag(covData)
# } else {
# If this is one variable, leave alone: equivalent to a 1,1, matrix with the diag on the "diag", and zeros elsewhere
# }
# diag diag creates a matrix with all zeros off the diagonal
# covData = diag(diag(covData))
freePaths = diag(obj$S$free) == TRUE
if(onlyTouchZeros) {
freePaths = freePaths & diag(obj$S$values) == 0
}
diag(obj$S@values)[freePaths] = diag(covData)[freePaths]
# =======================
# = Set off-diag values =
# =======================
# TODO decide whether to leave this as independence, or set to non-zero covariances...
# and off diagonals to the observed covariance,
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# obj$matrices$S$values[1:nVar, 1:nVar][freePaths] = (covData[freePaths]/2)
# offDiag = !diag(nVar)
# newOffDiags = obj$matrices$S$values[1:nVar, 1:nVar][offDiag & freePaths]/3
# obj$matrices$S$values[1:nVar, 1:nVar][offDiag & freePaths] = newOffDiags
# ======================================================
# = Put modest starts into the asymmetric (one headed) =
# ======================================================
freePaths = obj$matrices$A$free == TRUE
if(onlyTouchZeros){
freePaths = freePaths & obj$matrices$A$values == 0
}
# TODO umxRAM A starts change from .9 to sqrt(.2*Variance)/nFactors
obj$A@values[freePaths] = .9
return(obj)
} else {
stop("'obj' must be an mxMatrix, a RAM model, or a simple number")
}
}
#' xmuLabel: Add labels to a RAM model, matrix, or path
#'
#' xmuLabel adds labels to things, be it an: [OpenMx::mxModel()] (RAM or matrix based), an [OpenMx::mxPath()], or an [OpenMx::mxMatrix()]
#' This is a core function in umx: Adding labels to paths opens the door to [umxEquate()], as well as [OpenMx::omxSetParameters()]
#'
#' @param obj An [OpenMx::mxModel()] (RAM or matrix based), [OpenMx::mxPath()], or [OpenMx::mxMatrix()]
#' @param suffix String to append to each label (might be used to distinguish, say male and female submodels in a model)
#' @param baseName String to prepend to labels. Defaults to NA ("")
#' @param setfree Whether to label only the free paths (defaults to FALSE)
#' @param drop The value to fix "drop" paths to (defaults to 0)
#' @param jiggle How much to jiggle values in a matrix or list of path values
#' @param labelFixedCells = TRUE
#' @param boundDiag Whether to bound the diagonal of a matrix
#' @param verbose How much feedback to give the user (default = FALSE)
#' @param overRideExisting = FALSE
#' @param name Optional new name if given a model. Default (NULL) does not rename model.
#' @return - [OpenMx::mxModel()]
#' @export
#' @family Advanced Model Building Functions
#' @references - <https://github.com/tbates/umx>
#' @md
#' @examples
#' \dontrun{
#' # ==============================================================
#' # = Show how OpenMx models are not labeled, and then add labels =
#' # ==============================================================
#' require(umx)
#' data(demoOneFactor)
#' latents = c("G")
#' manifests = names(demoOneFactor)
#' m1 = mxModel("One Factor", type = "RAM",
#' manifestVars = manifests, latentVars = latents,
#' mxPath(from = latents , to = manifests),
#' mxPath(from = manifests, arrows = 2),
#' mxPath(from = latents , arrows = 2, free = FALSE, values = 1.0),
#' mxData(cov(demoOneFactor), type = "cov", numObs=500)
#' )
#'
#' umxGetParameters(m1) # Default "matrix address" labels, i.e "One Factor.S[2,2]"
#' m1 = xmuLabel(m1)
#' umxGetParameters(m1, free = TRUE) # Informative labels: "G_to_x1", "x4_with_x4", etc.
#'
#' # =======================================================================
#' # = Create a new model, with suffixes added to paths, and model renamed =
#' # =======================================================================
#' m2 = xmuLabel(m1, suffix= "_male", overRideExisting= TRUE, name = "male")
#' umxGetParameters(m2, free = TRUE) # suffixes added
#'
#' # =============================
#' # = Example Labeling a matrix =
#' # =============================
#' a = xmuLabel(mxMatrix(name = "a", "Full", 3, 3, values = 1:9))
#' a$labels
#' a = xmuLabel(mxMatrix(name = "a", "Full", 3, 3, values = 1:9), baseName="bob")
#' a$labels
#' # note: labels with "data." in the name are left untouched!
#' a = mxMatrix(name = "a", "Full", 1,3, labels = c("data.a", "test", NA))
#' a$labels
#' xmuLabel(a, verbose = TRUE)
#' xmuLabel(a, verbose = TRUE, overRideExisting = FALSE)
#' xmuLabel(a, verbose = TRUE, overRideExisting = TRUE)
#' }
xmuLabel <- function(obj, suffix = "", baseName = NA, setfree = FALSE, drop = 0, labelFixedCells = TRUE, jiggle = NA, boundDiag = NA, verbose = FALSE, overRideExisting = FALSE, name = NULL) {
# TODO xmuLabel: Change these to an S3 method with three classes...
# Check that arguments not used by a particular class are not set away from their defaults
# Perhaps make "A_with_A" --> "var_A"
# Perhaps make "one_to_x2" --> "mean_x2" best left as is
if (is(obj, "MxMatrix") ) {
# Label an mxMatrix
xmuLabel_Matrix(mx_matrix = obj, baseName = baseName, setfree = setfree, drop = drop, labelFixedCells = labelFixedCells, jiggle = jiggle, boundDiag = boundDiag, suffix = suffix, verbose = verbose, overRideExisting = overRideExisting)
} else if (umx_is_RAM(obj)) {
# Label a RAM model
if(verbose){message("RAM")}
return(xmuLabel_RAM_Model(model = obj, suffix = suffix, labelFixedCells = labelFixedCells, overRideExisting = overRideExisting, verbose = verbose, name = name))
} else if (umx_is_MxModel(obj) ) {
# Label a non-RAM matrix lamodel
return(xmuLabel_MATRIX_Model(model = obj, suffix = suffix, verbose = verbose))
} else {
stop("I can only label OpenMx models and mxMatrix types. You gave me a ", typeof(obj))
}
}
# TODO implement umxDefVar
# umxDefVar(selDefs[1], name ="mod1"){
# umxDefVar(colName = selDefs[1], name ="mod1"){
# # "data.defmod1"
# # TODO handle vector of colNames, return list of matrices
# umxMatrix(name, "Full", nrow=1, ncol=1, free=FALSE, labels=paste0("data.", colName))
# }
#' Make a mxMatrix with automatic labels. Also takes name as the first parameter for more readable code.
#'
#' @description
#' umxMatrix is a wrapper for mxMatrix which labels cells buy default, and has the name parameter first in order.
#'
#' @param name The name of the matrix (Default = NA). Note the different order compared to mxMatrix!
#' @param type The type of the matrix (Default = "Full")
#' @param nrow Number of rows in the matrix: Must be set
#' @param ncol Number of columns in the matrix: Must be set
#' @param free Whether cells are free (Default FALSE)
#' @param values The values of the matrix (Default NA)
#' @param labels Either whether to label the matrix (default TRUE), OR a vector of labels to apply.
#' @param lbound Lower bounds on cells (Defaults to NA)
#' @param ubound Upper bounds on cells (Defaults to NA)
#' @param byrow Whether to fill the matrix down columns or across rows first (Default = getOption('mxByrow')
#' @param dimnames NA
#' @param baseName Set to override the default (which is to use the matrix name as the prefix).
#' @param condenseSlots Whether to save memory by NULLing out unused matrix elements, like labels, ubound etc. Default = getOption('mxCondenseMatrixSlots')
#' @param ... Additional parameters (!! not currently supported by umxMatrix)
#' @param joinKey See mxMatrix documentation: Defaults to as.character(NA)
#' @param joinModel See mxMatrix documentation: Defaults to as.character(NA)
#' @param jiggle = NA passed to xmuLabel to jiggle start values (default does nothing)
#' @return - [OpenMx::mxMatrix()]
#' @export
#' @family Core Model Building Functions
#' @seealso - [xmu_simplex_corner()], [OpenMx::mxMatrix()], [xmuLabel()], [umxRAM()]
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' # ==================================================================================
#' # = 1. Showing how name is first parameter, and how cells are labelled by default. =
#' # ==================================================================================
#' umxMatrix("test", "Full", 2, 2)$labels
#' # [,1] [,2]
#' # [1,] "test_r1c1" "test_r1c2"
#' # [2,] "test_r2c1" "test_r2c2"
#'
#'# ===========================================================
#'# = 2. Over-ride default (matrix name) as prefix for labels =
#'# ===========================================================
#' umxMatrix("test", "Full", 2, 2, baseName = "bob")$labels # bob_r1c1
#'
#'
#'# ==========================================
#'# = 3. User-provided labels are left as-is =
#'# ==========================================
#' umxMatrix("foo", "Lower", nrow=2, ncol=2, labels= c(NA, "beta1", NA))
#' # [,1] [,2]
#' # [1,] NA NA
#' # [2,] "beta1" NA
#'
#' }
#'
umxMatrix <- function(name = NA, type = "Full", nrow = NA, ncol = NA, free = FALSE, values = NA, labels = TRUE, lbound = NA, ubound = NA, byrow = getOption('mxByrow'), baseName = NA, dimnames = NA, condenseSlots = getOption('mxCondenseMatrixSlots'), ..., joinKey = as.character(NA), joinModel = as.character(NA), jiggle = NA) {
legalMatrixTypes = c("Diag", "Full", "Iden", "Lower", "Sdiag", "Stand", "Symm", "Unit", "Zero")
if(name %in% legalMatrixTypes){
warning("You used ", omxQuotes(name), " as the name of your matrix: That's also a valid type, so make sure you're not putting type first...")
}
if(is.numeric(type)){
stop("You used ", omxQuotes(type), " as the type of your matrix. You probably need to add something like type='Full' or specify nrow and ncol")
}
if(isTRUE(labels)){
setLabels = TRUE
labels = NA
} else {
setLabels = FALSE
}
x = mxMatrix(type = type, nrow = nrow, ncol = ncol, free = free, values = values, labels = labels, lbound = lbound, ubound = ubound, byrow = byrow, dimnames = dimnames, name = name, condenseSlots = condenseSlots, joinKey = joinKey, joinModel = joinModel, ...)
if(setLabels){
x = xmuLabel(x, baseName = baseName, jiggle = jiggle)
}
return(x)
}
#' A simple wrapper for mxAlgebra with name as the first parameter for more readable compact code.
#'
#' @description
#' umxAlgebra is a wrapper for mxAlgebra which has the name parameter first in order.
#'
#' @param name The name of the algebra (Default = NA). Note the different order compared to mxAlgebra!
#' @param expression The algebra
#' @param dimnames Dimnames of the algebra
#' @param ... Other parameters
#' @param fixed = See mxAlgebra documentation
#' @param joinKey See mxAlgebra documentation
#' @param joinModel See mxAlgebra documentation
#' @param verbose Quiet or informative
#' @param initial See mxAlgebra documentation
#' @param recompute See mxAlgebra documentation
#' @return - [OpenMx::mxAlgebra()]
#' @export
#' @family Advanced Model Building Functions
#' @seealso - [umxMatrix()]
#' @md
#' @examples
#' \dontrun{
#' A = umxMatrix("A", "Full", nrow = 3, ncol = 3, values=2)
#' B = umxAlgebra("B", A)
#' C = umxAlgebra(A + B, name = "C")
#' D = umxAlgebra(sin(C), name = "D")
#' m1 = mxRun(mxModel("AlgebraExample", A, B, C, D ))
#' mxEval(D, m1)
#'
#' x = umxAlgebra("circ", expression = 2 * pi)
#' class(x$formula)
#' x = mxAlgebra(name = "circ", 2 * pi)
#' class(x$formula) # "call"
#' }
umxAlgebra <- function(name = NA, expression, dimnames = NA, ..., joinKey=as.character(NA), joinModel=as.character(NA), verbose=0L, initial=matrix(as.numeric(NA),1,1), recompute=c('always','onDemand'), fixed = "deprecated_use_recompute") {
message("umxAlgebra is not working yet: contribute here https://github.com/tbates/umx/issues/199 if you'd like this finished... ")
if(!inherits(name, "character")){
stop("In umxAlgebra, name comes first, not expression.")
}
formula = match.call()$expression
x = mxAlgebra(formula, name = name, dimnames = dimnames, ..., joinKey=joinKey, joinModel=joinModel, verbose=verbose, initial=initial, recompute = recompute)
return(x)
}
# =================================
# = Run Helpers =
# =================================
#' umxRun: Run an mxModel
#'
#' `umxRun` is a version of [OpenMx::mxRun()] which can run also set start values, labels, and run multiple times
#' It can also calculate the saturated and independence likelihoods necessary for most fit indices.
#' **Note** this is not needed for umxRAM models or twin models - it is just a convenience to get base OpenMx models to run.
#' @param model The [OpenMx::mxModel()] you wish to run.
#' @param tryHard How to tryHard. Default = "yes". Alternatives "no", "ordinal", "search"
#' @param calc_sat Whether to calculate the saturated and independence models (for raw [OpenMx::mxData()] [OpenMx::mxModel()]s)
#' @param setValues Whether to set the starting values of free parameters (default = FALSE)
#' @param setLabels Whether to set the labels (default = FALSE)
#' @param optimizer optional to set the optimizer.
#' @param intervals Whether to run mxCI confidence intervals (default = FALSE) intervals = FALSE
#' @param summary Whether to print summary or not (default = !umx_set_silent() )
#' @param comparison Comparison model (will be used to drive umxCompare() after umxRun
#' @return - [OpenMx::mxModel()]
#' @family Advanced Model Building Functions
#' @references - <https://github.com/tbates/umx>
#' @export
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(demoOneFactor)
#' latents = c("G")
#' manifests = names(demoOneFactor)
#' m1 = mxModel("fact", type="RAM", manifestVars=manifests, latentVars=latents,
#' mxPath(latents , to = manifests),
#' mxPath(manifests, arrows = 2),
#' mxPath(latents , arrows = 2, free = FALSE, values = 1),
#' mxData(cov(demoOneFactor), type = "cov", numObs=500)
#' )
#'
#' m1 = umxRun(m1) # just run: will create saturated model if needed
#' m1 = umxRun(m1, setValues = TRUE, setLabels = TRUE) # set start values and label all parameters
#' umxSummary(m1, std = TRUE)
#' m1 = mxModel(m1, mxCI("G_to_x1")) # add one CI
#' m1 = mxRun(m1, intervals = TRUE)
#' residuals(m1, run = TRUE) # get CIs on all free parameters
#' confint(m1) # OpenMx's SE-based CIs
#' umxConfint(m1, run = TRUE) # get likelihood-based CIs on all free parameters
#' m1 = umxRun(m1, tryHard = "yes")
#' }
#'
# type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"),
umxRun <- function(model, tryHard = c( "yes", "no", "ordinal", "search"), calc_sat = TRUE, setValues = FALSE, setLabels = FALSE, summary = !umx_set_silent(silent = TRUE), intervals = FALSE, optimizer = NULL, comparison = NULL){
# TODO: umxRun: Return change in -2LL for models being re-run
# TODO: umxRun: Stash saturated model for re-use
# TODO: umxRun: Optimise for speed
tryHard = match.arg(tryHard)
# =================
# = Set optimizer =
# =================
if(!is.null(optimizer)){
umx_set_optimizer(optimizer)
}
if(setLabels){
model = xmuLabel(model)
}
if(setValues){
model = xmuValues(model)
}
model = xmu_safe_run_summary(model, autoRun = TRUE, summary = summary, tryHard = tryHard)
if(calc_sat){
if(umx_is_RAM(model)){
if(model$data$type == "raw"){
# If we have a RAM model with raw data, compute the saturated and independence models
# message("computing saturated and independence models so you have access to absolute fit indices for this raw-data model")
ref_models = mxRefModels(model, run = TRUE)
model@output$IndependenceLikelihood = as.numeric(-2 * logLik(ref_models$Independence))
model@output$SaturatedLikelihood = as.numeric(-2 * logLik(ref_models$Saturated))
}
}
}
if(!is.null(comparison)){
umxCompare(comparison, model)
}
return(model)
}
# ==============================
# = Label and equate functions =
# ==============================
#' Change or fix parameters (e.g. their values, labels, bounds, ..) in a model.
#'
#' `umxSetParameters` is used to alter values, and other parameter properties in an [OpenMx::mxModel()].
#' A common use is setting new values and changing parameters from free to false.
#' *Note*: If you just want to modify and re-run a model, you probably want [umxModify()].
#'
#' Using `umxSetParameters`, you use `labels=` to select the parameters you want to update.
#' You can set their free/fixed state with `free=`, and set new values with `values = `. Likewise
#' for bounds.
#'
#' `umxSetParameters` supports pattern matching (regular expressions) to select labels. Set `regex=`
#' to a regular expression matching the labels you want to select. e.g. "G_to_.*" would match
#' "G_to_anything".
#'
#' **Details**
#' Internally, `umxSetParameters` is equivalent to a call to `omxSetParameters` where you
#' have the ability to generate a pattern-based label list,
#' and, because this can create duplicate labels, we also call [OpenMx::omxAssignFirstParameters()]
#' to equate the start values for parameters which now have identical labels.
#'
#' @param model an [OpenMx::mxModel()] to set parameters in.
#' @param labels = labels to find
#' @param free = new value for free
#' @param values = new values
#' @param newlabels = newlabels
#' @param lbound = value for lbound
#' @param ubound = value for ubound
#' @param indep = whether to look in indep models
#' @param strict whether to complain if labels not found
#' @param name = new name for the returned model
#' @param regex patterns to match for labels (or if TRUE, use labels as regular expressions)
#' @param test Just show what you would do? (defaults to FALSE)
#' @return - [OpenMx::mxModel()]
#' @export
#' @family Model Summary and Comparison
#' @seealso - [umxModify()], [xmuLabel()]
#' @references - <https://github.com/tbates/umx>, <https://tbates.github.io>
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(demoOneFactor)
#' latents = c("G")
#' manifests = names(demoOneFactor)
#' m1 = umxRAM("One Factor", data = mxData(demoOneFactor[1:80,], type = "raw"),
#' umxPath(from = latents, to = manifests),
#' umxPath(v.m. = manifests),
#' umxPath(v1m0 = latents)
#' )
#' parameters(m1)
#' # Match all labels
# # example showing all updated with an "m1_" in front
#' umxSetParameters(m1, regex = "^", newlabels= "m1_", test = TRUE)
#'
#' # Change path to x1 to x2, equating these two paths
#' m2 = umxSetParameters(m1, "G_to_x1", newlabels= "G_to_x2", test = FALSE)
#' m2 = umxRun(m2) # umxSetParameters does not re-run he model, so make sure you do!
#' parameters(m2)
#'
#' }
umxSetParameters <- function(model, labels, free = NULL, values = NULL, newlabels = NULL, lbound = NULL, ubound = NULL, indep = FALSE, strict = TRUE, name = NULL, regex = FALSE, test = FALSE) {
if(is.character(regex)){
labels = regex
regex = TRUE
}
nothingDoing = all(is.null(c(free, values, newlabels)))
if(nothingDoing){
warning("You are not setting anything: set one or more of free, values, or newlabels to update a parameter")
}
if(regex){
oldLabels = umxGetParameters(model, regex = labels)
if(!is.null(newlabels)){
newlabels = gsub(labels, newlabels, oldLabels, ignore.case = FALSE)
}
labels = oldLabels
}
if(test){
message("Found labels:", omxQuotes(labels))
message("New labels:", omxQuotes(newlabels))
} else {
a = omxSetParameters(model = model, labels = labels, free = free, values = values,
newlabels = newlabels, lbound = lbound, ubound = ubound, indep = indep,
strict = strict, name = name)
return(omxAssignFirstParameters(a, indep = FALSE))
}
}
#' umxEquate: Equate two or more paths
#'
#' In addition to dropping or adding parameters, a second common task in modeling
#' is to equate parameters. umx provides a convenience function to equate parameters
#' by setting one or more parameters (the "slave" set) equal to one or more "master"
#' parameters. These parameters are picked out via their labels, and setting two or more
#' parameters to have the same value is accomplished by setting the slave(s) to have
#' the same label(s) as the master parameters, thus constraining them to take the same
#' value during model fitting.
#'
#' \emph{note}: In addition to using this method to equating parameters, you can
#' also equate one parameter to another by setting its label to the
#' "square bracket" address of the master, e.g. "a\[r,c\]".
#'
#' \emph{Tip}: To find labels of free parameters use [umxGetParameters()]
#' with free = TRUE
#'
#' \emph{Tip}: To find labels by name, use the regex parameter of [umxGetParameters()]
#'
#' @param model An [OpenMx::mxModel()] within which to equate parameters listed in "a" with those in "b"
#' @param a one or more labels to equate with those in the "b" set.
#' @param b one or more labels to equate with those in the 'a' set. (if 'newlabels' is NULL, labels will be set to 'a' list).
#' @param newlabels (optional) list of new labels for the equated parameters.
#' @param free Must the parameter(s) initially be free? (default = TRUE)
#' @param verbose Whether to give verbose feedback (default = TRUE)
#' @param name name for the returned model (optional: Leave empty to leave name unchanged)
#' @param comparison Compare the new model to the old (if updating an existing model: default = TRUE)
#' @param autoRun Whether to run the model (default), or just to create it and return without running.
#' @param tryHard Default ('no') uses normal mxRun. "yes" uses mxTryHard. Other options: "ordinal", "search"
#' @param master synonym for 'a'
#' @param slave synonym for 'b'
#' @return - [OpenMx::mxModel()]
#' @export
#' @seealso [umxModify()], [umxCompare()]
#' @family Model Summary and Comparison
#' @references - <https://github.com/tbates/umx>
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(demoOneFactor)
#' manifests = names(demoOneFactor)
#' m1 = umxRAM("One Factor", data = demoOneFactor, type = "cov",
#' umxPath("G", to = manifests),
#' umxPath(var = manifests),
#' umxPath(var = "G", fixedAt = 1)
#' )
#' # By default, umxEquate just equates master and slave labels: doesn't run model
#' m2 = umxEquate(m1, a = "G_to_x1", b = "G_to_x2", name = "Eq x1 x2 loadings")
#'
#' # Set autoRun = TRUE and comparison = TRUE to run and output a comparison
#' m2 = umxEquate(m1, autoRun = TRUE, comparison = TRUE, name = "Eq_x1_x2",
#' a = "G_to_x1", b = "G_to_x2"
#' )
#'
#' # rename the equated paths
#' m2 = umxEquate(m1, autoRun = TRUE, comparison = TRUE, name = "Eq_x1_x2",
#' a = "G_to_x1", b = "G_to_x2", newlabels = c("equated")
#' )
#' parameters(m2)
#' }
umxEquate <- function(model, a, b, newlabels= NULL, free = c(TRUE, FALSE, NA), verbose = FALSE, name = NULL, autoRun = FALSE, tryHard = c("no", "yes", "ordinal", "search"), comparison = TRUE, master= NULL, slave= NULL) {
free = xmu_match.arg(free, c(TRUE, FALSE, NA)) # match.arg can't handle Boolean as options?
tryHard = match.arg(tryHard)
if(!is.null(master)){
listA = master
listB = slave
}else{
listA = a
listB = b
}
if(!umx_is_MxModel(model)){
message("ERROR in umxEquate: model must be a model, you gave me a ", class(model)[1])
message("A usage example is umxEquate(model, listA=\"a_to_b\", listB=\"a_to_c\", name=\"model2\") # equate paths a->b and a->c, in a new model called \"model2\"")
stop()
}
if(length(listA) == 1){
if(length(grep("[\\^\\.\\*\\[\\(\\+\\|]+", listA) ) < 1){ # no grep found: add some anchors
listA = paste0("^", listA, "$"); # anchor to the start of the string
listB = paste0("^", listB, "$");
if(verbose == TRUE){
cat("note: matching whole label\n");
}
}
}
listALabels = umxGetParameters(model, regex = listA, free = free, verbose = verbose)
listBLabels = umxGetParameters(model, regex = listB, free = free, verbose = verbose)
if( length(listBLabels) != length(listALabels) && (length(listALabels)!=1)) {
print(list(listALabels = listALabels, listBLabels = listBLabels))
stop("ERROR in umxEquate: listA and listB labels not the same length!\n",
length(listBLabels), " list B labels found, and ", length(listALabels), " list As")
}
if(length(listBLabels) == 0) {
legal = names(omxGetParameters(model, indep=FALSE, free=free))
legal = legal[which(!is.na(legal))]
message("Labels available in model are: ", paste(legal, ", "))
stop("ERROR in umxEquate: no listB labels found or none requested!")
}
# print(list(listALabels = listALabels, listBLabels = listBLabels))
if(is.null(newlabels)){
newModel = omxSetParameters(model = model, labels = listBLabels, newlabels = listALabels, name = name)
} else {
umx_check(length(newlabels)==length(listALabels), "stop", "newlabels must be the same length as list a. ",
"Found ", length(listALabels), " list a labels, and ", length(newlabels), " newlabels"
)
newModel = omxSetParameters(model = model , labels = listALabels, newlabels = newlabels, name = name)
newModel = omxSetParameters(model = newModel, labels = listBLabels, newlabels = newlabels, name = name)
}
newModel = omxAssignFirstParameters(newModel, indep = FALSE)
newModel = xmu_safe_run_summary(newModel, model, autoRun = autoRun, tryHard = tryHard, comparison = comparison)
return(newModel)
}
#' umxFixAll: Fix all free parameters
#'
#' Fix all free parameters in a model using omxGetParameters()
#'
#' @param model an [OpenMx::mxModel()] within which to fix free parameters
#' @param verbose whether to mention how many paths were fixed (default is FALSE)
#' @param name optional new name for the model. if you begin with a _ it will be made a suffix
#' @param run whether to fix and re-run the model, or just return it (defaults to FALSE)
#' @return - the fixed [OpenMx::mxModel()]
#' @export
#' @family Advanced Model Building Functions
#' @references - <https://tbates.github.io>, <https://github.com/tbates/umx>
#' @md
#' @examples
#' \dontrun{
#' require(umx)
#' data(demoOneFactor)
#' manifests = names(demoOneFactor)
#'
#' m1 = umxRAM("OneFactor", data = demoOneFactor, type = "cov",
#' umxPath("G", to = manifests),
#' umxPath(var = manifests),
#' umxPath(var = "G", fixedAt = 1)
#' )
#' m2 = umxFixAll(m1, run = TRUE, verbose = TRUE)
#' mxCompare(m1, m2)
#'
#' }
umxFixAll <- function(model, name = "_fixed", run = FALSE, verbose= FALSE){
if(!umx_is_MxModel(model)){
message("ERROR in umxFixAll: model must be a model, you gave me a ", class(model)[1])
message("A usage example is m1 = umxFixAll(m1)")
stop()
}
if(is.null(name)){
name = model$name
} else if("_" == substring(name, 1, 1)){
name = paste0(model$name, name)
}
oldFree = names(omxGetParameters(model, indep = TRUE, free = TRUE))
if(verbose){
message("fixed ", length(oldFree), " paths")
}
model = omxSetParameters(model, oldFree, free = FALSE, strict = TRUE, name = name)
if(run){
model = mxRun(model)
}
return(model)
}
# ===============
# = RAM Helpers =
# ===============
# ===========================
# = matrix-oriented helpers =
# ===========================
#' Create the threshold matrix needed for modeling ordinal data.
#'
#' High-level helper for ordinal modeling. Creates, labels, and sets smart-starts for this
#' complex set set of an algebra and matrices. Big time saver!
#'
#' @details We often need to model ordinal data: sex, low-med-hi, depressed/normal, etc.,
#' A useful conceptual strategy to handle these data is to build a standard model for normally-varying data
#' and then to threshold this normal distribution to generate the observed data. Thus an observation of "depressed"
#' is modeled as a high score on the latent normally distributed trait, with thresholds set so that only scores above
#' this threshold (1-minus the number of categories) reach the criteria for the diagnosis.
#'
#' Making this work can require fixing the first 2 thresholds of ordinal data, or fixing both the mean and variance of
#' a latent variable driving binary data, in order to estimate its one-free parameter: where to place the single threshold
#' separating low from high cases.
#'
#' The function returns a 3-item list consisting of:
#'
#' 1. A thresholdsAlgebra (named `threshMatName`)
#' 2. A matrix of deviations for the thresholds (`deviations_for_thresh`)
#' 3. A lower matrix of ones (`lowerOnes_for_thresh`)
#'
#' *Twin Data*
#'
#' With twin data, make sure to provide the **full names** for twin data... this is not standard I know...
#'
#' For twins (the function currently handles only pairs), the thresholds are equated for both twins using labels:
#'
#' $labels
#'
#' obese_T1 obese_T2
#'
#' dev_1 "obese_dev1" "obese_dev1"
#'
#' @param df The data being modeled (to allow access to the factor levels and quantiles within these for each variable)
#' @param fullVarNames The variable names. Note for twin data, just the base names, which sep will be used to fill out.
#' @param sep (e.g. "_T") Required for wide (twin) data. It is used to break the base names our from their numeric suffixes.
#' @param method How to implement the thresholds: Mehta, (1 free thresh for binary, first two fixed for ordinal) or "allFree"
#' @param l_u_bound c(NA, NA) by default, you can use this to bound the first (base) threshold.
#' @param droplevels Whether to drop levels with no observed data (defaults to FALSE)
#' @param threshMatName name of the matrix which is returned. Defaults to "threshMat" - best not to change it.
#' @param verbose How much to say about what was done. (defaults to FALSE)
#' @param selDVs deprecated. Use "fullVarNames"
#' @return - list of thresholds matrix, deviations, lowerOnes
#' @export
#' @seealso [OpenMx::mxThreshold()]
#' @family Advanced Model Building Functions
#' @references - <https://tbates.github.io>, <https://github.com/tbates/umx>
#' @md
#' @examples
#'
#' # ============================
#' # = Simple non-twin examples =
#' # ============================
#'
#' # data: 1 2-level ordered factor
#' x = data.frame(ordered(rbinom(100,1,.5))); names(x) = c("x")
#'
#' tmp = umxThresholdMatrix(x, fullVarNames = "x")
#' # The lower ones matrix (all fixed)
#' tmp[[1]]$values
#' tmp[[1]]$free
#'
#' # The deviations matrix
#' tmp[[2]]$values
#' tmp[[2]]$labels # note: for twins, labels will be equated across twins
#'
#' # The algebra that adds the deviations to create thresholds:
#' tmp[[3]]$formula
#'
#' # Example of a warning to not omit the variable names
#' # tmp = umxThresholdMatrix(x)
#' # Polite message: For coding safety, when calling umxThresholdMatrix, set fullVarNames...
#'
#' # One ordered factor with 5-levels
#' x = cut(rnorm(100), breaks = c(-Inf,.2,.5, .7, Inf)); levels(x) = 1:5
#' x = data.frame(ordered(x)); names(x) <- c("x")
#' tmp = umxThresholdMatrix(x, fullVarNames = "x")
#' tmp[[2]]$name
#' tmp[[2]]$free # last one is free.. (method = Mehta)
#'
#' tmp = umxThresholdMatrix(x, fullVarNames = "x", l_u_bound= c(-1,1))
#' tmp[[2]]$lbound # bounds applied to base threshold
#'
#' # =================================
#' # = Binary example with twin data =
#' # =================================
#' # ===============================================================
#' # = Create a series of binary and ordinal columns to work with =
#' # ===============================================================
#' data(twinData)
#'
#' # Make "obese" variable with ~20% subjects categorised as obese
#' obesityLevels = c('normal', 'obese')
#' cutPoints = quantile(twinData[, "bmi1"], probs = .2, na.rm = TRUE)
#' twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' # Step 2: Make the ordinal variables into umxFactors (ordered, with the levels found in the data)
#' selVars = c("obese1", "obese2")
#' twinData[, selVars] = umxFactor(twinData[, selVars])
#'
#' # Example 1
#' # use verbose = TRUE to see informative messages
#' tmp = umxThresholdMatrix(twinData, fullVarNames = selVars, sep = "", verbose = TRUE)
#'
#'
#' # ======================================
#' # = Ordinal (n categories > 2) example =
#' # ======================================
#' # Repeat for three-level weight variable
#' obesityLevels = c('normal', 'overweight', 'obese')
#' cutPoints = quantile(twinData[, "bmi1"], probs = c(.4, .7), na.rm = TRUE)
#' twinData$obeseTri1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obeseTri2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' selDVs = "obeseTri"; selVars = tvars(selDVs, sep = "", suffixes = 1:2)
#' twinData[, selVars] = umxFactor(twinData[, selVars])
#' tmp = umxThresholdMatrix(twinData, fullVarNames = selVars, sep = "", verbose = TRUE)
#'
#'
#' # ========================================================
#' # = Mix of all three kinds example (and a 4-level trait) =
#' # ========================================================
#' obesityLevels = c('underWeight', 'normal', 'overweight', 'obese')
#' cutPoints = quantile(twinData[, "bmi1"], probs = c(.25, .4, .7), na.rm = TRUE)
#' twinData$obeseQuad1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obeseQuad2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' selVars = c("obeseQuad1", "obeseQuad2")
#' twinData[, selVars] = umxFactor(twinData[, selVars])
#'
#' selDVs =c("bmi", "obese", "obeseTri", "obeseQuad")
#' tmp = umxThresholdMatrix(twinData, fullVarNames = tvars(selDVs, sep= ""), sep = "", verbose = TRUE)
#' # The lower ones matrix (all fixed)
#' tmp[[1]]$values
#' # The deviations matrix
#' tmp[[2]]$values
#' tmp[[2]]$labels # note labels are equated across twins
#' # Check to be sure twin-1 column labels same as twin-2
#' tmp[[2]]$labels[,2]==tmp[[2]]$labels[,4]
#'
#' # The algebra that assembles these into thresholds:
#' tmp[[3]]$formula
#' # =================================
#' # = Example with method = allFree =
#' # =================================
#'
#' tmp = umxThresholdMatrix(twinData, fullVarNames = tvars(selDVs, sep= ""), sep = "",
#' method = "allFree")
#' all(tmp[[2]]$free)
#'
umxThresholdMatrix <- function(df, fullVarNames = NULL, sep = NULL, method = c("Mehta", "allFree"), threshMatName = "threshMat", l_u_bound = c(NA, NA), droplevels = FALSE, verbose = FALSE, selDVs= "deprecated"){
# TODO: umxThresholdMatrix: priority A: Move to a more robust way to detect twin than just the sep isn't NULL??
# TODO: Consider changing from "threshMat" to "Thresholds" to match what mxModel does with mxThresholds internally now...
method = match.arg(method)
if(method=="allFree"){
verbose = FALSE
}
if(any(selDVs != "deprecated")){
message("Polite note: please use fullVarNames instead of selDVs when calling umxThresholdMatrix")
fullVarNames = selDVs
}
if(is.null(fullVarNames)){
warning("Polite message: For coding safety, when calling umxThresholdMatrix, set fullVarNames to the list of FULL names of all the variables in the model (AND you MUST include sep if this is a twin model!!)")
fullVarNames = names(df)
nSib = 1
} else if(is.null(sep)){
# no sep: Assume this is not family data
nSib = 1
} else {
# sep provided: Assume this is twin data (already expanded... no way currently to tell if sep was intended to build or decompose vars - see TODO above!!)
# Set nSib, and break down names into base and suffix if necessary
msg = paste0("umxThresholdMatrix needs the _FULL_ name of each variable (in addition to the `sep` used to break them down to base names)...
you provided: ", omxQuotes(fullVarNames))
umx_check_names(namesNeeded = fullVarNames, data = df, die = TRUE, message = msg)
tmp = umx_explode_twin_names(fullVarNames, sep = sep)
baseNames = tmp$baseNames
twinIndexes = tmp$twinIndexes
nSib = length(twinIndexes)
}
# Create dataframe with just the requested variables
df = df[, fullVarNames, drop = FALSE]
# Check input
if(dim(df)[1] < 1){ stop("Data input to umxThresholdMatrix had no rows. I use the data to set thresholds, so the data must have rows.") }
if(droplevels){ stop("Not sure it's wise to drop levels... let me know if you have a case where this is legit") }
summaryObj = umx_is_ordered(df, summaryObject= TRUE)
isFactor = summaryObj$isFactor
isOrd = summaryObj$isOrd
isBin = summaryObj$isBin
nFactors = summaryObj$nFactors
nOrdVars = summaryObj$nOrdVars
nBinVars = summaryObj$nBinVars
factorVarNames = summaryObj$factorVarNames
ordVarNames = summaryObj$ordVarNames
binVarNames = summaryObj$binVarNames
df = df[, factorVarNames, drop = FALSE]
if(nSib == 2){
# For precision (placing cuts) and to ensure twins have same levels, copy both halves of the dataframe into each
T1 = df[, grep(paste0(twinIndexes[1], "$"), factorVarNames, value = TRUE), drop = FALSE]
T2 = df[, grep(paste0(twinIndexes[2], "$"), factorVarNames, value = TRUE), drop = FALSE]
names(T2) = names(T1)
df = cbind(rbind(T1, T2), rbind(T1, T2))
names(df) = factorVarNames
} else if(nSib == 1){
# df is fine as is.
} else {
stop("I can only handle 1 and 2 sib models. The way you called umxThresholdMatrix, I've guessed nSib is ", omxQuotes(nSib),
" and separator ", omxQuotes(sep), "\n fullVarNames were: ", omxQuotes(fullVarNames), ". email maintainer('umx') to get this expanded.")
}
minLevels = xmuMinLevels(df)
maxLevels = xmuMaxLevels(df)
maxThresh = maxLevels - 1
# ===========================================
# = Tell the user what we found if they ask =
# ===========================================
if((nOrdVars + nBinVars) < 1){
warning("No ordinal or binary variables in dataframe (or possibly a factor but with only 1 level): no need to call umxThresholdMatrix")
return(NA) # Probably OK to set thresholds matrix to NA in mxExpectation()
} else {
if(verbose){
theMsg = paste0("object ", omxQuotes(threshMatName), " created to handle")
if(nSib == 2){
if(nOrdVars > 0){
theMsg = paste0(theMsg, ": ", nOrdVars/nSib, " pair(s) of ordinal variables:", omxQuotes(ordVarNames), "\n")
}
if(nBinVars > 0){
theMsg = paste0(theMsg, ": ", nBinVars/nSib, " pair(s) of binary variables:", omxQuotes(binVarNames), "\n")
}
} else {
if(nOrdVars > 0){
theMsg = paste0(theMsg, ": ", nOrdVars, " ordinal variables:", omxQuotes(ordVarNames), "\n")
}
if(nBinVars > 0){
theMsg = paste0(theMsg, ": ", nBinVars, " binary variables:", omxQuotes(binVarNames), "\n")
}
}
message(theMsg)
}
}
# =================================
# = Create labels and free vector =
# =================================
labels = free = c()
for (thisVarName in factorVarNames) {
thisCol = df[,thisVarName]
nThreshThisVar = length(levels(thisCol)) -1
# TODO maybe make this demand/find basenames?
if(nSib == 2){
# Make same label (just baseVarname_thresh) for each twin for each variable
findStr = paste0(sep, "(", paste(twinIndexes, collapse = "|"), ")$") # e.g. "_T(1|2)$"
thisLab = sub(findStr, "", thisVarName) # strip sep+0-9 from end of name, e.. remove "_T1"
} else {
thisLab = thisVarName
}
theseLabels = umx_pad(paste0(thisLab, "_thresh", 1:nThreshThisVar), maxThresh)
labels = append(labels, theseLabels)
# ============
# = Set Free =
# ============
theseFree = c(rep(TRUE, nThreshThisVar), rep(FALSE, (maxThresh - nThreshThisVar)))
free = append(free, theseFree)
}
# Create threshMat with size the order maxThresh rows * nFactors cols
threshMat = mxMatrix(name = threshMatName, type = "Full",
nrow = maxThresh,
ncol = nFactors,
free = free,
values = rep(NA, (maxThresh * nFactors)),
labels = labels,
# note: these matrix bounds are discarded along with this threshMat now
# that thresholds are implemented using deviations
# Bounds are placed instead on the first deviation threshold
lbound = l_u_bound[1],
ubound = l_u_bound[2],
dimnames = list(paste0("th_", 1:maxThresh), factorVarNames)
)
# ====================
# = talk to the user =
# ====================
if(nBinVars > 0){
if(verbose){
message(sum(isBin), " trait(s) are binary: ", omxQuotes(binVarNames),
"\nFor these, you you MUST fix the mean and variance of the latent traits driving each variable (usually 0 & 1 respectively) .\n",
"See ?umxThresholdMatrix")
}
}
if(nOrdVars > 0){
if(verbose){
message(nOrdVars, " variables are ordinal (>2 levels). For these I will use Paras Mehta's 'fix first 2 thresholds' method.\n",
"It's ESSENTIAL that you leave the means and variances of the latent ordinal traits FREE!\n",
"See ?umxThresholdMatrix")
}
}
if(minLevels == 1){
warning("You seem to have a trait with only one category: ", omxQuotes(xmuMinLevels(df, what = "name")), "... makes it a bit futile to model it?")
stop("Stopping, as I can't handle trait with no variance.")
}
# =======================
# = Estimate thresholds =
# =======================
# For each factor variable
for (thisVarName in factorVarNames) {
thisCol = df[,thisVarName]
nThreshThisVar = length(levels(thisCol)) -1 # "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
# ===============================================================
# = Work out z-values for thresholds based on simple bin counts =
# ===============================================================
# Pros: Doesn't assume equal intervals.
# Problems = empty bins and noise (equal thresholds (illegal) and higher than realistic z-values)
tab = table(thisCol)/sum(table(thisCol)) # Simple table of % values occuring at each threshold
cumTab = cumsum(tab) # Convert to a cumulative sum (sigmoid from 0 to 1)
# Use quantiles to get z-equivalent for each level: ditch one to get thresholds...
zValues = qnorm(p = cumTab, lower.tail = TRUE)
# take this table as make a simple vector
zValues = as.numeric(zValues)
# =======================================================================================
# = TODO handle where flows over, say, 3.3... squash the top down or let the user know? =
# =======================================================================================
if(any(is.infinite(zValues))){
nPlusInf = sum(zValues == (Inf))
nMinusInf = sum(zValues == (-Inf))
if(nPlusInf){
if(length(zValues[!is.infinite(zValues)])==0){
zValues[zValues == (Inf)] = seq(from = .1, by = .1, length.out = nPlusInf)
} else {
maxOK = max(zValues[!is.infinite(zValues)])
padding = seq(from = (maxOK + .1), by = .1, length.out = nPlusInf)
zValues[zValues == (Inf)] = padding
}
}
if(nMinusInf){
if(length(zValues[!is.infinite(zValues)])==0){
zValues[zValues == (-Inf)] = seq(from = -.1, by = .1, length.out = nPlusInf)
} else {
minOK = min(zValues[!is.infinite(zValues)])
padding = seq(from = (minOK - .1), by = (- .1), length.out = nMinusInf)
zValues[zValues == (-Inf)] = padding
}
}
}
# =================================
# = Move duplicates (empty cells) =
# =================================
if(any(duplicated(zValues))){
# message("You have some empty cells")
# Find where the values change:
runs = rle(zValues)
runLengths = runs$lengths
runValues = runs$values
distinctCount = length(runValues)
indexIntoRLE = indexIntoZ = 1
for (i in runLengths) {
runLen = i
if(runLen != 1){
repeatedValue = runValues[indexIntoRLE]
precedingValue = runValues[(indexIntoRLE - 1)]
minimumStep = .01
if(indexIntoRLE == distinctCount){
newValues = seq(from = (precedingValue + minimumStep), by = (minimumStep), length.out = runLen)
zValues[c(indexIntoZ:(indexIntoZ + runLen - 1))] = rev(newValues)
} else {
followedBy = runValues[(indexIntoRLE + 1)]
minimumStep = min((followedBy - precedingValue)/(runLen + 1), minimumStep)
newValues = seq(from = (followedBy - minimumStep), by = (-minimumStep), length.out = runLen)
zValues[c(indexIntoZ:(indexIntoZ + runLen - 1))] = rev(newValues)
}
}
indexIntoZ = indexIntoZ + runLen
indexIntoRLE = indexIntoRLE + 1
}
}
# TODO start from 1, right, not 2?
values = c(zValues[1:(nThreshThisVar)], rep(.001, (maxThresh - nThreshThisVar)))
sortValues = sort(zValues[1:(nThreshThisVar)], na.last = TRUE)
if (!identical(sortValues, zValues[1:(nThreshThisVar)])) {
umx_msg(values)
stop("The thresholds for ", thisVarName, " are not in order... oops: that's my fault :-(")
}
# Already labeled, and all free initialised to TRUE (out of range = FALSE)
if(nThreshThisVar > 1){ # fix the first 2
threshMat$free[1:2, thisVarName] = FALSE
}
threshMat$values[, thisVarName] = values
} # end for each factor variable
if(verbose) {
message("Using deviation-based model: Thresholds will be in", omxQuotes(threshMatName), " based on deviations in ", omxQuotes("deviations_for_thresh"))
}
# ==========================
# = Adding deviation model =
# ==========================
# Tasks:
# 1. Convert thresholds into deviations
# value 1 for each var = the base, everything else is a deviation
# 2. Make matrix deviations_for_thresh (similar to existing threshMat), fill values with results from 1
# 3. Make lower matrix of 1s called "lowerOnes_for_thresh"
# 4. Create thresholdsAlgebra named threshMatName
# 5. Return a package of lowerOnes_for_thresh, deviations_for_thresh & thresholdsAlgebra (named threshMatName)
# =====
# = 1 =
# =====
# startDeviations
deviationValues = threshMat$values
nrows = dim(threshMat$values)[1]
ncols = dim(threshMat$values)[2]
if (nrows > 1){
for (col in 1:ncols) {
# Skip row 1 which is the base
for (row in 2:nrows) {
# Convert remaining rows to offsets
thisValue = threshMat$values[row, col]
previousValue = threshMat$values[(row-1), col]
if(!is.na(thisValue)){
thisOffset = thisValue - previousValue
if(thisOffset <= 0){
# tweak to be slightly positive
thisOffset = .001
}
deviationValues[row, col] = thisOffset
} else {
# out of range: TODO: simplify by just run to max thresh row
}
}
}
}
# threshMat$values
# obese1 obeseTri1 obeseQuad1 obese2 obeseTri2 obeseQuad2
# th_1 -0.4727891 0.2557009 -0.2345662 -0.4727891 0.2557009 -0.2345662
# th_2 NA 1.0601180 0.2557009 NA 1.0601180 0.2557009
# th_3 NA NA 1.0601180 NA NA 1.0601180
#
# threshMat$free
# obese1 obeseTri1 obeseQuad1 obese2 obeseTri2 obeseQuad2
# th_1 TRUE FALSE FALSE TRUE FALSE FALSE
# th_2 FALSE FALSE FALSE FALSE FALSE FALSE
# th_3 FALSE FALSE TRUE FALSE FALSE TRUE
# =====
# = 2 =
# =====
# make a copy of "_thresh" labels, changing to "_dev"
devLabels = sub("_thresh", "_dev", threshMat$labels, ignore.case = FALSE)
# Create the deviations matrix
deviations_for_thresh = mxMatrix(name = "deviations_for_thresh", type = "Full",
nrow = maxThresh, ncol = nFactors,
free = threshMat$free,
labels = devLabels,
values = deviationValues,
lbound = .001,
# TODO umxThresholdMatrix: ubound might want to be l_u_bound[2]
ubound = NA,
dimnames = list(paste0("dev_", 1:maxThresh), factorVarNames)
)
deviations_for_thresh$lbound[1,] = l_u_bound[1] # First deviation is special, because it it's the base, not a deviation.
deviations_for_thresh$ubound[1,] = l_u_bound[2] # First deviation is special, because it it's the base, not a deviation.
# 3: Create the lowerOnes matrix
lowerOnes_for_thresh = mxMatrix(name = "lowerOnes_for_thresh", type = "Lower", nrow = maxThresh, free = FALSE, values = 1)
# 4: Create thresholdsAlgebra named threshMatName
threshDimNames = list(paste0("th_", 1:maxThresh), factorVarNames)
thresholdsAlgebra = mxAlgebra(name = threshMatName, lowerOnes_for_thresh %*% deviations_for_thresh, dimnames = threshDimNames)
if(method == "allFree"){
deviations_for_thresh$free = TRUE
# !tmpFree[[2]]$free
# deviations_for_thresh$free[FALSE]=TRUE
# deviations_for_thresh$free = tmpFree
}
return(list(lowerOnes_for_thresh, deviations_for_thresh, thresholdsAlgebra))
}
# ===========
# = Utility =
# ===========
# ====================
# = Parallel Helpers =
# ====================
eddie_AddCIbyNumber <- function(model, labelRegex = "") {
# eddie_AddCIbyNumber(model, labelRegex="[ace][1-9]")
args = commandArgs(trailingOnly=TRUE)
CInumber = as.numeric(args[1]); # get the 1st argument from the cmdline arguments (this is called from a script)
CIlist = umxGetParameters(model ,regex= "[ace][0-9]", verbose= FALSE)
thisCI = CIlist[CInumber]
model = mxModel(model, mxCI(thisCI) )
return (model)
}
#' Easier (and powerful) specification of paths in SEM.
#'
#' @description This function is used to easily and compactly specify paths in models. In addition to
#' `from` and `to`, it adds specialised parameters for variances (var), two headed paths (with) and means (mean).
#' There are also new terms to describe fixing values: `fixedAt` and `fixFirst`.
#' To give a couple of the most common, time-saving examples:
#'
#' * `umxPath("A", with = "B", fixedAt = 1)`
#' * `umxPath(var = c("A", "B"), fixedAt = 1)`
#' * `umxPath(v.m. = manifests)`
#' * `umxPath(v1m0 = latents)`
#' * `umxPath(v1m0 = latents)`
#' * `umxPath(means = manifests)`
#' * `umxPath(fromEach = c('A',"B","C"), to = c("y1","y2"))`
#' * `umxPath(unique.bivariate = c('A',"B","C"))`
#' * `umxPath("A", to = c("B","C","D"), firstAt = 1)`
#'
#' @details
#' `umxPath` introduces the following new words to your path-defining vocabulary: `with`, `var`, `cov`, `means`, `v1m0`,
#' `v0m0`, `v.m0`, `v.m`, `fixedAt`, `freeAt`, `firstAt`, `unique.bivariate`, `unique.pairs`, `fromEach`, `Cholesky`, `defn`, `forms`.
#'
#' `with` creates covariances (2-headed paths):
#' `umxPath(A, with = B)`
#'
#' Specify a variance for A with
#' `umxPath(var = "A")`.
#'
#' Of course you can use vectors anywhere:
#' `umxPath(var = c('N','E', 'O'))`
#'
#' To specify a mean, you just say:
#' `umxPath(mean = "A")`, which is equivalent to `mxPath(from = "one", to = "A")`.
#'
#' To fix a path at a value, you can say:
#' `umxPath(var = "A", fixedAt = 1)`
#'
#' The common task of creating a variable with variance fixed at 1 and mean at 0 is done thus:
#' `umxPath(v1m0 = "A")`
#'
#' For free variance and means use:
#' `umxPath(v.m. = "A")`
#'
#' `umxPath` exposes `unique.bivariate` and `unique.pairs`, So to create paths A<->A, B<->B,
#' and A->B, you would say:
#' `umxPath(unique.pairs = c('A',"B"))`
#'
#' To create paths A<->B, B<->C, and A<->C, you would say:
#' `umxPath(unique.bivariate = c('A',"B","C"))`
#'
#' Creates one-headed arrows on the all.bivariate pattern
#' `umxPath(fromEach = c('A',"B","C"))`
#'
#' Setting up a latent trait, you can scale with a fixed first path thus:
#'
#' `umxPath("A", to = c("B","C","D"), firstAt = 1)`
#'
#' To create Cholesky-pattern connections:
#'
#' `umxPath(Cholesky = c("A1", "A2"), to c("var1", "var2"))`
#'
#'
#' @param from One or more source variables e.g "A" or c("A","B")
#' @param to One or more target variables for one-headed paths, e.g "A" or c("A","B").
#' @param with 2-headed path <--> from 'from' to 'with'.
#' @param var Equivalent to setting 'from' and 'arrows' = 2. nb: from, to, and with must be left empty.
#' @param cov Convenience to allow 2 variables to covary (equivalent to 'from' and 'with'). nb: leave from, to, etc. empty
#' @param means equivalent to "from = 'one', to = x. nb: from, to, with and var must be left empty (their default).
#' @param v1m0 variance of 1 and mean of zero in one call.
#' @param v.m. variance and mean, both free.
#' @param v0m0 variance and mean, both fixed at zero.
#' @param v.m0 variance free, mean fixed at zero.
#' @param v0m. variance fixed at 0, mean free.
#' @param fixedAt Equivalent to setting "free = FALSE, values = fixedAt"
#' @param freeAt Equivalent to setting "free = TRUE, values = freeAt"
#' @param firstAt First path is fixed at this value (free is ignored: warning if other than a single TRUE)
#' @param unique.bivariate equivalent to setting from, and "connect = "unique.bivariate", arrows = 2".
#' nb: from, to, and with must be left empty (their default)
#' @param unique.pairs equivalent to setting "connect = "unique.pairs", arrows = 2" (don't use from, to, or with)
#' @param fromEach Like all.bivariate, but with one head arrows. 'to' can be set.
#' @param forms Build a formative variable. 'from' variables form the latent.
#' Latent variance is fixed at 0. Loading of path 1 is fixed at 1. unique.bivariate between 'from' variables.
#' @param Cholesky Treat \strong{Cholesky} variables as latent and \strong{to} as measured, and connect as in an ACE model.
#' @param defn Implements a definition variable as a latent with zero variance & mean and labeled 'data.defVar'
#' @param connect as in mxPath - nb: from and to must also be set.
#' @param arrows as in mxPath - nb: from and to must also be set.
#' @param free whether the value is free to be optimised
#' @param values default value list
#' @param labels labels for each path
#' @param lbound lower bounds for each path value
#' @param ubound upper bounds for each path value
#' @param hasMeans Used in 'forms' case to know whether the data have means or not.
#' @return - 1 or more [OpenMx::mxPath()]s
#' @export
#' @family Core Model Building Functions
#' @seealso - [OpenMx::mxPath()]
#' @references - <https://tbates.github.io>
#' @md
#' @examples
#'
#' # ==========================================
#' # = Examples of each path type, and option =
#' # ==========================================
#'
#' umxPath("A", to = "B") # One-headed path from A to B
#' umxPath("A", to = "B", fixedAt = 1) # same, with value fixed @@1
#' umxPath("A", to = c("B", "C"), fixedAt = 1:2) # same, with more than 1 value
#' umxPath("A", to = c("B","C"), firstAt = 1) # Fix only the first path, others free
#' umxPath(var = "A") # Give a variance to A
#' umxPath(var = "A", fixedAt = 1) # Give A variance, fixed at 1
#' umxPath(means = c("A","B")) # Create a means model for A: from = "one", to = "A"
#' umxPath(v1m0 = "A") # Give "A" variance and a mean, fixed at 1 and 0 respectively
#' umxPath(v.m. = "A") # Give "A" variance and a mean, leaving both free.
#' umxPath(v0m0 = "W", label = c(NA, "data.W"))
#' umxPath("A", with = "B") # using with: same as "to = B, arrows = 2"
#' umxPath("A", with = "B", fixedAt = .5) # 2-head path fixed at .5
#' umxPath("A", with = c("B", "C"), firstAt = 1) # first covariance fixed at 1
#' umxPath(cov = c("A", "B")) # Covariance A <-> B
#' umxPath(defn = "mpg") # create latent called def_mpg, with var = 1 and label = "data.mpg"
#' umxPath(fromEach = c('a','b'), to = c('c','d')) # a->c, a<->d, b<->c, b<->d
#' umxPath(unique.bivariate = c('a','b','c')) # bivariate paths a<->b, a<->c, b<->c etc.
#' umxPath(unique.pairs = letters[1:3]) # all distinct pairs: a<->a, a<->b, a<->c, b<->b, etc.
#' umxPath(Cholesky = c("A1","A2"), to = c("m1", "m2")) # Cholesky
#'
#' \dontrun{
#' # A worked example
#' data(demoOneFactor)
#' manifests = names(demoOneFactor)
#
#' m1 = umxRAM("One Factor", data = demoOneFactor, type= "cov",
#' umxPath("G", to = manifests),
#' umxPath(var = manifests),
#' umxPath(var = "G", fixedAt = 1.0)
#' )
#' umxSummary(m1, std = TRUE)
#' require(umx)
#'
#'
#' # ====================
#' # = Cholesky example =
#' # ====================
#' # ======================================================================
#' # = 3-factor Cholesky (A component of a 5-variable 3-factor ACE model) =
#' # ======================================================================
#' latents = paste0("A", 1:3)
#' manifests = names(demoOneFactor)
#' m1 = umxRAM("Chol", data = demoOneFactor, type = "cov",
#' umxPath(Cholesky = latents, to = manifests),
#' umxPath(var = manifests),
#' umxPath(var = latents, fixedAt = 1)
#' )
#' plot(m1, splines= FALSE)
#'
#' # ======================================================================
#' # = Definition variable example. for a RAM model =
#' # = def vars are instantiated as dummy latents with data on the "mean" =
#' # ======================================================================
#' library(umx); libs("MASS") # for mvrnorm()
#' # 1. Create Data
#' N = 500 # size of each group
#' Sigma = matrix(c(1,.5,.5,1),2,2) # cov (.5)
#' group1 = MASS::mvrnorm(N, c(1,2), Sigma)
#' group2 = MASS::mvrnorm(N, c(0,0), Sigma)
#' # rbind groups and name cols "x" and "y"
#' xy = rbind(group1, group2)
#' dimnames(xy)[2]= list(c("x", "y"))
#'
#' # Create a definition variable for group status
#' groupID = rep(c(1,0), each = N)
#' df = data.frame(xy, groupID = groupID)
#'
#' # Make the model with a definition variable on means
#' m1 = umxRAM("Def Means", data = df,
#' umxPath(v.m. = c("x","y")),
#' umxPath("x", with = "y"),
#' # create a unit latent called "def_groupID" with data "data.groupID"
#' umxPath(defn = "groupID"),
#' # Add it to the x and y means
#' umxPath("def_groupID", to = c("x", "y"))
#' )
#' plot(m1)
#'
#' }
umxPath <- function(from = NULL, to = NULL, with = NULL, var = NULL, cov = NULL, means = NULL, v1m0 = NULL, v.m. = NULL, v0m0 = NULL, v.m0 = NULL, v0m. = NULL, fixedAt = NULL, freeAt = NULL, firstAt = NULL, unique.bivariate = NULL, unique.pairs = NULL, fromEach = NULL, forms = NULL, Cholesky = NULL, defn = NULL, connect = c("single", "all.pairs", "all.bivariate", "unique.pairs", "unique.bivariate"), arrows = 1, free = TRUE, values = NA, labels = NA, lbound = NA, ubound = NA, hasMeans = NULL) {
connect = match.arg(connect) # Set to single if not overridden by user.
# xmu_string2path(from)
n = 0
for (i in list(with, cov, var, forms, means, fromEach, unique.bivariate, unique.pairs, v.m. , v1m0, v0m0, v.m0, v0m., defn, Cholesky)) {
if(!is.null(i)){ n = n + 1}
}
if(n > 1){
stop("At most one of with, cov, var, forms, means, fromEach, unique.bivariate, unique.pairs, v1m0, v.m., v0m0, v.m0, v0m., defn, or Cholesky can be set: Use at one time")
} else if(n == 0){
# check that from is set?
if(is.null(from)){
stop("You must set at least 'from'")
}
}
n = 0
for (i in list(v.m. , v1m0, v0m0, v0m., v.m0)) {
if(!is.null(i)){ n = n + 1}
}
if(n && !is.null(fixedAt)){
stop("I stopped processing the model: When you use v.m. , v1m0, v0m0, v.m0, or v0m. you can't also set fixedAt")
}
if(n && !is.null(firstAt)){
stop("I stopped processing the model: When you use v.m. , v1m0, v0m0, v.m0, or v0m. you can't also set firstAt")
}
if(!is.null(defn)){
if(anyNA(labels)){
labels = paste0("data.", defn)
defn = paste0("def_" , defn)
nDef = length(defn)
if(nDef == 1){
message(nDef, " definition variable created: refer to it as: ", omxQuotes(defn))
} else {
message(nDef, " definition variables created: refer to them as: ", omxQuotes(defn))
}
} else if(length(labels) != length(defn)){
stop("Number of labels must match number of definition variables (data source)!\n",
"You can gave me ", omxQuotes(labels), "labels and ", omxQuotes(defn), " defn vars")
}else if (length(grep("data\\.", labels, value = FALSE))==0){
# if user hasn't prepended labels with "data." then add it for them
labels = paste0("data.", labels)
}
a = umxPath(var = defn, fixedAt = 0)
b = umxPath(means = defn, free = FALSE, labels = labels)
# var a var@1 mean@0
return(list(a, b))
}
if(!is.null(Cholesky)){
if(arrows!=1){
stop("Cholesky paths are one-headed: you set arrows= to something other than 1")
}
from = Cholesky
nFrom = length(from)
nTo = length(to)
if(!(nTo >= nFrom)){
stop("Must have at least as many 'to' variables as latents for Cholesky: you gave me ",
nTo, " to variables and ", nFrom, " Cholesky latents")
}
if(!is.na(labels)){
message("setting labels for Cholesky is tricky: Leave blank to have me do this for you automatically.")
}
if(!is.na(lbound)){
message("setting lbounds for Cholesky is tricky: Leave blank to have me bound the diagonal for you automatically.")
}else{
lbound = matrix(NA, nrow = nFrom, ncol = nTo); diag(lbound) = 1e-6
lbound = lbound[upper.tri(lbound, diag = TRUE)]
}
if(!is.na(ubound)){
message("nb setting ubound (other than as uniform) is tricky for Cholesky, make sure you're getting what you expected or leave it blank.")
}
if(!is.na(values)){
message("nb setting values is tricky for Cholesky, make sure you're getting what you expected, or leave it blank.")
}
labelList = fromList = toList =c()
n = nTo
for(i in seq_along(from)) {
thisFrom = rep(from[i], n)
thisTo = to[i:nTo]
fromList = c(fromList, thisFrom)
toList = c(toList, thisTo)
# Needn't bother with this as it will all be taken care of in xmuLabel...
labelList = c(labelList, paste(thisFrom, thisTo, sep = '_to_'))
n = (n - 1)
}
if(!is.na(labels)){
labelList = labels
}
return(mxPath(from = fromList, to = toList, arrows = 1, free = free, labels = labelList, lbound = lbound, ubound = ubound, values = values))
}
if(!is.null(v1m0)){
# TODO lbound ubound unlikely to be applied to two things, and can't affect result... error if they're not NULL?
if(!is.na(lbound) && is.na(ubound) && FALSE){
message("I lbounded var of ", v1m0, " @0")
}
if(any(!is.na(labels))){
if(length(labels)==2){
a = mxPath(from = v1m0, arrows = 2, free = FALSE, values = 1, labels = labels[1], lbound = 0, ubound = ubound)
b = mxPath(from = "one", to = v1m0, free = FALSE, values = 0, labels = labels[2], lbound = lbound, ubound = ubound)
} else {
stop("Managing which labels apply to the variances and which to the means is error prone:\n",
"I suggest you call: umxPath(var=) and umxPath(means=) separately"
)
}
} else {
a = mxPath(from = v1m0, arrows = 2, free = FALSE, values = 1, lbound = 0, ubound = ubound)
b = mxPath(from = "one", to = v1m0, free = FALSE, values = 0, lbound = lbound, ubound = ubound)
}
return(list(a, b))
}
if(!is.null(v.m.)){
# TODO lbound ubound unlikely to be applied to two things. lbound for var should be 0
if(!is.na(lbound) && is.na(ubound) && FALSE){
message("I lbounded var of ", v.m. , " @0")
}
if(any(!is.na(labels))){
if(length(labels)==2){
a = mxPath(from = v.m., arrows = 2, free = TRUE, values = 1, labels = labels[1], lbound = 0, ubound = ubound)
b = mxPath(from = "one", to = v.m., free = TRUE, values = 0, labels = labels[2], lbound = lbound, ubound = ubound)
} else {
stop("Managing which labels apply to the variances and which to the means is error prone:\n",
"I suggest you call: umxPath(var) and umxPath(means=) separately"
)
}
} else {
a = mxPath(from = v.m., arrows = 2, free = TRUE, values = 1, lbound = 0, ubound = ubound)
b = mxPath(from = "one", to = v.m., free = TRUE, values = 0, lbound = lbound, ubound = ubound)
}
return(list(a, b))
}
if(!is.null(v0m0)){
if(any(!is.na(labels))){
if(length(labels)==2){
a = mxPath(from = v0m0, arrows = 2, free = FALSE, values = 0, labels = labels[1])
b = mxPath(from = "one", to = v0m0, free = FALSE, values = 0, labels = labels[2])
} else {
stop("Managing which labels apply to the variances and which to the means is error prone:\n",
"I suggest you call: umxPath(var) and umxPath(means=) separately"
)
}
} else {
a = mxPath(from = v0m0, arrows = 2, free = FALSE, values = 0)
b = mxPath(from = "one", to = v0m0, free = FALSE, values = 0)
}
return(list(a, b))
}
if(!is.null(v.m0)){
# use values, if provided, to start variance
values = ifelse(is.na(values), 1, values)
if(any(!is.na(labels))){
if(length(labels)==2){
a = mxPath(from = v.m0, arrows = 2, free = TRUE, values = values, labels=labels[1])
b = mxPath(from = "one", to = v.m0, free = FALSE, values = 0, labels=labels[2])
} else {
stop("Managing which labels apply to the variances and which to the means is error prone:\n",
"I suggest you call: umxPath(var) and umxPath(means=) separately"
)
}
} else {
a = mxPath(from = v.m0, arrows = 2, free = TRUE, values = values)
b = mxPath(from = "one", to = v.m0, free = FALSE, values = 0)
}
return(list(a, b))
}
if(!is.null(v0m.)){
if(length(values)!=1 || !is.na(values)){
if(length(values)==2){
varValue = values[1]
meanValue = values[2]
} else if(length(values)==1){
varValue = 0
meanValue = values
} else {
stop("Managing which values apply to variances and which to means is error prone for more than one variable: Please do them 1 at a time\n")
}
}else{
varValue = 0
meanValue = 0
}
if(any(!is.na(labels))){
if(length(labels)==2){
a = mxPath(from = v0m., arrows = 2, free = FALSE, values = varValue, labels = labels[1])
b = mxPath(from = "one", to = v0m., free = TRUE , values = meanValue, labels = labels[2])
} else {
stop("Managing which labels apply to the variances and which to the means is error prone:\n",
"I suggest you call: umxPath(var) and umxPath(means=) separately"
)
}
} else {
a = mxPath(from = v0m., arrows = 2, free = FALSE, values = varValue)
b = mxPath(from = "one", to = v0m., free = TRUE , values = meanValue)
}
return(list(a, b))
}
if(!is.null(forms)){
# ====================
# = Handle formative =
# ====================
# http://davidakenny.net/cm/mvar.htm
if(is.null(from)){
stop("You have to have offer up at least 3 unique 'from' variables to make a formative")
}
if(is.null(hasMeans)){
message("You have to set hasMeans so I know whether to make them for this formative: Assuming TRUE")
hasMeans = TRUE
}
if(length(forms) > 1){
stop("It's tricky to setup multiple forms variables in 1 line. e-mail if you'd like this to work..")
} else {
numPaths = length(forms)
free = rep(TRUE, numPaths)
free[1] = FALSE
values = rep(NA, numPaths)
values[1] = 1
}
a = mxPath(from = from, connect = "unique.bivariate", arrows = 2)
b = mxPath(from = from, to = forms, free = free, values = values)
if(hasMeans){
c = mxPath(from = forms, arrows = 2, free = FALSE, values = 0)
d = mxPath(from = "one", to = forms, free = FALSE, values = 0)
e = mxPath(from = from, arrows = 2, free = TRUE, values = 1, labels = labels, lbound = 0, ubound = ubound)
f = mxPath(from = "one", to = from, free = TRUE, values = 0, labels = labels, lbound = lbound, ubound = ubound)
x = list(a, b, c, d, e, f)
} else {
c = mxPath(from = forms, arrows = 2, free = FALSE, values = 0)
e = mxPath(from = from, arrows = 2, free = TRUE, values = 1, labels = labels, lbound = 0, ubound = ubound)
x = list(a, b, c, e)
}
# return(c(a, b))
return(x)
}
if(!is.null(with)){
# ===============
# = Handle with =
# ===============
if(is.null(from)){
stop("To use with, you must set 'from = ' also")
} else {
from = from
to = with
arrows = 2
connect = "single"
}
} else if(!is.null(cov)){
# ==============
# = Handle cov =
# ==============
if(!is.null(from) | !is.null(to)){
stop("To use 'cov = ', 'from' and 'to' should be empty")
} else if (length(cov) != 2){
stop("cov must consist of two and only two variables.\n",
"If you want to covary more variables, use: 'unique.bivariate =' \n",
"or else use 'from =', 'to=', and 'connect = \"unique.bivariate\"'\n",
"If you want to set variances for a list of variables, use 'var = c(\"X\")'")
} else {
from = cov[1]
to = cov[2]
arrows = 2
connect = "single"
}
} else if(!is.null(var)){
# ==============
# = handle var =
# ==============
if(!is.null(from) || !is.null(to)){
stop("To use 'var = ', 'from' and 'to' should be empty")
} else {
from = var
to = var
arrows = 2
connect = "single"
if(is.na(lbound)){
lbound = 0
# message("I lbounded var of ", omxQuotes(var), " @0")
}
}
} else if(!is.null(means)){
# ================
# = Handle means =
# ================
if(!is.null(from) | !is.null(to)){
stop("To use means, from and to should be empty.",
"Just say 'means = c(\"X\",\"Y\").'")
} else {
from = "one"
to = means
arrows = 1
connect = "single"
}
} else if(!is.null(unique.bivariate)){
# ===========================
# = Handle unique.bivariate =
# ===========================
if(length(unique(unique.bivariate)) < 2){
stop("You have to have at least 2 unique variables to use unique.bivariate")
}
if(!is.null(from)){
stop("To use unique.bivariate, 'from=' should be empty.\n",
"Just say 'unique.bivariate = c(\"X\",\"Y\").'\n",
"or 'unique.bivariate = c(\"X\",\"Y\"), to = \"Z\"")
} else {
if(is.null(to)){
to = NA
} else {
to = to
}
from = unique.bivariate
arrows = 2
connect = "unique.bivariate"
}
} else if(!is.null(fromEach)){
# ===========================
# = Handle fromEach =
# ===========================
if(!is.null(from)){
stop("To use fromEach, 'from=' must be empty (perhaps you were trying to set to= but didn't say that?).\n",
"Just say 'fromEach = c(\"X\",\"Y\").'\n",
"or 'fromEach = c(\"X\",\"Y\"), to = \"Z\"")
} else {
if(is.null(to)){
to = NA
} else {
to = to
}
from = fromEach
arrows = 1
connect = "all.bivariate"
}
} else if(!is.null(unique.pairs)){
# ===========================
# = Handle unique.pairs =
# ===========================
if(length(unique(unique.pairs)) < 2){
stop("You have to have at least 2 unique variables to use unique.pairs")
}
if(!is.null(from)){
stop("To use unique.pairs, 'from=' should be empty.\n",
"Just say 'unique.pairs = c(\"X\",\"Y\").'\n",
"or 'unique.pairs = c(\"X\",\"Y\"), to = \"Z\"")
} else {
if(is.null(to)){
to = NA
} else {
to = to
}
from = unique.pairs
arrows = 2
connect = "unique.pairs"
}
} else {
if(is.null(from) && is.null(to)){
stop("You don't seem to have requested any paths.\n",
"see help(umxPath) for all the possibilities")
} else {
# assume it is from to
if(is.null(to)){
to = NA
}
from = from
to = to
arrows = arrows
connect = connect
}
}
# ==============================================
# = From, to, and connect have now been set... =
# ==============================================
# ==============================
# = Handle fixedAt and firstAt =
# ==============================
if(sum(c(is.null(fixedAt), is.null(firstAt), is.null(freeAt))) < 2){
stop("At most one of fixedAt freeAt and firstAt can be set: You seem to have tried to set more than one.")
}
# Handle firstAt
if(!is.null(firstAt)){
if(length(from) > 1 && length(to) > 1){
stop("It's not wise to use firstAt with multiple from sources AND multiple to targets. I'd like to think about this before implementing it..")
} else {
numPaths = max(length(from), length(to))
free = rep(TRUE, numPaths)
free[1] = FALSE
values = rep(NA, numPaths)
values[1] = firstAt
}
}
# Handle fixedAt
if(!is.null(fixedAt)){
free = FALSE
values = fixedAt
}
# Handle freeAt
if(!is.null(freeAt)){
free = TRUE
values = freeAt
}
mxPath(from = from, to = to, connect = connect, arrows = arrows, free = free, values = values, labels = labels, lbound = lbound, ubound = ubound)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.