Nothing
#' Helper to make a basic top, MZ, and DZ model.
#'
#' @description
#' `xmu_make_TwinSuperModel` makes basic twin model containing `top`, `MZ`, and `DZ` models. It intelligently handles thresholds for
#' ordinal data, and means model for covariates matrices in the twin models if needed.
#'
#' It's the replacement for `xmu_assemble_twin_supermodel` approach.
#'
#' @details
#' `xmu_make_TwinSuperModel` is used in twin models (e.g.[umxCP()], [umxACE()] and [umxACEv()] and will be added to the other models: [umxGxE()], [umxIP()],
#' simplifying code maintenance.
#'
#' It takes `mzData` and `dzData`, a list of the `selDVs` to analyse and optional `selCovs` (as well as `sep` and `nSib`), along with other
#' relevant information such as whether the user wants to `equateMeans`.
#' It can also handle a `weightVar`.
#'
#' If covariates are passed in these are included in the means model (via a call to `xmuTwinUpgradeMeansToCovariateModel`.
#'
#'
#' **Modeling**
#'
#' **Matrices created**
#'
#' *top model*
#'
#' For raw and WLS data, `top` contains a `expMeans` matrix (if needed). For summary data, the top model contains only a name.
#'
#' For ordinal data, `top` gains `top.threshMat` (from a call to [umxThresholdMatrix()]).
#'
#' For covariates, top stores the `intercepts` matrix and a `betaDef` matrix. These are then used to make expMeans in `MZ` and `DZ`.
#'
#' *MZ and DZ models*
#'
#' `MZ` and `DZ` contain the data, and an expectation referencing `top.expCovMZ` and `top.expMean`, and, `vector = bVector`.
#' For continuous raw data, MZ and DZ contain [OpenMx::mxExpectationNormal()] and [OpenMx::mxFitFunctionML()].
#' For WLS these the fit function is switched to [OpenMx::mxFitFunctionWLS()] with appropriate `type` and `allContinuousMethod`.
#'
#'
#' For binary, a constraint and algebras are included to constrain `Vtot` (A+C+E) to 1.
#'
#' If a `weightVar` is detected, these columns are used to create a row-weighted MZ and DZ models.
#'
#' If `equateMeans` is `TRUE`, then the Twin-2 vars in the mean matrix are equated by label with Twin-1.
#'
#'
#' Decent starts are guessed from the data.
#' `varStarts` is computed as `sqrt(variance)/3` of the DVs and `meanStarts` as the variable means.
#' For raw data, a check is made for ordered variables. For Binary variables, means are fixed at 0 and
#' total variance (A+C+E) is fixed at 1. For ordinal variables, the first 2 thresholds are fixed.
#'
#' Where needed, e.g. continuous raw data, top adds a means matrix "expMean".
#' For ordinal data, top adds a [umxThresholdMatrix()].
#'
#' If binary variables are present, matrices and a constraint to hold `A+C+E == 1` are added to top.
#'
#' If a weight variable is offered up, an `mzWeightMatrix` will be added.
#'
#' **Data handling**
#'
#' In terms of data handling, `xmu_make_TwinSuperModel` was primarily designed to take
#' data.frames and process these into mxData.
#' It can also, however, handle cov and mxData input.
#'
#' It can process data into all the types supported by `mxData`.
#'
#' Raw data input with a target of `cov` or `cor` type requires the `numObsMZ` and `numObsDZ` to be set.
#'
#' Type "WLS", "DWLS", or "ULS", data remain raw, but are handled as WLS in the [OpenMx::mxFitFunctionWLS()].
#'
#' Unused columns are dropped.
#'
#' If you pass in raw data, you can't request type cov/cor yet. Will work on this if desired.
#'
#' @param name for the supermodel
#' @param mzData Dataframe containing the MZ data
#' @param dzData Dataframe containing the DZ data
#' @param selDVs List of manifest base names (e.g. BMI, NOT 'BMI_T1') (OR, you don't set "sep", the full variable names)
#' @param selCovs List of covariate base names (e.g. age, NOT 'age_T1') (OR, you don't set "sep", the full variable names)
#' @param sep string used to expand selDVs into selVars, i.e., "_T" to expand BMI into BMI_T1 and BMI_T2 (optional but STRONGLY encouraged)
#' @param type One of 'Auto','FIML','cov', 'cor', 'WLS','DWLS', or 'ULS'. Auto tries to react to the incoming mxData type (raw/cov).
#' @param allContinuousMethod "cumulants" or "marginals". Used in all-continuous WLS data to determine if a means model needed.
#' @param nSib Number of members per family (default = 2)
#' @param equateMeans Whether to equate T1 and T2 means (default = TRUE).
#' @param weightVar If provided, a vector objective will be used to weight the data. (default = NULL).
#' @param numObsMZ Number of MZ observations contributing (for summary data only)
#' @param numObsDZ Number of DZ observations contributing (for summary data only)
#' @param bVector Whether to compute row-wise likelihoods (defaults to FALSE).
#' @param dropMissingDef Whether to automatically drop missing def var rows for the user (default = TRUE). You get a polite note.
#' @param verbose (default = FALSE)
#' @return - [mxModel()]s for top, MZ and DZ.
#' @export
#' @family xmu internal not for end user
#' @md
#' @examples
# # TODO add tests with numObsMZ = NULL, numObsDZ = NULL, equateMeans = TRUE,
# # TODO add tests with weightVar = NULL, bVector = FALSE,
#
#' # ==============
#' # = Continuous =
#' # ==============
#' library(umx)
#' data(twinData)
#' twinData = umx_scale(twinData, varsToScale= c('ht1','ht2'))
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' m1= xmu_make_TwinSuperModel(mzData=mzData, dzData=dzData, selDVs=c("wt","ht"), sep="", nSib=2)
#' names(m1) # "top" "MZ" "DZ"
#' class(m1$MZ$fitfunction)[[1]] == "MxFitFunctionML"
#'
#' # ====================
#' # = With a covariate =
#' # ====================
#'
#' m1= xmu_make_TwinSuperModel(mzData=mzData, dzData=dzData,
#' selDVs= "wt", selCovs= "age", sep="", nSib=2)
#' m1$top$intercept$labels
#' m1$MZ$expMean
#'
#' # ===============
#' # = WLS example =
#' # ===============
#' m1=xmu_make_TwinSuperModel(mzData=mzData, dzData=dzData,selDVs=c("wt","ht"),sep="",type="WLS")
#' class(m1$MZ$fitfunction)[[1]] == "MxFitFunctionWLS"
#' m1$MZ$fitfunction$type =="WLS"
#' # Check default all-continuous method
#' m1$MZ$fitfunction$continuousType == "cumulants"
#'
#' # Choose non-default type (DWLS)
#' m1= xmu_make_TwinSuperModel(mzData= mzData, dzData= dzData,
#' selDVs= c("wt","ht"), sep="", type="DWLS")
#' m1$MZ$fitfunction$type =="DWLS"
#' class(m1$MZ$fitfunction)[[1]] == "MxFitFunctionWLS"
#'
#' # Switch WLS method
#' m1 = xmu_make_TwinSuperModel(mzData= mzData, dzData= dzData, selDVs= c("wt","ht"), sep= "",
#' type = "WLS", allContinuousMethod = "marginals")
#' m1$MZ$fitfunction$continuousType == "marginals"
#' class(m1$MZ$fitfunction)[[1]] == "MxFitFunctionWLS"
#'
#'
#' # ============================================
#' # = Bivariate continuous and ordinal example =
#' # ============================================
#' data(twinData)
#' selDVs = c("wt", "obese")
#' # Cut BMI column to form ordinal obesity variables
#' ordDVs = c("obese1", "obese2")
#' obesityLevels = c('normal', 'overweight', 'obese')
#' cutPoints = quantile(twinData[, "bmi1"], probs = c(.5, .2), na.rm = TRUE)
#' twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' # Make the ordinal variables into mxFactors (ensure ordered is TRUE, and require levels)
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' m1 = xmu_make_TwinSuperModel(mzData= mzData, dzData= dzData, selDVs= selDVs, sep="", nSib= 2)
#' names(m1) # "top" "MZ" "DZ"
#'
#' # ==============
#' # = One binary =
#' # ==============
#' data(twinData)
#' cutPoints = quantile(twinData[, "bmi1"], probs = .2, na.rm = TRUE)
#' obesityLevels = c('normal', 'obese')
#' twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels)
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#' selDVs = c("wt", "obese")
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' m1 = xmu_make_TwinSuperModel(mzData= mzData, dzData= dzData, selDVs= selDVs, sep= "", nSib= 2)
#'
#' # ========================================
#' # = Cov data (calls xmuTwinSuper_CovCor) =
#' # ========================================
#'
#' data(twinData)
#' mzData =cov(twinData[twinData$zygosity %in% "MZFF", tvars(c("wt","ht"), sep="")], use="complete")
#' dzData =cov(twinData[twinData$zygosity %in% "DZFF", tvars(c("wt","ht"), sep="")], use="complete")
#' m1 = xmu_make_TwinSuperModel(mzData= mzData, dzData= dzData, selDVs= "wt", sep= "",
#' nSib= 2, numObsMZ = 100, numObsDZ = 100, verbose=TRUE)
#' class(m1$MZ$fitfunction)[[1]] =="MxFitFunctionML"
#' dimnames(m1$MZ$data$observed)[[1]]==c("wt1", "wt2")
#'
xmu_make_TwinSuperModel <- function(name = "twin_super", mzData, dzData, selDVs, selCovs= NULL, sep = NULL, type = c("Auto", "FIML", "cov", "cor", "WLS", "DWLS", "ULS"), allContinuousMethod = c("cumulants", "marginals"), numObsMZ = NULL, numObsDZ = NULL, nSib = 2, equateMeans = TRUE, weightVar = NULL, bVector = FALSE, dropMissingDef = TRUE, verbose= FALSE) {
# TODO for xmu_make_TwinSuperModel
# TODO: 1. xmu_make_TwinSuperModel Add selCovs
# TODO: 2. xmu_make_TwinSuperModel Add beta matrix for fixed covariates in means.
# TODO: 4. xmu_make_TwinSuperModel more tests in a test page
# *Note*: If dropping this into an existing model, all your code setting: nVar, selVars, used
# any code figuring out data-type, is not needed!
# ===================
# = match arguments =
# ===================
type = match.arg(type)
allContinuousMethod = match.arg(allContinuousMethod)
# ====================================================
# = Figure out selVars, nVar, usedVars, and dataType =
# ====================================================
if(is.null(sep)){
fullVars = selDVs
fullCovs = selCovs
message("Polite note: It's better to use 'sep'. This might become compulsory as it helps umx manage variable names in twin models.")
# stop("You MUST set 'sep'. Otherwise xmu_make_top can't reliably expand selDVs into full variable names")
}else{
fullVars = tvars(selDVs , sep = sep, suffixes= 1:nSib)
fullCovs = tvars(selCovs, sep = sep, suffixes= 1:nSib)
}
dataType = umx_is_cov(dzData, boolean = FALSE)
if(verbose){ umx_msg(dataType)}
if(type %in% c("cov", "cor") && !dataType %in% c("cov", "cor")){
stop("You've requested type= cov or cor, but the provided dataType is ", omxQuotes(dataType), " I don't support that yet. Please pass in cov data.")
}
if(dataType == "raw") {
if(!all(is.null(c(numObsMZ, numObsDZ)))){
stop("You should not be setting numObsMZ or numObsDZ with ", omxQuotes(dataType), " data...")
}
if(!is.null(weightVar)){
# Weight variable provided: check it exists in each frame.
mzWeightData = xmu_extract_column(mzData, weightVar)
dzWeightData = xmu_extract_column(dzData, weightVar)
bVector = TRUE
}
# ============================================
# = Make mxData, dropping any unused columns =
# ============================================
usedVars = c(fullVars, fullCovs)
if(verbose){
umx_msg(usedVars)
umx_msg(fullVars)
umx_msg(fullCovs)
umx_msg(type)
}
mzData = xmu_make_mxData(mzData, type = type, manifests = usedVars, fullCovs = fullCovs, numObs = numObsMZ, dropMissingDef = dropMissingDef)
dzData = xmu_make_mxData(dzData, type = type, manifests = usedVars, fullCovs = fullCovs, numObs = numObsDZ, dropMissingDef = dropMissingDef)
# ========================================================================
# = 3. Add mxExpectationNormal, means and var matrices to top, MZ and DZ =
# ========================================================================
# Find ordinal variables
colTypes = umx_is_ordered(xmu_extract_column(mzData, fullVars), summaryObject= TRUE)
if(colTypes$nFactors == 0){
model = xmuTwinSuper_Continuous(name= name, fullVars = fullVars, fullCovs = fullCovs, mzData = mzData, dzData = dzData, sep = sep, equateMeans= equateMeans, nSib = nSib, type= type, allContinuousMethod= allContinuousMethod)
} else if(sum(colTypes$isBin) == 0){
model = xmuTwinSuper_NoBinary(name= name, fullVars = fullVars, fullCovs = fullCovs, mzData = mzData, dzData = dzData, sep = sep, equateMeans= equateMeans, nSib = nSib)
} else if(sum(colTypes$isBin) > 0){
model = xmuTwinSuper_SomeBinary(name= name, fullVars = fullVars, fullCovs = fullCovs, mzData = mzData, dzData = dzData, sep = sep, equateMeans= equateMeans, nSib = nSib, verbose = verbose)
} else {
stop("You appear to have something other than I expected in terms of WLS, or binary, ordinal and continuous variable mix")
}
# nb: means not yet equated across twins
} else if(dataType %in% c("cov", "cor")){
if(!is.null(weightVar)){
stop("You can't set weightVar when you give cov data - use cov.wt to create weighted cov matrices, or pass in raw data")
}
if(!is.null(fullCovs)){
stop("You can't set covariates when you have cov data: needs raw data to estimate on each row")
}
model = xmuTwinSuper_CovCor(name=name, fullVars = fullVars, mzData= mzData, dzData= dzData, type = dataType, numObsMZ = numObsMZ, numObsDZ = numObsDZ)
} else {
stop("Datatype \"", dataType, "\" not understood")
}
# ==============================
# = Add mxFitFunction to model =
# ==============================
if(type %in% c('WLS', 'DWLS', 'ULS')) {
message("Data treated as ", type)
# Still mxExpectationNormal (`top` is not affected - either has or lacks means matrix already).
# Replace the MZ and DZ model FitFunctions
model$MZ = mxModel(model$MZ, mxFitFunctionWLS(type = type, allContinuousMethod = allContinuousMethod) )
model$DZ = mxModel(model$DZ, mxFitFunctionWLS(type = type, allContinuousMethod = allContinuousMethod) )
}else{
model$MZ = mxModel(model$MZ, mxFitFunctionML(vector = bVector) )
model$DZ = mxModel(model$DZ, mxFitFunctionML(vector = bVector) )
}
if(!is.null(weightVar)){
model = xmu_twin_add_WeightMatrices(model, mzWeights = mzWeightData, dzWeights = dzWeightData)
}
return(model)
}
# =============================
# = raw twin-assembly helpers =
# =============================
#' Create core of twin model for all-continuous data.
#'
#' @description
#' Sets up top, MZ and DZ submodels with a means model, data, and expectation for all-continuous data.
#' called by [xmu_make_TwinSuperModel()].
#'
#' @param name The name of the supermodel
#' @param fullVars Full Variable names (wt_T1)
#' @param fullCovs Full Covariate names (age_T1)
#' @param mzData An mxData object containing the MZ data
#' @param dzData An mxData object containing the DZ data
#' @param type type
#' @param nSib nSib
#' @param sep default "_T"
#' @param allContinuousMethod allContinuousMethod
#' @param equateMeans Whether to equate the means across twins (default TRUE)
#' @return - A twin model
#' @export
#' @family xmu internal not for end user
#' @seealso - [xmu_make_TwinSuperModel()]
#' @md
#' @examples
#' \dontrun{
#' xmuTwinSuper_Continuous(name="twin_super", selVars = selVars, selCovs = selCovs,
#' mzData = mzData, dzData = dzData, equateMeans = TRUE, type = type,
#' allContinuousMethod = allContinuousMethod, nSib= nSib, sep = "_T" )
#' }
xmuTwinSuper_Continuous <- function(name= NULL, fullVars, fullCovs = NULL, sep, mzData, dzData, equateMeans, type, allContinuousMethod, nSib){
# ==============================
# = Handle all continuous case =
# ==============================
# expects fullVars and fullCovs to be expanded
# = Special case (for WLS, can affect mean or not)
umx_check(!is.null(name), "stop", "I need a name for the super model")
nVar = length(fullVars)/nSib; # Number of dependent variables ** per INDIVIDUAL ( so times nSib for a family)**
if(type %in% c('WLS', 'DWLS', 'ULS') & allContinuousMethod == "cumulants"){
# Raw data, mo means (WLS with cumulants
umx_check(is.null(fullCovs), "warning", "covariates not allowed in all-continuous WLS with method = cumulants. Switch to other method")
model = mxModel(name,
mxModel("top"),
mxModel("MZ", mzData, mxExpectationNormal("top.expCovMZ") ),
mxModel("DZ", dzData, mxExpectationNormal("top.expCovDZ") ),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
} else {
# Raw data or (WLS && allContinuousMethod != cumulants): Needs means and MZ and DZ a means model.
# ====================================================================
# = Figure out start values =
# = NOTE: fullVars is expanded by the time we get to here... no sep. =
# ====================================================================
starts = xmu_starts(mzData= mzData, dzData= dzData, selVars= fullVars, equateMeans= equateMeans, nSib= nSib, varForm= "Cholesky")
# Contains starts$varStarts; starts$meanStarts; starts$meanLabels # (Equated across twins if requested)
model = mxModel(name,
mxModel("top",
umxMatrix("expMean", "Full", nrow= 1, ncol= nVar*nSib, free= TRUE, values= starts$meanStarts, labels= starts$meanLabels, dimnames= list("means", fullVars))
),
mxModel("MZ", mzData, mxExpectationNormal("top.expCovMZ", "top.expMean") ),
mxModel("DZ", dzData, mxExpectationNormal("top.expCovDZ", "top.expMean") ),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
if(!is.null(fullCovs)){
model = xmuTwinUpgradeMeansToCovariateModel(model, fullVars = fullVars, fullCovs = fullCovs, nSib = nSib, sep = sep)
}
}
return(model)
}
#' xmuTwinSuper_NoBinary
#'
#' @description
#' # Handle 1 or more ordinal variables (no binary)
#' Means ordinal, but no binary
#' Means: all free, start cont at the measured value, ordinals @0
#'
#' Notes: Ordinal requires:
#' 1. Variable set to mxFactor
#' 2. For Binary variables:
#' 1. Latent means of binary variables fixedAt 0 (or by data.def?)
#' 2. Latent variance (A + C + E) constrained == 1
#' 3. For Ordinal variables, first 2 thresholds fixed
#'
#' @param name = NULL
#' @param fullVars full names of variables
#' @param fullCovs full names of covariates
#' @param mzData mzData
#' @param dzData dzData
#' @param sep sep
#' @param nSib nSib
#' @param equateMeans T/F
#' @param verbose (Default FALSE)
#' @return - twin model
#' @export
#' @family xmu internal not for end user
#' @md
#' @examples
#' #
xmuTwinSuper_NoBinary <- function(name = NULL, fullVars, fullCovs = NULL, mzData, dzData, sep, nSib, equateMeans= TRUE, verbose=FALSE){
umx_check(!is.null(name), "stop", "I need a name for the super model")
nVar = length(fullVars)/nSib; # Number of dependent variables ** per INDIVIDUAL ( so times-2 for a family)**
colTypes = umx_is_ordered(xmu_extract_column(mzData, fullVars), summaryObject= TRUE)
# ===============
# = Inform user =
# ===============
message("Found ", (colTypes$nOrdVars/nSib), " pair(s) of ordinal variables:", omxQuotes(colTypes$ordVarNames), " (No binary)")
if(length(colTypes$contVarNames) > 0){
message(length(colTypes$contVarNames)/nSib, " pair(s) of continuous variables:", omxQuotes(colTypes$contVarNames[1:(length(colTypes$contVarNames)/nSib)]))
}
# ===========================
# = Figure out start values =
# ===========================
starts = xmu_starts(mzData= mzData, dzData= dzData, selVars= fullVars, equateMeans= equateMeans, nSib= nSib, varForm= "Cholesky")
# Contains starts$varStarts; starts$meanStarts; starts$meanLabels # (Equated across twins if requested)
model = mxModel(name,
mxModel("top",
umxMatrix("expMean", "Full" , nrow = 1, ncol = (nVar * nSib), free = TRUE, values = starts$meanStarts, labels = starts$meanLabels, dimnames = list("means", fullVars)),
umxThresholdMatrix(rbind(mzData$observed, dzData$observed), fullVarNames = fullVars, sep = sep, verbose = verbose)
),
mxModel("MZ", mzData, mxExpectationNormal("top.expCovMZ", "top.expMean", thresholds = "top.threshMat") ),
mxModel("DZ", dzData, mxExpectationNormal("top.expCovDZ", "top.expMean", thresholds = "top.threshMat") ),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
if(!is.null(fullCovs)){
model = xmuTwinUpgradeMeansToCovariateModel(model, fullVars = fullVars, fullCovs = fullCovs, nSib = nSib, sep = sep)
}
return(model)
}
# xmuTwinSuper_SomeBinary(name=NULL, fullVars = fullVars, fullCovs = fullCovs, mzData = mzData, dzData = dzData, nSib, equateMeans= equateMeans, sep = "_T", verbose = verbose)
xmuTwinSuper_SomeBinary <- function(name = NULL, fullVars, fullCovs = NULL, mzData, dzData, sep, nSib, equateMeans= equateMeans, verbose = verbose){
# =============================================
# = Handle case of at least 1 binary variable =
# =============================================
# ===========================================================================
# = Means: bin fixed, others free, start cont at the measured value, ord @0 =
# ===========================================================================
# ===================================
# = Constrain Ordinal variance @1 =
# ===================================
umx_check(!is.null(name), "stop", "I need a name for the super model")
nVar = length(fullVars)/nSib; # Number of dependent variables ** per INDIVIDUAL ( so times-2 for a family)**
colTypes = umx_is_ordered(xmu_extract_column(mzData, fullVars), summaryObject= TRUE)
nBin = colTypes$nBinVars/nSib
# ===============
# = Inform user =
# ===============
message("Found ", nBin, " pairs of binary variables:", omxQuotes(colTypes$binVarNames))
message("\nI am fixing the latent means and variances of these variables to 0 and 1")
if(colTypes$nOrdVars > 0){
message("There were also ", colTypes$nOrdVars/nSib, " pair(s) of ordinal variables:", omxQuotes(colTypes$ordVarNames))
}
if(length(colTypes$contVarNames) > 0){
message("\nand ", length(colTypes$contVarNames)/nSib, " pair(s) of continuous variables first of each pair was: ", omxQuotes(colTypes$contVarNames[1:(length(colTypes$contVarNames)/nSib)]))
}else{
message("No continuous variables")
}
# Algebra to pick out the ordinal variables.
# TODO check using twin 1 to pick where the bin variables are is robust...
# Fill with zeros: default for ordinals and binary...
meansFree = !colTypes$isBin # Fix the binary variables at zero (umx_means did this)
the_bin_cols = which(colTypes$isBin)[1:nBin] # Columns in which the bin vars appear for T1, i.e., c(1,3,5)
binBracketLabels = paste0("Vtot[", the_bin_cols, ",", the_bin_cols, "]") # "Vtot[1,1]" "Vtot[3,3]"
# =============================
# = Figure out start values =
# = NOTE: fullVars is expanded by the time we get to here... no sep. =
# ===================================================================
starts = xmu_starts(mzData= mzData, dzData= dzData, selVars= fullVars, equateMeans= equateMeans, nSib= nSib, varForm= "Cholesky")
# Contains starts$varStarts; starts$meanStarts; starts$meanLabels # (Equated across twins if requested)
model = mxModel(name,
mxModel("top",
# means
umxMatrix("expMean", "Full" , nrow = 1, ncol = (nVar*nSib), free = meansFree, values = starts$meanStarts, labels = starts$meanLabels, dimnames = list("means", fullVars)),
# thresholds
umxThresholdMatrix(rbind(mzData$observed, dzData$observed), fullVarNames = fullVars, sep = sep, verbose = verbose),
# var-cov
# NOTE: Assumes A+C+E is Vtot (i.e., these are the three and only components forming expCov)
mxAlgebra(name = "Vtot", A + C + E), # Total variance (also added by models with std = TRUE, but is OK to add twice)
umxMatrix("binLabels" , "Full", nrow = nBin, ncol = 1, labels = binBracketLabels),
umxMatrix("Unit_nBinx1", "Unit", nrow = nBin, ncol = 1),
mxConstraint(name = "constrain_Bin_var_to_1", binLabels == Unit_nBinx1)
),
mxModel("MZ", mzData, mxExpectationNormal("top.expCovMZ", "top.expMean", thresholds = "top.threshMat") ),
mxModel("DZ", dzData, mxExpectationNormal("top.expCovDZ", "top.expMean", thresholds = "top.threshMat") ),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
if(!is.null(fullCovs)){
model = xmuTwinUpgradeMeansToCovariateModel(model, fullVars = fullVars, fullCovs = fullCovs, nSib = nSib, sep = sep)
}
return(model)
}
# xmuTwinSuper_CovCor(name=name, fullVars = fullVars, mzData= mzData, dzData = dzData, numObsMZ = numObsMZ, numObsDZ = numObsDZ)
xmuTwinSuper_CovCor <- function(name=NULL, fullVars, mzData, dzData, type, numObsMZ, numObsDZ){
# Check the data and get it into shape
umx_check(!is.null(numObsMZ), "stop", paste0("You must set numObsMZ with summary data"))
umx_check(!is.null(numObsDZ), "stop", paste0("You must set numObsDZ with summary data"))
mzData = xmu_make_mxData(mzData, type = type, manifests = fullVars, numObs = numObsMZ)
dzData = xmu_make_mxData(dzData, type = type, manifests = fullVars, numObs = numObsDZ)
model = mxModel(name,
mxModel("top"),
mxModel("MZ", mxExpectationNormal("top.expCovMZ"), mzData),
mxModel("DZ", mxExpectationNormal("top.expCovDZ"), dzData),
mxFitFunctionMultigroup(c("MZ", "DZ"))
)
return(model)
}
#' Not for end-users: Add a means model with covariates to a twin model
#'
#' @description
#' Does the following to `model` (i.e., a umx top/MZ/DZ supermodel):
#'
#' 1. Change `top.expMeans` to `top.intercept`.
#' 2. Create `top.meansBetas` for beta weights in rows (of covariates) and columns for each variable.
#' 3. Add matrices for each twin's data.cov vars (matrixes are called `T1DefVars`).
#' 4. Switch `mxExpectationNormal` in each data group to point to the local `expMean`.
#' 5. Add "expMean" algebra to each data group.
#' * `grp.expMean` sums `top.intercept` and `grp.DefVars %*% top.meansBetas` for each twin.
#'
#' @details In umx models with no covariates, means live in `top$expMean`
#'
#'
#' @param model The [umxSuperModel()] we are modifying (must have `MZ` `DZ` and `top` submodels)
#' @param fullVars the FULL names of manifest variables
#' @param fullCovs the FULL names of definition variables
#' @param nSib How many siblings
#' @param sep How twin variable names have been expanded, e.g. "_T".
#' @return - model, now with means model extended to covariates.
#' @export
#' @family xmu internal not for end user
#' @seealso - called by [xmuTwinSuper_Continuous()]
#' @md
#' @examples
#' \dontrun{
#' data(twinData) # ?twinData from Australian twins.
#' twinData[, c("ht1", "ht2")] = twinData[, c("ht1", "ht2")] * 10
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#' # m1 = umxACE(selDVs= "ht", sep= "", dzData= dzData, mzData= mzData, autoRun= FALSE)
# # This example won't work, as umxACE drops the covs from the data...
#' # m2 = xmuTwinUpgradeMeansToCovariateModel(m1, fullVars = c("ht1", "ht2"),
#' # fullCovs = c("age1", "sex1", "age2", "sex2"), sep = "")
#'
#' }
#'
xmuTwinUpgradeMeansToCovariateModel <- function(model, fullVars, fullCovs, nSib, sep) {
# TODO Check the def vars are still in the dataset at this point...
umx_check(all(c("MZ", "DZ", "top") %in% names(model)), "stop", message= "need a model with top, MZ and DZ sub-models")
umx_check(!is.null(model$top$expMean), "stop", "Model must have $top$expMean for xmuTwinUpgradeMeansToCovariateModel to work.")
baseVars = umx_explode_twin_names(fullVars, sep = sep)$baseNames
baseCovs = umx_explode_twin_names(fullCovs, sep = sep)$baseNames
nVar = length(baseVars)
nCov = length(baseCovs)
# 1. Make `meansBetas` matrix
betaLabels = paste0(rep(baseCovs, times= nVar), "_b_Var", rep(1:nVar, each= nCov) )
meansBetas = umxMatrix("meansBetas", "Full", nrow = nCov, ncol = nVar, free = TRUE, labels= betaLabels, values = 0, lbound = -2, ubound = 2)
dimnames(meansBetas) = list(baseCovs, baseVars)
# age "age_b_Var1" "age_b_Var2" "age_b_Var3"
# sex "sex_b_Var1" "sex_b_Var2" "sex_b_Var3"
# =============================================================================================================
# = 1. Add the betaDef matrix to top and replace top.expMean with top.intercept
# =============================================================================================================
# (assumes expMean has correctly locked to zero for binary variables)
top = model$top
# Add meansBetas to top, and rename expMean to intercept
top = mxModel(top, meansBetas)
top = umxRenameMatrix(top, matrixName = "expMean", name="intercept")
# New expectation to point to expMean in the local data group
if(is.na(model$MZ$expectation$thresholds)){
# no thresholds found, don't add any
MZexpectation = mxExpectationNormal("top.expCovMZ", "expMean")
DZexpectation = mxExpectationNormal("top.expCovDZ", "expMean")
} else {
# had thresholds, keep them
MZexpectation = mxExpectationNormal("top.expCovMZ", "expMean", thresholds = model$MZ$expectation$thresholds) # "top.threshMat"
DZexpectation = mxExpectationNormal("top.expCovDZ", "expMean", thresholds = model$DZ$expectation$thresholds) # "top.threshMat"
}
# 2. Upgrade MZ and DZ groups with local Def Vars and new expMean algebra
matsAndAlg = xmu_twin_make_def_means_mats_and_alg(baseCovs= baseCovs, fullVars = fullVars, nSib = nSib, sep= sep) # "T1DefVars", "T2DefVars"... + "expMean" algebra
MZ = mxModel(model$MZ, MZexpectation, matsAndAlg)
DZ = mxModel(model$DZ, DZexpectation, matsAndAlg)
return(mxModel(model, top, MZ, DZ))
}
#' Make the matrices and algebras for definition-based means models
#'
#' @description
#' not-for-end-user helper for means in twin models. Returns matrices for
#' each definition variable, and an algebra to compute means.
#'
#' @param baseCovs base names of the DVs, e.g. "age"
#' @param fullVars full names of the DVs, e.g. "E_T1"
#' @param nSib how many siblings - typically 2
#' @param sep in twin variable, i.e., "_T"
#' @return matrices and an algebra
#' @export
#' @family xmu internal not for end user
#' @seealso - [xmuTwinUpgradeMeansToCovariateModel()]
#' @md
#' @examples
#' # xmu_twin_make_def_means_mats_and_alg(baseCovs= baseCovs,
#' # fullVars = fullVars, nSib = nSib, sep= sep)
xmu_twin_make_def_means_mats_and_alg <- function(baseCovs, fullVars, nSib, sep) {
nCov = length(baseCovs)
defMats = list()
for (i in 1:nSib) {
tmp = umxMatrix(paste0("T", i, "DefVars"), type= "Full", nrow= 1, ncol= nCov, free= FALSE, labels= paste0("data.", baseCovs, sep, i))
defMats[[i]] = tmp
}
# note: intercept is full width
if(nSib == 2){
tmpAlg = mxAlgebra(name= "expMean", top.intercept + cbind(T1DefVars %*% top.meansBetas, T2DefVars %*% top.meansBetas), dimnames= list("means", fullVars) )
} else if (nSib == 3){
tmpAlg = mxAlgebra(name= "expMean", top.intercept + cbind(
T1DefVars %*% top.meansBetas,
T2DefVars %*% top.meansBetas,
T3DefVars %*% top.meansBetas), dimnames= list("means", fullVars)
)
}else{
stop("I can't handle nSib = ", nSib, ", only 2 or 3 so far. post as an issue on github if you need this")
}
return(append(defMats, tmpAlg))
}
#' Helper providing boilerplate start values for means and variance in twin models
#'
#' @description
#' `xmu_starts` can handle several common/boilerplate situations in which means and variance start values
#' are used in twin models.
#'
#' @param mzData Data for MZ pairs.
#' @param dzData Data for DZ pairs.
#' @param selVars Variable names: If sep = NULL, then treated as full names for both sibs.
#' @param sep All the variables full names.
#' @param equateMeans (NULL)
#' @param nSib How many subjects in a family.
#' @param varForm currently just "Cholesky" style.
#' @param SD = TRUE (FALSE = variance, not SD).
#' @param divideBy = 3 (A,C,E) 1/3rd each. Use 1 to do this yourself post-hoc.
#' @return - varStarts and meanStarts
#' @export
#' @family xmu internal not for end user
#' @md
#' @examples
#' data(twinData)
#' selDVs = c("wt", "ht")
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#'
#' round(sqrt(var(dzData[,tvars(selDVs, "")], na.rm=TRUE)/3),3)
#' xmu_starts(mzData, dzData, selVars=selDVs, nSib= 2, sep="", equateMeans=TRUE, varForm="Cholesky")
#'
#' # Variance instead of SD
#' round(var(dzData[,tvars(selDVs, "")], na.rm=TRUE)/3,3)
#' xmu_starts(mzData, dzData, selVars = selDVs, nSib = 2, sep= "",
#' equateMeans= TRUE, varForm= "Cholesky", SD= FALSE)
#'
#' # one variable
#' xmu_starts(mzData, dzData, selVars= "wt", nSib = 2, sep="", equateMeans = TRUE)
#'
#' # Ordinal/continuous mix
#' data(twinData)
#' twinData= umx_scale_wide_twin_data(data=twinData,varsToScale="wt",sep= "")
#' # Cut BMI column to form ordinal obesity variables
#' cuts = quantile(twinData[, "bmi1"], probs = c(.5, .8), na.rm = TRUE)
#' obLevels = c('normal', 'overweight', 'obese')
#' twinData$obese1= cut(twinData$bmi1,breaks=c(-Inf,cuts,Inf),labels=obLevels)
#' twinData$obese2= cut(twinData$bmi2,breaks=c(-Inf,cuts,Inf),labels=obLevels)
#' # Make the ordinal variables into mxFactors
#' ordDVs = c("obese1", "obese2")
#' twinData[, ordDVs] = umxFactor(twinData[, ordDVs])
#' mzData = twinData[twinData$zygosity %in% "MZFF",]
#' dzData = twinData[twinData$zygosity %in% "DZFF",]
#' xmu_starts(mzData, dzData, selVars = c("wt","obese"), sep= "",
#' nSib= 2, equateMeans = TRUE, SD= FALSE)
#'
#' xmu_starts(mxData(mzData, type="raw"), mxData(mzData, type="raw"),
#' selVars = c("wt","obese"), sep= "", nSib= 2, equateMeans = TRUE, SD= FALSE)
#'
#' # ==============
#' # = Three sibs =
#' # ==============
#' data(twinData)
#' twinData$wt3 = twinData$wt2
#' twinData$ht3 = twinData$ht2
#' selDVs = c("wt", "ht")
#' mzData = twinData[twinData$zygosity %in% "MZFF", ]
#' dzData = twinData[twinData$zygosity %in% "DZFF", ]
#'
#' xmu_starts(mzData, dzData, selVars=selDVs, sep="", nSib=3, equateMeans=TRUE)
#' xmu_starts(mzData, dzData, selVars=selDVs, sep="", nSib=3, equateMeans=FALSE)
xmu_starts <- function(mzData, dzData, selVars = selVars, sep = NULL, equateMeans= NULL, nSib, varForm = c("Cholesky"), SD= TRUE, divideBy = 3) {
if(!is.null(sep)){
selVars = umx_paste_names(selVars, sep = sep, suffixes = 1:nSib)
}
nVar = length(selVars)/nSib
dataType = umx_is_cov(dzData, boolean = FALSE)
if(dataType == "raw") {
if(is.null(equateMeans)){
stop("you have to tell xmu_starts whether to equate the means or not")
}
if(umx_is_MxData(mzData)){
allData = rbind(mzData$observed, dzData$observed)[,selVars]
}else{
allData = rbind(mzData, dzData)[,selVars]
}
# Uses sib 1 and 2 irrespective of nSib (doesn't matter here)
T1 = allData[, 1:nVar, drop = FALSE]
T2 = allData[, (nVar+1):(nVar*2), drop = FALSE];
names(T2) = names(T1)
longData = rbind(T1, T2)[, selVars[1:nVar], drop = FALSE]
# Mean starts (1 per variable)
meanStarts = umx_means(longData, ordVar = 0, na.rm = TRUE)
# Make nSib wide
meanStarts = rep(meanStarts, times = nSib)
if(equateMeans){
# e.g. "expMean_ht1" "expMean_wt1" "expMean_ht1"
meanLabels = rep(paste0("expMean_", selVars[1:nVar]), times = nSib)
} else {
meanLabels = paste0("expMean_", selVars)
}
varStarts = umx_var(longData, format= "diag", ordVar = 1, use = "pairwise.complete.obs", strict = FALSE)
} else if(dataType %in% c("cov", "cor")){
meanStarts = NA # Not used for summary data
meanLabels = NA
if(umx_is_MxData(mzData)){
het_mz = umx_reorder(mzData$observed, selVars)
het_dz = umx_reorder(dzData$observed, selVars)
}else{
het_mz = umx_reorder(mzData, selVars)
het_dz = umx_reorder(dzData, selVars)
}
varStarts = (diag(het_mz)[1:nVar]+ diag(het_dz)[1:nVar])/2
}else{
stop("xmu_starts can only handle raw and cov/cor data types. You gave me ", omxQuotes(dataType))
}
# Covariance matrix, 1/3 allocated to each of A=C=E.
varStarts = varStarts/divideBy
if(varForm == "Cholesky"){
if(SD){
# sqrt() to switch from var to path coefficient scale
# Sqrt to switch from var to path coefficient scale
varStarts = t(chol(diag(varStarts, length(varStarts))))
varStarts = matrix(varStarts, nVar, nVar)
} else {
varStarts = diag(varStarts, nVar, nVar)
}
} else {
stop("Not sure how to handle varForm = ", omxQuotes(varForm))
}
return(list(varStarts=varStarts, meanStarts= meanStarts, meanLabels= meanLabels))
}
#' Add weight matrices to twin models.
#'
#' @description
#' Add weight models (MZw, DZw) with matrices (e.g. mzWeightMatrix) to a twin model, and
#' update `mxFitFunctionMultigroup`. This yields a weighted model with vector objective.
#'
#' To weight objective functions in OpenMx, you specify a container model that applies the weights
#' m1 is the model with no weights, but with "vector = TRUE" option added to the FIML objective.
#' This option makes FIML return individual likelihoods for each row of the data (rather than a single
#' -2LL value for the model)
#' You then optimize weighted versions of these likelihoods by building additional models containing
#' weight data and an algebra that multiplies the likelihoods from the first model by the weight vector.
#'
#' @param model umx-style twin model
#' @param mzWeights data for MZ weights matrix
#' @param dzWeights data for DZ weights matrix
#' @return - model
#' @export
#' @family xmu internal not for end user
#' @md
#' @examples
#' tmp = umx_make_twin_data_nice(data=twinData, sep="", zygosity="zygosity", numbering= 1:2)
#' m1 = umxACE(selDVs = "wt", data = tmp, dzData = "DZFF", mzData = "MZFF", autoRun= FALSE)
#' m1$MZ$fitfunction$vector= TRUE
#'
#' tmp = xmu_twin_add_WeightMatrices(m1,
#' mzWeights= rnorm(nrow(m1$MZ$data$observed)),
#' dzWeights= rnorm(nrow(m1$DZ$data$observed))
#' )
#'
xmu_twin_add_WeightMatrices <- function(model, mzWeights = NULL, dzWeights = NULL) {
umx_check(model$MZ$fitfunction$vector, "stop", "xmu_twin_add_WeightMatrix: You need to set the fitFunction to vector mode... but it appears I haven't")
if(umx_is_MxMatrix(mzWeights)){
# just check the name
if(mzWeights$name != "mzWeightMatrix"){ stop(
paste0("xmu_twin_add_WeightMatrices expects name of mzWeights to be 'mzWeightMatrix'.\n", "It was ", omxQuotes(mzWeights$name))
)}
} else {
# make incoming raw data into matrices
# convert from dataframe if necessary
if(!is.null(dim(mzWeights)) ){ mzWeights = mzWeights[,1] }
if(!is.null(dim(dzWeights)) ){ dzWeights = dzWeights[,1] }
mzWeights = mxMatrix(name = "mzWeightMatrix", type = "Full", nrow = length(mzWeights), ncol = 1, free = FALSE, values = mzWeights)
dzWeights = mxMatrix(name = "dzWeightMatrix", type = "Full", nrow = length(dzWeights), ncol = 1, free = FALSE, values = dzWeights)
}
model = mxModel(model,
mxModel("MZw", mzWeights,
mxAlgebra(-2 * sum(mzWeightMatrix * log(MZ.objective) ), name = "mzWeightedCov"),
mxFitFunctionAlgebra("mzWeightedCov")
),
mxModel("DZw", dzWeights,
mxAlgebra(-2 * sum(dzWeightMatrix * log(DZ.objective) ), name = "dzWeightedCov"),
mxFitFunctionAlgebra("dzWeightedCov")
),
mxFitFunctionMultigroup(c("MZw", "DZw"))
)
return(model)
}
# one_by_nCov = umxMatrix("T1DefVars", "Full", nrow = 1, ncol = nCov, values = c(exp(1), pi))
# meansBetas = umxMatrix("meansBetas", "Full", nrow = nCov, ncol = nVar, free = TRUE, labels=betaLabels, values = 1:6, lbound = -2, ubound = 2)
# one_by_nCov = qm(1, 57) # qm(sex=1, age=57)
# # a beta for each covariate for each variable: nCov * nVar (product has nVar columns)
# meansBetas = qm(
# .1, .2, .3|
# .4, .5, .6)
# dimnames(meansBetas)=list(c("sex", "age"), c("var1","var2","var3"))
# var1 var2 var3
# sex 0.1 0.2 0.3
# age 0.4 0.5 0.6
# one_by_nCov %*% meansBetas
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.