newPsydiff: newPsydiff

View source: R/prepareModel.R

newPsydiffR Documentation

newPsydiff

Description

newPsydiff

Usage

newPsydiff(
  dataset,
  latentEquations,
  manifestEquations,
  L,
  Rchol,
  A0,
  m0,
  grouping = NULL,
  parameters,
  groupingvariables = NULL,
  additional = NULL,
  srUpdate = TRUE,
  alpha = 0.2,
  beta = 2,
  kappa = 0,
  timeStep = NULL,
  integrateFunction = "default",
  breakEarly = TRUE,
  verbose = 0,
  eps = rev(c(1e-04, 1e-05, 1e-06, 1e-08)),
  direction = "central",
  sanityChecks = TRUE
)

Arguments

dataset

list with fields person, observations, and dt

latentEquations

string with latent equations

manifestEquations

string with manifest equations

L

lower triangular Cholesky decomposition of the diffusion matrix

Rchol

lower triangular Cholesky decomposition of the manifest variance

A0

lower triangular Cholesky decomposition of the initial latent variance

m0

vector of initial latent means

grouping

string specifying the groupings

parameters

list with named parameters

groupingvariables

list with variables used for grouping

additional

list for anything additional that should be passed to the latent or manifest equation

srUpdate

boolean: Should the square root version be used for the updates?

alpha

controls the alpha parameter of the unscented transform. Should be relatively small.

beta

controls the beta parameter of the unscented transform. 2 should work fine for normal distributions

kappa

controls the kappa parameter of the unscented transform. 0 should work fine for normal distributions

timeStep

timeStep of the numerical integration. You should pass a vector as the integration might run into problems for a specific value (e.g., .01) but work fine for another (e.g., .005). The function will stop once one of the integrations resulted in no errors

integrateFunction

which function should be used for integration? Possible are rk4, runge_kutta_dopri5, and default. default will first try rk4 and - if this fails - runge_kutta_dopri5. rk4 is often a lot slower on average but runge_kutta_dopri5 tends to get stuck from time to time.

breakEarly

boolean: Should the integration be stopped if the prediction for at least one time point did not work? Setting to FALSE can be useful for debugging

verbose

Values > 0 will print additional information

eps

controls the step size in the numerical approximation of the gradients. You should pass a vector, as this will allow psydiff to try computing the gradients for an alternative step size if one of them fails

direction

direction of the steps for gradient approximation. Possible are right, left, and central.

sanityChecks

boolean: Should the parameters be checked and adjusted if they might cause problems?

Value

psydiffModel that can be compiled with compileModel()

Examples

## Not run: 
library(psydiff)
library(ctsemOMX)

## Example 3: Kalman Filter
set.seed(175446)

## define the population model:
n.subjects = 10
# set the drift matrix. Note that drift eta_1_eta2 is set to equal 0 in the population.
ct_drift <- matrix(c(-.3,.2,0,-.5), ncol = 2)

generatingModel<-ctsem::ctModel(Tpoints=5,n.latent=2,n.TDpred=0,n.TIpred=0,n.manifest=2,
                                MANIFESTVAR=diag(0.5,2),
                                LAMBDA=diag(1,2),
                                DRIFT=ct_drift,
                                DIFFUSION=matrix(c(.5,0,0,.5),2),
                                CINT=matrix(0,nrow = 2, ncol = 1),
                                T0MEANS=matrix(0,ncol=1,nrow=2),
                                T0VAR=diag(1,2), type = "omx")

# simulate a training data and testing data set
traindata <- ctsem::ctGenerate(generatingModel,n.subjects = n.subjects, wide = TRUE)
# introduce some missings:
traindata[1:4,2] <- NA
traindata[5,2:5] <- NA
traindata[6,7] <- NA
traindata[19,4] <- NA

## Build the analysis model. Note that drift eta1_eta2 is freely estimated
# although it is 0 in the population.
myModel <- ctsem::ctModel(Tpoints=5,n.latent=2,n.TDpred=0,n.TIpred=0,n.manifest=2,
                          LAMBDA=diag(1,2),
                          MANIFESTVAR=diag(.5,2), MANIFESTMEANS = "auto",
                          CINT=0,
                          DIFFUSION=matrix(c('eta1_eta1',0,0,'eta2_eta2'),2),
                          T0MEANS=matrix(c(1,2),ncol=1,nrow=2),
                          T0VAR="auto", type = "omx")

myModel <- ctFit(myModel, dat = traindata, objective = "Kalman", useOptimizer = T,
                 stationary = c('T0TRAITEFFECT','T0TIPREDEFFECT'))
myModel$mxobj$fitfunction$result[[1]]


## with psydiff
# prepare data
longdata <- ctWideToLong(traindata, n.manifest = 2, Tpoints =  5)
dat <- list("person" = longdata[,"id"], "observations" = longdata[,c("Y1", "Y2")], "dt" = longdata[,"dT"])

## prepare model

latentEquations <- "
dx = DRIFT*x;
"

manifestEquations <- "
y = MANIFESTMEANS + LAMBDA*x;
"

LAMBDA <- fromMxMatrix(myModel$mxobj$LAMBDA)
DRIFT <- fromMxMatrix(myModel$mxobj$DRIFT)
MANIFESTMEANS <- fromMxMatrix(myModel$mxobj$MANIFESTMEANS)

parameters <- list("LAMBDA" = LAMBDA, "DRIFT" = DRIFT,
                   "MANIFESTMEANS" = MANIFESTMEANS)

m0  <- fromMxMatrix(myModel$mxobj$T0MEANS)
A0 <- sdeModelMatrix(values = t(chol(myModel$mxobj$T0VAR$result)),
                     labels = matrix(c("T0var_eta1", "",
                                       "T0var_eta2_eta1", "T0var_eta2"),2,2,T))
Rchol = sdeModelMatrix(values = t(chol(myModel$mxobj$MANIFESTVAR$result)),
                       labels = matrix(c("", "",
                                         "", ""),2,2,T))
L = sdeModelMatrix(values = t(chol(myModel$mxobj$DIFFUSION$result)),
                   labels = matrix(c("eta1_eta1", "",
                                     "", "eta2_eta2"),2,2,T))

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)

# compile model
compileModel(model)


# fit model
out <- fitModel(psydiffModel = model)
sum(out$m2LL)

# change parameter values
# inspect the model
newValues <- inspectModel(model)
psydiff::setParameterValues(parameterTable = model$pars$parameterTable,
                            parameterValues = newValues, parameterLabels = names(newValues))

# fit model
out <- fitModel(psydiffModel = model)
sum(out$m2LL)

## optimize model with optimx

optimized <- psydiffOptimx(model)
optimized.model <- psydiff::extractOptimx(model = model, opt = optimized)
fitModel(optimized.model)

## additional grouping
# we will make the initial mean mm_Y1 person specific and mm_Y2 depend on a grouping parameter
grouping <- "
mm_Y1 | person;
mm_Y2 | group1;
"
groupinglist <- list("group1" = c(rep(1,5), rep(2,5)))

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations, grouping = grouping,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters, groupingvariables = groupinglist)
# compile model
compileModel(model)

# set parameters
parval <- psydiff::getParameterValues(model)

optimized <- psydiffOptimx(model, control = list(trace = 1))
optimized.model <- psydiff::extractOptimx(model = model, opt = optimized)
fitModel(optimized.model)

## The following example is taken from ctsem and also demonstrates the use of the GIST optimizer
library(ctsemOMX)
data('ctExample3')
model <- ctModel(n.latent = 1, n.manifest = 3, Tpoints = 100,
                 LAMBDA = matrix(c(1, 'lambda2', 'lambda3'), nrow = 3, ncol = 1),
                 CINT= matrix('cint'), T0VAR = diag(1),
                 MANIFESTMEANS = matrix(c(0, 'manifestmean2', 'manifestmean3'), nrow = 3,
                                        ncol = 1))
fit <- ctFit(dat = ctExample3, ctmodelobj = model, objective = 'Kalman',
             stationary = c("T0TRAITEFFECT", "T0TIPREDEFFECT"), useOptimizer = F)
omxGetParameters(fit$mxobj)
fit$mxobj$fitfunction$result[[1]]

latentEquations <- "
dx(0) = cint + driftEta1*x(0);
"
manifestEquations <- "
y(0) = x(0);
y(1) = manifestmean2 + lambda2*x(0);
y(2) = manifestmean3 + lambda3*x(0);
"
# The optimization depends highly on the starting values. The values blow are taken from ctsem
parameters <- list("driftEta1" = -1.150227824,
                   "cint" = 11.214338733,
                   "lambda2" = 0.480098989,
                   "lambda3" = 0.959200513,
                   "manifestmean2" = 2.824753235,
                   "manifestmean3" = 5.606085485)

m0  <- fromMxMatrix(fit$mxobj$T0MEANS)
A0 <- sdeModelMatrix(values = fit$mxobj$T0VARchol$result,
                     labels = matrix("",1,1))
Rchol = sdeModelMatrix(values = fit$mxobj$MANIFESTVARchol$result,
                       labels = matrix(c("mvar1", "", "",
                                         "", "mvar2", "",
                                         "", "", "mvar3"),3,3,T))
L = sdeModelMatrix(values = fit$mxobj$DIFFUSIONchol$result,
                   labels = matrix(c("lvar"),1,1,T))

longdata <- ctWideToLong(ctExample3, n.manifest = 3, Tpoints =  100)
dat <- list("person" = longdata[,"id"], "observations" = longdata[,c("Y1", "Y2", "Y3")], "dt" = longdata[,"dT"])

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters, verbose = 0, kappa = 0, alpha = .81, beta = 2)

compileModel(model)
out <- fitModel(model)
out$m2LL
getParameterValues(model)

opt <- psydiffOptimx(model, method = c('Nelder-Mead', 'BFGS', 'nlm', 'nlminb'), control = list(trace = 1))

startingValues <- psydiff::getParameterValues(model)
adaptiveLassoWeights <- rep(1, length(startingValues))
names(adaptiveLassoWeights) <- names(startingValues)
regularizedParameters <- "lambda2"
lambda <- 10

opt <- GIST(model = model, startingValues = startingValues, lambda = lambda,
            adaptiveLassoWeights = adaptiveLassoWeights,
            regularizedParameters = regularizedParameters,
            verbose = 1, maxIter_out = 200, sig = .4, break_outer = 10e-20)

getParameterValues(opt$model)
out <- fitModel(opt$model)
matplot(out$predictedManifest, type = "l")
points(dat$observations[,1])
points(dat$observations[,2])
points(dat$observations[,3])

## second order model
set.seed(12391)

library(ctsemOMX)
DRIFT <- matrix(c(0, 1,
                  -.005, -.04),2,2,T)
LAMBDA <- matrix(c(1,0),1,2,T)
MANIFESTVAR <- matrix(1)
LATENTVAR <- matrix(c(0.001, 0,
                      0, 0.001),2,2,T)
ctMod <- ctModel(LAMBDA = LAMBDA, n.manifest = 1, n.latent = 2,
                 Tpoints = 300,
                 T0MEANS = c(0,1),
                 T0VAR = diag(2), CINT = matrix(c(0, 1),nrow = 2),
                 MANIFESTVAR = MANIFESTVAR, MANIFESTMEANS = 0,
                 DRIFT = DRIFT, DIFFUSION = LATENTVAR)
ctDat <- ctGenerate(ctMod, n.subjects = 1)
plot(ctDat[,"Y1"], type = "p")

## Define model in psydiff

manifestEquations <- "
y = x(0);
"

latentEquations <- "
dx(0) = x(1);
dx(1) = cint + a*x(0) +b*x(1);
"

A0 <- diag(1,2)
m0 <- matrix(c("0","1"), nrow = 2)

Rchol <- matrix("1",1,1)
L <- ctMod$DIFFUSION

parameters <- list("cint" = 1, "a" = -.5, "b" = -.5)

dat <- list("person" = ctDat[,"id"],
            "observations" = matrix(ctDat[,"Y1"], ncol = 1),
            "dt" = c(0,rep(1,length(ctDat[,"time"])-1)))

## optimize model

model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)
compileModel(model)

(startingValues <- psydiff::getParameterValues(model))

## Of the optimizers I tried, Nelder-Mead results in  the best fit for this model
nm <- psydiffOptimNelderMead(model, control = list("trace" = 1))
lines(model$predictedManifest, col = "#008080", lwd = 3) # Note: model object was changed by reference


# Predator Prey Model
library(deSolve)

NPerson <- 5
measurementOccasions <- 100
burnin <- 1000

# function from deSolve:
LotVmod <- function (Time, State, Pars) {
  with(as.list(c(State, Pars)), {
    dx = x*(alpha - beta*y)
    dy = -y*(gamma - delta*x)
    return(list(c(dx, dy)))
  })
}

Pars <- c(alpha = 2, beta = .5, gamma = .4, delta = .2)
State <- c(x = 10, y = 10)
Time <- seq(0, 200, length.out = measurementOccasions + burnin)

persons <- c()
laggedTimes <- c()
for(p in 1:NPerson){
  out <- as.data.frame(ode(func = LotVmod, y = State, parms = Pars, times = Time))
  out <- out[(nrow(out) - measurementOccasions+1):nrow(out), ]
  obs <- as.matrix(out[,c("x", "y")]) %*%matrix(c(1,2,0,0,
                                                  0,0,1,4), nrow = 2, byrow = T)
  obs <- obs +
    mvtnorm::rmvnorm(n = measurementOccasions,
                     mean = rep(0,4),
                     sigma = diag(1,4))
  if(p == 1){
    observations <- obs
  }else{
    observations <- rbind(observations, obs)
  }
  laggedTime <- out$time
  laggedTime[2:length(laggedTime)] <- laggedTime[2:length(laggedTime)] - laggedTime[1:(length(laggedTime)-1)]
  laggedTime[1] <- 0
  laggedTimes <- c(laggedTimes, laggedTime)

  persons <- c(persons, rep(p,measurementOccasions))
}

matplot(as.matrix(out[,c("x", "y")]), type = "l")
matpoints(observations[persons == 1,],
          type = "p", lty = 1)
##
## with psydiff
# prepare data
dat <- list("person" = persons,
            "observations" = observations,
            "dt" = laggedTimes)

## prepare model

latentEquations <- "
dx[0] = x[0]*(alpha - beta*x[1]);
dx[1] = -x[1]*(gamma - delta*x[0]);
"
#'
manifestEquations <- "
y[0] = 1*x[0];
y[1] = a1*x[0];
y[2] = 1*x[1];
y[3] = a2*x[1];
"

parameters <- list("alpha" = 0.1, "beta" = .1,
                   "gamma" = .1, "delta" = .1,
                   "a1" = 1, "a2" = 1)

m0  <- matrix(c("m01 = 1", "m02 = 1"), nrow = 2)
A0 <-matrix(c("a01 = 1", "0",
              "a012 = .2", "a02 = 1"),2,2,T)
Rchol <-matrix(c("r1 = 0.1", "0", "0", "0",
                 "0", "r2 = 0.1", "0", "0",
                 "0", "0", "r3 = 0.1", "0",
                 "0", "0", "0", "r4 = 0.1"),4,4,T)
L = matrix(c("0", "0",
             "0", "0"),2,2,T)

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)

# compile model
compileModel(model)

fitModel(model)
startVal <- inspectModel(model)
setParameterValues(model$pars$parameterTable,
                   parameterValues = startVal,
                   parameterLabels = names(startVal))

opt <- psydiffOptimx(model,
                     method = c("nlminb"),
                     control = list("trace" = 1, "kkt" = FALSE, "maxit" = 200),
                     hessian = FALSE)
model <- extractOptimx(model = model, opt = opt)
f <- fitModel(model)
inspectModel(model)

plot(observations[persons==1,1])
lines(1:length(f$predictedManifest[,1]), f$predictedManifest[,1], col = "red")

## End(Not run)

jhorzek/psydiff documentation built on Sept. 15, 2022, 6:26 a.m.