newPsydiff: newPsydiff
In jhorzek/psydiff: panel stochastic differential equations

newPsydiff

R Documentation

newPsydiff

Description

newPsydiff

Usage

newPsydiff(
  dataset,
  latentEquations,
  manifestEquations,
  L,
  Rchol,
  A0,
  m0,
  grouping = NULL,
  parameters,
  groupingvariables = NULL,
  additional = NULL,
  srUpdate = TRUE,
  alpha = 0.2,
  beta = 2,
  kappa = 0,
  timeStep = NULL,
  integrateFunction = "default",
  breakEarly = TRUE,
  verbose = 0,
  eps = rev(c(1e-04, 1e-05, 1e-06, 1e-08)),
  direction = "central",
  sanityChecks = TRUE
)

Arguments

`dataset`	list with fields person, observations, and dt
`latentEquations`	string with latent equations
`manifestEquations`	string with manifest equations
`L`	lower triangular Cholesky decomposition of the diffusion matrix
`Rchol`	lower triangular Cholesky decomposition of the manifest variance
`A0`	lower triangular Cholesky decomposition of the initial latent variance
`m0`	vector of initial latent means
`grouping`	string specifying the groupings
`parameters`	list with named parameters
`groupingvariables`	list with variables used for grouping
`additional`	list for anything additional that should be passed to the latent or manifest equation
`srUpdate`	boolean: Should the square root version be used for the updates?
`alpha`	controls the alpha parameter of the unscented transform. Should be relatively small.
`beta`	controls the beta parameter of the unscented transform. 2 should work fine for normal distributions
`kappa`	controls the kappa parameter of the unscented transform. 0 should work fine for normal distributions
`timeStep`	timeStep of the numerical integration. You should pass a vector as the integration might run into problems for a specific value (e.g., .01) but work fine for another (e.g., .005). The function will stop once one of the integrations resulted in no errors
`integrateFunction`	which function should be used for integration? Possible are rk4, runge_kutta_dopri5, and default. default will first try rk4 and - if this fails - runge_kutta_dopri5. rk4 is often a lot slower on average but runge_kutta_dopri5 tends to get stuck from time to time.
`breakEarly`	boolean: Should the integration be stopped if the prediction for at least one time point did not work? Setting to FALSE can be useful for debugging
`verbose`	Values > 0 will print additional information
`eps`	controls the step size in the numerical approximation of the gradients. You should pass a vector, as this will allow psydiff to try computing the gradients for an alternative step size if one of them fails
`direction`	direction of the steps for gradient approximation. Possible are right, left, and central.
`sanityChecks`	boolean: Should the parameters be checked and adjusted if they might cause problems?

Value

psydiffModel that can be compiled with compileModel()

Examples

## Not run: 
library(psydiff)
library(ctsemOMX)

## Example 3: Kalman Filter
set.seed(175446)

## define the population model:
n.subjects = 10
# set the drift matrix. Note that drift eta_1_eta2 is set to equal 0 in the population.
ct_drift <- matrix(c(-.3,.2,0,-.5), ncol = 2)

generatingModel<-ctsem::ctModel(Tpoints=5,n.latent=2,n.TDpred=0,n.TIpred=0,n.manifest=2,
                                MANIFESTVAR=diag(0.5,2),
                                LAMBDA=diag(1,2),
                                DRIFT=ct_drift,
                                DIFFUSION=matrix(c(.5,0,0,.5),2),
                                CINT=matrix(0,nrow = 2, ncol = 1),
                                T0MEANS=matrix(0,ncol=1,nrow=2),
                                T0VAR=diag(1,2), type = "omx")

# simulate a training data and testing data set
traindata <- ctsem::ctGenerate(generatingModel,n.subjects = n.subjects, wide = TRUE)
# introduce some missings:
traindata[1:4,2] <- NA
traindata[5,2:5] <- NA
traindata[6,7] <- NA
traindata[19,4] <- NA

## Build the analysis model. Note that drift eta1_eta2 is freely estimated
# although it is 0 in the population.
myModel <- ctsem::ctModel(Tpoints=5,n.latent=2,n.TDpred=0,n.TIpred=0,n.manifest=2,
                          LAMBDA=diag(1,2),
                          MANIFESTVAR=diag(.5,2), MANIFESTMEANS = "auto",
                          CINT=0,
                          DIFFUSION=matrix(c('eta1_eta1',0,0,'eta2_eta2'),2),
                          T0MEANS=matrix(c(1,2),ncol=1,nrow=2),
                          T0VAR="auto", type = "omx")

myModel <- ctFit(myModel, dat = traindata, objective = "Kalman", useOptimizer = T,
                 stationary = c('T0TRAITEFFECT','T0TIPREDEFFECT'))
myModel$mxobj$fitfunction$result[[1]]


## with psydiff
# prepare data
longdata <- ctWideToLong(traindata, n.manifest = 2, Tpoints =  5)
dat <- list("person" = longdata[,"id"], "observations" = longdata[,c("Y1", "Y2")], "dt" = longdata[,"dT"])

## prepare model

latentEquations <- "
dx = DRIFT*x;
"

manifestEquations <- "
y = MANIFESTMEANS + LAMBDA*x;
"

LAMBDA <- fromMxMatrix(myModel$mxobj$LAMBDA)
DRIFT <- fromMxMatrix(myModel$mxobj$DRIFT)
MANIFESTMEANS <- fromMxMatrix(myModel$mxobj$MANIFESTMEANS)

parameters <- list("LAMBDA" = LAMBDA, "DRIFT" = DRIFT,
                   "MANIFESTMEANS" = MANIFESTMEANS)

m0  <- fromMxMatrix(myModel$mxobj$T0MEANS)
A0 <- sdeModelMatrix(values = t(chol(myModel$mxobj$T0VAR$result)),
                     labels = matrix(c("T0var_eta1", "",
                                       "T0var_eta2_eta1", "T0var_eta2"),2,2,T))
Rchol = sdeModelMatrix(values = t(chol(myModel$mxobj$MANIFESTVAR$result)),
                       labels = matrix(c("", "",
                                         "", ""),2,2,T))
L = sdeModelMatrix(values = t(chol(myModel$mxobj$DIFFUSION$result)),
                   labels = matrix(c("eta1_eta1", "",
                                     "", "eta2_eta2"),2,2,T))

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)

# compile model
compileModel(model)


# fit model
out <- fitModel(psydiffModel = model)
sum(out$m2LL)

# change parameter values
# inspect the model
newValues <- inspectModel(model)
psydiff::setParameterValues(parameterTable = model$pars$parameterTable,
                            parameterValues = newValues, parameterLabels = names(newValues))

# fit model
out <- fitModel(psydiffModel = model)
sum(out$m2LL)

## optimize model with optimx

optimized <- psydiffOptimx(model)
optimized.model <- psydiff::extractOptimx(model = model, opt = optimized)
fitModel(optimized.model)

## additional grouping
# we will make the initial mean mm_Y1 person specific and mm_Y2 depend on a grouping parameter
grouping <- "
mm_Y1 | person;
mm_Y2 | group1;
"
groupinglist <- list("group1" = c(rep(1,5), rep(2,5)))

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations, grouping = grouping,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters, groupingvariables = groupinglist)
# compile model
compileModel(model)

# set parameters
parval <- psydiff::getParameterValues(model)

optimized <- psydiffOptimx(model, control = list(trace = 1))
optimized.model <- psydiff::extractOptimx(model = model, opt = optimized)
fitModel(optimized.model)

## The following example is taken from ctsem and also demonstrates the use of the GIST optimizer
library(ctsemOMX)
data('ctExample3')
model <- ctModel(n.latent = 1, n.manifest = 3, Tpoints = 100,
                 LAMBDA = matrix(c(1, 'lambda2', 'lambda3'), nrow = 3, ncol = 1),
                 CINT= matrix('cint'), T0VAR = diag(1),
                 MANIFESTMEANS = matrix(c(0, 'manifestmean2', 'manifestmean3'), nrow = 3,
                                        ncol = 1))
fit <- ctFit(dat = ctExample3, ctmodelobj = model, objective = 'Kalman',
             stationary = c("T0TRAITEFFECT", "T0TIPREDEFFECT"), useOptimizer = F)
omxGetParameters(fit$mxobj)
fit$mxobj$fitfunction$result[[1]]

latentEquations <- "
dx(0) = cint + driftEta1*x(0);
"
manifestEquations <- "
y(0) = x(0);
y(1) = manifestmean2 + lambda2*x(0);
y(2) = manifestmean3 + lambda3*x(0);
"
# The optimization depends highly on the starting values. The values blow are taken from ctsem
parameters <- list("driftEta1" = -1.150227824,
                   "cint" = 11.214338733,
                   "lambda2" = 0.480098989,
                   "lambda3" = 0.959200513,
                   "manifestmean2" = 2.824753235,
                   "manifestmean3" = 5.606085485)

m0  <- fromMxMatrix(fit$mxobj$T0MEANS)
A0 <- sdeModelMatrix(values = fit$mxobj$T0VARchol$result,
                     labels = matrix("",1,1))
Rchol = sdeModelMatrix(values = fit$mxobj$MANIFESTVARchol$result,
                       labels = matrix(c("mvar1", "", "",
                                         "", "mvar2", "",
                                         "", "", "mvar3"),3,3,T))
L = sdeModelMatrix(values = fit$mxobj$DIFFUSIONchol$result,
                   labels = matrix(c("lvar"),1,1,T))

longdata <- ctWideToLong(ctExample3, n.manifest = 3, Tpoints =  100)
dat <- list("person" = longdata[,"id"], "observations" = longdata[,c("Y1", "Y2", "Y3")], "dt" = longdata[,"dT"])

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters, verbose = 0, kappa = 0, alpha = .81, beta = 2)

compileModel(model)
out <- fitModel(model)
out$m2LL
getParameterValues(model)

opt <- psydiffOptimx(model, method = c('Nelder-Mead', 'BFGS', 'nlm', 'nlminb'), control = list(trace = 1))

startingValues <- psydiff::getParameterValues(model)
adaptiveLassoWeights <- rep(1, length(startingValues))
names(adaptiveLassoWeights) <- names(startingValues)
regularizedParameters <- "lambda2"
lambda <- 10

opt <- GIST(model = model, startingValues = startingValues, lambda = lambda,
            adaptiveLassoWeights = adaptiveLassoWeights,
            regularizedParameters = regularizedParameters,
            verbose = 1, maxIter_out = 200, sig = .4, break_outer = 10e-20)

getParameterValues(opt$model)
out <- fitModel(opt$model)
matplot(out$predictedManifest, type = "l")
points(dat$observations[,1])
points(dat$observations[,2])
points(dat$observations[,3])

## second order model
set.seed(12391)

library(ctsemOMX)
DRIFT <- matrix(c(0, 1,
                  -.005, -.04),2,2,T)
LAMBDA <- matrix(c(1,0),1,2,T)
MANIFESTVAR <- matrix(1)
LATENTVAR <- matrix(c(0.001, 0,
                      0, 0.001),2,2,T)
ctMod <- ctModel(LAMBDA = LAMBDA, n.manifest = 1, n.latent = 2,
                 Tpoints = 300,
                 T0MEANS = c(0,1),
                 T0VAR = diag(2), CINT = matrix(c(0, 1),nrow = 2),
                 MANIFESTVAR = MANIFESTVAR, MANIFESTMEANS = 0,
                 DRIFT = DRIFT, DIFFUSION = LATENTVAR)
ctDat <- ctGenerate(ctMod, n.subjects = 1)
plot(ctDat[,"Y1"], type = "p")

## Define model in psydiff

manifestEquations <- "
y = x(0);
"

latentEquations <- "
dx(0) = x(1);
dx(1) = cint + a*x(0) +b*x(1);
"

A0 <- diag(1,2)
m0 <- matrix(c("0","1"), nrow = 2)

Rchol <- matrix("1",1,1)
L <- ctMod$DIFFUSION

parameters <- list("cint" = 1, "a" = -.5, "b" = -.5)

dat <- list("person" = ctDat[,"id"],
            "observations" = matrix(ctDat[,"Y1"], ncol = 1),
            "dt" = c(0,rep(1,length(ctDat[,"time"])-1)))

## optimize model

model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)
compileModel(model)

(startingValues <- psydiff::getParameterValues(model))

## Of the optimizers I tried, Nelder-Mead results in  the best fit for this model
nm <- psydiffOptimNelderMead(model, control = list("trace" = 1))
lines(model$predictedManifest, col = "#008080", lwd = 3) # Note: model object was changed by reference


# Predator Prey Model
library(deSolve)

NPerson <- 5
measurementOccasions <- 100
burnin <- 1000

# function from deSolve:
LotVmod <- function (Time, State, Pars) {
  with(as.list(c(State, Pars)), {
    dx = x*(alpha - beta*y)
    dy = -y*(gamma - delta*x)
    return(list(c(dx, dy)))
  })
}

Pars <- c(alpha = 2, beta = .5, gamma = .4, delta = .2)
State <- c(x = 10, y = 10)
Time <- seq(0, 200, length.out = measurementOccasions + burnin)

persons <- c()
laggedTimes <- c()
for(p in 1:NPerson){
  out <- as.data.frame(ode(func = LotVmod, y = State, parms = Pars, times = Time))
  out <- out[(nrow(out) - measurementOccasions+1):nrow(out), ]
  obs <- as.matrix(out[,c("x", "y")]) %*%matrix(c(1,2,0,0,
                                                  0,0,1,4), nrow = 2, byrow = T)
  obs <- obs +
    mvtnorm::rmvnorm(n = measurementOccasions,
                     mean = rep(0,4),
                     sigma = diag(1,4))
  if(p == 1){
    observations <- obs
  }else{
    observations <- rbind(observations, obs)
  }
  laggedTime <- out$time
  laggedTime[2:length(laggedTime)] <- laggedTime[2:length(laggedTime)] - laggedTime[1:(length(laggedTime)-1)]
  laggedTime[1] <- 0
  laggedTimes <- c(laggedTimes, laggedTime)

  persons <- c(persons, rep(p,measurementOccasions))
}

matplot(as.matrix(out[,c("x", "y")]), type = "l")
matpoints(observations[persons == 1,],
          type = "p", lty = 1)
##
## with psydiff
# prepare data
dat <- list("person" = persons,
            "observations" = observations,
            "dt" = laggedTimes)

## prepare model

latentEquations <- "
dx[0] = x[0]*(alpha - beta*x[1]);
dx[1] = -x[1]*(gamma - delta*x[0]);
"
#'
manifestEquations <- "
y[0] = 1*x[0];
y[1] = a1*x[0];
y[2] = 1*x[1];
y[3] = a2*x[1];
"

parameters <- list("alpha" = 0.1, "beta" = .1,
                   "gamma" = .1, "delta" = .1,
                   "a1" = 1, "a2" = 1)

m0  <- matrix(c("m01 = 1", "m02 = 1"), nrow = 2)
A0 <-matrix(c("a01 = 1", "0",
              "a012 = .2", "a02 = 1"),2,2,T)
Rchol <-matrix(c("r1 = 0.1", "0", "0", "0",
                 "0", "r2 = 0.1", "0", "0",
                 "0", "0", "r3 = 0.1", "0",
                 "0", "0", "0", "r4 = 0.1"),4,4,T)
L = matrix(c("0", "0",
             "0", "0"),2,2,T)

# set up model
model <- newPsydiff(dataset = dat, latentEquations = latentEquations,
                    manifestEquations = manifestEquations,
                    L = L, Rchol = Rchol, A0 = A0, m0 = m0,
                    parameters = parameters)

# compile model
compileModel(model)

fitModel(model)
startVal <- inspectModel(model)
setParameterValues(model$pars$parameterTable,
                   parameterValues = startVal,
                   parameterLabels = names(startVal))

opt <- psydiffOptimx(model,
                     method = c("nlminb"),
                     control = list("trace" = 1, "kkt" = FALSE, "maxit" = 200),
                     hessian = FALSE)
model <- extractOptimx(model = model, opt = opt)
f <- fitModel(model)
inspectModel(model)

plot(observations[persons==1,1])
lines(1:length(f$predictedManifest[,1]), f$predictedManifest[,1], col = "red")

## End(Not run)

jhorzek/psydiff documentation built on Sept. 15, 2022, 6:26 a.m.