Nothing
#---------------------------------------------------------------------
# Author: Dongjun You
# Date: 2018-05-15
# Filename: OutlierDetection.R
# Purpose: An illustrative example of using dynr.taste to detect
# outliers from a state space model
#---------------------------------------------------------------------
# Load packages
library(dynr)
# The data were generated from a discrete time state-space model with six observed variables and two latent variables
# 100 participants, 100 time-points for each participant
# For each participant, 3 innovative outliers and 6 additive outliers
data("Outliers")
outdata <- Outliers$generated$y
shockO <- Outliers$generated$shocks$shockO
shockL <- Outliers$generated$shocks$shockL
# The true parameters used to generate the data
true_params <- c(.6, -.2, -.2, .5,# beta
.9, .8, .9, .8,# lambda
.3, -.1, .3,# psi
.2, .2, .2, .2, .2, .2)# theta
data_shk <- dynr.data(outdata, id='id', time='time',
observed=c('V1','V2','V3','V4','V5','V6'))
# measurement
# this is the factor loadings matrix, Lambda in SEM notation
meas_shk <- prep.measurement(
values.load=matrix(c(1.0, 0.0,
0.9, 0.0,
0.8, 0.0,
0.0, 1.0,
0.0, 0.9,
0.0, 0.8), ncol=2, byrow=TRUE),
params.load=matrix(c('fixed','fixed',
'l_21','fixed',
'l_31','fixed',
'fixed','fixed',
'fixed','l_52',
'fixed','l_62'), ncol=2, byrow=TRUE),
state.names=c('eta_1','eta_2'),
obs.names=c('V1','V2','V3','V4','V5','V6') )
# observation and dynamic noise components
# the latent noise is the dynamic noise, Psi in SEM notation
# the observed noise is the measurement noise, Theta in SEM
nois_shk <- prep.noise(
values.latent=matrix(c(0.3, -0.1,
-0.1, 0.3), ncol=2, byrow=TRUE),
params.latent=matrix(c('psi_11','psi_12',
'psi_12','psi_22'), ncol=2, byrow=TRUE),
values.observed=diag(c(0.2, 0.2, 0.2, 0.2, 0.2, 0.2), ncol=6, nrow=6),
params.observed=diag( paste0('e_', 1:6), 6) )
# initial covariances and latent state values
init_shk <- prep.initial(
values.inistate=c(0,0),
params.inistate=c('mu_1','mu_2'),
values.inicov=matrix(c(0.3, -0.1,
-0.1, 0.3), ncol=2, byrow=TRUE),
params.inicov=matrix(c('c_11','c_12',
'c_12','c_22'), ncol=2, byrow=TRUE) )
# define the transition matrix
dynm_shk <- prep.matrixDynamics(
values.dyn=matrix(c(0.6, -0.2,
-0.2, 0.5), ncol=2, byrow=TRUE),
params.dyn=matrix(c('b_11','b_12',
'b_21','b_22'), ncol=2, byrow=TRUE),
isContinuousTime=FALSE)
# Prepare for cooking
# put all the recipes together
model_shk <- dynr.model(dynamics=dynm_shk, measurement=meas_shk,
noise=nois_shk, initial=init_shk,
data=data_shk, outfile=paste0("model_shk.c"))
# Estimate free parameters
# SHOULD cook with 'debug_flag=TRUE' to extract information
# used for outlier detection
cook_shk <- dynr.cook(model_shk, debug_flag=TRUE, verbose=FALSE)
#------------------------------------------------------------
# detect outliers
# output of 'dynr.taste' is a list containing lists of results from the outlier detection process,
# which will be input of the autoplot.dynrTaste and dynr.taste2
taste_shk <- dynr.taste(model_shk, cook_shk, conf.level=.99)
# apply the detected outliers, and re-fit the model
taste2_shk <- dynr.taste2(model_shk, cook_shk, taste_shk,
newOutfile="taste2_shk.c")
# compare true parameter and estimated ones
# cook: parameter estimates when the data include true outliers
# taste2: parameter estimates after applying the estimated outliers by the function 'dynr.taste2'
# check the how closely recover the true parameter
# from 'dynr.taste2', i.e., after applying outliers
data.frame(true=true_params,
cook=coef(cook_shk)[1:17],
taste2=coef(taste2_shk$dynrCook_new)[1:17])
##---------------------
## Checking detected outliers, comparing it with true outliers
# A helper function to extract detected outliers
shock_check <- function(dynrModel, dynrTaste, shocks) {
tstart <- dynrModel$data$tstart
shock_inn <- dynrTaste$t.inn.shock
shock_add <- dynrTaste$t.add.shock
nlat <- nrow(shock_inn)
nobs <- nrow(shock_add)
id <- dynrModel$data$id
IDs <- unique(id)
nID <- length(IDs)
shocked_L <- vector('list', nID*nlat)
shocked_O <- vector('list', nID*nobs)
for(j in 1:nID) {
beginTime <- tstart[j] + 1
endTime <- tstart[j+1]
for(l in 1:nlat) {
time_L <- which(shock_inn[l, beginTime:endTime])
if (length(time_L) > 0) {
shocked_L[[(j-1)+l]] <- data.frame(id=j, time_L=time_L, lat=l)
} else {
shocked_L[[(j-1)+l]] <- NULL
}
}
for(o in 1:nobs) {
time_O <- which(shock_add[o, beginTime:endTime])
if (length(time_O) > 0) {
shocked_O[[(j-1)+o]] <- data.frame(id=j, time_O=time_O, obs=o)
} else {
shocked_O[[(j-1)+o]] <- NULL
}
}
}
list(sh_L = do.call("rbind", shocked_L),
sh_O = do.call("rbind", shocked_O))
}
# pre-saved detected outliers
detect_L <- Outliers$detect_L
detect_O <- Outliers$detect_O
# detected outliers
detected <- shock_check(model_shk, taste_shk)
detected_L <- detected$sh_L
detected_O <- detected$sh_O
# test equality of the saved outliers and detected outliers
testthat::expect_true(all(as.data.frame(detect_L) == detected_L))
testthat::expect_true(all(as.data.frame(detect_O) == detected_O))
# true outliers detection
detrue_L <- merge(shockL, detected_L, by=c('id','time_L','lat'))
detrue_O <- merge(shockO, detected_O, by=c('id','time_O','obs'))
# checking the N of detected true outliers
testthat::expect_identical(nrow(detrue_L), 82L)
testthat::expect_identical(nrow(detrue_O), 123L)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.