Nothing
seqimpute_timing <- function(OD, regr = "multinom", np = 1, nf = 0, nfi = 1, npt = 1,
available = TRUE, covariates = matrix(NA, nrow = 1, ncol = 1),
time.covariates = matrix(NA, nrow = 1, ncol = 1), pastDistrib = FALSE,
futureDistrib = FALSE, m = 1, noise = 0, SetRNGSeed = FALSE, ParExec = TRUE, ncores = NULL,
timeFrame = 0, verbose = TRUE, ...) {
if (inherits(OD, "stslist")) {
valuesNA <- c(attr(OD, "nr"), attr(OD, "void"))
OD <- data.frame(OD)
OD[OD == valuesNA[1] | OD == valuesNA[2]] <- NA
}
if (sum(is.na(OD)) == 0) {
if (verbose == TRUE) {
message("This dataset has no missing values!")
}
return(OD)
}
rownamesDataset <- rownames(OD)
nrowsDataset <- nrow(OD)
# 0. Initial tests and manipulations on parameters ------------------------------------------------------------------------------------------------------------
dataOD <- preliminaryChecks(OD, covariates, time.covariates, np, nf, nfi, npt, pastDistrib, futureDistrib)
dataOD[c("pastDistrib", "futureDistrib", "totV", "totVi", "totVt", "noise")] <- InitCorectControl(regr, dataOD$ODClass, dataOD$OD, dataOD$nr, dataOD$nc, dataOD$k, np, nf, dataOD$nco, dataOD$ncot, nfi, npt, pastDistrib, futureDistrib, dataOD$totV, dataOD$totVi, dataOD$totVt, noise)
# 1. Analysis of OD and creation of matrices ORDER, ORDER2 and ORDER3 -----------------------------------------------------------------------------------------
dataOD[c("MaxInitGapSize", "InitGapSize", "MaxTermGapSize", "TermGapSize", "MaxGap", "ORDER", "ORDER2", "ORDER3")] <- OrderCreation(dataOD$OD, dataOD$nr, dataOD$nc)
# 2. Computation of the order of imputation of each MD (i.e. updating of matrix ORDER) --------------------------------------------------------------------
if (max(dataOD$ORDER) != 0) {
dataOD[c("ORDERSLGLeft", "ORDERSLGRight", "ORDERSLGBoth", "LongGap", "MaxGap", "REFORD_L", "ORDER")] <- ImputeOrderComputation(dataOD$ORDER, dataOD$ORDER3, dataOD$MaxGap, np, nf, dataOD$nr, dataOD$nc)
} else {
dataOD$ORDERSLGLeft <- matrix(nrow = dataOD$nr, ncol = dataOD$nc, 0)
dataOD$ORDERSLGRight <- matrix(nrow = dataOD$nr, ncol = dataOD$nc, 0)
dataOD$ORDERSLGBoth <- matrix(nrow = dataOD$nr, ncol = dataOD$nc, 0)
dataOD$LongGap <- FALSE
}
# Setting parallel or sequential backend and random seed
if (ParExec & (parallel::detectCores() > 2 & m > 1)) {
if (is.null(ncores)) {
Ncpus <- parallel::detectCores() - 1
} else {
Ncpus <- min(ncores, parallel::detectCores() - 1)
}
cl <- parallel::makeCluster(Ncpus)
doSNOW::registerDoSNOW(cl) # registerDoParallel doesn't have compatibility with ProgressBar
if (SetRNGSeed) {
doRNG::registerDoRNG(SetRNGSeed)
}
# set progress bar for parallel processing
pb <- txtProgressBar(max = m, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
# condition used to run code part needed for parallel processing
ParParams <- TRUE
} else {
if (ParExec & m == 1) {
if (verbose == TRUE) {
message(paste("/!\\ The number of multiple imputations is 1, parallel processing is only available for m > 1."))
}
} else if (ParExec) {
if (verbose == TRUE) {
message(paste("/!\\ The number of cores of your processor does not allow paralell processing, at least 3 cores are needed."))
}
}
if (SetRNGSeed) {
set.seed(SetRNGSeed)
}
foreach::registerDoSEQ()
opts <- NULL
# condition used to run code part needed for sequential processing
ParParams <- FALSE
}
# Beginning of the multiple imputation (imputing "mi" times)
o <- NULL
RESULT <- foreach(o = 1:m, .inorder = TRUE, .combine = "rbind", .options.snow = opts) %dopar% {
if (!ParParams) {
# Parallel and sequential execution of foreach don't use the same casting mechanism, this one is used for sequential execution.
if (verbose == TRUE) {
cat("iteration :", o, "/", m, "\n")
}
}
# Trying to catch the potential singularity error (part 1/2)
# (comment this part of code (as well as the one at the very end of
# seqimpute.R) to debug more easily and see the full message of any
# occuring error)
#********************************************************************************************************************************
#************************************************************************************************************************
# 3. Imputation using a specific model --------------------------------------------------------------------------------------------------------
if (max(dataOD$ORDER) != 0) {
# Otherwise if there is only 0 in ORDER,
# there is no need to impute internal gaps
# and we directly jump to the imputation of
# external gaps (i.e. points 4. and 5.)
if (verbose == TRUE) {
print("Imputation of the internal gaps...")
}
dataOD[["ODi"]] <- ModelImputationTiming(dataOD$OD, dataOD$CO, dataOD$COt, dataOD$ODi, dataOD$MaxGap, dataOD$totV, dataOD$totVi, regr, dataOD$nc, np, nf, dataOD$nr, dataOD$ncot, dataOD$COtsample, dataOD$pastDistrib, dataOD$futureDistrib, dataOD$k, available, dataOD$REFORD_L, dataOD$noise, timeFrame, ...)
}
# 4. Imputing initial NAs -------------------------------------------------------------------------------------------------------------------------
if ((nfi != 0) & (dataOD$MaxInitGapSize != 0)) {
if (verbose == TRUE) {
print("Imputation of the initial gaps...")
}
# # we only impute the initial gaps if nfi > 0
dataOD[["ODi"]] <- ImputingInitialNAsTiming(
OD = dataOD$OD, covariates = dataOD$CO, time.covariates = dataOD$COt, ODi = dataOD$ODi, totVi = dataOD$totVi, COtsample = dataOD$COtsample,
futureDistrib = dataOD$futureDistrib, InitGapSize = dataOD$InitGapSize, MaxInitGapSize = dataOD$MaxInitGapSize, nr = dataOD$nr, nc = dataOD$nc,
ud = dataOD$ud, nco = dataOD$nco, ncot = dataOD$ncot, nfi = nfi, regr = regr, k = dataOD$k, available = available, noise = dataOD$noise, timeFrame = timeFrame, ...
)
}
# 5. Imputing terminal NAs ------------------------------------------------------------------------------------------------------------------------
if ((npt != 0) & (dataOD$MaxTermGapSize != 0)) {
# we only impute the terminal
# gaps if npt > 0
if (verbose == TRUE) {
print("Imputation of the terminal gaps...")
}
dataOD[["ODi"]] <- ImputingTerminalNAsTiming(
OD = dataOD$OD, covariates = dataOD$CO, time.covariates = dataOD$COt, ODi = dataOD$ODi, totVi = dataOD$totVi, COtsample = dataOD$COtsample,
pastDistrib = dataOD$pastDistrib, TermGapSize = dataOD$TermGapSize, MaxTermGapSize = dataOD$MaxTermGapSize, nr = dataOD$nr, nc = dataOD$nc,
ud = dataOD$ud, nco = dataOD$nco, ncot = dataOD$ncot, npt = npt, regr = regr, k = dataOD$k, available = available, noise = dataOD$noise, timeFrame = timeFrame, ...
)
}
if (max(dataOD$ORDERSLGLeft) != 0 & !is.null(dataOD$ORDERSLGLeft)) {
# Checking if we have to impute
# left-hand side SLG
if (verbose == TRUE) {
print("Imputation of the left-hand side SLG...")
}
dataOD[["ODi"]] <- LSLGNAsImputeTiming(dataOD$OD, dataOD$ODi, dataOD$CO, dataOD$COt, dataOD$COtsample, dataOD$ORDERSLGLeft, dataOD$pastDistrib, dataOD$futureDistrib, regr, np, dataOD$nr, nf, dataOD$nc, dataOD$ud, dataOD$ncot, dataOD$nco, dataOD$k, dataOD$noise, available, timeFrame, ...)
}
# right-hand side SLG
if (max(dataOD$ORDERSLGRight) != 0 & !is.null(dataOD$ORDERSLGRight)) {
# Checking if we have to impute right-hand
# side SLG
if (verbose == TRUE) {
print("Imputation of the right-hand side SLG...")
}
dataOD[["ODi"]] <- RSLGNAsImputeTiming(dataOD$OD, dataOD$ODi, dataOD$CO, dataOD$COt, dataOD$COtsample, dataOD$ORDERSLGRight, dataOD$pastDistrib, dataOD$futureDistrib, regr, np, dataOD$nr, nf, dataOD$nc, dataOD$ud, dataOD$ncot, dataOD$nco, dataOD$k, dataOD$noise, available, timeFrame, ...)
}
# Checking if we have to impute
# Both-hand side SLG
if (dataOD$LongGap) {
if (verbose == TRUE) {
print("Imputation of the both-hand side SLG...")
}
for (h in 2:np) {
if (sum(dataOD$ORDERSLGBoth[, h - 1] == 0 & dataOD$ORDERSLGBoth[, h] != 0) > 0) {
tt <- which(dataOD$ORDERSLGBoth[, h - 1] == 0 & dataOD$ORDERSLGBoth[, h] != 0)
tmpORDER <- matrix(0, nrow(dataOD$ORDERSLGBoth), ncol(dataOD$ORDERSLGBoth))
tmpORDER[tt, h:ncol(dataOD$ORDERSLGBoth)] <- dataOD$ORDERSLGBoth[tt, h:ncol(dataOD$ORDERSLGBoth)]
dataOD[["ODi"]] <- RSLGNAsImputeTiming(dataOD$OD, dataOD$ODi, dataOD$CO, dataOD$COt, dataOD$COtsample, tmpORDER, dataOD$pastDistrib, dataOD$futureDistrib, regr, h - 1, dataOD$nr, nf, dataOD$nc, dataOD$ud, dataOD$ncot, dataOD$nco, dataOD$k, dataOD$noise, available, timeFrame, ...)
}
}
# nfix <- 1
# dataOD[["ODi"]] <- RSLGNAsImpute(dataOD$OD, dataOD$ODi, dataOD$CO, dataOD$COtsample, dataOD$ORDERSLGBoth, dataOD$pastDistrib, dataOD$futureDistrib, regr, np, dataOD$nr, nfix, dataOD$nc, dataOD$ud, dataOD$ncot, dataOD$nco, dataOD$k, dataOD$noise, available,num.trees,min.node.size,max.depth,timing=F)
}
# Updating the matrix RESULT used to store the multiple imputations
return(cbind(replicate(dataOD$nr, o), dataOD$ODi))
}
if (ParParams) {
parallel::stopCluster(cl)
}
RESULT <- rbind(cbind(replicate(dataOD$nr, 0), dataOD$OD), RESULT)
# X. Final conversions ----------------------------------------------------------------------------------------------------------------------------------------
RESULT <- FinalResultConvert(RESULT, dataOD$ODClass, dataOD$ODlevels, rownamesDataset, nrowsDataset, dataOD$nr, dataOD$nc, dataOD$rowsNA, m)
# Rearrange dataframe by order of the mi imputation as parallel computation may not return the values in sequential order.
# if (ParParams){
# RESULT <- RESULT[order(RESULT$.imp),]
# }
structure(RESULT,class=c("seqimp","data.frame"))
}
ModelImputationTiming <- function(OD, covariates, time.covariates, ODi, MaxGap,
totV, totVi, regr, nc, np, nf, nr, ncot, COtsample,
pastDistrib, futureDistrib, k, available, REFORD_L, noise, timeFrame, ...) {
for (order in 1:MaxGap) {
print(paste0("Step ", order, "/", MaxGap))
# number of time-point that have a missing data to impute that correspond to actual gap
ncol_imp <- length(unique(REFORD_L[[order]][, 2]))
# columns that have a missing data to impute that correspond to the actual gap
col_to_imp <- unique(sort(unique(REFORD_L[[order]])[, 2]))
for (i in 1:ncol_imp) {
CD_shift <- CDComputeTiming(covariates, OD, time.covariates, MaxGap, order, np, nc, nr, nf, COtsample, pastDistrib, futureDistrib, ncot, k, col_to_imp[i], timeFrame)
# Identify if only one level appear in the predictive matrix. If it is the case, we directly predict the only level that appears.
if (length(table(CD_shift$CD[, 1])) > 1) {
log_CD <- list()
log_CD[c("reglog", "CD")] <- ComputeModel(CD_shift$CD, regr, totV, np, nf, k, ...)
row_to_imp <- REFORD_L[[order]][which(REFORD_L[[order]][, 2] == col_to_imp[[i]]), 1]
# 3.3. Imputation using the just created model (Dealing with the actual VALUES to impute) ---------
ODi <- CreatedModelImputationTiming(order, covariates, log_CD$CD, time.covariates, OD, ODi, pastDistrib, futureDistrib, available, col_to_imp[i], row_to_imp, ncot, nc, np, nf, k, totV, regr, log_CD$reglog, noise, CD_shift$shift, MaxGap)
} else {
lev <- names(table(CD_shift$CD[, 1]))
REFORD <- as.matrix(REFORD_L[[order]])
if (ncol(REFORD) == 1) {
REFORD <- t(REFORD)
}
nr_REFORD <- nrow(REFORD)
for (u in 1:nr_REFORD) {
i <- REFORD[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDI
j <- REFORD[u, 2]
ODi[i, j] <- lev
}
}
}
}
return(ODi)
}
CDComputeTiming <- function(CO, OD, COt, MaxGap, order, np, nc, nr, nf, COtsample, pastDistrib, futureDistrib, ncot, k, col, timeFrame) {
# Building of a data matrix for the computation of the
# model
# number of usable data in past and futur
# for each row of
# frameSize <- MaxGap-order+np+nf+1 # size of the current
# mobile caracteristic frame (that
# changes according to
# "order") which is equal to
# nc-ud+1
# Structure and building of the data matrix CD
# The first column of CD is the dependent variable (VD,
# response variable)
# The following columns are the independent variables
# (VIs, explanatory variables) coming from the past
# (np>0) or the future (nf>0) ordered by time and the
# distribution of the possible values (i.e. all possible
# categorical variables numbered from 1 to k and of
# course the value NA) respectively Before and After the
# NA to impute.
#
# VD Past VIs Future VIS Past distribution Future distribution
#
# / \
# | |
# CD = | CD CDp CPf CDdb CDda |
# \ /
# \ /
#
# We are then going to create a specific CD according to
# the value of np and nf
# initialization of
# the current very left part of
# the predictive model matrix
# ("matrice de modele de
# prediction") with NA
# everywhere (/!\ for each
# "order", we are going to
# build such a CD)
if ((np > 0 & nf > 0) & ((MaxGap %% 2 == 0 & order %% 2 == 0) | (MaxGap %% 2 != 0 & order %% 2 != 0))) {
shift <- MaxGap - order # jumping at the end of
udp <- min(timeFrame, col - (MaxGap - order) - np - 1) # number of usable data in the past
udf <- min(timeFrame, nc - col - nf) # number of usable data in the futur
col_to_use <- (col - udp):(col + udf)
ud <- udp + udf + 1
# the gap
} else {
shift <- 0 # no shift is needed (note that
# no shift is needed for the
# case of model 2 (only past)
# and model 3 (only future))
if (np > 0 & nf > 0) {
udp <- min(timeFrame, col - np - 1)
udf <- min(timeFrame, nc - col - (MaxGap - order) - nf)
col_to_use <- (col - udp):(col + udf)
ud <- udp + udf + 1
}
if (np > 0 & nf == 0) {
udp <- min(timeFrame, col - np - 1)
udf <- min(timeFrame, nc - col)
col_to_use <- (col - udp):(col + udf)
ud <- udp + udf + 1
}
if (nf > 0 & np == 0) {
udf <- min(timeFrame, nc - col - nf)
udp <- min(col - 1, timeFrame)
col_to_use <- (col - udp):(col + udf)
ud <- udp + udf + 1
}
}
CD <- matrix(NA, nrow = nr * ud, ncol = 1)
iter <- 1 # initialisation of the number
# of iterations of the
# following for loops
# Only PAST
if (np > 0 & nf == 0) {
CD <- PastVIComputeTiming(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, col_to_use)
# Only FUTURE
} else if (np == 0 & nf > 0) {
# only FUTURE VIs do exist
CD <- FutureVIComputeTiming(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, col_to_use)
# PAST and FUTURE
} else {
# meaning np>0 and nf>0 and that, thus,
# PAST as well as FUTURE VIs do exist
CD <- PastFutureVIComputeTiming(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, shift, col_to_use, MaxGap, order)
}
CD_shift <- list()
CD_shift[c("CD", "shift")] <- list(CD, shift)
return(CD_shift)
}
################################################################################
FutureVIComputeTiming <- function(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, col_to_use) {
CDf <- matrix(NA, nrow = nr * ud, ncol = nf)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in col_to_use) {
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j]
# future VIs
CDf[t1:t2, ] <- OD[, (j + 1):(j + nf)]
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselected[t1:t2, j + (1:(ncot / nc) - 1) * nc]
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - 1)), ], factor, levels = c(1:k, NA), exclude = NULL)
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - 1))
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j + 1):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j + 1):nc)
}
iter <- iter + 1
}
# future VIs
CD <- cbind(CD, CDf)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
PastVIComputeTiming <- function(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, col_to_use) {
CDp <- matrix(NA, nrow = nr * ud, ncol = np)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in col_to_use) {
# /!\ j is initialised at
# the very end (utmost right) of the
# frame
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j]
# past VIs
CDp[t1:t2, ] <- OD[, (j - np):(j - 1)]
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselected[t1:t2, j + (1:(ncot / nc) - 1) * nc]
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - 1)), ], factor, levels = c(1:k, NA), exclude = NULL)
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - 1))
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j + 1):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j + 1):nc)
}
iter <- iter + 1
}
# past VIs
CD <- cbind(CD, CDp)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
# Past or future VI computation
PastFutureVIComputeTiming <- function(CD, CO, OD, ncot, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, shift, col_to_use, MaxGap, order) {
CDp <- matrix(NA, nrow = nr * ud, ncol = np)
CDf <- matrix(NA, nrow = nr * ud, ncol = nf)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in col_to_use) {
# /!\ j is initialised at
# the very end (utmost right) of the
# frame
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j]
if (shift == 0) {
CDp[t1:t2, ] <- OD[, (j - np):(j - 1)]
CDf[t1:t2, ] <- OD[, (j + MaxGap - order + 1):(j + MaxGap - order + nf)]
} else {
CDp[t1:t2, ] <- OD[, (j - shift - np):(j - shift - 1)]
CDf[t1:t2, ] <- OD[, (j + 1):(j + nf)]
}
#
# # past VIs
# CDp[t1:t2,] <- OD[,(j-frameSize+1):(j-frameSize+np)]
#
# # future VIs
# CDf[t1:t2,] <- OD[,(j-nf+1):j]
#
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselected[t1:t2, j + (1:(ncot / nc) - 1) * nc]
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - 1)), ], factor, levels = c(1:k, NA), exclude = NULL)
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - 1))
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j + 1):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j + 1):nc)
}
iter <- iter + 1
}
# past and future VIs
CD <- cbind(CD, CDp, CDf)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
################################################################################
# Imputation using the just created model (Dealing with the actual VALUES to impute)
CreatedModelImputationTiming <- function(order, CO, CD, COt, OD, ODi, pastDistrib, futureDistrib, available, col, row_to_imp, ncot, nc, np, nf, k, totV, regr, reglog, noise, shift, MaxGap) {
# Structure and building of the data matrix CDi
# The first column of CDi is the dependent variable (VD,
# response variable) that we have to implement during
# the current iteration (i.e. automatically a NA)
# The following columns are the corresponding
# independent variables (VIs, explanatory variables)
# coming from the past (np>0) or the future (nf>0)
# (ordered by time and corresponding to the current
# predictive pattern) and the distribution of the
# possible values (i.e. all possible categorical
# variables numbered from 1 to k and of course
# the value NA) respectively Before and After the NA to
# impute.
# (The fact that every lines of CDi are identical is
# related to the working of the function mlogit that has
# to have as much lines in CDi as there are categories
# of the variable (see the parameter "k") --> so, CDi is
# composed of k identical lines)
#
# VD Past VIs Future VIS Past distribution Future distribution
#
# / \
# | |
# CDi = | CDi CDpi CPfi CDdbi CDdai |
# \ /
# \ /
#
# We are then going to create a specific CDi according
# to the value of np and nf
# Analysing the value of parameter available
if (available == TRUE) { # we take the previously imputed
# data into account
LOOKUP <- ODi
} else { # that is available == FALSE and thus we
# don't take the previously imputed
# data into account
LOOKUP <- OD
}
# Assigning the current "REFORD_order" matrix to the
# variable matrix REFORD
# (according to the current value of "order")
# REFORD <- as.matrix(REFORD_L[[order]])
# if (ncol(REFORD) == 1) {
# REFORD <- t(REFORD)
# }
# nr_REFORD <- nrow(REFORD)
if (np > 0 & nf == 0) { # only PAST VIs do exist
ODi <- ODiImputePASTTiming(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise)
#---------------------------------------------------------------------------------------------
} else if (np == 0 & nf > 0) { # only FUTURE VIs do exist
ODi <- ODiImputeFUTURETiming(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise)
} else { # meaning np>0 and nf>0 and that,
# thus, PAST as well as FUTURE VIs
# do exist
ODi <- ODiImputePFTiming(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise, shift, MaxGap, order)
}
return(ODi)
}
ODiImputePASTTiming <- function(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise) {
for (u in 1:length(row_to_imp)) {
i <- row_to_imp[u]
# taking out the first coordinate
# (row number in ORDER) from REFORD
j <- col
# taking out the second coordinate
# (column number in ORDER) from REFORD
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for past values
vect <- LOOKUP[i, (j - np):(j - 1)]
# /!\ current pointer
# on olumn is thus: "j"
CDpi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDpi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
# We also account for the fact that levels that do not appear at
# all in a given variable of CD were discarded with droplevels before
# the fit of the mlogit model
if (regr != "rf") {
for (v in 1:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else {
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
}
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib | futureDistrib) {
CDi[, (1 + np + nf + 1):ncol(CDi)] <- lapply(CDi[, (1 + np + nf + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't
# consider any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with
# COtselected_i (the matrix containing the
# current time-dependent covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i
# into CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Check for missing-values among predictors
if (max(is.na(CDi[1, 2:totV])) == 0) {
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
ODiImputeFUTURETiming <- function(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise) {
for (u in 1:length(row_to_imp)) {
i <- row_to_imp[u]
# taking out the first coordinate
# (row number in ORDER) from REFORD
j <- col
# taking out the second coordinate
# (column number in ORDER) from REFORD
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for future values
vect <- LOOKUP[i, (j + 1):(j + nf)]
CDfi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDfi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
# We also account for the fact that levels that do not appear at
# all in a given variable of CD were discarded with droplevels before
# the fit of the mlogit model
if (regr != "rf") {
for (v in 1:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else {
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
}
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib | futureDistrib) {
CDi[, (1 + np + nf + 1):ncol(CDi)] <- lapply(CDi[, (1 + np + nf + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't
# consider any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with
# COtselected_i (the matrix containing the
# current time-dependent covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i into
# CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Check for missing-values among predictors
if (max(is.na(CDi[1, 2:totV])) == 0) {
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
ODiImputePFTiming <- function(CO, ODi, CD, COt, col, row_to_imp, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise, shift, MaxGap, order) {
for (u in 1:length(row_to_imp)) {
i <- row_to_imp[u]
# taking out the first coordinate
# (row number in ORDER) from REFORD
j <- col
# taking out the second coordinate
# (column number in ORDER) from REFORD
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for past values
shift <- as.numeric(shift)
if (shift == 0) {
vect <- LOOKUP[i, (j - np):(j - 1)]
} else {
vect <- LOOKUP[i, (j - shift - np):(j - shift - 1)]
}
CDpi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for future values
if (shift == 0) {
vect <- LOOKUP[i, (j + MaxGap - order + 1):(j + MaxGap - order + nf)]
} else {
vect <- LOOKUP[i, (j + 1):(j + nf)]
}
CDfi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDpi, CDfi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
# We also account for the fact that levels that do not appear at
# all in a given variable of CD were discarded with droplevels before
# the fit of the mlogit model
if (regr == "lm" | regr == "lrm" | regr == "mlogit") {
for (v in 1:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else if (regr == "rf") {
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
} else {
# glmnet
CDi[, 1] <- factor(CDi[, 1], levels = c(1:k))
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
}
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib | futureDistrib) {
CDi[, (1 + np + nf + 1):ncol(CDi)] <- lapply(CDi[, (1 + np + nf + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't
# consider any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with
# COtselected_i (the matrix containing the
# current time-dependent covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i into
# CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Check for missing-values among predictors
# (i.e. we won't impute any value on the current
# MD if there is any NA among the VIs)
if (max(is.na(CDi[1, 2:totV])) == 0) {
# checking that
# there is no NA
# among the current
# VI (otherwise no
# data will be
# imputed for the
# current NA)
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
# 4. Imputation of initial NAs
#
#
################################################################################
# Impute initial NAs
ImputingInitialNAsTiming <- function(OD, covariates, time.covariates, ODi, totVi, COtsample, futureDistrib, InitGapSize, MaxInitGapSize, nr, nc, ud, nco, ncot, nfi, regr, k, available, noise, timeFrame, ...) {
# 4.1.-2. Creation of ORDERI -------------------------------------------------
REFORDI_L <- REFORDICreation(nr, nc, InitGapSize, MaxInitGapSize)
# 4.3. Imputation using a specific model -------------------------------------
for (order in 1:MaxInitGapSize) {
col <- MaxInitGapSize - order + 1
CD <- CDMatCreateTiming(covariates, COtsample, OD, time.covariates, min(nfi, nc - col), nr, nc, ncot, futureDistrib, k, col, timeFrame)
# 4.3.2 Computation of the model (Dealing with the LOCATIONS of imputation) --
if (length(table(CD[, 1])) > 1) {
log_CD <- list()
totVi <- 1 + min(nfi, nc - col) + nco + (ncot / nc)
log_CD[c("reglog", "CD")] <- ComputeModel(CD, regr, totVi, 0, min(nfi, nc - col), k, ...)
# 4.3.3 Imputation using the just created model (Dealing with the actual VALUES to impute)
ODi <- Init_NA_CreatedModelImputationTiming(OD, ODi, covariates, log_CD$CD, time.covariates, MaxInitGapSize, REFORDI_L, futureDistrib, totVi, nc, k, min(nfi, nc - col), ncot, regr, log_CD$reglog, noise, available, order)
} else {
lev <- names(table(CD[, 1]))
REFORDI <- as.matrix(REFORDI_L[[order]])
if (ncol(REFORDI) == 1) {
REFORDI <- t(REFORDI)
}
nr_REFORDI <- nrow(REFORDI)
for (u in 1:nr_REFORDI) {
i <- REFORDI[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDI
j <- REFORDI[u, 2]
ODi[i, j] <- lev
}
}
}
return(ODi)
}
Init_NA_CreatedModelImputationTiming <- function(OD, ODi, CO, CD, COt, MaxInitGapSize, REFORDI_L, futureDistrib, totVi, nc, k, nfi, ncot, regr, reglog, noise, available, order) {
# Conversion of ODi from data.frame to matrix
ODi <- as.matrix(ODi)
# Analysing the value of parameter available
if (available == TRUE) {
# we take the previously imputed data
# into account
LOOKUP <- ODi
} else {
# that is available == FALSE and thus we don't take
# the previously imputed data into account
LOOKUP <- OD
}
# Assigning the current "REFORDI_order" matrix to the
# variable matrix REFORDI
# (according to the current value of "order")
# tempObject = get(paste0("REFORDI_",order))
REFORDI <- as.matrix(REFORDI_L[[order]])
if (ncol(REFORDI) == 1) {
REFORDI <- t(REFORDI)
}
nr_REFORDI <- nrow(REFORDI)
for (u in 1:nr_REFORDI) {
i <- REFORDI[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDI
j <- REFORDI[u, 2]
# taking out the second coordinate
# (column number in ORDER) from REFORDI
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for future values
vect <- LOOKUP[i, (j + 1):(j + nfi)]
CDfi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDfi)
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
# We also account for the fact that levels that do not appear at
# all in a given variable of CD were discarded with droplevels before
# the fit of the mlogit model
if (regr != "rf") {
for (v in 1:(1 + nfi)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else {
for (v in 2:(1 + nfi)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
}
# The last values of CDi must be of type numeric
# (distributions)
if (futureDistrib) {
CDi[, (1 + nfi + 1):ncol(CDi)] <- lapply(CDi[, (1 + nfi + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current
# CDi
# Eventually concatenating CDi with COtselected_i
# (the matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
CDi <- COtselectionSpe(CDi, COt, ncot, nc, i, j, k)
# Check for missing-values among predictors
if (max(is.na(CDi[1, 2:totVi])) == 0) {
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
CDMatCreateTiming <- function(CO, COtsample, OD, COt, nfi, nr, nc, ncot, futureDistrib, k, col, timeFrame) {
# For Initial Gaps
# we will impute single NA after single NA going from the
# center of OD towards its very left border
# We here only take observations from the FUTURE into
# account --> nfi
# But we will create one single regression
# model used for the imputation of all
# the NAs belonging to an "initial gap"
udf <- min(timeFrame, nc - col - nfi)
udp <- min(timeFrame, col - 1)
ud <- udf + udp + 1
col_to_use <- (col - udp):(col + udf)
CD <- matrix(NA, nrow = nr * ud, ncol = 1)
# initialisation of matrix CDf
CDf <- matrix(NA, nrow = nr * ud, ncol = nfi)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDda (for future distribution
# analysis)
# (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
da <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDda: CDda is
# composed of ud matrix da on
# top of each other
}
iter <- 1
for (j in col_to_use) {
t1 <- (nr * (iter - 1) + 1)
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j]
# Future VIs
CDf[t1:t2, ] <- OD[, (j + 1):(j + nfi)]
# Eventually considering time-dependent covariates
if (ncot > 0) {
COtselected <- COtselected[t1:t2, j + (1:(ncot / nc) - 1) * nc]
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j + 1):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j + 1):nc)
}
iter <- iter + 1
}
# Concatenating CD
CD <- cbind(CD, CDf)
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider any
# covariate)
# simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
# 5. Imputation of terminal NAs
#
#
################################################################################
# Impute terminal NAs
ImputingTerminalNAsTiming <- function(OD, covariates, time.covariates, ODi, COtsample, MaxTermGapSize, TermGapSize, pastDistrib, regr, npt, nco, ncot, totVt, nr, nc, ud, available, k, noise, timeFrame, ...) {
# 5.1.-2. Creation of ORDERT -------------------------------------------------
REFORDT_L <- REFORDTCreation(nr, nc, TermGapSize, MaxTermGapSize)
for (order in 1:MaxTermGapSize) {
col <- nc - MaxTermGapSize + order
CD <- TerminalCDMatCreateTiming(covariates, OD, time.covariates, COtsample, pastDistrib, min(npt, col - 1), nr, nc, ncot, k, col, timeFrame)
# 5.3.2 Computation of the model (Dealing with the LOCATIONS of imputation) --
if (length(table(CD[, 1])) > 1) {
log_CD <- list()
totVt <- 1 + min(npt, col - 1) + nco + (ncot / nc)
log_CD[c("reglog", "CD")] <- ComputeModel(CD, regr, totVt, min(npt, col - 1), 0, k, ...)
# 5.3.3 Imputation using the just created model (Dealing with the actual VALUES to impute)
ODi <- TerminalCreatedModelImputationTiming(covariates, OD, log_CD$CD, ODi, time.covariates, nc, ncot, totVt, REFORDT_L, pastDistrib, MaxTermGapSize, available, regr, log_CD$reglog, k, min(npt, col - 1), noise, order)
} else {
lev <- names(table(CD[, 1]))
REFORDT <- as.matrix(REFORDT_L[[order]])
if (ncol(REFORDT) == 1) {
REFORDT <- t(REFORDT)
}
nr_REFORDT <- nrow(REFORDT)
for (u in 1:nr_REFORDT) {
i <- REFORDT[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDT
j <- REFORDT[u, 2]
ODi[i, j] <- lev
}
}
}
# 5.3. Imputation using a specific model -------------------------------------
# 5.3.1 Building of the data matrix CD for the computation of the model ------
return(ODi)
}
################################################################################
# Imputation of the terminal created model
TerminalCreatedModelImputationTiming <- function(CO, OD, CD, ODi, COt, nc, ncot, totVt, REFORDT_L, pastDistrib, MaxTermGapSize, available, regr, reglog, k, npt, noise, order) {
# Conversion of ODi from data.frame to matrix
ODi <- as.matrix(ODi)
# Analysing the value of parameter available
if (available == TRUE) {
# we take the previously
# imputed data into account
LOOKUP <- ODi
} else { # that is available == FALSE and thus we
# don't take the previously imputed
# data into account
LOOKUP <- OD
}
# Assigning the current "REFORDT_order" matrix to the
# variable matrix REFORDT
# (according to the current value of "order")
REFORDT <- as.matrix(REFORDT_L[[order]])
if (ncol(REFORDT) == 1) {
REFORDT <- t(REFORDT)
}
nr_REFORDT <- nrow(REFORDT)
for (u in 1:nr_REFORDT) {
i <- REFORDT[u, 1] # taking out the first coordinate
# (row number in ORDER) from REFORDT
j <- REFORDT[u, 2] # taking out the second coordinate
# (column number in ORDER) from REFORDT
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for past values
vect <- LOOKUP[i, (j - npt):(j - 1)]
CDpi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDpi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
# We also account for the fact that levels that do not appear at
# all in a given variable of CD were discarded with droplevels before
# the fit of the mlogit model
if (regr != "rf") {
for (v in 1:(1 + npt)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else {
for (v in 2:(1 + npt)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
}
# The
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib) {
CDi[, (1 + npt + 1):ncol(CDi)] <- lapply(CDi[, (1 + npt + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with COtselected_i
# (the matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
CDi <- COtselectionSpe(CDi, COt, ncot, nc, i, j, k)
# Check for missing-values among predictors
if (max(is.na(CDi[1, 2:totVt])) == 0) {
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
TerminalCDMatCreateTiming <- function(CO, OD, COt, COtsample, pastDistrib, npt, nr, nc, ncot, k, col, timeFrame) {
# For Terminal Gaps
# we will impute single NA after single NA going from the
# center of OD towards its very right border
# We here only take observations from the PAST
# into account --> npt
# But we will create one single regression
# model used for the imputation of all the NAs belonging to
# a "terminal gap"
udf <- min(timeFrame, nc - col)
udp <- min(timeFrame, col - 1 - npt)
ud <- udf + udp + 1
col_to_use <- (col - udp):(col + udf)
CD <- matrix(NA, nrow = nr * ud, ncol = 1)
# initialisation of matrix CDp
CDp <- matrix(NA, nrow = nr * ud, ncol = npt)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb
# (for past distribution analysis)
# (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb: CDdb is
# composed of ud matrix db on
# top of each other
}
iter <- 1
for (j in col_to_use) {
t1 <- (nr * (iter - 1) + 1)
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j]
# /!\ current pointer on column is thus: "j"
# Past VIs
CDp[t1:t2, ] <- OD[, (max(j - npt, 1)):(j - 1)]
# Eventually considering time-dependent covariates
if (ncot > 0) {
COtselected <- COtselected[t1:t2, j + (1:(ncot / nc) - 1) * nc]
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - 1)), ], factor, levels = c(1:k, NA), exclude = NULL)
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - 1))
}
iter <- iter + 1
}
# Concatening CD
CD <- cbind(CD, CDp)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider any
# covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
# 6. Imputation of SLG NAs
#
#
################################################################################
# Left-hand side SLG imputation
LSLGNAsImputeTiming <- function(OD, ODi, CO, COt, COtsample, ORDERSLG, pastDistrib, futureDistrib, regr, np, nr, nf, nc, ud, ncot, nco, k, noise, available, timeFrame, ...) { # Checking if we have to impute
# left-hand side SLG
warning("/!\\ Specially Located Gaps (SLG) have been detected on the left-hand side of OD.", "\n", "For certain missing data groups close to the border of OD, np may have been automatically reduced.", "\n", "If you don't want this to happen, please choose a lower value for np.")
# 6.2.LEFT Computation of the order of imputation of each MD ----
# Initialization of a list to take all the variable returned by the functions
ParamList <- list()
# Creation of the temporary SLG matrices for the left-hand
# side of OD
for (h in 2:np) {
if (max(ORDERSLG[, h]) > 0) {
# Checking if a gap begins
# somewhere on the current column
# If that is not the case, there is
# no need to perform
# the entire following procedure
# and we simply can go
# to the next column of ORDERSLGLeft
ParamList[c("ORDERSLG_temp", "totV_temp", "np_temp")] <- SLGMatrix_temp(nr, nc, nf, h, ORDERSLG, nco, ncot, pastDistrib, futureDistrib, k)
if (max(ParamList$ORDERSLG_temp) == 0) {
next
}
# In a similar manner to part 2.4., we go here one
# single time through the reduced version
# ORDERSLGLeft_temp of ORDERSLG and we create
# MaxGapSLGLeft "REFORDSLGRLeft_" matrices
# collecting the coordinates of each corresponding
# values in ORDERSLGLeft_temp which are greater
# than 0
# REFORDSLGLeft matrices
# Initialization of the REFORDSLGLeft matrices
ParamList[c("MaxGap", "REFORD_L", "ORDERSLG_temp")] <- REFORDInit(ParamList$ORDERSLG_temp, nr, nc)
# MaxGapSLGLeft <- REFORDOD[[1]]
# REFORDSLG_L <- REFORDOD[[2]]
# 6.3.LEFT Imputation of the missing data listed by ORDERSLGLeft_temp and ORDERSLGRight_temp using a specific model ----
# 6.3.1.LEFT Building of the different data matrices CD ----
# for the computation of the model for every SLG
# on the left-hand side of OD
for (order in 1:ParamList$MaxGap) {
ncol_imp <- length(unique(ParamList$REFORD_L[[order]][, 2]))
col_to_imp <- unique(sort(unique(ParamList$REFORD_L[[order]])[, 2]))
for (i in 1:ncol_imp) {
CD_shift <- CDComputeTiming(CO, OD, COt, ParamList$MaxGap, order, ParamList$np_temp, nc, nr, nf, COtsample, pastDistrib, futureDistrib, ncot, k, col_to_imp[i], timeFrame)
if (length(table(CD_shift$CD[, 1])) > 1) {
# CD_shift <- CDComputeTiming(CO, OD, COt, MaxGap, order, np, nc, nr, nf, COtsample, pastDistrib, futureDistrib, ncot, k,timing,col_to_imp[i], timeFrame)
log_CD <- list()
log_CD[c("reglog", "CD")] <- ComputeModel(CD_shift$CD, regr, ParamList$totV_temp, ParamList$np_temp, nf, k, ...)
row_to_imp <- ParamList$REFORD_L[[order]][which(ParamList$REFORD_L[[order]][, 2] == col_to_imp[i]), 1]
# 3.3. Imputation using the just created model (Dealing with the actual VALUES to impute) ---------
ODi <- CreatedModelImputationTiming(order, CO, log_CD$CD, COt, OD, ODi, pastDistrib, futureDistrib, available, col_to_imp[i], row_to_imp, ncot, nc, ParamList$np_temp, nf, k, ParamList$totV_temp, regr, log_CD$reglog, noise, CD_shift$shift, ParamList$MaxGap)
} else {
lev <- names(table(CD_shift$CD[, 1]))
REFORDI <- as.matrix(ParamList$REFORD_L[[order]])
if (ncol(REFORDI) == 1) {
REFORDI <- t(REFORDI)
}
nr_REFORDI <- nrow(REFORDI)
for (u in 1:nr_REFORDI) {
i <- REFORDI[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDI
j <- REFORDI[u, 2]
ODi[i, j] <- lev
}
}
}
}
}
}
return(ODi)
}
#
# ################################################################################
# # Compute the CD matrix for SLG
#
# SLGCDMatBuildTiming <- function(CO,COt, OD, order, MaxGapSLGLeft, np, ncot, nr, nc, nf, COtsample, pastDistrib, futureDistrib, k,timing, col, timeFrame){
# # Building of a data matrix for the computation
# # of the model
# #
# # ud <- nc-(MaxGapSLGLeft-order+np+nf)
# # print(ud)
# # # number of usable data for each row of OD
# # # size of the current mobile caracteristic frame
# # # (that changes according to "order") which is
# # # equal to nc-ud+1
# # frameSize <- MaxGapSLGLeft-order+np+nf+1
# # Structure and building of the data matrix CD
# # The first column of CD is the dependent
# # variable (VD, response variable)
# # The following columns are the independent
# # variables (VIs, explanatory variables) coming
# # from the past (np>0) or the future (nf>0)
# # ordered by time and the distribution of
# # the possible values (i.e. all
# # possible categorical variables numbered from
# # 1 to k and of course the value NA)
# # respectively Before and After the NA to impute
# #
# # VD Past VIs Future VIS Past distribution Future distribution
# #
# # / \
# # | |
# # CD = | CD CDp CPf CDdb CDda |
# # \ /
# # \ /
# #
# # We are then going to create a specific CD
# # according to the value of np and nf
#
# # initialization of the current very left part
# # of the predictive model matrix ("matrice de
# # modele de prediction") with NA everywhere
# # (/!\ for each "order", we are going to build
# # such a CD)
#
# CD <- matrix(NA,nrow=nr*ud,ncol=1)
# # Dealing with the change of shape of the
# # prediction frame (according to whether the
# # imputed data is located at the beginning
# # (left) of a gap or at the end (right))
# # The purpose of this if statement is to detect
# # if we have to insert a shift (to jump at the
# # end of the gap) or not
# # if ( (np > 0 & nf > 0) & ( (MaxGapSLGLeft%%2==0 & order%%2==0) | (MaxGapSLGLeft%%2!=0 & order%%2!=0) )){
# # shift <- MaxGapSLGLeft - order
# # # jumping at the end of the gap
# # } else {
# # shift <- 0
# # # no shift is needed (note that no shift
# # # is needed for the case of model 2
# # # (only past) and model 3 (only future))
# # }
#
# if ( (np > 0 & nf > 0) & ( (MaxGapSLGLeft%%2==0 & order%%2==0) | (MaxGapSLGLeft%%2!=0 & order%%2!=0) )){
# shift <- MaxGapSLGLeft - order # jumping at the end of
# udp <- min(timeFrame,col-(MaxGapSLGLeft-order)-np-1) #number of usable data in the past
# udf <- min(timeFrame, nc-col-nf) #number of usable data in the futur
# col_to_use <- (col-udp):(col+udf)
# ud <- udp + udf +1
# # the gap
# } else {
# shift <- 0 # no shift is needed (note that
# # no shift is needed for the
# # case of model 2 (only past)
# # and model 3 (only future))
#
# if(np>0 & nf>0){
# udp <- min(timeFrame,col-np-1)
# udf <- min(timeFrame,nc-col-(MaxGapSLGLeft-order)-nf)
# col_to_use <- (col-udp):(col+udf)
# ud <- udp + udf + 1
# }
#
# }
#
# iter <- 1
# # initialisation of the number of
# # iterations of the following for loops
#
#
# # For left SLG, naturally only the cases "only
# # PAST" and "PAST and FUTURE" are possible to
# # meet (because np has to be greater than
# # 0, otherwise, it would mean that we are not
# # in the case of a SLG and that we
# # can impute it as a usual internal gap)
#
# # Only PAST
# if (np>0 & nf==0) {
# CD <- PastVICompute(CD, CO, OD, ncot, frameSize, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k,timing)
# # PAST and FUTURE
# } else {
# CD <- PastFutureVIComputeTiming(CD, CO, OD, ncot, frameSize, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, shift, timing, col_to_use, MaxGapSLGLeft, order)
# }
#
# return(list(CD, shift))
# }
#
##############################################################################
# Right-hand side SLG imputation
RSLGNAsImputeTiming <- function(OD, ODi, CO, COt, COtsample, ORDERSLGRight, pastDistrib, futureDistrib, regr, np, nr, nf, nc, ud, ncot, nco, k, noise, available, timeFrame, ...) {
# Checking if we have to impute right-hand
# side SLG
warning("/!\\ Specially Located Gaps (SLG) have been detected on the right-hand side of OD.", "\n", "For certain missing data groups close to the border of OD, nf may have been automatically reduced.", "\n", "If you don't want this to happen, please choose a lower value for nf.")
# 6.2.RIGHT Computation of the order of imputation of each MD ----------------
# Initialization of a list to take all the variable returned by the functions
ParamList <- list()
# Creation of the temporary SLG matrices for the right-hand
# side of OD
for (h in (nc - 1):(nc - nf + 1)) {
if (max(ORDERSLGRight[, h]) > 0) {
# Checking if a gap begins
# somewhere on the current
# column.
# If that is not the case, there is no need to
# perform the entire following procedure and we
# simply can go to the next column of ORDERSLGRight
ParamList[c("ORDERSLGRight_temp", "totV_temp", "nf_temp")] <- SLGMatrixRight_temp(nr, nc, np, h, ORDERSLGRight, nco, ncot, pastDistrib, futureDistrib, k)
if (max(ParamList$ORDERSLGRight_temp) == 0) {
next
}
# In a similar manner to part 2.4., we go here one
# single time through the reduced version
# ORDERSLGRight_temp of ORDERSLG and we create
# MaxGapSLGLRight "REFORDSLGRight_" matrices
# collecting the coordinates of
# each corresponding values in
# ORDERSLGRight_temp which are
# greater than 0
# REFORDSLGRight matrices
# Initialization of the REFORDSLGRight matrices
ParamList[c("MaxGap", "REFORD_L", "ORDERSLGRight_temp")] <- REFORDInit(ParamList$ORDERSLGRight_temp, nr, nc)
# 6.3.RIGHT Imputation of the missing data listed by ORDERSLGLeft_temp and ORDERSLGRight_temp using a specific model ----
for (order in 1:ParamList$MaxGap) {
ncol_imp <- length(unique(ParamList$REFORD_L[[order]][, 2]))
col_to_imp <- unique(sort(unique(ParamList$REFORD_L[[order]])[, 2]))
for (i in 1:ncol_imp) {
CD_shift <- CDComputeTiming(CO, OD, COt, ParamList$MaxGap, order, np, nc, nr, ParamList$nf_temp, COtsample, pastDistrib, futureDistrib, ncot, k, col_to_imp[i], timeFrame)
if (length(table(CD_shift$CD[, 1])) > 1) {
# CD_shift <- CDComputeTiming(CO, OD, COt, MaxGap, order, np, nc, nr, nf, COtsample, pastDistrib, futureDistrib, ncot, k,timing,col_to_imp[i], timeFrame)
log_CD <- list()
log_CD[c("reglog", "CD")] <- ComputeModel(CD_shift$CD, regr, ParamList$totV_temp, np, ParamList$nf_temp, k, ...)
row_to_imp <- ParamList$REFORD_L[[order]][which(ParamList$REFORD_L[[order]][, 2] == col_to_imp[i]), 1]
# 3.3. Imputation using the just created model (Dealing with the actual VALUES to impute) ---------
ODi <- CreatedModelImputationTiming(order, CO, log_CD$CD, COt, OD, ODi, pastDistrib, futureDistrib, available, col_to_imp[i], row_to_imp, ncot, nc, np, ParamList$nf_temp, k, ParamList$totV_temp, regr, log_CD$reglog, noise, CD_shift$shift, ParamList$MaxGap)
} else {
lev <- names(table(CD_shift$CD[, 1]))
REFORDI <- as.matrix(ParamList$REFORD_L[[order]])
if (ncol(REFORDI) == 1) {
REFORDI <- t(REFORDI)
}
nr_REFORDI <- nrow(REFORDI)
for (u in 1:nr_REFORDI) {
i <- REFORDI[u, 1]
# taking out the first coordinate (row
# number in ORDER) from REFORDI
j <- REFORDI[u, 2]
ODi[i, j] <- lev
}
}
}
}
}
}
return(ODi)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.