Nothing
# C. File containing function used several time for different part of seqimpute
#
#
################################################################################
# Consider time-dependent covariates
COtselection <- function(COtselected, COt, ncot, t1, t2, nr, nc, j, shifted) {
COttemp <- as.data.frame(matrix(nrow = nr, ncol = 0))
for (d in 1:(ncot / nc)) {
COttemp <- cbind(COttemp, COt[, (j + shifted) + (d - 1) * nc])
}
COtselected[t1:t2, ] <- COttemp
return(COtselected)
}
################################################################################
# Concatenating CDi with COtselected_i (the matrix containing the current
# time-dependent covariates) Checking if COt is NOT completely empty
COtselectionSpe <- function(CDi, COt, ncot, nc, i, j, k) {
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i
# into CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
return(CDi)
}
################################################################################
# Creation of matrices REFORD with ORDER
REFORDInit <- function(ORDER, nr, nc) {
# The purpose of this function is to accelerate part 3.3 in which we
# initially (i.e. with the first versions of seqimpute3.R) go "order"
# times through the matrix ORDER
#
# Going one single time through the matrix ORDER, we create MaxGap
# matrices REFORD (numbered from REFORD_1 to "REFORD_MaxGap") which
# collect the coordinates of each corresponding values greater than 0
# It will create MaxGap lookup matrices that will be used in point 3.3
# to directly pinpoint the NA that have to be currently imputed
# according to the value of the variable "order"
# Updating MaxGap
MaxGap <- max(ORDER[ORDER != 0]) - (min(ORDER[ORDER != 0]) - 1)
# Initialization of the REFORD matrices
REFORD_L <- lapply(1:MaxGap, matrix, data = NA, nrow = 0, ncol = 2)
# Return the matrix of coordinates of value of ORDER bigger than 0
non_zero <- which(ORDER > 0, arr.ind = TRUE)
non_zero <- non_zero[(non_zero[, 1] <= nr) & (non_zero[, 2] <= nc), , drop = F]
# Updating ORDER so that it becomes a matrix with positive
# values going from 1 to MaxGap
ORDER[non_zero] <- ORDER[non_zero] - (min(ORDER[ORDER != 0]) - 1)
# Collecting the coordinates for each value of "order"
non_zero <- non_zero[order(non_zero[, 1]), , drop = F]
ord_cord <- ORDER[non_zero]
for (i in 1:MaxGap) {
REFORD_L[[i]] <- non_zero[which(ord_cord == i), , drop = F]
}
return(list(MaxGap, REFORD_L, ORDER))
}
################################################################################
# Creation of matrices REFORD with GapSize
REFORDInit_TI <- function(GapSize, nr, ORDER, GapSizelist) {
# Initialization of the REFORD matrices
REFORD_L <- lapply(1:GapSize, matrix, data = NA, nrow = 0, ncol = 2)
# Return the matrix of coordinates of value of ORDER bigger than 0
non_zero <- which(ORDER > 0, arr.ind = TRUE)
non_zero <- non_zero[(non_zero[, 1] <= nr) & (non_zero[, 2] %in% GapSizelist), , drop = F]
# Collecting the coordinates for each value of "order"
ord_cord <- ORDER[non_zero, drop = F]
for (i in 1:GapSize) {
REFORD_L[[i]] <- non_zero[which(ord_cord == i), ]
}
return(REFORD_L)
}
################################################################################
# Past VI computation
PastVICompute <- function(CD, CO, OD, ncot, frameSize, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k) {
CDp <- matrix(NA, nrow = nr * ud, ncol = np)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in frameSize:nc) {
# /!\ j is initialised at
# the very end (utmost right) of the
# frame
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j - frameSize + np + 1]
# past VIs
CDp[t1:t2, ] <- OD[, (j - frameSize + 1):(j - frameSize + np)]
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselection(COtselected, COt, ncot, t1, t2, nr, nc, j, shifted = -frameSize + np + 1)
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - frameSize + np)), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 - 1 = j-frameSize+np
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - frameSize + np))
}
# Future distribution (i.e. After)
if (futureDistrib) {
if ((j - frameSize + np + 2) <= nc) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j - frameSize + np + 2):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j - frameSize + np + 2):nc)
} else {
# if index in OD exceeds OD number of
# columns, the future distribution of
# the k categorical variables is simply
# null for everyone of them
CDda[t1:t2, ] <- matrix(nrow = nr, ncol = k, 0)
}
}
iter <- iter + 1
}
# past VIs
CD <- cbind(CD, CDp)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
################################################################################
# Future VI computation
FutureVICompute <- function(CD, CO, OD, ncot, frameSize, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf) {
CDf <- matrix(NA, nrow = nr * ud, ncol = nf)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in frameSize:nc) {
# /!\ j is initialised at
# the very end (utmost right) of the
# frame
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j - frameSize + np + 1]
# future VIs
CDf[t1:t2, ] <- OD[, (j - nf + 1):j]
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselection(COtselected, COt, ncot, t1, t2, nr, nc, j, shifted = -frameSize + np + 1)
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - frameSize + np)), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 - 1 = j-frameSize+np
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - frameSize + np))
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j - frameSize + np + 2):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j - frameSize + np + 2):nc)
}
iter <- iter + 1
}
# future VIs
CD <- cbind(CD, CDf)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
################################################################################
# Past or future VI computation
PastFutureVICompute <- function(CD, CO, OD, ncot, frameSize, iter, nr, nc, ud, np, COtsample, COt, pastDistrib, futureDistrib, k, nf, shift) {
CDp <- matrix(NA, nrow = nr * ud, ncol = np)
CDf <- matrix(NA, nrow = nr * ud, ncol = nf)
if (ncot > 0) {
# initialisation of matrix COtselected
COtselected <- do.call(rbind, replicate(ud, COtsample, simplify = FALSE))
}
# initialisation of matrix CDdb (for past
# distribution analysis) (Distribution Before)
if (pastDistrib) {
CDdb <- matrix(NA, nrow = nr * ud, ncol = k)
db <- matrix(NA, nrow = nr, ncol = k)
# submatrix of CDdb:
# CDdb is composed of
# ud matrix db on top
# of each other
}
# initialisation of matrix CDda (for future
# distribution analysis) (Distribution After)
if (futureDistrib) {
CDda <- matrix(NA, nrow = nr * ud, ncol = k)
# CDda has same dimensions as CDdb
da <- matrix(NA, nrow = nr, ncol = k)
# da has same dimensions as db
}
for (j in frameSize:nc) {
# /!\ j is initialised at
# the very end (utmost right) of the
# frame
t1 <- (nr * (iter - 1) + 1)
# Determining the locations
# of the time span (always nr) of
# the piled up blocks of CD
t2 <- nr * iter
# VD
CD[t1:t2, 1] <- OD[, j - frameSize + np + 1 + shift]
# past VIs
CDp[t1:t2, ] <- OD[, (j - frameSize + 1):(j - frameSize + np)]
# future VIs
CDf[t1:t2, ] <- OD[, (j - nf + 1):j]
# /!\ current
# pointer on column
# is thus:
# "j-frameSize
# Eventually considering time-dependent
# covariates
if (ncot > 0) {
COtselected <- COtselection(COtselected, COt, ncot, t1, t2, nr, nc, j, shifted = -frameSize + np + 1 + shift)
}
# Past distribution (i.e. Before)
if (pastDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[(1:(j - frameSize + np + shift)), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 - 1 = j-frameSize+np
db_list <- lapply(tempOD, summary)
db_matrix <- do.call(rbind, db_list)
CDdb[t1:t2, ] <- db_matrix[, 1:k] / length(1:(j - frameSize + np + shift))
}
# Future distribution (i.e. After)
if (futureDistrib) {
ODt <- t(OD)
ODt <- as.data.frame(ODt)
tempOD <- lapply(ODt[((j - frameSize + np + shift + 2):nc), ], factor, levels = c(1:k, NA), exclude = NULL)
# because:
# j-frameSize+np+1 + 1
# = j-frameSize+np+2
da_list <- lapply(tempOD, summary)
da_matrix <- do.call(rbind, da_list)
CDda[t1:t2, ] <- da_matrix[, 1:k] / length((j - frameSize + np + shift + 2):nc)
}
iter <- iter + 1
}
# past and future VIs
CD <- cbind(CD, CDp, CDf)
if (pastDistrib) {
CD <- cbind(CD, CDdb)
}
if (futureDistrib) {
CD <- cbind(CD, CDda)
}
# Conversion of CD into a data frame
CD <- as.data.frame(CD)
# Eventually concatenating CD with COs (the matrix
# containing the covariates)
if (all(is.na(CO)) == FALSE) {
if (is.null(dim(CO))) {
CO <- matrix(CO, nrow = nrow(OD), ncol = 1)
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
} else {
# Checking if CO is NOT
# completely empty
# Creation of the stacked covariates
# matrix for 3.1
COs <- do.call("rbind", rep(list(CO), ud))
# Concatenating CD and COs into CD
CD <- cbind(CD, COs)
}
}
# Else, in case CO is empty (i.e. we don't consider
# any covariate) simply continue with the current CD
# Eventually concatenating CD with COtselected (the
# matrix containing the current time-dependent
# covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
# Concatenating CD and COtselected into CD
CD <- cbind(CD, as.data.frame(COtselected))
}
return(CD)
}
################################################################################
# Imputation where only PAST VIs exist
ODiImputePAST <- function(CO, ODi, CD, COt, REFORD, nr_REFORD, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise) {
for (u in 1:nr_REFORD) {
i <- REFORD[u, 1]
# taking out the first coordinate
# (row number in ORDER) from REFORD
j <- REFORD[u, 2]
# taking out the second coordinate
# (column number in ORDER) from REFORD
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for past values
vect <- LOOKUP[i, (j - np):(j - 1)]
# /!\ current pointer
# on olumn is thus: "j"
CDpi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDpi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
if (regr == "lm" | regr == "lrm") {
for (v in 1:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else if (regr == "rf") {
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
} else {
# multinom
CDi[, 1] <- factor(CDi[, 1], levels = c(1:k))
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
}
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib | futureDistrib) {
CDi[, (1 + np + nf + 1):ncol(CDi)] <- lapply(CDi[, (1 + np + nf + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't
# consider any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with
# COtselected_i (the matrix containing the
# current time-dependent covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i
# into CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Check for missing-values among predictors
if (max(is.na(CDi[1, 2:totV])) == 0) {
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
################################################################################
# Imputation where past and future VIs exist
ODiImputePF <- function(CO, ODi, CD, COt, REFORD, nr_REFORD, pastDistrib, futureDistrib, k, np, nf, nc, ncot, totV, reglog, LOOKUP, regr, noise, shift, MaxGap, order) {
for (u in 1:nr_REFORD) {
i <- REFORD[u, 1]
# taking out the first coordinate
# (row number in ORDER) from REFORD
j <- REFORD[u, 2]
# taking out the second coordinate
# (column number in ORDER) from REFORD
CDi <- matrix(NA, nrow = k, ncol = 1)
# Matrix for past values
shift <- as.numeric(shift)
vect <- LOOKUP[i, (j - shift - np):(j - shift - 1)]
CDpi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for future values
vect <- LOOKUP[i, (j - shift + MaxGap - order + 1):(j - shift + MaxGap - order + nf)]
CDfi <- matrix(vect, nrow = k, ncol = length(vect), byrow = TRUE)
# Matrix for past distribution
if (pastDistrib) {
dbi <- summary(factor(LOOKUP[i, 1:(j - 1)], levels = c(1:k), exclude = NULL)) / length(1:(j - 1))
CDdbi <- matrix(dbi[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Matrix for future distribution
if (futureDistrib) {
dai <- summary(factor(LOOKUP[i, (j + 1):nc], levels = c(1:k), exclude = NULL)) / length((j + 1):nc)
CDdai <- matrix(dai[1:k], nrow = k, ncol = k, byrow = TRUE)
}
# Concatenating CDi
CDi <- cbind(CDi, CDpi, CDfi)
if (pastDistrib) {
CDi <- cbind(CDi, CDdbi)
}
if (futureDistrib) {
CDi <- cbind(CDi, CDdai)
}
# Conversion of CDi into a data frame
CDi <- as.data.frame(CDi)
# Type transformation of the columns of CDi
# The first values of CDi must be of type factor
# (categorical values)
if (regr == "lm" | regr == "lrm") {
for (v in 1:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
} else if (regr == "rf") {
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = c(1:(k + 1)))
CDi[, v][is.na(CDi[, v])] <- k + 1
}
CDi[, 1] <- factor(CDi[, 1], levels = levels(CD[, 1]))
} else {
# multinom
CDi[, 1] <- factor(CDi[, 1], levels = c(1:k))
for (v in 2:(1 + np + nf)) {
CDi[, v] <- factor(CDi[, v], levels = levels(CD[, v]), exclude = NULL)
}
}
# The last values of CDi must be of type numeric
# (distributions)
if (pastDistrib | futureDistrib) {
CDi[, (1 + np + nf + 1):ncol(CDi)] <- lapply(CDi[, (1 + np + nf + 1):ncol(CDi)], as.numeric)
}
# Eventually concatenating CDi with COi
# (the matrix containing the covariates)
if (all(is.na(CO)) == FALSE) {
# Checking if CO is NOT
# completely empty
# Creation of the matrix COi used in 3.3
if (is.null(dim(CO))) {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i]), simplify = FALSE))
} else {
COi <- do.call(rbind, replicate(k, as.data.frame(CO[i, ]), simplify = FALSE))
}
# Concatenating CDi and COi into CDi
CDi <- cbind(CDi, COi)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Else, in case CO is empty (i.e. we don't
# consider any covariate)
# simply continue with the current CDi
# Eventually concatenating CDi with
# COtselected_i (the matrix containing the
# current time-dependent covariates)
# Checking if COt is NOT completely empty
if (ncot > 0) {
COtselected_i <- as.data.frame(matrix(nrow = 1, ncol = 0))
for (d in 1:(ncot / nc)) {
COtselected_i <- cbind(COtselected_i, COt[i, (j) + (d - 1) * nc])
}
COtselected_i <- do.call(rbind, replicate(k, as.data.frame(COtselected_i[1, ]), simplify = FALSE))
# Concatenating CDi and COtselected_i into
# CDi
CDi <- cbind(CDi, COtselected_i)
# Transformation of the names of the columns
# of CDi (called V1, V2, ..., "VtotV")
colnames(CDi) <- paste("V", 1:ncol(CDi), sep = "")
}
# Check for missing-values among predictors
# (i.e. we won't impute any value on the current
# MD if there is any NA among the VIs)
if (max(is.na(CDi[1, 2:totV])) == 0) {
# checking that
# there is no NA
# among the current
# VI (otherwise no
# data will be
# imputed for the
# current NA)
ODi <- RegrImpute(ODi, CDi, regr, reglog, noise, i, j, k)
}
}
return(ODi)
}
################################################################################
# Impute value with the chosen regression model
RegrImpute <- function(ODi, CDi, regr, reglog, noise, i, j, k) {
if (regr == "multinom") {
if (length(reglog$lev) > 2) {
pred <- predict(reglog, CDi, type = "probs")[1, ]
pred <- cumsum(pred)
names_saved <- names(pred)
alea <- runif(1)
# Example value returned in "alea":
# [1] 0.2610005
#
sel <- as.numeric(names_saved[which(pred >= alea)])
} else {
pred <- predict(reglog, CDi, type = "probs")
alea <- runif(1)
if (alea > pred[1]) {
sel <- as.numeric(reglog$lev[1])
} else {
sel <- as.numeric(reglog$lev[2])
}
}
} else if (regr == "lm") {
# Since we are performing a linear
# regression, each element of CDi are
# numbers and have to be considered as
# class "numeric"
CDi <- as.data.frame(lapply(CDi[, 1:ncol(CDi)], as.numeric))
pred <- predict(reglog, CDi)
# Introducing a variance "noise"
pred <- rnorm(length(pred), pred, noise)
# Rounding pred to the nearest integer
pred <- round(pred)
# Restricting pred to its lowest
# value: 1
pred <- ifelse(pred < 1, 1, pred)
# Restricting pred to its highest
# value: k
pred <- ifelse(pred > k, k, pred)
sel <- pred
} else if (regr == "lrm") { # meaning (regr == "lrm")
## Case of ORDINAL REGRESSION MODEL
# Since we are performing an ordinal
# regression, each element of CDi are
# numbers and have to be considered as
# class "numeric"
CDi <- as.data.frame(lapply(CDi[, 1:ncol(CDi)], as.numeric))
pred <- predict(reglog, CDi, type = "fitted.ind")
# Testing if we are in case where k=2
# (if this is the case, we need to
# create the second complementary
# probility by hand since lrm returns
# only the first probability)
if (k == 2) {
pred <- c(pred, (1 - pred))
}
# Cumulative pred
pred <- cumsum(pred)
# Introducing a variance "noise"
pred <- rnorm(length(pred), pred, noise)
# Checking that the components of vector
# "pred" are still included in the
# interval [0,1]
pred <- unlist(lapply(pred, function(x) {
if (x < 0) 0 else x
}))
pred <- unlist(lapply(pred, function(x) {
if (x > 1) 1 else x
}))
# Imputation
# Since we have introduce a noise on the
# variance, it might occur that "alea"
# is greater than the greatest value of
# "pred". We have then to restrict
# "alea" to the last value of "pred"
alea <- runif(1)
if (alea > pred[length(pred)]) {
alea <- pred[length(pred)]
}
sel <- which(pred >= alea)
} else if (regr == "rf") { # regr=="rf" randomForest
# pred <- predict(reglog,newdata=CDi,type="prob")[1,]
pred <- predict(reglog, data = CDi, predict.all = T)$predictions[1, ]
pred <- factor(pred, levels = c(1:k))
tab <- table(pred)
tab <- tab / sum(tab)
#
# # Example of value returned by pred:
# # (Sytematically, the upper line
# # represents the possible categories of
# # the variable (here, k=2, so the
# # possible categories are
# # either 1" or "2"))
# # 1 2
# # 1.000000e+00 2.111739e-22
# #
# # Cumulative pred
pred <- cumsum(tab)
# Corresponding example value returned
# by the "cumulative pred":
# 1 2
# 1 1
#
# Introducing a variance "noise"
# pred <- rnorm(length(pred),pred,noise)
# Checking that the components of vector
# "pred" are still included in the
# interval [0,1]
# pred <- unlist(lapply(pred, function(x) {if (x < 0) 0 else x}))
# pred <- unlist(lapply(pred, function(x) {if (x > 1) 1 else x}))
# Imputation
alea <- runif(1)
# Example value returned in "alea":
# [1] 0.2610005
#
sel <- levels(CDi[, 1])[which(pred >= alea)[1]]
# Corresponding example value returned
# in sel:
# 1 2
# 1 2
#
} else {
}
ODi[i, j] <- sel[1]
return(ODi)
}
################################################################################
# Compute model with the chosen regression model
ComputeModel <- function(CD, regr, tot_VI, np, nf, k, ...) {
npfi <- np + nf
# ==>> Manipulation of parameters
# Conversion of CD in a data frame
CD <- as.data.frame(CD)
# Transformation of the names of the columns of CD
# (called V1, V2, ..., "Vtot_VI")
colnames(CD) <- paste("V", 1:ncol(CD), sep = "")
if (regr == "lm") {
## Case of LINEAR REGRESSION MODEL
# Since we are performing a linear regression, each
# element of CD are numbers and have then to remain as
# class "numeric" (we don't have to perform some class
# adjustment as for the case of the creation of a
# multinomial model).
# Computation of the linear regression model
if (tot_VI == 1) {
reglog <- lm(V1 ~ 0, data = CD)
# first case is evaluated aside
}
if (tot_VI > 1) {
# creation of "V2" ... "Vtot_VI" (to use them in the
# formula)
factors_character <- paste("V", 2:tot_VI, sep = "")
# Transformation of this object from character to
# vector (in order to be able
# to access its components)
factors <- as.vector(factors_character)
# Creation of a specific formula according to the
# value of tot_VI
fmla <- as.formula(paste("V1~0+", paste(factors, collapse = "+")))
reglog <- lm(fmla, data = CD)
}
} else if (regr == "lrm") { # meaning (regr == "lrm")
## Case of ORDINAL REGRESSION MODEL
# Linking to the package rms to use the function "lrm"
# Since we are performing an ordinal regression, each
# element of CD are numbers and have then to remain as
# class "numeric" (we don't have to perform some
# class adjustment as for the case of the creation of a
# multinomial model).
# Computation of the ordinal model
if (tot_VI == 1) {
reglog <- lrm(V1 ~ 0, data = CD)
# first case is evaluated aside
}
if (tot_VI > 1) {
# creation of "V2" ... "Vtot_VI" (to use them in the
# formula)
factors_character <- paste("V", 2:tot_VI, sep = "")
# Transformation of this object from character to
# vector (in order to be able
# to access its components)
factors <- as.vector(factors_character)
# Creation of a specific formula according to the
# value of tot_VI
fmla <- as.formula(paste("V1~0+", paste(factors, collapse = "+")))
reglog <- lrm(fmla, data = CD)
}
} else if (regr == "rf") { # Case regr=="rf" random forest
CD[, (1:(1 + npfi))] <- lapply(CD[, (1:(1 + npfi))], factor, levels = c(1:k))
for (v in 2:(1 + npfi)) {
CD[, v] <- factor(CD[, v], levels = c(1:(k + 1)))
CD[, v][is.na(CD[, v])] <- k + 1
}
CD[, 1] <- factor(CD[, 1], levels = c(1:k))
CD[, 1] <- droplevels(CD[, 1])
CD <- CD[!is.na(CD[, 1]), ]
factors_character <- paste("V", 2:tot_VI, sep = "")
# Transformation of this object from character to
# vector (in order to be able
# to access its components)
factors <- as.vector(factors_character)
# Creation of a specific formula according to the
# value of tot_VI
fmla <- as.formula(paste("V1~", paste(factors, collapse = "+")))
# reglog <- randomForest(fmla, data=CD,ntree=100)
if ("num.trees" %in% names(list(...))) {
reglog <- ranger(fmla, data = CD, ...)
} else {
reglog <- ranger(fmla, data = CD, num.trees = 10, ...)
}
} else if (regr == "multinom") {
CD[, 1] <- factor(CD[, 1], levels = c(1:k))
CD[, 1] <- droplevels(CD[, 1])
if (npfi > 1) {
CD[, (2:(1 + npfi))] <- lapply(CD[, (2:(1 + npfi))], factor, levels = c(1:k, NA), exclude = NULL)
} else {
CD[, 2] <- factor(CD[, 2], levels = c(1:k, NA), exclude = NULL)
}
factors_character <- paste("V", 2:tot_VI, sep = "")
# Transformation of this object from character to
# vector (in order to be able
# to access its components)
factors <- as.vector(factors_character)
# Creation of a specific formula according to the
# value of tot_VI
fmla <- as.formula(paste("V1~", paste(factors, collapse = "+")))
reglog <- nnet::multinom(CD, maxit = 100, trace = FALSE, MaxNwts = 1500, ...)
} else {
}
return(list(reglog, CD))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.