Nothing
# jonashaslbeck@gmail.com; May 2018
ModelMatrix <- function(data, # matrix
type, # type vector (I think not needed, level should be sufficient)
level, # level vector
labels,
d, # largest neighborhood size
moderators = NULL,
v = NULL,
allCats = FALSE # if true, the model matrix does not use all unique categories, but the categories specified in level, this exists because I use this function within the sampling function
)
{
# Delete variable v: only for MGMs!
if(!is.null(v)) {
data <- data[, -v]
type <- type[-v]
level <- level[-v]
labels <- labels[-v]
}
# ---------- Calculate Auxilliary variables ----------
p <- ncol(data) # note that we have only the predictors here!!
n <- nrow(data)
# mSpec <- ifelse(class(moderators) %in% c("integer", "numeric"), "vector", "matrix")
mSpec <- ifelse(any(class(moderators) %in% c("integer","numeric")), "vector", "matrix")
# ---------- Input Checks ----------
if(p != length(type)) stop('Length of type has to match the number of columns in data.')
if(p != length(level)) stop('Length of level has to match the number of columns in data.')
if(!(inherits(level, "numeric") | inherits(level, "integer"))) stop('level has to be an integer vector.')
# ---------- Calculate Indicator Functions for all Variables ----------
l_ind_datasets <- list()
for(j in 1:p) {
if(type[j] != 'c') {
l_ind_datasets[[j]] <- as.matrix(data[, j])
colnames(l_ind_datasets[[j]]) <- paste0(labels[j])
} else {
if(allCats==FALSE) {
unique_labels <- unique(data[, j])
unique_labels_sorted <- sort(unique_labels)
n_labels <- length(unique_labels_sorted)
ind_matrix <- matrix(NA, nrow = n, ncol=n_labels)
for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels_sorted[s]
} else {
# This is used in mvarsampler() and avoids that the design matrix is too small cases early in the time series, where not all categories are seen yet
unique_labels <- 1:level[j]
n_labels <- level[j]
ind_matrix <- matrix(NA, nrow = n, ncol=n_labels)
for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels[s]
}
# Add Var names
cn <- paste0(labels[j], 1:n_labels)
colnames(ind_matrix) <- cn
l_ind_datasets[[j]] <- ind_matrix
}
}
# ---------- Collect d = 1 interaction Terms ----------
# In this case we just use the indicator functions computed above
l_ind_datasets_nV <- l_ind_datasets
Xd1 <- do.call(cbind, l_ind_datasets_nV)
# ---------- Compute all d>1 Interaction Terms ----------
# Here we loop over all orders, in each order over all interactions, and in each interaction over all combinations of the levels of all variables
if(!is.null(moderators)) d <- 2 # If first-order moderation (three way interactions) is specified, we fix k = 3, d = k - 1 = 2
if(d > 1) {
## List all possible Interactions
l_interactions <- vector('list', length = d)
# Case I: No Moderation
if(is.null(moderators)) {
for(ord in 1:d) l_interactions[[ord]] <- combn((1:p), ord, simplify = FALSE) # note that the numbers here refer to the all variables minus variable v
} else {
l_interactions[[1]] <- list()
for(i in 1:p) l_interactions[[1]][[i]] <- i
# Case II: Moderation
if(mSpec == "vector") {
if(v %in% moderators) {
l_interactions[[2]] <- combn(1:p, 2, simplify = FALSE) # all combinations of the remaining (here denoted by 1:p) variables
} else {
ind_mod_MM <- (1:(p+1) %in% moderators)[-v] # indicator(moderator?) for 1:p predictors
n_mods <- sum(ind_mod_MM)
which_mod <- which(ind_mod_MM)
l_mods <- list()
for(i in 1:n_mods) l_mods[[i]] <- expand.grid((1:p)[-which_mod[i]], which_mod[i]) # loop over moderators
m_mods <- do.call(rbind, l_mods)
m_mods <- as.matrix(m_mods)
l_mods_combn <- list()
for(i in 1:nrow(m_mods)) l_mods_combn[[i]] <- m_mods[i, ] # turn each row into list entry
l_interactions[[2]] <- l_mods_combn
} # end: v = moderator?
} # end if: vector specification?
if(mSpec == "matrix") {
nrow_mods <- nrow(moderators)
ind_v_inMod <- as.logical(apply(moderators, 1, function(x) v %in% x ))
if(sum(ind_v_inMod)>0) { # if variable v is involved in at least one interaction
# get interaction terms for node v
int_terms <- t(apply(matrix(moderators[ind_v_inMod], ncol=3), 1, function(x) x[x!=v]))
# To get the predictors on that weird "other variable" vector I use in this script
for(i in 1:nrow(int_terms)) for(j in 1:2) if(int_terms[i, j] > v) int_terms[i, j] <- int_terms[i, j] - 1
# fill in existing structure:
for(i in 1:nrow(int_terms)) l_interactions[[2]][[i]] <- int_terms[i, ]
} else {
d <- 1 # for the present node v, to skip the below which takes l_interactions[[2]] as input
}
} # end if: matrix specification?
} # end if: moderators?
} # end if: d>1
# if statement again here, because it can happen that there is no interaction term in regression on variable v
if(d > 1) {
# storage for all interactions of all orders
l_collect_terms <- list()
# Loop over order of interactions;
for(ord in 2:d) {
n_terms <- length(l_interactions[[ord]])
# storage for all interactions of fixed order
l_ord_terms <- list()
# Loop over the interactions of a given order
for(it in 1:n_terms) {
# Storage: collect here all interaction terms of one interaction
l_it_terms <- list()
# For fixed interaction: which variables are involved?
inter_it <- l_interactions[[ord]][[it]] # select interactions one by one
# Compute amount of levels of each variable (continuous=1)
l_indicator_it <- list()
for(i in 1:ord) l_indicator_it[[i]] <- 1:(level[inter_it[i]])
# List all combination of levels of variables in the interaction it
all_combs <- expand.grid(l_indicator_it)
n_combs <- nrow(all_combs)
# Loop over all combinations of levels of variables in interaction and collect in matrix
for(comb in 1:n_combs) {
tarmat <- matrix(NA, nrow = n, ncol = ord)
for(i in 1:ord) tarmat[, i] <- as.matrix(l_ind_datasets[[inter_it[i]]])[,all_combs[comb, i]]
l_it_terms[[comb]] <- apply(tarmat, 1, prod)
}
# combine all level combinations for interaction it
it_data <- do.call(cbind, l_it_terms)
# Create Names for all (combinations of) interactions
all_combs_char <- apply(all_combs, 2, function(x) {
if(length(unique(x))==1) {
out <- rep("", length(x))
} else {
out <- x
}
})
cn <- rep(NA, n_combs)
# DEV: check whether this still works:
for(comb in 1:n_combs) cn[comb] <- paste0(labels[inter_it], matrix(all_combs_char, ncol=ord)[comb,], collapse=':')
# the point above added to deliminate category flag
colnames(it_data) <- cn
l_ord_terms[[it]] <- it_data
}
# Collapse over interactions of fixed order
l_collect_terms[[ord]] <- do.call(cbind, l_ord_terms)
}
# Collapse across order of interactions
# l_collect_terms[[1]] <- NULL
all_HOI_terms <- do.call(cbind, l_collect_terms)
} # end if: d>1
# Combine with d=1 size neighborhoods (singletons)
if(d > 1) {
X <- cbind(Xd1, all_HOI_terms)
} else {
X <- Xd1
}
# ---------- Output----------
return(X)
} # end of function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.