# handling inputs of AGLM model
# written by Kenji Kondo @ 2019/1/2
#' S4 class for input
#'
#' @slot vars_info A list of list. Each element has some information of one feature.
#' @slot data A data.frame which contains original data itself.
setClass("AGLM_Input",
representation=representation(vars_info="list", data="data.frame"))
#' Create a new AGLM_Input object
#' @importFrom assertthat assert_that
newInput <- function(x,
qualitative_vars_UD_only=NULL,
qualitative_vars_both=NULL,
qualitative_vars_OD_only=NULL,
quantitative_vars=NULL,
use_LVar=FALSE,
add_linear_columns=TRUE,
add_OD_columns_of_qualitatives=TRUE,
add_interaction_columns=TRUE,
OD_type_of_quantitatives='C',
bins_list=NULL,
bins_names=NULL) {
# Check and process arguments
assert_that(!is.null(x))
if (class(x) != "data.frame") x <- data.frame(x)
assert_that(dim(x)[2] > 0)
# Calculate data size
nobs <- dim(x)[1]
nvar <- dim(x)[2]
assert_that(nobs > 0 & nvar > 0)
# Create a list (explanatory) variables' information.
# Firstly create without considering qualitative_vars_UD_only, qualitative_vars_both, ...
vars_info <- list()
for (i in seq(nvar)) {
var <- list()
var$idx <- i
var$name <- names(x)[i]
var$data_column_idx <- i
var$type <- if (is.factor(x[, i]) | is.logical(x[, i])) {"qual"} else {"quan"}
is_ordered <- (var$type == "qual" & is.ordered(x[, i])) | (var$type == "quan")
var$use_linear <- var$type == "quan" & add_linear_columns
var$use_UD <- var$type == "qual"
var$use_OD <- var$type == "quan" | (var$type == "qual" & is_ordered & add_OD_columns_of_qualitatives)
if (var$use_OD) {
if (var$type == "quan") var$OD_type <- OD_type_of_quantitatives
else var$OD_type <- 'J'
}
var$use_LV <- var$type == "quan" & use_LVar
if (var$use_LV) {
var$use_linear <- FALSE
var$use_OD <- FALSE
}
vars_info[[i]] <- var
}
# Define a utility function
var_names <- sapply(vars_info, function(var) {return(var$name)})
get_idx <- function(idxs_or_names) {
if (is.null(idxs_or_names)) {
return(integer(0))
}
cl <- class(idxs_or_names)
idxs <- seq(length(var_names))
if (cl == "integer") {
is_hit <- function(idx) {return(idx %in% idxs_or_names)}
idxs <- idxs[sapply(idxs, is_hit)]
} else if (cl == "character") {
is_hit <- function(var_name) {return(var_name %in% idxs_or_names)}
idxs <- idxs[sapply(var_names, is_hit)]
} else {
assert_that(FALSE, msg="qualitative_vars_UD_only, qualitative_vars_both, qualitative_vars_both, quantitative_vars should be integer or character vectors.")
}
}
# Get indices of variables specified by qualitative_vars_UD_only, qualitative_vars_both, ...
qual_UD <- get_idx(qualitative_vars_UD_only)
qual_OD <- get_idx(qualitative_vars_both)
qual_both <- get_idx(qualitative_vars_both)
quan <- get_idx(quantitative_vars)
# Check if no variables are doubly counted.
msg <- paste0("Each pair of qualitative_vars_UD_only, qualitative_vars_both, qualitative_vars_both, ",
"and quantitative_vars shouldn't be overlapped.")
assert_that(length(intersect(qual_UD, qual_OD)) == 0, msg=msg)
assert_that(length(intersect(qual_UD, qual_both)) == 0, msg=msg)
assert_that(length(intersect(qual_UD, quan)) == 0, msg=msg)
assert_that(length(intersect(qual_OD, qual_both)) == 0, msg=msg)
assert_that(length(intersect(qual_OD, quan)) == 0, msg=msg)
assert_that(length(intersect(qual_both, quan)) == 0, msg=msg)
# Modify vars_info using qualitative_vars_UD_only, qualitative_vars_both, ...
for (i in qual_UD) {
var <- vars_info[[i]]
var$type <- "qual"
var$use_linear <- FALSE
var$use_UD <- TRUE
var$use_OD <- FALSE
var$use_LV <- FALSE
vars_info[[i]] <- var
}
for (i in qual_OD) {
var <- vars_info[[i]]
var$type <- "qual"
var$use_linear <- FALSE
var$use_UD <- FALSE
var$use_OD <- TRUE
var$use_LV <- FALSE
vars_info[[i]] <- var
}
for (i in qual_both) {
var <- vars_info[[i]]
var$type <- "qual"
var$use_linear <- FALSE
var$use_UD <- TRUE
var$use_OD <- TRUE
var$use_LV <- FALSE
vars_info[[i]] <- var
}
for (i in quan) {
var <- vars_info[[i]]
var$type <- "quan"
var$use_linear <- !use_LVar & add_linear_columns
var$use_UD <- FALSE
var$use_OD <- !use_LVar
var$use_LV <- use_LVar
vars_info[[i]] <- var
}
# Retypes columns
for (i in seq(nvar)) {
# For quantitative variables, holds numeric data
if (vars_info[[i]]$type == "quan") {
data_idx <- vars_info[[i]]$data_column_idx
x[, data_idx] <- as.numeric(x[, data_idx])
}
# For qualitative variables, holds ordered or unordered factor data
if (vars_info[[i]]$type == "qual") {
data_idx <- vars_info[[i]]$data_column_idx
if (vars_info[[i]]$use_OD) {
x[, data_idx] <- ordered(x[, data_idx])
} else {
x[, data_idx] <- factor(x[, data_idx])
}
}
}
# Set binning informations from bins_list
if (!is.null(bins_list)) {
if (is.null(bins_names)) {
idx_list <- list()
for (i in seq(nvar)) {
v <- vars_info[[i]]
if (v$use_OD | v$use_LV) idx_list <- c(idx_list, v$idx)
}
for (i in seq(length(bins_list))) {
idx <- idx_list[[i]]
breaks <- bins_list[[i]]
if (vars_info[[idx]]$use_OD) vars_info[[idx]]$OD_info$breaks <- unique(sort(breaks[is.finite(breaks)]))
if (vars_info[[idx]]$use_LV) vars_info[[idx]]$LV_info$breaks <- unique(sort(breaks[is.finite(breaks)]))
}
} else {
idx_map <- list()
if (all(sapply(bins_names, is.character))) {
for (i in seq(nvar)) {
v <- vars_info[[i]]
if (v$use_OD | v$use_LV) idx_map[[v$name]] <- v$idx
}
} else {
for (i in seq(nvar)) {
v <- vars_info[[i]]
if (v$use_OD | v$use_LV) idx_map[[v$idx]] <- v$idx
}
}
for (i in seq(length(bins_list))) {
name <- bins_names[[i]]
idx <- idx_map[[name]]
if (vars_info[[idx]]$use_OD) vars_info[[idx]]$OD_info$breaks <- bins_list[[i]]
if (vars_info[[idx]]$use_LV) vars_info[[idx]]$LV_info$breaks <- bins_list[[i]]
}
}
}
# For variables using dummies, set informations of the way dummies are generated
for (i in seq(nvar)) {
if (vars_info[[i]]$use_UD) {
vars_info[[i]]$UD_info <- getUDummyMatForOneVec(x[, i], only_info=TRUE, drop_last=FALSE)
}
if (vars_info[[i]]$use_OD & is.null(vars_info[[i]]$OD_info)) {
vars_info[[i]]$OD_info <- getODummyMatForOneVec(x[, i], dummy_type=vars_info[[i]]$OD_type, only_info=TRUE)
}
if (vars_info[[i]]$use_LV & is.null(vars_info[[i]]$LV_info)) {
vars_info[[i]]$LV_info <- getLVarMatForOneVec(x[, i], only_info=TRUE)
}
}
# Append informations of interaction effects
if (add_interaction_columns & nvar > 1) {
idx <- length(vars_info)
for (i in 1:nvar) {
for (j in i:nvar) {
idx <- idx + 1
var_info1 <- vars_info[[i]]
var_info2 <- vars_info[[j]]
var_info <- list(idx=idx,
name=paste0(var_info1$name, "_x_", var_info2$name),
type="inter",
var_idx1=i,
var_idx2=j)
vars_info[[idx]] <- var_info
}
}
}
new("AGLM_Input", vars_info=vars_info, data=x)
}
# Functions for inner use
getMatrixRepresentationByVector <- function(raw_vec, var_info, drop_OD=FALSE) {
assert_that(var_info$type != "inter")
z <- NULL
if (var_info$use_linear) {
z <- matrix(raw_vec, ncol=1)
colnames(z) <- var_info$name
}
if (var_info$use_LV & !drop_OD) {
z_LV <- getLVarMatForOneVec(raw_vec, breaks=var_info$LV_info$breaks)$dummy_mat
colnames(z_LV) <- paste0(var_info$name, "_LV_", seq(dim(z_LV)[2]))
z <- cbind(z, z_LV)
}
if (var_info$use_OD & !drop_OD) {
z_OD <- getODummyMatForOneVec(raw_vec, breaks=var_info$OD_info$breaks, dummy_type=var_info$OD_type)$dummy_mat
colnames(z_OD) <- paste0(var_info$name, "_OD_", seq(dim(z_OD)[2]))
z <- cbind(z, z_OD)
}
if (var_info$use_UD) {
z_UD <- getUDummyMatForOneVec(raw_vec, levels=var_info$UD_info$levels,
drop_last=var_info$UD_info$drop_last)$dummy_mat
colnames(z_UD) <- paste0(var_info$name, "_UD_", seq(dim(z_UD)[2]))
z <- cbind(z, z_UD)
}
return(z)
}
getMatrixRepresentation <- function(x, idx, drop_OD=FALSE) {
var_info <- x@vars_info[[idx]]
z <- NULL
if (var_info$type == "quan" | var_info$type == "qual") {
z_raw <- x@data[, var_info$data_column_idx]
z <- getMatrixRepresentationByVector(z_raw, var_info, drop_OD)
} else if (var_info$type == "inter") {
# Interaction effects between columns of one variable itself
self_interaction <- var_info$var_idx1 == var_info$var_idx2
# Get matrix representations of two variables
z1 <- getMatrixRepresentation(x, var_info$var_idx1, drop_OD=TRUE)
z2 <- getMatrixRepresentation(x, var_info$var_idx2, drop_OD=TRUE)
if (is.null(z1) | is.null(z2))
return(NULL)
# Create matrix representation of intarction
nrow <- dim(z1)[1]
ncol1 <- dim(z1)[2]
ncol2 <- dim(z2)[2]
ncol_res <- ifelse(self_interaction, ncol1 * (ncol1 - 1) / 2, ncol1 * ncol2)
assert_that(dim(z2)[1] == nrow)
z <- matrix(0, nrow, ncol_res)
nm <- character(ncol_res)
ij <- 0
for (i in 1:ncol1) {
js <- 1:ncol2
if (self_interaction)
js <- js[js > i]
for (j in js) {
ij <- ij + 1
z[, ij] <- z1[, i] * z2[, j]
nm[ij] <- paste0(var_info$name, "_", i, "_", j)
}
}
colnames(z) <- nm
} else {
assert_true(FALSE) # never expects to come here
}
return(z)
}
#' Get design-matrix representation of AGLM_Input objects
#'
#' @param x An AGLM_Input object
#'
#' @return A data.frame which represents the matrix representation of `x`.
#'
#' @export
#' @importFrom assertthat assert_that
getDesignMatrix <- function(x) {
# Check arguments
assert_that(class(x) == "AGLM_Input")
# Data size
nobs <- dim(x@data)[1]
nvar <- length(x@vars_info)
x_mat <- NULL
for (i in 1:nvar) {
z <- getMatrixRepresentation(x, i)
if (i == 1) x_mat <- z
else x_mat <- cbind(x_mat, z)
}
return(x_mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.