Nothing
# this function is written by Myrsini Katsikatsou
############################## pairwiseTables FUNCTION ########################
# This function can be public. It gets as an input a raw data set of ordinal
# variables and it returns a list of all pairwise frequency tables.
#
# The input arguments of the function:
# data : matrix or data frame containing the data. The rows correspond to
# different observations and the columns to different observed categorical
# (ordinal or nominal) variables. No continuous variables or covariates
# should be contained in data. If the variables contained in the data are
# distinguished into indicators of exogenous latent variables (lv) and
# indicators of endogenous latent variables, those for exogenous lv should
# be presented first (in the first columns of data) followed by the
# indicators for endogenous lv.
# var.levels: NULL or vector or list, specifies the levels (response categories)
# for each categorical variable contained in data.
# If NULL, the levels encoutered in data are used. If a response
# category is not observed in the data, then var.levels should be
# defined.
# If vector, that implies that all variables have the same levels as
# given in the vector.
# If list, the components of the list are vectors, as many as the
# number of variables in data. Each vector gives the levels of
# the corresponding categorical variable in data.
# no.x : NULL or integer, gives the number of indicators for exogenous lv.
# The default value is NULL indicating that data contains only
# indicators of exogenous latent variables.
# perc : TRUE/FALSE. If FALSE the observed frequencies are reported, otherwise
# the observed percentages are given.
# na.exclude : TRUE/FALSE. If TRUE, listwise deletion is applied to data.
# Otherwise, cases with missing values are preserved and and an
# extra level with label NA is included in the tables.
# The output of the function:
# It is a list of three components: $pairTables, $VarLevels and $Ncases_del.
# pairTables : a list of so many tables as the number of variable pairs formed
# by data. If there are indicators of both exogenous and endogenous
# variables, then first all the matrices referring to pairs of
# indicators of exogenous lv are reported, followed by all the
# matrices referring to pairs of indicators of endogenous lv, which
# in turn folowed by all the matrices of pairs: one indicator of an
# exogenous - one indicator of an endogenous lv.
# VarLevels : a list of as many vectors as the number of variables in the data.
# Each vector gives the levels/ response categories of each variable
# Ncases_del : An integer reporting the number of cases deleted by data because
# of missing values (listwise deletion) when na.exclude=TRUE.
pairwiseTables <- function(data, var.levels=NULL, no.x=NULL,
perc=FALSE, na.exclude=TRUE) {
# data in right format?
if ( (!is.matrix(data)) & (!is.data.frame(data)) ) {
stop("data is neither a matrix nor a data.frame")
}
# at least two variables
no.var <- dim(data)[2]
if(no.var<2) {
stop("there are less than 2 variables")
}
# no.x < no.var ?
if(no.x > no.var) {
stop("number of indicators for exogenous latent variables is larger than the total number of variables in data")
}
# if data as matrix, transforma as data.frame
if(is.matrix(data)) {
data <- as.data.frame(data)
}
# listwise deletion
if(na.exclude) {
old.data <- data
data <- na.omit(data)
}
# all columns of data.frame should be of class factor so that function levels
# can be applied
if(!all(sapply(data,class)=="factor")) {
if(nrow(data)>1) {
data <- data.frame( sapply(data,factor) )
} else {
data <- apply(data,2,factor)
data <- as.data.frame( matrix(data, nrow=1) )
}
}
# the levels observed for each variable, obs.levels is a list
obs.levels <- lapply(data,levels)
# number of variables in data same as number of vectors in var.levels
if(is.list(var.levels) && no.var!= length(var.levels) ) {
stop("the length of var.levels does not match the number of variables of the given data set")
}
# create var.levels if a list is not given
old.var.levels <- var.levels
if(!is.list(old.var.levels)) {
if(is.null(old.var.levels) ) {
var.levels <- obs.levels
} else {
var.levels <- vector("list", no.var)
var.levels <- lapply(var.levels, function(x){x <- old.var.levels} )
}
}
names(var.levels) <- names(data)
# also check that obs.levels exist in the object var.levels given by the user, i.e. old.var.levels
if(is.list(old.var.levels)) {
for(i in 1:no.var) {
if(!all( obs.levels[[i]] %in% old.var.levels[[i]]))
stop("levels observed in data are not mentioned in var.levels")
}
} else if (is.vector(old.var.levels)) {
if(!all(apply(na.omit(data), 2, function(x){x %in% old.var.levels})))
stop("levels observed in data are not mentioned in var.levels")
}
no.given.levels <- sapply(var.levels, length)
# assign the right levels for each variable as given in object var.levels if it is not the case
# it is not the case when the observed levels are a subgroup of the var.levels given
if(!is.null(old.var.levels)) {
no.obs.levels <- sapply(obs.levels, length)
if(!all(no.obs.levels==no.given.levels) ) {
index <- c(1:no.var)[no.obs.levels!=no.given.levels]
for(i in index) {
data[,i] <- factor(data[,i] , levels=var.levels[[i]])
}
}
}
# compute the bivariate frequency tables
# Split first into two cases: a) only indicators of exogenous latent variables
# b) otherwise
if(is.null(no.x) || no.x==no.var) {
pairs.index <- utils::combn(no.var,2)
no.pairs <- dim(pairs.index)[2]
res <- vector("list", no.pairs)
for(i in 1:no.pairs ) {
res[[i]] <- table( data[, pairs.index[,i] ], useNA="ifany" )
}
} else {
no.y <- no.var - no.x
pairs.xixj.index <- utils::combn(no.x,2) # row 1 gives i index, row 2 j index, j runs faster than i
pairs.yiyj.index <- utils::combn(no.y,2)
pairs.xiyj.index <- expand.grid(1:no.y, 1:no.x)
pairs.xiyj.index <- rbind( pairs.xiyj.index[,2], pairs.xiyj.index[,1] ) # row 1 gives i index, row 2 j index, j runs faster than i
no.pairs.xixj <- dim(pairs.xixj.index)[2]
no.pairs.yiyj <- dim(pairs.yiyj.index)[2]
no.pairs.xiyj <- dim(pairs.xiyj.index)[2]
no.all.pairs <- no.pairs.xixj + no.pairs.yiyj + no.pairs.xiyj
data.x <- data[,1:no.x]
data.y <- data[,(no.x+1):no.var]
res <- vector("list", no.all.pairs)
for(i in 1:no.pairs.xixj) {
res[[i]] <- table(data.x[,pairs.xixj.index[,i]], useNA="ifany" )
}
j <- 0
for(i in (no.pairs.xixj+1):(no.pairs.xixj+no.pairs.yiyj) ) {
j <- j+1
res[[i]] <- table(data.y[,pairs.yiyj.index[,j]], useNA="ifany" )
}
j <-0
for(i in (no.pairs.xixj+no.pairs.yiyj+1):no.all.pairs ) {
j <- j+1
res[[i]] <- table(cbind(data.x[,pairs.xiyj.index[1,j], drop=FALSE],
data.y[,pairs.xiyj.index[2,j], drop=FALSE]),
useNA="ifany" )
}
}
# if percentages are asked
if (perc) {
Nobs <- dim(data)[1]
res <- lapply(res, function(x){x/Nobs} )
}
#Ncases_del = the number of cases deleted because they had missing values
if (na.exclude) {
Ncases_deleted <- dim(old.data)[1] - dim(data)[1]
} else {
Ncases_deleted <- 0
}
list(pairTables=res, VarLevels=var.levels, Ncases_del= Ncases_deleted)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.