#' @title Performs Difference of Proportion Tests on a given dataset
#'
#' @description Performs Difference of Proportion Tests on a given dataset
#' The data can be a mixture of numric and factor variables.
#' The results are outputed as a data frame.
#' Furthermore the results an be saved as .csv file to a specified directory.
#'
#' @param dataset A dataset on which the Difference of Proportion Tests are performed.
#'
#' @param y_index An integer value, the column index of the response variable, the default is NULL.
#'
#' @param y_name A character value, the column name of the response variable, the default is NULL.
#'
#' @param alternative The type of hypothesis being tested; two.sided, greater, less.
#' The default is "two.sided"
#'
#' @param conf.level The level of confidence used in the Test, default is 0.95
#'
#' @param correct A logical object, indicating whether Yates' continuity correction should be applied where possible, default is TRUE.
#'
#' @param file_name A character object indicating the file name when saving the data frame.
#' The default is NULL.
#' The name must include the .csv suffixs.
#'
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#' The default is NULL.
#'
#' @return Outputs the Difference of Proportion Tests information as a data frame.
#'
#' @export
#'
#' @seealso \code{\link{tests_chisq}}, \code{\link{tests_cors}}, \code{\link{tests_ks}}, \code{\link{tests_norm}}, \code{\link{tests_t}}, \code{\link{tests_var}}, \code{\link{tests_wilcoxon}}
#'
#' @keywords Difference of Proportion Tests
#'
#' @examples
#' #-- Example Lung Cap Data --#
#'
#' # perform difference of proportion tests on the entire dataset
#' tests_proptest(dataset = lungcap)
#'
#' # perform a difference of proportion test in relation to the fifth column
#' tests_proptest(dataset = lungcap, y_index = 5)
#'
#' # perform a difference of proportion test in relation to the gender column
#' tests_proptest(dataset = lungcap, y_name = "Gender")
#'
tests_proptest <- function(dataset,
y_index = NULL,
y_name = NULL,
alternative = c("two.sided", "greater", "less"),
conf.level = 0.95,
correct = TRUE,
file_name = NULL,
directory = NULL)
{
#-------------------------------------------------------------------#
# When y_index = NULL and y_name = NULL #
#-------------------------------------------------------------------#
if(is.null(y_index) & is.null(y_name)){
# Confirm correct choice for alternative
alternative <- match.arg(alternative)
# Convert the dataset set to a data frame
dataset <- as.data.frame(dataset)
# the number of numeric columns in the dataset
n_cols <- sum(sapply(X = dataset, FUN = function(x) is.factor(x))) - 1
# First create a dataframe to store the relevent t-test data
proptestdf <- as.data.frame(matrix(nrow = ((n_cols)^2 - (n_cols)) / 2,
ncol = 7))
# rename the columns of the data frame
colnames(proptestdf) <- c("Xi", "Xj", "Xiprop", "Xjprop",
"DoP Stat", "DoP P.V.", "Ha")
# r represents the row index and will be used to input the relevent data
r = 1
for (i in 1:(ncol(dataset))) {
j = i + 1
# i acts as the index for column i of the specified dataset
# j acts as theindex for column j of the specified dataset
# where j != i
while (j <= ncol(dataset)) {
if (is.factor(dataset[,i]) & (is.factor(dataset[,j]))){
xiname <- colnames(dataset)[i]
xjname <- colnames(dataset)[j]
# First it is neccesary to aggregate the raw data
agg_data <- as.data.frame(x = table(dataset[,c(i,j)]))
# Assign the column names to the aggregated data
colnames(agg_data) <- c(xiname, xjname, "Count")
for(k in 1:nrow(agg_data)){
m = k + 1
# similarly
# k acts as the index for row k of the aggregated table
# m acts as the index for row m of the aggregated table
# where m != k
while (m <= nrow(agg_data)) {
# select the appropiate counts to be tested
X <- c(agg_data[k,3], agg_data[m,3])
# select the appropiate count totals to be tested
N <- c(sum(agg_data[,3]), sum(agg_data[,3]))
# Perform the Proportion Test
proptest <- prop.test(x = X,
n = N,
conf.level = conf.level,
correct = correct)
# Fill in the Xi level
proptestdf[r,1] <- paste(paste(colnames(agg_data)[1],
agg_data[k,1], sep = ""),
paste(colnames(agg_data)[2],
agg_data[k,2], sep = ""), sep = "_")
# Fill in Xj level
proptestdf[r,2] <- paste(paste(colnames(agg_data)[1],
agg_data[m,1], sep = ""),
paste(colnames(agg_data)[2],
agg_data[m,2], sep = ""), sep = "_")
# Fill in the first proportion
proptestdf[r,3] <- round(proptest[[4]][1],
digits = 3)
# Fill in the second proportion
proptestdf[r,4] <- round(proptest[[4]][2],
digits = 3)
# Fill in the Difference of proportion test statistic
proptestdf[r,5] <- round(proptest[[1]],
digits = 3)
# Fill in the P-Value
proptestdf[r,6] <- round(proptest[[3]],
digits = 3)
# Fill in the Alternative hypothesis
proptestdf[r,7] <- alternative
# update row index
r = r + 1
# update m index
m = m + 1
}
}
}
# update j index
j = j + 1
}
}
#-------------------------------------------------------------------#
# When y_index != NULL or y_name != NULL #
#-------------------------------------------------------------------#
} else if (!is.null(y_index) | !is.null(y_name)){
if(!is.null(y_name)){
y_index = which(colnames(dataset) == y_name)
}
# Confirm correct choice for alternative
alternative <- match.arg(alternative)
# Convert the dataset set to a data frame
dataset <- as.data.frame(dataset)
# extract the test data
test_data <- dataset[,-y_index]
# First create a dataframe to store the relevent t-test data
proptestdf <- as.data.frame(matrix(nrow = 1,
ncol = 7))
# rename the columns of the data frame
colnames(proptestdf) <- c("Xi", "Y", "Xiprop", "Yprop",
"DoP Stat", "DoP P.V.", "Ha")
# r represents the row index and will be used to input the relevent data
r = 1
for (i in 1:(ncol(test_data))) {
if (is.factor(test_data[,i])) {
xiname <- colnames(test_data)[i]
yname <- colnames(dataset)[y_index]
# First it is neccesary to aggregate the raw data
agg_data <- as.data.frame(x = table(test_data[,i], dataset[,y_index]))
# Assign the column names to the aggregated data
colnames(agg_data) <- c(xiname, yname, "Count")
for(k in 1:nrow(agg_data)){
m = k + 1
# similarly
# k acts as the index for row k of the aggregated table
# m acts as the index for row m of the aggregated table
# where m != k
while (m <= nrow(agg_data)) {
# select the appropiate counts to be tested
X <- c(agg_data[k,3], agg_data[m,3])
# select the appropiate count totals to be tested
N <- c(sum(agg_data[,3]), sum(agg_data[,3]))
# Perform the Proportion Test
proptest <- prop.test(x = X,
n = N,
conf.level = conf.level,
correct = correct)
# Fill in the Xi level
proptestdf[r,1] <- paste(paste(colnames(agg_data)[1],
agg_data[k,1], sep = ""),
paste(colnames(agg_data)[2],
agg_data[k,2], sep = ""), sep = "_")
# Fill in Xj level
proptestdf[r,2] <- paste(paste(colnames(agg_data)[1],
agg_data[m,1], sep = ""),
paste(colnames(agg_data)[2],
agg_data[m,2], sep= ""), sep = "_")
# Fill in the first proportion
proptestdf[r,3] <- round(proptest[[4]][1],
digits = 3)
# Fill in the second proportion
proptestdf[r,4] <- round(proptest[[4]][2],
digits = 3)
# Fill in the Difference of proportion test statistic
proptestdf[r,5] <- round(proptest[[1]],
digits = 3)
# Fill in the P-Value
proptestdf[r,6] <- round(proptest[[3]],
digits = 3)
# Fill in the Alternative hypothesis
proptestdf[r,7] <- alternative
# update row index
r = r + 1
# update m index
m = m + 1
}
}
}
}
}
# Write the data frame to the specified directory
if(!is.null(directory)) {
write.csv(x = proptestdf,
file = paste(directory, "/", file_name, sep = ""),
row.names = F)
}
return(proptestdf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.