#' @title Chi-Square Simulation (Contingency Table)
#' @description Perform chi-square test for association, by simulation. Enter either
#' formula-data input or a summary table.
#'
#' @rdname chisqSimShiny
#' @usage chisqSimShiny(x, data = parent.frame())
#' @param x Could be a formula. If so, it should be of the form ~var1+var2.
#' Otherwise either a table or matrix of summary data.
#' @param data dataframe supplying variables for formula x. If variables in x ar enot found in the data,
#' then they will be searched for in the parent environment.
#' @return side effects
#' @note This is a locally-run Shiny app. It may not work properly on some R Studio Server set-ups,
#' especially on the CentoOS operating system.
#' @export
#' @author Homer White \email{hwhite0@@georgetowncollege.edu}
#' @examples
#' \dontrun{
#' # from a data frame:
#' chisqSimShiny(~seat,data=m111survey)
#'
#' # from a summary table:
#' DoesNotSmoke <- c(NeitherSmokes=1168,OneSmokes=1823,BothSmoke=1380)
#' Smokes <- c(188,416,400)
#' ChildParents <- rbind(DoesNotSmoke,Smokes)
#' chisqSimShiny(ChildParents)
#' }
chisqSimShiny <-
function (x,data=parent.frame())
{
###########################################################
# begin with utiltiy functions
##########################################################
# temporary, for development
simpleFind <- function(varName,data) {
if (is.null(data)) {
return(get(varName,inherits=T))
}
tryCatch({get(varName,envir=as.environment(data))},
error=function(e) {
# is data name on the search path?
dataName <- deparse(substitute(data))
# will throw the error if data is not on search path
get(dataName,inherits=T)
# otherwise, user probably intends that this particular variable
# is outside the stated data, so look for it:
get(varName,inherits=T)
}
)
}
# temporary, for development
ParseFormula <- function(formula, ...) {
op <- formula[[1]]
condition <- NULL
if (length(formula) == 2) {
rhs <- formula[[2]]
lhs <- NULL
} else if (length(formula) == 3) {
rhs <- formula[[3]]
lhs <- formula[[2]]
} else {
stop('Invalid formula type.')
}
if (inherits(rhs, "call") && rhs[[1]] == '|') {
condition <- rhs[[3]]
rhs <- rhs[[2]]
}
return( structure(list(op=op,lhs=lhs,rhs=rhs,condition=condition), class='parsedFormula') )
}
# Modified from pchisqGC.R in package tigerstats
chisqGraph <- function(bound,region="above",df=NA,xlab="chi_square_statistic",graph=FALSE) {
if (!is.numeric(bound)) stop("Specify a numerical boundary")
if (bound < 0) stop("The chi-square statistic must be at least 0")
if (is.na(df)) stop("Specify the degrees of freedom using the argument df")
if (!(region %in% c("below","above"))) stop("Specify either \"region=\"below\" or
\"region=\"above\"")
if (df < 0) stop("Degrees of freedom must be positive")
if (region=="below") {
area <- pchisq(bound,df=df)
if (graph && df==1) warning("No graph produced for region below when df=1")
if (graph) {
bound <- round(bound,2)
upper <- max(qchisq(.9999,df=df),bound+1)
lower <- 0
curve(dchisq(x,df=df),from=lower,to=upper,ylab="density",axes=FALSE,n=501,xlab=xlab,
main=paste("Chi-Square Curve, df = ",df,"\nShaded Area = ",round(area,4)))
axis(1,at=c(lower,bound,upper),labels=c(as.character(0),as.character(bound),""))
axis(2)
x.coords <- c(lower,seq(lower,bound,length.out=301),bound)
y.coords <- c(0,dchisq(seq(lower,bound,length.out=301),df=df),0)
polygon(x.coords,y.coords,col="lightblue",cex=2)
}
}
if (region=="above") {
area <- pchisq(bound,df=df,lower.tail=FALSE)
if (graph) {
bound <- round(bound,2)
upper <- max(qchisq(.9999,df=df),bound+1)
lower <- 0
curve(dchisq(x,df=df),from=lower,to=upper,ylab="density",axes=FALSE,n=501,xlab=xlab,
main=paste("Chi-Square Curve, df = ",df,"\nShaded Area = ",round(area,4)))
axis(1,at=c(lower,bound,upper),labels=c(as.character(0),as.character(bound),""))
axis(2)
x.coords <- c(bound,seq(bound,upper,length.out=301),upper)
y.coords <- c(0,dchisq(seq(bound,upper,length.out=301),df=df),0)
polygon(x.coords,y.coords,col="lightblue",cex=2)
}
}
}#end of chisqGraph
exp.counts <- function(x) (rowSums(x) %*% t(colSums(x)))/sum(x)
chisq.calc <- function(x) {
expected <- exp.counts(x)
contributions <- (x - expected)^2/expected
return(sum(contributions[!is.nan(contributions)]))
}
##########################################################################
# simulation for "rcfix" option
##########################################################################
DoubleFixedResampler <- function(x,n) {
expected <- exp.counts(x)
csq <- function(x) {
sum((x-expected)^2/expected)
}
statistic <- csq(x)
nullDist <- numeric(n)
r <- rowSums(x)
c <- colSums(x)
rtabs <- r2dtable(n,r=r,c=c)
sims <- sapply(rtabs,FUN=csq,USE.NAMES=FALSE)
return(list(sims=sims,last_table=rtabs[[n]]))
}
#################################################
# simulation for "rfix" and "gtfix" options
#################################################
RandFixedResampler <- function (x, n, effects = "random")
{
#x is a two-way table, n is number of resamples
TableResampler <- function(x, n = 1000, effects) {
rowsampler <- function(x, p) {
rmultinom(1, size = sum(x), prob = p)
}
table.samp <- function(x) {
nullprobs <- colSums(x)/sum(x)
resamp <- t(apply(x, 1, rowsampler, p = nullprobs))
rownames(resamp) <- rownames(x)
colnames(resamp) <- colnames(x)
as.table(resamp)
}
rtabsamp <- function(x, n) {
expected <- exp.counts(x)
probs <- expected/sum(x)
resamp.tab <- rmultinom(1, size = n, prob = probs)
resamp.tab <- matrix(resamp.tab, nrow = nrow(x))
rownames(resamp.tab) <- rownames(x)
colnames(resamp.tab) <- colnames(x)
return(resamp.tab)
}
resampled.tabs <- array(0, dim = c(nrow(x), ncol(x),n))
if (effects == "fixed") {
for (i in 1:n) {
resampled.tabs[, , i] <- table.samp(x)
}
return(resampled.tabs)
}
if (effects == "random") {
for (i in 1:n) {
resampled.tabs[, , i] <- rtabsamp(x, sum(x))
}
return(resampled.tabs)
}
}
tables <- TableResampler(x, n, effects = effects)
nullDist <- apply(tables, 3, chisq.calc)
return(list(sims=nullDist,last_table=tables[,,n]))
}#end of RandFixedResampler
##################################################################
####
####
#### Process Input
####
####
#################################################################
#first see if we have formula-data input, or summary data
if (is(x,"formula")) #we have formula-data input
{
prsd <- ParseFormula(x)
pullout <- as.character(prsd$rhs)
if (length(pullout) != 3) {
stop("Formula should have the form ~ var1 + var2")
}
expname <- as.character(prsd$rhs)[2]
respname <- as.character(prsd$rhs)[3]
explanatory <- simpleFind(varName=expname,data=data)
response <- simpleFind(varName=respname,data=data)
data2 <- data.frame(explanatory,response)
names(data2) <- c(expname,respname)
tab <- table(data2)
} #end processing of formula
if (!is(x,"formula")) #we have summary data
{
if (length(dim(x)) !=2) #array more than two dimensions
{
stop("This function works only with two-dimensional contingency tables.")
}
tab <- as.table(x)
}#end processing for summary data
# Define server logic for SlowGoodness
server <- function(input, output) {
simLimit <- 10000 # no more than this many sims at one time
#Keep track of number of simulations in a given "set-up"
numberSims <- 0
chisqSims <- numeric()
latest_table <- NULL
#we also want the ability to refresh the set-up
total <- 0 #total number of sims over all set-ups including current one
totalPrev <- 0 #total number of sims over all set-ups excluding current one
# The observed two-way table:
observed <- tab
rowNames <- rownames(tab)
colNames <- colnames(tab)
expected <- exp.counts(tab)
obschisq <- chisq.calc(observed)
simsUpdate <- reactive({
if (input$resample > 0) {
reps <- min(simLimit,isolate(input$sims))
simType <- isolate(input$simType) #probably don't need isolate
if (simType=="rcfix") newSims <- DoubleFixedResampler(observed,reps)
if (simType=="rfix") newSims <- RandFixedResampler(observed,reps,effects="fixed")
if (simType=="gtfix") newSims <- RandFixedResampler(observed,reps,effects="random")
chisqNew <- newSims$sims
latest_table <<- newSims$last_table
chisqSims <<- c(chisqSims,chisqNew)
latestSim <<- newSims[reps]
numberSims <<- numberSims + reps
total <<- total+reps
list(numberSims,latestSim)
}
})
#this erases the simulation history and puts user back to initial graph
simsReset <- reactive({
input$reset
totalPrev <<- totalPrev + numberSims
numberSims <<- 0
chisqSims <<- numeric()
latest_table <<- NULL
return(totalPrev)
})
df <- (nrow(observed)-1)*(ncol(observed)-1)
xmax <- qchisq(0.999,df=df)
#help with conditonal panels
output$totalPrev <- reactive({
simsReset()
})
# needed for the conditional panels to work
outputOptions(output, 'totalPrev', suspendWhenHidden=FALSE)
output$total <- reactive({
simsUpdate() #for dependency
total
})
# also needed for the conditional panels to work
outputOptions(output, 'total', suspendWhenHidden=FALSE)
output$remarksInitial <- renderText({
paste("Observed chi-square statistic = ",as.character(round(obschisq,2)),sep="")
})
output$obsTable <- renderTable({
observed
})
output$expTable <- renderTable({
expected <- exp.counts(observed)
rownames(expected) <- rowNames
expected
})
output$mosaicInitial <- renderPlot({
par(mfrow=c(1,2))
mosaicplot(t(observed),col="orange",main="Observed Table")
expected <- exp.counts(observed)
rownames(expected) <- rowNames
mosaicplot(t(expected),col="grey",main="Expected Table")
par(mfrow=c(1,1))
})
output$contrTable <- renderTable({
(observed-exp.counts(observed))^2/exp.counts(observed)
})
output$remarksLatest1 <- renderText({
input$resample
rounded1 <- round(obschisq,2)
rounded2 <- round(chisqSims[length(chisqSims)],2)
paste("Observed chi-square statistic = ",as.character(rounded1),
", Latest resampled chi-square = ",as.character(rounded2),sep="")
})
output$mosaicLatest <- renderPlot({
if(input$resample > 0) { # for the dependency
par(mfrow=c(1,2))
rownames(latest_table) <- rowNames
colnames(latest_table) <- colNames
latest_table <- as.matrix(latest_table)
mosaicplot(t(latest_table),col="blue",main="Simulated Table")
expected <- exp.counts(latest_table)
rownames(expected) <- rowNames
mosaicplot(t(expected),col="grey",main="Expected Table\n(from simulation)")
par(mfrow=c(1,1))
}
})
output$latestTable <- renderTable({
input$resample
if (!is.null(latest_table)) {
rownames(latest_table) <- rowNames
colnames(latest_table) <- colNames
storage.mode(latest_table) <- "integer"
return(latest_table)
}
})
output$remarksLatest2 <- renderText({
input$resample
rounded1 <- round(obschisq,2)
rounded2 <- round(chisqSims[length(chisqSims)],2)
paste("Observed chi-square statistic = ",as.character(rounded1),
", Latest resampled chi-square = ",as.character(rounded2),sep="")
})
chisqDensities <- reactive({
input$resample
if (length(chisqSims)==1) band <- 1 else band <- "nrd0"
density(chisqSims,n=500,from=0,to=xmax,bw=band)
})
output$densityplot <-
renderPlot({
input$resample
dchisq <- chisqDensities()
plot(dchisq$x,dchisq$y,type="l",col="blue",
xlab="Chi-Square Value",ylab="Estimated Density",
main="Distribution of Resampled Chi-Square Statistics")
if (length(chisqSims) <= 200) rug(chisqSims)
latest <- chisqSims[length(chisqSims)]
points(latest,0,col="blue",pch=19)
abline(v=isolate(obschisq))
})
output$summary1 <- renderTable({
input$resample
obs <- obschisq
if (length(chisqSims) >0) {
n <- length(chisqSims)
latest <- chisqSims[n]
p.value <- length(chisqSims[chisqSims>=obs])/n
percentage <- paste(as.character(round(p.value*100,2)),"%",sep="")
df <- data.frame(round(latest,2),n,percentage)
names(df) <- c("Last Resampled Chi-Square",
"Number of Resamples So Far",
paste("Percent Above ",round(obs,2),sep="")
)
df
}
})
output$remarksProbBar <- renderText({
obs <- obschisq
paste0("The percentage in the table gives the approximate probability, based on our resamples so far, of getting a chi-square statistic of ",
round(obs,2)," or more, if the Null Hypothesis (no relationship between the two factor",
" variables under study) is true. The more resamples you take the better these",
"approximations will be!")
})
output$summary2 <- renderTable({
input$resample
obs <- obschisq
n <- length(chisqSims)
latest <- chisqSims[n]
p.value <- length(chisqSims[chisqSims>=obs])/n
percentage <- paste(as.character(round(p.value*100,2)),"%",sep="")
df <- data.frame(round(latest,2),n,percentage)
names(df) <- c("Last Resampled Chi-Square",
"Number of Resamples So Far",
paste("Percent Above ",round(obs,2),sep="")
)
df
})
output$remarksProbDensity <- renderText({
obs <- obschisq
paste0("The curve above approximates the true probability distribution of the chi-square statistic.",
" It is based on our resamples so far. The percentage in the table gives the approximate",
"probability, based on our resamples so far, of getting a chi-square statistic of ",
round(obs,2)," or more, if the Null Hypothesis (no relationship between the two factor",
" variables under study) is true. The more resamples you take the better these",
"approximations will be!")
})
output$chisqCurve <- renderPlot({
obs <- obschisq
degFreedom <- df
chisqGraph(bound=obs,region="above",df=degFreedom,xlab="Chi-Square Values",
graph=TRUE)
abline(v=obs)
if (input$compareDen) {
lines(chisqDensities(),col="blue",lwd=4)
}
})
output$remarksProb <- renderText({
obs <- obschisq
paste0("The curve above approximates the true probability distribution of the chi-square statistic.",
" The shaded area gives the approximate probability of getting a chi-square statistic of ",
round(obs,2)," or more, if the Null Hypothesis (no relationshoip between the two factor",
" variables under study) is true.")
})
} #end server
ui <- shinyUI(pageWithSidebar(
# Application title
headerPanel("Chi-Square Goodness-of-Fit Resampling"),
# Sidebar
sidebarPanel(
conditionalPanel(
condition="input.resample == 0 || output.totalPrev == output.total",
radioButtons(inputId="simType",
label="Choose type of simulation",
choices=c("Row and Column Sums Fixed"="rcfix",
"Row Sums Fixed"="rfix",
"Grand Total Fixed"="gtfix"))
),
helpText("One simulation means the machine will produce one simulated table of",
"counts, assuming the Null Hypothesis. How many simulations do",
"you want the machine to perform at once? (Limit is 10000.)"),
numericInput("sims","Number of Simulations at Once",1,min=0,step=1),
br(),
actionButton("resample","Simulate Now"),
conditionalPanel(
condition="(input.resample > 0 && input.reset == 0) || output.total > output.totalPrev",
actionButton("reset","Start Over")
)
),
# Here comes the main panel
mainPanel(
conditionalPanel(
condition="input.resample == 0 || output.totalPrev == output.total",
plotOutput("mosaicInitial"),
h4("Observed Table"),
tableOutput("obsTable"),
hr(),
h4("Table Expected by the Null Hypothesis"),
tableOutput("expTable"),
hr(),
h4("Contributions to the Chi-Square Statistic"),
tableOutput("contrTable"),
hr(),
h5(textOutput("remarksInitial"))
),
conditionalPanel(
condition="(input.resample > 0 && input.reset == 0) || output.total > output.totalPrev",
tabsetPanel(selected="Latest Simulation",
tabPanel("Latest Simulation",
plotOutput("mosaicLatest"),
h4("Last Simulated Table"),
tableOutput("latestTable"),
hr(),
p(textOutput("remarksLatest1")),
tableOutput("summary1"),
p(textOutput("remarksProbBar"))),
tabPanel("Density Plot of Simulations",
plotOutput("densityplot"),
p(textOutput("remarksLatest2")),
tableOutput("summary2"),
p(textOutput("remarksProbDensity"))),
tabPanel("Probability Distribution",
plotOutput("chisqCurve"),
hr(),
checkboxInput("compareDen","Compare with simulated chi-square distribution"),
p(textOutput("remarksProb"))
),
tabPanel("Simulation Types",
includeHTML(system.file("doc/instructorNotes.html",
package="tigerstats"))),
id="MyPanel"
)
)
)
)) #end ui
shiny::shinyApp(ui = ui, server = server)
}#end chisqSimShiny
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.