Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
message = FALSE,
warning = FALSE,
fig.height = 6,
fig.width = 7,
fig.path = "fig/tut01-",
dev = "png",
comment = "##"
)
# save some typing
knitr::set_alias(w = "fig.width",
h = "fig.height",
cap = "fig.cap")
# Old Sweave options
# \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE}
# \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE}
# \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE}
# \SweaveOpts{keep.source=TRUE}
# preload datasets ???
set.seed(1071)
library(vcd)
library(vcdExtra)
library(ggplot2)
data(HairEyeColor)
data(PreSex)
data(Arthritis, package="vcd")
art <- xtabs(~Treatment + Improved, data = Arthritis)
if(!file.exists("fig")) dir.create("fig")
## ---- case-form---------------------------------------------------------------
names(Arthritis) # show the variables
str(Arthritis) # show the structure
head(Arthritis,5) # first 5 observations, same as Arthritis[1:5,]
## ---- frequency-form----------------------------------------------------------
# Agresti (2002), table 3.11, p. 106
GSS <- data.frame(
expand.grid(sex = c("female", "male"),
party = c("dem", "indep", "rep")),
count = c(279,165,73,47,225,191))
GSS
names(GSS)
str(GSS)
sum(GSS$count)
## ---- table-form1-------------------------------------------------------------
str(HairEyeColor) # show the structure
sum(HairEyeColor) # number of cases
sapply(dimnames(HairEyeColor), length) # table dimension sizes
## ---- table-form2-------------------------------------------------------------
# A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction
JobSat <- matrix(c( 1, 2, 1, 0,
3, 3, 6, 1,
10,10,14, 9,
6, 7,12,11), 4, 4)
dimnames(JobSat) = list(
income = c("< 15k", "15-25k", "25-40k", "> 40k"),
satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS")
)
JobSat
## ---- table-form3-------------------------------------------------------------
JobSat <- as.table(JobSat)
str(JobSat)
## ---- relevel, eval=FALSE-----------------------------------------------------
# dimnames(JobSat)$income <- c(7.5,20,32.5,60)
# dimnames(JobSat)$satisfaction <- 1:4
## ---- reorder1----------------------------------------------------------------
HairEyeColor <- HairEyeColor[, c(1,3,4,2), ]
str(HairEyeColor)
## ---- reorder2, echo=TRUE, eval=FALSE-----------------------------------------
# Arthritis <- read.csv("arthritis.txt",header=TRUE)
# Arthritis$Improved <- ordered(Arthritis$Improved,
# levels=c("None", "Some", "Marked")
# )
## -----------------------------------------------------------------------------
data(Arthritis, package="vcd")
art <- xtabs(~Treatment + Improved, data = Arthritis)
mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]")
## ---- reorder3----------------------------------------------------------------
UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)[[2]] <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admit?", "Department")
# display as a flattened table
stats::ftable(UCB)
## ---- structable--------------------------------------------------------------
structable(HairEyeColor) # show the table: default
structable(Hair+Sex ~ Eye, HairEyeColor) # specify col ~ row variables
## ---- structable1,eval=FALSE--------------------------------------------------
# HSE < - structable(Hair+Sex ~ Eye, HairEyeColor) # save structable object
# mosaic(HSE) # plot it
## ---- table-setup-------------------------------------------------------------
n=500
A <- factor(sample(c("a1","a2"), n, rep=TRUE))
B <- factor(sample(c("b1","b2"), n, rep=TRUE))
C <- factor(sample(c("c1","c2"), n, rep=TRUE))
mydata <- data.frame(A,B,C)
## ---- table-ex1---------------------------------------------------------------
# 2-Way Frequency Table
attach(mydata)
mytable <- table(A,B) # A will be rows, B will be columns
mytable # print table
margin.table(mytable, 1) # A frequencies (summed over B)
margin.table(mytable, 2) # B frequencies (summed over A)
prop.table(mytable) # cell percentages
prop.table(mytable, 1) # row percentages
prop.table(mytable, 2) # column percentages
## ---- table-ex2---------------------------------------------------------------
# 3-Way Frequency Table
mytable <- table(A, B, C)
ftable(mytable)
## ---- xtabs-ex1---------------------------------------------------------------
# 3-Way Frequency Table
mytable <- xtabs(~A+B+C, data=mydata)
ftable(mytable) # print table
summary(mytable) # chi-square test of indepedence
## ---- xtabs-ex2---------------------------------------------------------------
(GSStab <- xtabs(count ~ sex + party, data=GSS))
summary(GSStab)
## ---- dayton1-----------------------------------------------------------------
data("DaytonSurvey", package="vcdExtra")
str(DaytonSurvey)
head(DaytonSurvey)
## ---- dayton2-----------------------------------------------------------------
# data in frequency form
# collapse over sex and race
Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana,
data=DaytonSurvey,
FUN=sum)
Dayton.ACM.df
## ---- dayton3-----------------------------------------------------------------
# in table form
Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race,
data=DaytonSurvey)
structable(cigarette+alcohol+marijuana ~ sex+race,
data=Dayton.tab)
## ---- dayton4-----------------------------------------------------------------
# collapse over sex and race
Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum)
Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3) # same result
structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab)
## ---- dayton5-----------------------------------------------------------------
library(plyr)
Dayton.ACM.df <- plyr::ddply(DaytonSurvey,
.(cigarette, alcohol, marijuana),
plyr::summarise, Freq=sum(Freq))
Dayton.ACM.df
## ---- collapse1---------------------------------------------------------------
# create some sample data in frequency form
sex <- c("Male", "Female")
age <- c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69")
education <- c("low", 'med', 'high')
data <- expand.grid(sex=sex, age=age, education=education)
counts <- rpois(36, 100) # random Possion cell frequencies
data <- cbind(data, counts)
# make it into a 3-way table
t1 <- xtabs(counts ~ sex + age + education, data=data)
structable(t1)
## ---- collapse2---------------------------------------------------------------
# collapse age to 3 levels, education to 2 levels
t2 <- collapse.table(t1,
age=c("10-29", "10-29", "30-49", "30-49", "50-69", "50-69"),
education=c("<high", "<high", "high"))
structable(t2)
## ----titanicp1----------------------------------------------------------------
table(Titanicp$sibsp, Titanicp$parch)
## ----titanicp2----------------------------------------------------------------
library(dplyr)
Titanicp <- Titanicp |>
mutate(sibspF = case_match(sibsp,
0 ~ "0",
1 ~ "1",
2:max(sibsp) ~ "2+")) |>
mutate(sibspF = ordered(sibspF)) |>
mutate(parchF = case_match(parch,
0 ~ "0",
1 ~ "1",
2:max(parch) ~ "2+")) |>
mutate(parchF = ordered(parchF))
table(Titanicp$sibspF, Titanicp$parchF)
## ---- convert-ex1-------------------------------------------------------------
as.data.frame(GSStab)
## ---- convert-ex2-------------------------------------------------------------
Art.tab <- with(Arthritis, table(Treatment, Sex, Improved))
str(Art.tab)
ftable(Art.tab)
## ---- convert-ex3-------------------------------------------------------------
Art.df <- expand.dft(Art.tab)
str(Art.df)
## ---- tv1---------------------------------------------------------------------
tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra"))
head(tv.data,5)
## ---- tv2,eval=FALSE----------------------------------------------------------
# tv.data<-read.table("C:/R/data/tv.dat")
## ---- tv3---------------------------------------------------------------------
TV <- array(tv.data[,5], dim=c(5,11,5,3))
dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"),
c("8:00","8:15","8:30","8:45","9:00","9:15","9:30",
"9:45","10:00","10:15","10:30"),
c("ABC","CBS","NBC","Fox","Other"),
c("Off","Switch","Persist"))
names(dimnames(TV))<-c("Day", "Time", "Network", "State")
## ---- tv3a,eval=FALSE---------------------------------------------------------
# TV <- xtabs(V5 ~ ., data=tv.data)
# dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"),
# Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30",
# "9:45","10:00","10:15","10:30"),
# Network = c("ABC","CBS","NBC","Fox","Other"),
# State = c("Off","Switch","Persist"))
#
# # table dimensions
# dim(TV)
## ---- tv4---------------------------------------------------------------------
TV2 <- TV[,,1:3,] # keep only ABC, CBS, NBC
TV2 <- TV2[,,,3] # keep only Persist -- now a 3 way table
structable(TV2)
## ---- tv5---------------------------------------------------------------------
TV.df <- as.data.frame.table(TV2)
levels(TV.df$Time) <- c(rep("8:00", 2),
rep("8:30", 2),
rep("9:00", 2),
rep("9:30", 2),
rep("10:00",2),
"10:30"
)
TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df)
structable(Day ~ Time+Network, TV3)
## ----tv-mosaic1, fig.height=6, fig.width=7------------------------------------
mosaic(TV3, shade = TRUE,
labeling = labeling_border(rot_labels = c(0, 0, 0, 90)))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.