inst/doc/tableby.R

## ----echo = FALSE---------------------------------------------------------------------------------
options(width = 100)
ge330 <- getRversion() >= "3.3.0"

## ---- load-data-----------------------------------------------------------------------------------
library(arsenal)
require(knitr)
require(survival)
data(mockstudy) ##load data
dim(mockstudy)  ##look at how many subjects and variables are in the dataset 
# help(mockstudy) ##learn more about the dataset and variables
str(mockstudy) ##quick look at the data

## ---- simple1-------------------------------------------------------------------------------------
tab1 <- tableby(arm ~ sex + age, data=mockstudy)

## ---- simple-text---------------------------------------------------------------------------------
summary(tab1, text=TRUE)

## ---- simple-markdown, results='asis'-------------------------------------------------------------
summary(tab1)

## -------------------------------------------------------------------------------------------------
as.data.frame(tab1)

## -------------------------------------------------------------------------------------------------
## base R frequency example
tmp <- table(Gender=mockstudy$sex, "Study Arm"=mockstudy$arm)
tmp

# Note: The continuity correction is applied by default in R (not used in %table)
chisq.test(tmp)

## base R numeric summary example
tapply(mockstudy$age, mockstudy$arm, summary)
summary(aov(age ~ arm, data=mockstudy))


## ---- check-labels--------------------------------------------------------------------------------
## Look at one variable's label
attr(mockstudy$age,'label')

## See all the variables with a label
unlist(lapply(mockstudy,'attr','label'))
# Can also use labels(mockstudy)

## ---- add-label, results='asis'-------------------------------------------------------------------
attr(mockstudy$sex,'label')  <- 'Gender'

tab1 <- tableby(arm ~ sex + age, data=mockstudy)
summary(tab1)

## ---- results = 'asis'----------------------------------------------------------------------------
labels(mockstudy)  <- c(age = 'Age, yrs', sex = "Gender")

tab1 <- tableby(arm ~ sex + age, data=mockstudy)
summary(tab1)

## ---- results='asis'------------------------------------------------------------------------------
mylabels <- list(sex = "SEX", age = "Age, yrs")
summary(tab1, labelTranslations = mylabels)

## ---- assignlabels--------------------------------------------------------------------------------
labels(tab1)
labels(tab1) <- c(arm="Treatment Assignment", age="Baseline Age (yrs)")
labels(tab1)

## ---- results='asis'------------------------------------------------------------------------------
summary(tab1)

## ---- results='asis'------------------------------------------------------------------------------
mycontrols  <- tableby.control(test=FALSE, total=FALSE,
                               numeric.test="kwt", cat.test="chisq",
                               numeric.stats=c("N", "median", "q1q3"),
                               cat.stats=c("countpct"),
                               stats.labels=list(N='Count', median='Median', q1q3='Q1,Q3'))
tab2 <- tableby(arm ~ sex + age, data=mockstudy, control=mycontrols)
summary(tab2)

## ---- results='asis'------------------------------------------------------------------------------
tab3 <- tableby(arm ~ sex + age, data=mockstudy, test=FALSE, total=FALSE, 
                numeric.stats=c("median","q1q3"), numeric.test="kwt")
summary(tab3)

## ---- testformula---------------------------------------------------------------------------------
tab.test <- tableby(arm ~ kwt(age) + anova(bmi) + notest(ast), data=mockstudy)
tests(tab.test)

## ---- results='asis'------------------------------------------------------------------------------
summary(tab.test)

## ---- testsAndStats, results='asis'---------------------------------------------------------------
tab.test <- tableby(arm ~ kwt(ast, "Nmiss2","median") + anova(age, "N","mean") +
                    notest(bmi, "Nmiss","median"), data=mockstudy)
summary(tab.test)

## ---- simfe, results='asis'-----------------------------------------------------------------------
set.seed(100)
tab.catsim <- tableby(arm ~ sex + race, cat.test="fe", simulate.p.value=TRUE, B=500, data=mockstudy)
tests(tab.catsim)

## ---- chisqcorrect, results='asis'----------------------------------------------------------------
cat.correct <- tableby(arm ~ sex + race, cat.test="chisq", subset = !grepl("^F", arm), data=mockstudy)
tests(cat.correct)
cat.nocorrect <- tableby(arm ~ sex + race, cat.test="chisq", subset = !grepl("^F", arm),
     chisq.correct=FALSE, data=mockstudy)
tests(cat.nocorrect)

## ---- nobyvar, results='asis'---------------------------------------------------------------------
tab.noby <- tableby(~ bmi + sex + age, data=mockstudy)
summary(tab.noby)

## ---- results="asis"------------------------------------------------------------------------------
summary(tab.test, pfootnote=TRUE)

## -------------------------------------------------------------------------------------------------
mockstudy$age.ordnew <- ordered(c("a",NA,as.character(mockstudy$age.ord[-(1:2)])))
table(mockstudy$age.ord, mockstudy$sex)
table(mockstudy$age.ordnew, mockstudy$sex)
class(mockstudy$age.ord)

## ---- results="asis", eval=requireNamespace("coin", quietly = TRUE)-------------------------------
summary(tableby(sex ~ age.ordnew, data = mockstudy), pfootnote = TRUE)
summary(tableby(sex ~ age.ord, data = mockstudy), pfootnote = TRUE)

## ---- eval=ge330----------------------------------------------------------------------------------
survfit(Surv(fu.time, fu.stat)~sex, data=mockstudy)
survdiff(Surv(fu.time, fu.stat)~sex, data=mockstudy)

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(sex ~ Surv(fu.time, fu.stat), data=mockstudy))

## ---- eval=ge330----------------------------------------------------------------------------------
summary(survfit(Surv(fu.time/365.25, fu.stat)~sex, data=mockstudy), times=1:5)

## ---- results='asis', eval=ge330------------------------------------------------------------------
summary(tableby(sex ~ Surv(fu.time/365.25, fu.stat), data=mockstudy, times=1:5, surv.stats=c("NeventsSurv","NriskSurv")))

## ---- results='asis'------------------------------------------------------------------------------
set.seed(100)
N <- nrow(mockstudy)
mockstudy$dtentry <- mdy.Date(month=sample(1:12,N,replace=T), day=sample(1:29,N,replace=T), 
                              year=sample(2005:2009,N,replace=T))
summary(tableby(sex ~ dtentry, data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
## create a vector specifying the variable names
myvars <- names(mockstudy)

## select the 8th through the last variables
## paste them together, separated by the + sign
RHS <- paste(myvars[8:10], collapse="+")
RHS

## create a formula using the as.formula function
as.formula(paste('arm ~ ', RHS))

## use the formula in the tableby function
summary(tableby(as.formula(paste('arm ~', RHS)), data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
## The formulize function does the paste and as.formula steps
tmp <- formulize('arm',myvars[8:10])
tmp

## More complex formulas could also be written using formulize
tmp2 <- formulize('arm',c('ps','hgb^2','bmi'))

## use the formula in the tableby function
summary(tableby(tmp, data=mockstudy))

## ----results='asis'-------------------------------------------------------------------------------
varlist1 <- c('age','sex','hgb')
varlist2 <- paste0("anova(", c('bmi','alk.phos','ast'), ", 'meansd')")

summary(tableby(formulize("arm", c(varlist1, varlist2)),
                data = mockstudy, numeric.test = "kwt"), pfootnote = TRUE)

## -------------------------------------------------------------------------------------------------
newdata <- subset(mockstudy, subset=age>50 & arm=='F: FOLFOX', select = c(sex,ps:bmi))
dim(mockstudy)
table(mockstudy$arm)
dim(newdata)
names(newdata)

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(sex ~ ., data=newdata))

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(sex ~ ps + hgb + bmi, subset=age>50 & arm=="F: FOLFOX", data=mockstudy))

## -------------------------------------------------------------------------------------------------
## create a variable combining the levels of mdquality.s and sex
with(mockstudy, table(interaction(mdquality.s,sex)))

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(arm ~ interaction(mdquality.s,sex), data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
## create a new grouping variable with combined levels of arm and sex
summary(tableby(interaction(mdquality.s, sex) ~  age + bmi, data=mockstudy, subset=arm=="F: FOLFOX"))

## ---- maketrans, results='asis'-------------------------------------------------------------------
trans <- tableby(arm ~ I(age/10) + log(bmi) + factor(mdquality.s, levels=0:1, labels=c('N','Y')),
                 data=mockstudy)
summary(trans)

## ---- assignlabels2-------------------------------------------------------------------------------
labels(trans)
labels(trans)[2:4] <- c('Age per 10 yrs', 'log(BMI)', 'MD Quality')
labels(trans)

## ---- transsummary, results='asis'----------------------------------------------------------------
summary(trans)

## ---- results='asis'------------------------------------------------------------------------------
class(mockstudy$mdquality.s)
summary(tableby(arm~mdquality.s, data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(arm ~ chisq(mdquality.s, "Nmiss","countpct"), data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
mytab <- tableby(arm ~ sex + alk.phos + age, data=mockstudy)
mytab2 <- mytab[c('age','sex','alk.phos')]
summary(mytab2)
summary(mytab[c('age','sex')], digits = 2)
summary(mytab[c(3,1)], digits = 3)
summary(sort(mytab, decreasing = TRUE))
summary(mytab[mytab < 0.5])
head(mytab, 1) # can also use tail()

## ---- results="asis"------------------------------------------------------------------------------
## demographics
tab1 <- tableby(arm ~ sex + age, data=mockstudy,
                control=tableby.control(numeric.stats=c("Nmiss","meansd"), total=FALSE))
## lab data
tab2 <- tableby(arm ~ hgb + alk.phos, data=mockstudy,
                control=tableby.control(numeric.stats=c("Nmiss","median","q1q3"),
                                        numeric.test="kwt", total=FALSE))
tab12 <- merge(tab1, tab2)
class(tab12)
summary(tab12)

## ---- results='asis'------------------------------------------------------------------------------
summary(merge(
  tableby(sex ~ age, data = mockstudy),
  tableby(arm ~ bmi, data = mockstudy),
  all = TRUE
))

## ---- results='asis'------------------------------------------------------------------------------
t1 <- tableby(arm ~ sex + age, data=mockstudy)
summary(t1, title='Demographics')

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(list(arm, sex) ~ age, data = mockstudy), title = c("arm table", "sex table"))

## -------------------------------------------------------------------------------------------------
## look at how many missing values there are for each variable
apply(is.na(mockstudy),2,sum)

## ---- results='asis'------------------------------------------------------------------------------
## Show how many subjects have each variable (non-missing)
summary(tableby(sex ~ ast + age, data=mockstudy,
                control=tableby.control(numeric.stats=c("N","median"), total=FALSE)))

## Always list the number of missing values
summary(tableby(sex ~ ast + age, data=mockstudy,
                control=tableby.control(numeric.stats=c("Nmiss2","median"), total=FALSE)))

## Only show the missing values if there are some (default)
summary(tableby(sex ~ ast + age, data=mockstudy, 
                control=tableby.control(numeric.stats=c("Nmiss","mean"),total=FALSE)))

## Don't show N at all
summary(tableby(sex ~ ast + age, data=mockstudy, 
                control=tableby.control(numeric.stats=c("mean"),total=FALSE)))

## ---- results = 'asis'----------------------------------------------------------------------------
mockstudy$ps.cat <- factor(mockstudy$ps)
attr(mockstudy$ps.cat, "label") <- "ps"
summary(tableby(sex ~ includeNA(ps.cat), data = mockstudy, cat.stats = "countpct"))

## ---- results='asis'------------------------------------------------------------------------------
summary(tableby(arm ~ sex + age + fu.time, data=mockstudy), digits=4, digits.p=2, digits.pct=1)

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(arm ~ chisq(sex, digits.pct=1) + anova(age, digits=4) +
                  anova(fu.time, digits = 1), data=mockstudy))

## ---- results='asis'------------------------------------------------------------------------------
trim10 <- function(x, weights=rep(1,length(x)), ...){
  mean(x, trim=.1, ...)
}

summary(tableby(sex ~ hgb, data=mockstudy, 
                control=tableby.control(numeric.stats=c("Nmiss","trim10"), numeric.test="kwt",
                    stats.labels=list(Nmiss='Missing values', trim10="Trimmed Mean, 10%"))))

## ---- results='asis'------------------------------------------------------------------------------
trim510comma <- function(x, weights=rep(1,length(x)), ...){
  tmp <- c(mean(x, trim = 0.05, ...), mean(x, trim = 0.1, ...))
  as.tbstat(tmp, sep = ", ")
}
trim510bracket <- function(x, weights=rep(1,length(x)), ...){
  tmp <- c(mean(x, trim = 0.05, ...), mean(x, trim = 0.1, ...))
  as.tbstat(tmp, sep = " ", parens = c("[", "]"))
}

summary(tableby(sex ~ hgb, data=mockstudy, numeric.stats=c("Nmiss", "trim510comma"), test = FALSE))
summary(tableby(sex ~ hgb, data=mockstudy, numeric.stats=c("Nmiss", "trim510bracket"), test = FALSE))

## ----results='asis'-------------------------------------------------------------------------------
trim10pct <- function(x, weights=rep(1,length(x)), ...){
  tmp <- mean(x, trim = 0.05, ...)
  as.countpct(c(tmp, 10), sep = " ", parens = c("(", ")"), which.count = 0, which.pct = 2, pct = "%")
}
summary(tableby(sex ~ hgb, data=mockstudy, numeric.stats=c("Nmiss", "trim10pct"),
                digits = 2, digits.pct = 0, test = FALSE))


## -------------------------------------------------------------------------------------------------
##create fake group that is not balanced by age/sex 
set.seed(200)
mockstudy$fake_arm <- ifelse(mockstudy$age>60 & mockstudy$sex=='Female',sample(c('A','B'),replace=T, prob=c(.2,.8)),
                            sample(c('A','B'),replace=T, prob=c(.8,.4)))

mockstudy$agegp <- cut(mockstudy$age, breaks=c(18,50,60,70,90), right=FALSE)

## create weights based on agegp and sex distribution
tab1 <- with(mockstudy,table(agegp, sex))
tab2 <- with(mockstudy, table(agegp, sex, fake_arm))
tab2
gpwts <- rep(tab1, length(unique(mockstudy$fake_arm)))/tab2
gpwts[gpwts>50] <- 30

## apply weights to subjects
index <- with(mockstudy, cbind(as.numeric(agegp), as.numeric(sex), as.numeric(as.factor(fake_arm)))) 
mockstudy$wts <- gpwts[index]

## show weights by treatment arm group
tapply(mockstudy$wts,mockstudy$fake_arm, summary)

## ---- results='asis', eval=ge330------------------------------------------------------------------
orig <- tableby(fake_arm ~ age + sex + Surv(fu.time/365, fu.stat), data=mockstudy, test=FALSE)
summary(orig, title='No Case Weights used')
tab1 <- tableby(fake_arm ~ age + sex + Surv(fu.time/365, fu.stat), data=mockstudy, weights=wts)
summary(tab1, title='Case Weights used')

## ---- results='asis', eval=ge330------------------------------------------------------------------
mypval <- data.frame(
  byvar = "fake_arm",
  variable = c('age','sex','Surv(fu.time/365, fu.stat)'), 
  adj.pvalue = c(.953,.811,.01), 
  method = c('Age/Sex adjusted model results')
)
tab2 <- modpval.tableby(tab1, mypval, use.pname=TRUE)
summary(tab2, title='Case Weights used, p-values added', pfootnote=TRUE)

## ---- results='asis'------------------------------------------------------------------------------
table2 <- tableby(arm~sex + factor(mdquality.s), data=mockstudy, cat.simplify=TRUE)
summary(table2, labelTranslations=c(sex="Female", "factor(mdquality.s)"="MD Quality"))

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(arm ~ age + ast, data = mockstudy,
                numeric.simplify=TRUE, numeric.stats=c("Nmiss", "meansd")))

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(arm ~ anova(age, "meansd", numeric.simplify=TRUE) +
                  chisq(sex, cat.simplify=TRUE), data = mockstudy))

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(arm ~ sex, cat.simplify = "label", data = mockstudy))

## -------------------------------------------------------------------------------------------------
tab1 <- summary(tableby(arm~sex+age, data=mockstudy), text = NULL)
as.data.frame(tab1)

# write.csv(tab1, '/my/path/here/my_table.csv')

## ----eval = FALSE---------------------------------------------------------------------------------
#  ## write to an HTML document
#  tab1 <- tableby(arm ~ sex + age, data=mockstudy)
#  write2html(tab1, "~/trash.html")
#  
#  ## write to a Word document
#  write2word(tab1, "~/trash.doc", title="My table in Word")

## ----eval=FALSE-----------------------------------------------------------------------------------
#  # A standalone shiny app
#  library(shiny)
#  library(arsenal)
#  data(mockstudy)
#  
#  shinyApp(
#    ui = fluidPage(tableOutput("table")),
#    server = function(input, output) {
#      output$table <- renderTable({
#        as.data.frame(summary(tableby(sex ~ age, data = mockstudy), text = "html"))
#      }, sanitize.text.function = function(x) x)
#    }
#  )

## ----eval=FALSE-----------------------------------------------------------------------------------
#  summary(tableby(sex ~ age, data = mockstudy), title="(\\#tab:mytableby) Caption here")

## ----results='asis'-------------------------------------------------------------------------------
tab <- summary(tableby(sex ~ age + fu.time + bmi + mdquality.s, data = mockstudy))
tab
padjust(tab, method = "bonferroni")

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(list(sex, mdquality.s, ps) ~ age + bmi, data = mockstudy))

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(list(sex, mdquality.s, ps) ~ age + bmi, data = mockstudy), term.name = TRUE)

## ----results='asis'-------------------------------------------------------------------------------
summary(tableby(list(sex, ps) ~ age + bmi, strata = arm, data = mockstudy))

## -------------------------------------------------------------------------------------------------
args(tableby.control)

## -------------------------------------------------------------------------------------------------
args(arsenal:::summary.tableby)

Try the arsenal package in your browser

Any scripts or data that you put into this service are public.

arsenal documentation built on June 5, 2021, 1:06 a.m.