Package etable."

library(knitr)
library(etable)
library(Hmisc)

Introduction

The main purpose of this package is to create descriptive tables for various subgroups in a quick and easy way. Most of the statistics can also be calculated using weights. Below are some examples of packet functionality using artificial data.

set.seed(31415)
age    <- round(runif(10000, 20, 80))
dec    <- cut(age, c(20, 30, 40, 50, 60, 70, 80))
weight <- rnorm(10000, mean=80, sd=10)
height <- rnorm(10000, mean=1.6, sd=0.1)
bmi    <- weight/height^2
bmi_q  <- cut(bmi, quantile(bmi, c(0, 0.25, 0.5, 0.75, 1)))
sex    <- factor(as.factor(rbinom(10000, 1, 0.5)), labels=c('Men', 'Women'))
ethnic <- factor(as.factor(rbinom(10000, 1, 0.75)),labels=c('Other','Caucasian'))
stage  <- as.factor(rbinom(10000, 2, 0.3)+1)
disease<- factor(as.factor(rbinom(10000, 1, 0.1)), labels=c('no','yes'))
treat  <- factor(as.factor(rbinom(10000, 1, 0.2)), labels=c('no','yes'))
ws     <- abs(rnorm(10000))
d<-data.frame(sex,age,dec,ethnic,weight,height,bmi,bmi_q,stage,disease,treat,ws)

Simple tables

With the predifined cell-function iqr_cell one can generate a simple table with interquartile range of a variable. It calculates the median, Q1 and Q3 for bmi variable in data.frame d. The variable to be analysed is selected by setting x_vars='bmi'. With rows='sex' the factor to separate the table by rows is selected. Parameter rnames='Sex' set the label for the row groups.

tab <- tabular.ade(x_vars='bmi', rows='sex', rnames='Sex', data=d, FUN=iqr_cell)
knitr::kable(tab, caption='Median (Q1/Q3) of BMI')

A simple 2 x 2 table

For a simple 2 x 2 table, the second separation factor for columns needs to be specified, what is done with cols='ethnic' and cnames='Ethnicity'.

tab<-tabular.ade(x_vars='bmi', rows='sex', rnames='Sex', cols='ethnic', cnames='Ethnicity', data=d, FUN=iqr_cell)
knitr::kable(tab, caption='Median (Q1/Q3) of BMI')

A n(nested) x 2 table

More than one factor at once can be used for rows or columns to create nested tables using rows=c('sex', 'dec'), rnames=c('Sex', 'Decades').

tab<-tabular.ade(x_vars='bmi', rows=c('sex', 'dec'), rnames=c('Sex', 'Decades'),cols='ethnic', cnames='Ethnicity', data=d, FUN=iqr_cell)
knitr::kable(tab, caption='Median (Q1/Q3) of BMI')

A n x n nested table

The cell function n_cell returns the number of non-missing observations in each cell. Missing values of x_vars variable will be excluded.

tab<-tabular.ade(x_vars='sex', rows=c('dec','bmi_q'),  rnames=c('Decades','BMI Quantiles'), cols=c('sex', 'ethnic'), cnames=c('Sex', 'Ethnicity'), data=d, FUN=n_cell)
knitr::kable(tab, caption='N of Obs.')

A n x 1 table

With the cell function quantile_cell, quantiles can be calculated. The parameter probs defines which quantile should be calculated.

tab<-tabular.ade(x_vars='bmi', xname='BMI', rows=c('sex','ethnic','disease','treat'), rnames=c('Sex', 'Ethnicity', 'Disease', 'Treatment'), data=d, FUN=quantile_cell, probs=0.95)
knitr::kable(tab, caption='95th quantile of BMI')

Predefined cell functions

There are several predefined cell functions in this package. See the help pages for more information. The stat_cell function includes a wide range of statistics and is the most usefull cell function of all.

Basic parameters are x, y, z, w, cell_ids, row_ids, col_ids, vnames, vars, n_min. Each cell function must take these parameters. They will be automatically passed from tabular.ade function. Most of the functions use only the x variable for calculations and w for weighted calculations. Only corr_p_cell uses y variable. Additional parameters like digits = 3 can be used in tabular.ade( , ...) instead of the points.

Writing custom cell function

There is a possibility to write custom cell function. It allows all possible designs of the cell and much more.

An example of a custom cell function

my_cell<- function(x, y, z, w, cell_ids, row_ids, col_ids, vnames, vars, n_min)
{
out<- format(mean(x[cell_ids], na.rm=TRUE), digits = 3)
return(out)
}

tab<-tabular.ade(x_vars='age', rows='sex', rnames='Sex', cols='dec', cnames='Decades', data=d, FUN=my_cell)
knitr::kable(tab, caption='Mean Age')

Another simple example of custom cell function

my_cell<- function(x, y, z, w, cell_ids, row_ids, col_ids, vnames, vars, n_min)
{
out<- NULL
tab<-table(x[cell_ids])
for(i in 1:length(tab)){
out<- paste(out, levels(x)[i],': ' ,tab[i], sep='')
if(i<length(tab)) out<- paste(out, ', ', sep='')
}
return(out)
}

tab<-tabular.ade(x_vars='sex', rows='dec', rnames='Decades', cols='stage', cnames='Stage', data=d, FUN=my_cell)
knitr::kable(tab, caption='Frequencies')

More complicated cell function example

b_cell<- function(x, y, z, w, cell_ids, row_ids, col_ids, vnames, vars, n_min)
{
out<- NULL
if(length(unique(x))==2){
lv<-levels(x)
n <-sum(x[cell_ids]==lv[2])
N <-sum(table(x[cell_ids]))
out<-paste(levels(x)[2], ': ',format((n/N)*100, digits=3),'% (N:',n , ')',sep='')
}
if(!is.factor(x) & length(unique(x))> 2){
quant <- format(quantile(x[cell_ids], c(0.25, 0.5, 0.75), na.rm=TRUE), digits=3)
out<- paste(quant[1], ' (',quant[2],'/',quant[3],')', sep='')
}
if(is.factor(x) & length(unique(x))> 2){
lv<-levels(x)
n <-table(x[cell_ids])
N <-sum(table(x[cell_ids]))
out<- paste(lv, ': ', format((n/N)*100,  digits=3), '%', collapse=' | ', sep='')
}
return(out)
}

tab<-tabular.ade(x_vars=c('bmi','ethnic','stage'),xname=c('BMI','Ethnicity','Stages'), cols='sex', cnames='Sex', data=d, FUN=b_cell)
knitr::kable(tab, caption='Diverse variables')

A T-test function, usage of x and y variables

t_test_cell<- function(x, y, z, w, cell_ids, row_ids, col_ids, vnames, vars, n_min)
{
v <- x[cell_ids]
group <- y[cell_ids]
test<-t.test(v[which(group==levels(group)[1])], v[which(group==levels(group)[2])])
mdiff<- format(diff(test$estimate), digits=3)
p<- base:::format.pval(test$p.value, digits=2, eps=0.0001)
out<- paste('Diff: ', mdiff, ', p-value: ', p, sep='')
return(out)
}

tab<-tabular.ade(x_vars='bmi', xname='BMI', y_vars='ethnic', yname='Ethnicity', rows='dec', rnames='Decades', cols='sex', cnames='Sex', data=d, FUN=t_test_cell)
knitr::kable(tab, caption='T-test for BMI between Ethnicity groups')

Multiple x or y variables

There is a possibility to pass more than one variable to x_vars or x_vars parameters. In this way a correlation matrix can be created.

vars    <-c('age', 'weight', 'height', 'bmi')
vlabels <-c('Age', 'Weight', 'Height', 'BMI')

tab<-tabular.ade(x_vars=vars, xname=vlabels, y_vars=vars, yname=vlabels,data=d, FUN=corr_p_cell, digits=2)
knitr::kable(tab, caption='Pearson correlation')

Multiple x with nested columns

If there are multiple x variables, then they are listed line by line.

vars    <-c('age', 'weight', 'height', 'bmi')
vlabels <-c('Age', 'Weight', 'Height', 'BMI')

tab<-tabular.ade(x_vars=vars, xname=vlabels, cols=c('sex','stage'), cnames=c('Sex','Stage'), data=d, FUN=quantile_cell)
knitr::kable(tab, caption='Medians')

Complex tables

The ALL keyword

The keyword ALL, after a factor in rows or cols statement, adds a row for overall sample.

tab<-tabular.ade(x_vars='sex', rows=c('treat', 'ALL'), rnames=c('Treatment'), cols=c('disease', 'ALL'), cnames=c('Disease'), data=d, FUN=n_cell, alllabel='both')
knitr::kable(tab, caption='Contingency table')

Weighted tables

Most of the predefined cell functions support weighting with w=weights parameter. This way weighted statistics can be calculated.

tab<-tabular.ade(x_vars='sex', rows=c('sex', 'ALL', 'ethnic', 'stage'), rnames=c('Sex','Ethnicity', 'Stage'), w='ws', data=d, FUN=n_cell, digits=1)
knitr::kable(tab, caption='weighted N')

Various statistics in a table

The predefined cell functions stat_cell can calculate several statistics at once. The statistics are set using keywords in x_vars or y_vars parameters.

vars    <-c('age', 'weight', 'height', 'bmi')
vlabels <-c('Age', 'Weight', 'Height', 'BMI')

keywords  <-c('MIN', 'MAX', 'MEAN', 'SD', 'CV', 'SKEW',     'KURT')
keylabels <-c('Min', 'Max', 'Mean', 'SD', 'CV', 'Skewness', 'Kurtosis')

tab<-tabular.ade(x_vars=vars, xname=vlabels, y_vars=keywords, yname=keylabels, data=d, FUN=stat_cell)
knitr::kable(tab, caption='Various statistics')

Various statistics combined with rows parameter

keywords  <-c('N', 'MIN', 'MAX', 'MEAN', 'SD')
keylabels <-c('N', 'Min', 'Max', 'Mean', 'SD')

tab<-tabular.ade(x_vars=vars, xname=vlabels, y_vars=keywords, yname=keylabels, rows=c('sex','ALL','ethnic'), rnames=c('Sex','Ethnicity'), data=d, FUN=stat_cell)
knitr::kable(tab, caption='Various statistics')

An example using the statistic keywords in x_vars parameter

keywords  <-c('N', 'MIN', 'MAX', 'MEAN', 'SD')
keylabels <-c('N', 'Min', 'Max', 'Mean', 'SD')


tab<-tabular.ade(x_vars=keywords, xname=keylabels, y_vars=vars, yname=vlabels, rows=c('sex', 'ALL'), rnames=c('Sex'),data=d, FUN=stat_cell)
knitr::kable(tab, caption='Various statistics')

And finally, an example of a weighted, multivariable, nested table with several statistics

vars    <-c('age', 'weight', 'height', 'bmi')
vlabels <-c('Age', 'Weight', 'Height', 'BMI')

keywords  <-c('N', 'MEDIAN', 'IQR')
keylabels <-c('N', 'Median', 'IQR')

tab<-tabular.ade(x_vars=vars, xname=vlabels, y_vars=keywords, yname=keylabels, rows=c('sex', 'ALL'), rnames=c('Sex'),cols=c('ethnic'),cnames=c('Ethnicity'),w='ws',data=d,FUN=stat_cell)
knitr::kable(tab, caption='Various statistics')


Try the etable package in your browser

Any scripts or data that you put into this service are public.

etable documentation built on May 22, 2021, 5:07 p.m.