tidyreport is a pipeline to conduct common statistical analyses, especially those done in the field of epidemiology and biostatistics and generate clean/formatted tables from the statistical output. The analyses include sample descriptive statistics, univariable testing (Wilcoxon rank sum, KW tests, Chisq/Fisher), regression analysis (linear, logistic, tobit, ordinal, LME, Normal GEE, Logistic GEE, Poisson GEE, Ordinal GEE, cox). The generated tables can be readily copied into Excel or Word for scientific paper writing.
The main functions are:
get_desc_stat()
generates descrptive statistics of the sample.get_desc_stat_grouping()
generates descrptive statistics of the
sample and stratified by a grouping variable, along with statistical
testing of group differences.get_regression_estimates()
runs different types of regression and
summarizes its results.cox_summary()
summarizes the results of cox regression.install.packages("devtools")
devtools::install_github("JiyueQin/tidyreport")
library(tidyreport)
# here is a sample dataset modified from the dataset starwars in tidyverse .
str(sample_dat)
## tibble[,5] [83 x 5] (S3: tbl_df/tbl/data.frame)
## $ height : int [1:83] 172 167 96 202 150 178 165 97 183 182 ...
## $ sex : chr [1:83] "male" "none" "none" "male" ...
## $ gender : chr [1:83] "masculine" "masculine" "masculine" "masculine" ...
## $ mass : num [1:83] 77 75 32 136 49 120 75 32 84 77 ...
## $ haircolor: chr [1:83] "other" "other" "other" "none" ...
get_desc_stat(sample_dat)
variable
Full Sample(N=83)
Height, Mean(SD)
174.1(35.4)
Mass, Mean(SD)
98.2(170.8)
Sex, N(%)
Female
16(19.3)
Hermaphroditic
1(1.2)
Male
60(72.3)
None
6(7.2)
Gender, N(%)
Feminine
17(20.5)
Masculine
66(79.5)
Haircolor, N(%)
Black
12(14.5)
Brown
17(20.5)
None
36(43.4)
Other
18(21.7)
# stratified by gender and testing for gender difference
get_desc_stat_grouping(sample_dat, 'gender')
variable
Full Sample(N=83)
Feminine(N=17)
Masculine(N=66)
P Value
Height, Mean(SD)
174.1(35.4)
164.7(23.6)
176.5(37.6)
0.003
Mass, Mean(SD)
98.2(170.8)
54.7(8.6)
106.1(185)
0.002
Sex, N(%)
<0.001
Female
16(19.3)
16(94.1)
0(0)
Hermaphroditic
1(1.2)
0(0)
1(1.5)
Male
60(72.3)
0(0)
60(90.9)
None
6(7.2)
1(5.9)
5(7.6)
Haircolor, N(%)
0.316
Black
12(14.5)
3(17.6)
9(13.6)
Brown
17(20.5)
6(35.3)
11(16.7)
None
36(43.4)
5(29.4)
31(47)
Other
18(21.7)
3(17.6)
15(22.7)
# report median(IQR) instead of mean(SD) for height
get_desc_stat_grouping(sample_dat, 'gender', median_vars = 'height')
variable
Full Sample(N=83)
Feminine(N=17)
Masculine(N=66)
P Value
Mass, Mean(SD)
98.2(170.8)
54.7(8.6)
106.1(185)
0.002
Height, Median(IQR)
180(166.2, 191)
165.5(161.5, 172)
183(171.2, 193)
0.003
Sex, N(%)
<0.001
Female
16(19.3)
16(94.1)
0(0)
Hermaphroditic
1(1.2)
0(0)
1(1.5)
Male
60(72.3)
0(0)
60(90.9)
None
6(7.2)
1(5.9)
5(7.6)
Haircolor, N(%)
0.316
Black
12(14.5)
3(17.6)
9(13.6)
Brown
17(20.5)
6(35.3)
11(16.7)
None
36(43.4)
5(29.4)
31(47)
Other
18(21.7)
3(17.6)
15(22.7)
# get detailed descriptive statistics stratified by gender, sort the table by the order of variables in the data, no statistical testing
get_desc_stat_grouping(sample_dat, 'gender', detail = T, sort = T, test = F)
variable
Full Sample(N=83)
Feminine(N=17)
Masculine(N=66)
Height
Min
66
96
66
Q1
166.2
161.5
171.2
Median
180
165.5
183
Mean
174.1
164.7
176.5
Q3
191
172
193
Max
264
213
264
Sd
35.4
23.6
37.6
Nmiss
5
1
4
Sex, N(%)
Female
16(19.3)
16(94.1)
0(0)
Hermaphroditic
1(1.2)
0(0)
1(1.5)
Male
60(72.3)
0(0)
60(90.9)
None
6(7.2)
1(5.9)
5(7.6)
Mass
Min
15
45
15
Q1
56.4
50
75
Median
79
55
80
Mean
98.2
54.7
106.1
Q3
84.8
56.2
88
Max
1358
75
1358
Sd
170.8
8.6
185
Nmiss
25
8
17
Haircolor, N(%)
Black
12(14.5)
3(17.6)
9(13.6)
Brown
17(20.5)
6(35.3)
11(16.7)
None
36(43.4)
5(29.4)
31(47)
Other
18(21.7)
3(17.6)
15(22.7)
# get formatted table for a logistic regression model
get_regression_estimates(dplyr::mutate(sample_dat, gender = as.factor(gender)), outcome = 'gender', predictor_vec = c( 'height', 'haircolor'), outcome_type = 'binary', format =T)
outcome
term
OR
CI
p
gender
height
1.01
(0.99,1.03)
0.216
haircolor
brown vs black
0.67
(0.12,3.8)
0.654
none vs black
2.19
(0.43,11.3)
0.348
other vs black
2.21
(0.35,14.17)
0.401
# perform linear regression for multiple outcomes with purrr and get tables with kableExtra
purrr::map_df(c('height', 'mass'), ~get_regression_estimates(sample_dat, outcome = .x, predictor_vec = c( 'sex', 'haircolor'), outcome_type = 'linear')) %>% kableExtra::kable() %>% kableExtra::kable_styling(full_width = F) %>% kableExtra::collapse_rows(1)
outcome
term
estimate
CI
p
height
(Intercept)
167.72
(142.44,192.99)
<0.001
sexhermaphroditic
14.15
(-57.95,86.26)
0.697
sexmale
8.02
(-12.26,28.29)
0.433
sexnone
-35.86
(-72.53,0.81)
0.055
haircolorbrown
1.85
(-25.64,29.33)
0.894
haircolornone
8.66
(-15.14,32.46)
0.47
haircolorother
-6.87
(-33.87,20.13)
0.614
mass
(Intercept)
53.19
(26.83,79.54)
<0.001
sexhermaphroditic
1315.04
(1252.5,1377.58)
<0.001
sexmale
27.82
(6.52,49.11)
0.011
sexnone
23.72
(-13.09,60.52)
0.202
haircolorbrown
5.85
(-21.68,33.39)
0.671
haircolornone
2.05
(-22.48,26.59)
0.867
haircolorother
-10.23
(-38.17,17.72)
0.466
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.