epi_stats_tidy: Tidy up a data.frame with summary values

Description Usage Arguments Value Note Author(s) See Also Examples

View source: R/epi_stats_tidy.R

Description

epi_stats_tidy() cleans up the output from epi_stats_summary() and epi_stats_numeric(). Values are rounded to digits (default is 2). format(x, nsmall = digits) is used to ensure xx.00 are printed. Ordering uses as.numeric(as.character(x)) as 'percent' or other numeric column is assumed to be the preferred option. 'decreasing' is passed to order.

Usage

1
2
epi_stats_tidy(sum_df = NULL, order_by = "percent", perc_n = NULL,
  digits = 2, decreasing = TRUE)

Arguments

sum_df

Data.frame with summary to clean up.

order_by

Column to order results by. Default is 'percent'.

perc_n

Number of rows from original dataframe to calculate percentage. Must be passed manually.

digits

= 2,

decreasing

Sort values by decreasing order. Default is TRUE.

Value

Returns a dataframe as a tibble with values ordered and spread. Adds row sums and percentage.

Note

Note that format() will likely change the class type.

Author(s)

Antonio J Berlanga-Taylor <https://github.com/AntonioJBT/episcout>

See Also

epi_stats_summary, epi_stats_format, epi_stats_numeric.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#####
# Load libraries needed:
library(episcout)
library(dplyr)
library(purrr)
library(e1071)
library(tibble)
library(tidyr)
#####

#####
# Generate a data frame:
n <- 1000
df <- data.frame(var_id = rep(1:(n / 2), each = 2),
                 var_to_rep = rep(c("Pre", "Post"), n / 2),
                 x = rnorm(n),
                 y = rbinom(n, 1, 0.50),
                 z = rpois(n, 2)
                )

# Explore first and last rows for first columns:
epi_head_and_tail(df)

# Add character/factor columns:
col_chr <- data.frame('chr1' = rep(c('A', 'B'), length.out = 1000),
                      'chr2' = rep(c('C', 'D'), length.out = 1000)
                      )
dim(col_chr)
df_cont_chr <- tibble::as.tibble(cbind(df, col_chr))
epi_head_and_tail(df_cont_chr)
epi_head_and_tail(df_cont_chr, last_cols = TRUE)

# Check variable types are what you expect:
epi_clean_count_classes(df_cont_chr)
str(df_cont_chr)
dim(df_cont_chr)
# var_id, y and z can be treated as factors or characters.
summary(as.factor(df_cont_chr$y))
summary(as.factor(df_cont_chr$z))
# Here we'll only transform y though:
df_cont_chr$y <- as.factor(df_cont_chr$y)
epi_clean_count_classes(df_cont_chr)
str(df_cont_chr)

# Designate some values as codes to be counted separately:
codes <- c('Pre', 'A', 'C', '1', '3')
#####

#####
# Count when codes are present, pass these as character or factor, specify
#  action is to count codes only:
stat_sum1 <- epi_stats_summary(df = df_cont_chr,
                               codes = codes,
                               class_type = 'chr_fct',
                               action = 'codes_only'
                               )
class(stat_sum1)
stat_sum1
#####

#####
# Add total for percentage calculation and order column to tidy up results:
perc_n <- nrow(df_cont_chr)
order_by <- 'percent'
stat_sum_tidy <- epi_stats_tidy(sum_df = stat_sum1,
                                order_by = order_by,
                                perc_n = perc_n
                                )
stat_sum_tidy
# Format them if needed:
epi_stats_format(stat_sum_tidy, digits = 0)
epi_stats_format(stat_sum_tidy, digits = 2)
#####

#####
# Count integer or numeric codes:
stat_sum2 <- epi_stats_summary(df_cont_chr,
                               codes = codes,
                               class_type = 'int_num',
                               action = 'codes_only'
                               )
stat_sum2
# Tidy and format them:
stat_sum_tidy <- epi_stats_tidy(sum_df = stat_sum2,
                                order_by = order_by,
                                perc_n = perc_n
                                )
stat_sum_tidy
epi_stats_format(stat_sum_tidy, digits = 0)
epi_stats_format(stat_sum_tidy, digits = 2, skip = c(2, 3))
#####

#####
# Get summary stats excluding contingency codes for character and factor columns:
stat_sum3 <- epi_stats_summary(df_cont_chr,
                               codes = codes,
                               class_type = 'chr_fct',
                               action = 'exclude'
                               )
stat_sum3
# Tidy and format:
stat_sum_tidy <- epi_stats_tidy(sum_df = stat_sum3,
                                order_by = order_by,
                                perc_n = perc_n
                                )
stat_sum_tidy
epi_stats_format(stat_sum_tidy, digits = 0)
epi_stats_format(stat_sum_tidy, digits = 1)
#####

#####
# Get summary stats for numeric/integer columns
# while excluding certain codes/values:
stat_sum4 <- epi_stats_summary(df = df_cont_chr,
                               codes = codes,
                               class_type = 'int_num',
                               action = 'exclude'
                               )
class(stat_sum4)
stat_sum4
# Numeric data summary doesn't need tidying but could be formatted:
epi_stats_format(stat_sum4, digits = 2)
#####

#####
# If there are no codes to return the result is an empty data.frame (tibble):
codes <- c('Per', 'X', '55')
stat_sum_zero <- epi_stats_summary(df_cont_chr,
                                   codes = codes,
                                   class_type = 'chr_fct',
                                   action = 'codes_only'
                                   )
stat_sum_zero
#####

AntonioJBT/episcout documentation built on Nov. 7, 2019, 5:34 p.m.