#' @title create tableone quickly
#' @description create tableone quickly
#' @param df dataframe
#' @param col_num vecter, the numerical variable names
#' @param col_fac vecter, the factor variable
#' @param col_group vecter, the variable to group
#' @param colnames_label vecter, the label of the colnames
#'
#' @return dataframe
#'
#' @examples
# library(pins)
# # Register the board
# pins::board_register(board = "github",
# repo = "jixing475/database",
# branch = "master",
# token = Sys.getenv("GITHUB_PAT"))
# T1 <-
# pins::pin_get(
# name = "T1_pin",
# board = "github"
# )
#
# manuscriptsJX::my_tableone(df = T1,
# col_num = "age_at_initial_pathologic_diagnosis",
# col_fac = c("CASC8", "gender", "race", "number_of_lymphnodes_positive_by_he", "pathologic_stage", "pathologic_T", "pathologic_M", "pathologic_N"),
# col_group = "CASC8",
# colnames_label = c("L_sample", "L_CASC8","L_age_at_initial_pathologic_diagnosis", "L_gender", "L_race", "L_number_of_lymphnodes_positive_by_he", "L_pathologic_stage", "L_pathologic_T", "L_pathologic_M", "L_pathologic_N")
# )
#
#'
#' @rdname my_tableone
#' @export
my_tableone <- function(df,
col_num,
col_fac,
col_group,
colnames_label
) {
library(tidyverse)
library(tableone)
# # 📌 read data edit at this ==============================
# df <- T1
#
# # 📌 set variable edit at this ==============================
# colnames(df)[map_lgl(df, is.numeric)] %>% clipr::write_clip()
#
# col_num <- c("age_at_initial_pathologic_diagnosis")
#
# colnames(df)[!map_lgl(df, is.numeric)] %>% clipr::write_clip()
#
# col_fac <- c("gender","race","pathologic_stage","pathologic_T","pathologic_M","pathologic_N")
#
library(friendlyeval)
group <- col_group
df <- mutate(df, group = !!treat_string_as_col(col_group))
# 📌 create df_label ==============================
df_label <- data.frame(var = colnames(df) %>%
LuckyStar::str_detect_remove("group"))
df_label$label <- colnames_label
# create tableone ==============================
# strata
table_one_group <-
CreateTableOne(
vars = c(col_num, col_fac),
strata = group,# Group
data = df,# filter group is NA
factorVars = col_fac # category variable
)
# overall 一列
table_one_overall <-
CreateTableOne(
vars = c(col_num, col_fac),
data = df,
factorVars = col_fac # category variable
)
# 分几组比较
group_levels <- df[[group]] %>% as.factor() %>% levels()
# 每个 level 下都有, 参数发个非参数法的统计量, 你要进行判别选择,对么个横向的变量选择合适的统计方式, mean or median
idx <- c()
for(i in seq_along(group_levels)){
idx <- table_one_group$ContTable[[i]] %>% as.data.frame() %>%
dplyr::pull(skew) %>%
abs() %>%
`>`(2) %>% # absolutely value is greater than 2
which() %>%
c(idx, .) %>%
unique()
}
idx# skew > 2 所以 idx 是非参变量的索引
non_normal <- rownames(table_one_group$ContTable[[1]])[idx]
## Just typing the object name will invoke the print.TableOne method
## Tests are by oneway.test/t.test for continuous, chisq.test for categorical
## Specifying nonnormal variables will show the variables appropriately,
## and show nonparametric test p-values (kruskal.test/wilcox.test).
## Specify variables in the exact argument to obtain the exact test
## (fisher.test) p-values. If a 2-level factor is specified in cramVars
## both levels are shown in one row.
res_group <- tableone_list2df(table_one_group,
#two_level = two_levels_var(),
non_normal = non_normal) %>%
mutate(Variable = if_else(Variable=="", "NA", Variable)) %>%
mutate(Variable = na_x2na(Variable, "NA")) %>%
tidyr::fill(., Variable) %>%
tidyr::unite(Variable, c("Variable", "level"), sep = " ")
res_overall <- tableone_list2df(table_one_overall,
#two_level = two_levels_var(),
non_normal = non_normal) %>%
mutate(Variable = if_else(Variable=="", "NA", Variable)) %>%
mutate(Variable = na_x2na(Variable, "NA")) %>%
tidyr::fill(., Variable) %>%
tidyr::unite(Variable, c("Variable", "level"), sep = " ")
res <-
left_join(res_group ,
res_overall ,
by = "Variable") %>%
#bind_cols(res_group, res_overall) %>%
select(Variable, Overall, everything()) %>%
select(-test)
# 增加统计量 ==============================
reg <- "(\\(mean \\(sd\\)\\))|(\\(median \\[IQR\\]\\))|(\\(%\\))"
statistic_table <-
res %>%
filter(Variable!="n ") %>%
fill(p, .direction = "down") %>%
mutate(var = Variable) %>%
separate(var, into = c("var", "type"), sep = reg) %>%
mutate(type = str_extract(Variable, reg)) %>%
select(Variable, var, type) %>%
mutate(var = str_trim(var, side = "right"))
output <- c()
for (i in 1:nrow(statistic_table)) {
# sequence
statistic <- get_statistic(statistic_table$var[i],
statistic_table$type[i],
df = df)
names(statistic) <- statistic_table$var[i]
output <- c(output, statistic)
}
statistic_table$statistic <- output
# 增加标签 ==============================
statistic_table <-
statistic_table %>%
left_join(df_label, by ="var") %>%
rowwise() %>%
mutate(label = str_replace(Variable, var, label))
# 合并
final_result <-
res %>%
left_join(select(statistic_table, Variable, statistic, label)) %>%
select(-Variable) %>%
select(Variable = label, everything()) %>%
col_back(p)
final_result[1, 1] <- "n"
reg <- "(\\(mean \\(sd\\)\\))|(\\(median \\[IQR\\]\\))|(\\(%\\))"
res <-
final_result %>%
mutate(var = Variable) %>%
separate(var, into = c("var", "type"), sep = reg) %>%
mutate(type = str_extract(Variable, reg)) %>%
mutate(Var = str_c(var, type)) %>%
rowwise() %>%
mutate(class = str_remove(Variable, var) %>%
str_remove(type) %>%
str_remove("\\(\\)")) %>%
select(-var, -type, -Variable) %>%
select(Var, class, everything())
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.