#' Create Description table for survey data
#'
#' @param design results of svydesign()
#' @param strata strata
#' @param cate categorical variable
#' @param cont continuous variable
#' @param wtn logical. whether to include weighted estimated column
#' @param total logical. whether to show total part
#' @param horizontal logical. the categorical ratio calculation direction
#' @param strata_group left partial match
#'
#' @return one dataframe contains unweighted data and weighted data
#' @export
#'
svy_tableone <- function(design,
strata=NULL,cate=NULL,cont=NULL,
wtn=FALSE,total=TRUE,horizontal=TRUE,
strata_group=NULL){
if (is.null(cate)){
cate <- sapply(design$variables, function(i) any(c('character','factor') %in% class(i)))
cate <- names(cate)[cate]
}
if (is.null(cont)) cont <- names(sapply(design$variables,is.numeric))
res <- NULL
if (is.null(strata)){
if (length(cate)>0 & all(!is.na(cate))){
resi <- cate_ustr(design = design,cate = cate)
resi$variable[nchar(resi$n)==0] <- paste0(resi$variable[nchar(resi$n)==0],'[n(%)]')
res <- rbind(res,resi)
}
if (length(cont)>0 & all(!is.na(cont))){
resi <- cont_ustr(design = design,cont = cont)
resi[is.na(resi)] <- ''
resi$variable[resi$variable != 'Total'] <- paste0(resi$variable[resi$variable != 'Total'],'[mean(sd)]')
res <- rbind(res,resi)
}
}else{
if (length(cate)>0 & all(!is.na(cate))){
cate <- set::not(cate,strata)
resi <- cate_str(design = design,strata = strata,cate = cate,horizontal=horizontal)
resi$variable[nchar(resi$n)==0] <- paste0(resi$variable[nchar(resi$n)==0],'[n(%)]')
res <- rbind(res,resi)
}
if (length(cont)>0 & all(!is.na(cont))){
(cont <- set::not(cont,strata))
resi <- cont_str(design = design,strata = strata,cont = cont)
resi[is.na(resi)] <- ''
resi$variable[resi$variable != 'Total'] <- paste0(resi$variable[resi$variable != 'Total'],'[mean(sd)]')
res <- plyr::rbind.fill(res,resi)
}
}
ck <- res$variable!='Total'
ck[1] <- TRUE
res <- res[ck,]
strata_class <- NULL
if (!is.null(strata)) strata_class <- 'strata'
wtn_class <- NULL
if (wtn) wtn_class <- 'wtn'
choice <- rep(TRUE,ncol(res))
if (!wtn){
choice <- choice & (colnames(res) != 'N')
choice <- choice & (do::right(colnames(res),6) != '_wtd_n')
}
if (!is.null(strata_group)){
ck <- colnames(res)[which(do::right(colnames(res),8)=='_unwtd_n')[1]:ncol(res)] |>
sapply(function(i) any(sapply(strata, function(j) do::left(i,nchar(j)+1)==paste0(j,'_'))))
ck <- c(rep(TRUE,5),ck)
choice <- choice & ck
}
res <- res[,choice]
total_class <- 'total'
if (!total){
res <- res[,-c(2:4)]
total_class <- NULL
}
class(res) <- c('svy_tableone',total_class,strata_class,wtn_class,'data.frame')
return(res)
}
cont_ustr <- function(design,cont){
design <- eval(parse(text=sprintf('design <- update(design,onlyforcont = ifelse(%s > median(%s),1,2))',
cont[1],cont[1])))
wtd <- survey::svytable(~onlyforcont,design,exclude=NULL,na.action=na.pass)
df1 <- data.frame(variable='total',
n=nrow(design$variables),'unweighted'='-',
N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
check.names = FALSE)
df2 <- lapply(cont, function(conti){
unwtd <- sprintf('%s(%s)',
digit(mean(design$variables[,conti],na.rm=TRUE),2),
digit(sd(design$variables[,conti],na.rm=TRUE),2)) |>
data.frame() |> do::give_names('unweighted')
unwtd
# wtd
wtd <- sprintf('%s(%s)',
digit(eval(parse(text=sprintf('svymean(~%s,design = design,na.rm = TRUE)',conti)))[1],2),
digit(eval(parse(text=sprintf('svyvar(~%s,design = design,na.rm = TRUE)',conti)))[1] |> sqrt(),2)) |>
as.data.frame() |> do::give_names('weighted')
cbind(variable=conti,unwtd,wtd)
}) |> do.call(what = rbind)
plyr::rbind.fill(df1,df2)
}
cont_str <- function(design,strata,cont){
if (length(strata)==1) design <- update(design,strataforsyvtalbeone = design$variables[,strata,with=FALSE])
if (length(strata)>1) design <- update(design,strataforsyvtalbeone = do::paste0_columns(design$variables[,strata,with=FALSE],collapse = '_'))
wtd <- survey::svytable(~strataforsyvtalbeone,design,exclude=NULL,na.action=na.pass)
df1a <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
check.names = FALSE)
(u_n <- table(design$variables[,'strataforsyvtalbeone']))
(u_value <- digit(prop.table(u_n)*100,2))
(wtd_value <- digit(prop.table(wtd)*100,2))
clnms <- c(paste0(names(wtd),'_unwtd_n'),
paste0(names(wtd),'_unwtd_value'),
paste0(names(wtd),'_wtd_n'),
paste0(names(wtd),'_wtd_value'))
(wtd <- digit(wtd,2))
(df1b <- data.frame(matrix(c(u_n,u_value,wtd,wtd_value),nrow = 1,dimnames = list(NULL,clnms)),check.names = FALSE))
(df1 <- cbind(df1a,df1b,p=''))
df2 <- lapply(cont, function(conti){
## tatol
# un wtd
unwtd <- sprintf('%s(%s)',
digit(mean(design$variables[,conti,with=FALSE][[1]],na.rm=TRUE),2),
digit(sd(design$variables[,conti,with=FALSE][[1]],na.rm=TRUE),2)) |>
data.frame() |> do::give_names('unweighted')
unwtd
# wtd
wtd <- sprintf('%s(%s)',
digit(eval(parse(text=sprintf('svymean(~%s,design = design,na.rm = TRUE)',conti)))[1],2),
digit(eval(parse(text=sprintf('svyvar(~%s,design = design,na.rm = TRUE)',conti)))[1] |> sqrt(),2)) |>
as.data.frame() |> do::give_names('weighted')
(total_df <- cbind(variable=conti,unwtd,wtd))
## strata
# wtd
(wtd_mean <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svymean,na.rm.all = FALSE))',conti)))[,1:2])
(wtd_var <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svyvar,na.rm.all = FALSE))',conti)))[,1:2])
wtd <- sprintf('%s(%s)',
digit(wtd_mean[,2],2),
digit(sqrt(wtd_var[,2]),2)) |>
t() |> as.data.frame() |> do::give_names(paste0(wtd_mean[,1],'_wtd_value'))
wtd
# unwtd
mean <- sapply(do::unique_no.NA(as.character(design$variables[,strata,with=FALSE][[1]])), function(i){
digit(mean(design$variables[design$variables[,strata,with=FALSE][[1]]==i,conti,with=FALSE],na.rm=TRUE),2)
})
sd <- sapply(do::unique_no.NA(as.character(design$variables[,strata,with=FALSE][[1]])), function(i){
digit(sd(design$variables[design$variables[,strata,with=FALSE][[1]]==i,conti,with=FALSE][[1]],na.rm=TRUE),2)
})
(unwtd <- sprintf('%s(%s)',mean,sd) |> t() |> as.data.frame() |> do::give_names(paste0(names(mean),'_unwtd_value')))
# p for weighted
p <- digit(eval(parse(text=sprintf('svyttest(%s~%s,design)',conti,strata)))$p.value,3)
cbind(variable=conti,total_df,unwtd,wtd,p)
}) |> do.call(what = rbind)
plyr::rbind.fill(df1,df2)
}
cate_ustr <- function(design,cate){
wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',cate[1]))))
df1 <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
N=digit(sum(wtd[,2]),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
check.names = FALSE)
df2 <- lapply(cate, function(catei){
(unwtd <- as.data.frame(table(design$variables[,catei],useNA = 'i')))
colnames(unwtd) <- c(catei,'n')
unwtd <- cbind(unwtd,digit(unwtd[,2]/sum(unwtd[,2])*100,2))
colnames(unwtd)[3] <- 'unweighted'
# wtd
wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',catei))))
colnames(wtd)[2] <- 'N'
wtd <- cbind(wtd,digit(wtd[,2]/sum(wtd[,2])*100,2))
wtd[,2] <- digit(wtd[,2],2)
colnames(wtd)[3] <- 'weighted'
tab <- dplyr::full_join(unwtd,wtd,catei)
mt <- as.data.frame(matrix(rep('',ncol(tab)),nrow = 1,dimnames = list(NULL,colnames(tab))))
mt[1,1] <- catei
tab <- rbind(mt,tab)
colnames(tab)[1] <- 'variable'
tab
}) |> do.call(what = rbind)
rbind(df1,df2)
}
cate_str <- function(design,strata,cate,horizontal=TRUE){
if (length(strata)==1) design <- update(design,strataforsyvtalbeone = design$variables[,strata])
if (length(strata)>1) design <- update(design,strataforsyvtalbeone = do::paste0_columns(design$variables[,strata],collapse = '_'))
wtd <- survey::svytable(~strataforsyvtalbeone,design,exclude=NULL,na.action=na.pass)
df1a <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
check.names = FALSE)
(u_n <- table(design$variables[,'strataforsyvtalbeone']))
(u_value <- digit(prop.table(u_n)*100,2))
(wtd_value <- digit(prop.table(wtd)*100,2))
clnms <- c(paste0(names(wtd),'_unwtd_n'),
paste0(names(wtd),'_unwtd_value'),
paste0(names(wtd),'_wtd_n'),
paste0(names(wtd),'_wtd_value'))
(wtd <- digit(wtd,2))
(df1b <- data.frame(matrix(c(u_n,u_value,wtd,wtd_value),nrow = 1,dimnames = list(NULL,clnms)),check.names = FALSE))
(df1 <- cbind(df1a,df1b,p=''))
df2 <- lapply(cate, function(catei){
## tatol
# un wtd
(unwtd <- as.data.frame(table(design$variables[,catei],useNA = 'i')))
colnames(unwtd) <- c(catei,'n')
(unwtd <- cbind(unwtd,digit(unwtd[,2]/sum(unwtd[,2])*100,2)))
colnames(unwtd)[3] <- 'unweighted'
# wtd
wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',catei))))
colnames(wtd)[2] <- 'N'
wtd <- cbind(wtd,digit(wtd[,2]/sum(wtd[,2])*100,2))
wtd[,2] <- digit(wtd[,2],2)
colnames(wtd)[3] <- 'weighted'
(total_df <- dplyr::full_join(unwtd,wtd,catei))
## strata
# wtd
if (horizontal){
(wtd <- eval(parse(text=sprintf('as.data.frame(svyby(~strataforsyvtalbeone,~%s,design,svytotal,na.rm.all = FALSE))',catei))))
colnames(wtd) <- do::Replace0(colnames(wtd),'strataforsyvtalbeone')
(strata_len <- length(unique(design$variables[,'strataforsyvtalbeone'])))
(total <- rowSums(wtd[,2:(1+strata_len)]))
for (i in 1:strata_len) {
(e <- digit(wtd[,1+i]/total*100,2))
(ci_l <- digit((wtd[,1+i]-1.96*wtd[,1+i+strata_len])/total*100,2))
(ci_u <- digit((wtd[,1+i]+1.96*wtd[,1+i+strata_len])/total*100,2))
wtd[,1+strata_len+i] <- sprintf('%s(%s,%s)',e,ci_l,ci_u)
colnames(wtd)[1+strata_len+i] <- paste0(colnames(wtd)[1+i],'_wtd_value')
colnames(wtd)[1+i] <- paste0(colnames(wtd)[1+i],'_wtd_n')
wtd[,1+i] <- digit(wtd[,1+i],2)
}
}else{
(wtd <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svytotal,na.rm.all = FALSE))',catei))))
ckse <- grepl(paste0('se.',catei),colnames(wtd))
ck <- !ckse
ck[1] <- FALSE
for (i in 1:nrow(wtd)) {
if (i==1){
dfi <- as.data.frame(do::Replace0(colnames(wtd)[ck],catei)) |>
do::give_names(catei)
dfi <- cbind(dfi,matrix(rep('',nrow(wtd)*2),nrow = 1,dimnames = list(NULL,sapply(c('_wtd_n','_wtd_value'), function(i)paste0(rownames(wtd),i)))))
}
dfi[,paste0(rownames(wtd)[i],'_wtd_n')] <- digit(as.numeric(wtd[i,ck]),2)
(e <- digit(wtd[i,ck]/sum(wtd[i,ck])*100,2))
(ci_l <- digit((wtd[i,ck]-1.96*as.numeric(wtd[i,ckse]))/sum(wtd[i,ck])*100,2))
(ci_u <- digit((wtd[i,ck]+1.96*as.numeric(wtd[i,ckse]))/sum(wtd[i,ck])*100,2))
dfi[,paste0(rownames(wtd)[i],'_wtd_value')] <- sprintf('%s(%s,%s)',e,ci_l,ci_u)
}
wtd <- dfi
}
# unwtd
unwtd <- table(design$variables[,catei],design$variables[,'strataforsyvtalbeone'],useNA = 'i') |>
as.data.frame() |>
reshape2::dcast(Var1~Var2,value.var = 'Freq')
colnames(unwtd)[1] <- catei
if (horizontal){
(total <- rowSums(unwtd[,2:(1+strata_len)]))
for (i in 1:strata_len) {
unwtd <- cbind(unwtd,digit(unwtd[,i+1]/total*100,2))
colnames(unwtd)[ncol(unwtd)] <- paste0(colnames(unwtd)[i+1],'_unwtd_value')
colnames(unwtd)[i+1] <- paste0(colnames(unwtd)[i+1],'_unwtd_n')
}
}else{
for (i in 1:strata_len) {
unwtd <- cbind(unwtd,digit(unwtd[,i+1]/sum(unwtd[,i+1])*100,2))
colnames(unwtd)[ncol(unwtd)] <- paste0(colnames(unwtd)[i+1],'_unwtd_value')
colnames(unwtd)[i+1] <- paste0(colnames(unwtd)[i+1],'_unwtd_n')
}
}
tab <- dplyr::full_join(unwtd,wtd,catei)
tab <- tab[,c(catei,do::increase(colnames(tab)[-1]))]
tab <- dplyr::full_join(total_df,tab,catei)
mt <- as.data.frame(matrix(rep('',ncol(tab)+1),nrow = 1,dimnames = list(NULL,c(colnames(tab),'p'))))
mt[1,1] <- catei
p <- digit(eval(parse(text=sprintf('svychisq(~%s+%s,design)',catei,strata)))$p.value,3)
mt$p <- p
tab$p <- ''
tab <- rbind(mt,tab)
colnames(tab)[1] <- 'variable'
tab
}) |> do.call(what = rbind)
rbind(df1[,colnames(df2)],df2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.