R/svy_tableone.R

Defines functions cate_str cate_ustr cont_str cont_ustr svy_tableone

Documented in svy_tableone

#' Create Description table for survey data
#'
#' @param design results of svydesign()
#' @param strata strata
#' @param cate categorical variable
#' @param cont continuous variable
#' @param wtn logical. whether to include weighted estimated column
#' @param total logical. whether to show total part
#' @param horizontal logical. the categorical ratio calculation direction
#' @param strata_group left partial match
#'
#' @return one dataframe contains unweighted data and weighted data
#' @export
#'
svy_tableone <- function(design,
                         strata=NULL,cate=NULL,cont=NULL,
                         wtn=FALSE,total=TRUE,horizontal=TRUE,
                         strata_group=NULL){

    if (is.null(cate)){
        cate <- sapply(design$variables, function(i) any(c('character','factor') %in% class(i)))
        cate <- names(cate)[cate]
    }
    if (is.null(cont)) cont <- names(sapply(design$variables,is.numeric))
    res <- NULL
    if (is.null(strata)){
        if (length(cate)>0 & all(!is.na(cate))){
            resi <- cate_ustr(design = design,cate = cate)
            resi$variable[nchar(resi$n)==0] <- paste0(resi$variable[nchar(resi$n)==0],'[n(%)]')
            res <- rbind(res,resi)
        }
        if (length(cont)>0 & all(!is.na(cont))){
            resi <- cont_ustr(design = design,cont = cont)
            resi[is.na(resi)] <- ''
            resi$variable[resi$variable != 'Total'] <- paste0(resi$variable[resi$variable != 'Total'],'[mean(sd)]')
            res <- rbind(res,resi)
        }
    }else{
        if (length(cate)>0 & all(!is.na(cate))){
            cate <- set::not(cate,strata)
            resi <- cate_str(design = design,strata = strata,cate = cate,horizontal=horizontal)
            resi$variable[nchar(resi$n)==0] <- paste0(resi$variable[nchar(resi$n)==0],'[n(%)]')
            res <- rbind(res,resi)
        }
        if (length(cont)>0 & all(!is.na(cont))){
            (cont <- set::not(cont,strata))
            resi <- cont_str(design = design,strata = strata,cont = cont)
            resi[is.na(resi)] <- ''
            resi$variable[resi$variable != 'Total'] <- paste0(resi$variable[resi$variable != 'Total'],'[mean(sd)]')
            res <- plyr::rbind.fill(res,resi)
        }
    }
    ck <- res$variable!='Total'
    ck[1] <- TRUE
    res <- res[ck,]
    strata_class <- NULL
    if (!is.null(strata)) strata_class <- 'strata'
    wtn_class <- NULL
    if (wtn) wtn_class <- 'wtn'
    choice <- rep(TRUE,ncol(res))
    if (!wtn){
        choice <- choice & (colnames(res) != 'N')
        choice <- choice & (do::right(colnames(res),6) != '_wtd_n')
    }
    if (!is.null(strata_group)){
        ck <- colnames(res)[which(do::right(colnames(res),8)=='_unwtd_n')[1]:ncol(res)] |>
            sapply(function(i) any(sapply(strata, function(j) do::left(i,nchar(j)+1)==paste0(j,'_'))))
        ck <- c(rep(TRUE,5),ck)
        choice <- choice & ck
    }
    res <- res[,choice]
    total_class <- 'total'
    if (!total){
        res <- res[,-c(2:4)]
        total_class <- NULL
    }
    class(res) <- c('svy_tableone',total_class,strata_class,wtn_class,'data.frame')
    return(res)
}

cont_ustr <- function(design,cont){
    design <- eval(parse(text=sprintf('design <- update(design,onlyforcont = ifelse(%s > median(%s),1,2))',
                                      cont[1],cont[1])))
    wtd <- survey::svytable(~onlyforcont,design,exclude=NULL,na.action=na.pass)
    df1 <- data.frame(variable='total',
                      n=nrow(design$variables),'unweighted'='-',
                      N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
                      check.names = FALSE)

    df2 <- lapply(cont, function(conti){
        unwtd <- sprintf('%s(%s)',
                digit(mean(design$variables[,conti],na.rm=TRUE),2),
                digit(sd(design$variables[,conti],na.rm=TRUE),2)) |>
            data.frame() |> do::give_names('unweighted')
        unwtd
        # wtd
        wtd <- sprintf('%s(%s)',
                digit(eval(parse(text=sprintf('svymean(~%s,design = design,na.rm = TRUE)',conti)))[1],2),
                digit(eval(parse(text=sprintf('svyvar(~%s,design = design,na.rm = TRUE)',conti)))[1] |> sqrt(),2)) |>
            as.data.frame() |> do::give_names('weighted')
        cbind(variable=conti,unwtd,wtd)
    }) |> do.call(what = rbind)
    plyr::rbind.fill(df1,df2)
}
cont_str <- function(design,strata,cont){
    if (length(strata)==1) design <- update(design,strataforsyvtalbeone = design$variables[,strata,with=FALSE])
    if (length(strata)>1) design <- update(design,strataforsyvtalbeone = do::paste0_columns(design$variables[,strata,with=FALSE],collapse = '_'))
    wtd <- survey::svytable(~strataforsyvtalbeone,design,exclude=NULL,na.action=na.pass)
    df1a <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
                       N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
                       check.names = FALSE)
    (u_n <- table(design$variables[,'strataforsyvtalbeone']))
    (u_value <- digit(prop.table(u_n)*100,2))
    (wtd_value <- digit(prop.table(wtd)*100,2))
    clnms <- c(paste0(names(wtd),'_unwtd_n'),
               paste0(names(wtd),'_unwtd_value'),
               paste0(names(wtd),'_wtd_n'),
               paste0(names(wtd),'_wtd_value'))
    (wtd <- digit(wtd,2))
    (df1b <- data.frame(matrix(c(u_n,u_value,wtd,wtd_value),nrow = 1,dimnames = list(NULL,clnms)),check.names = FALSE))
    (df1 <- cbind(df1a,df1b,p=''))
    df2 <- lapply(cont, function(conti){
        ## tatol
        # un wtd
        unwtd <- sprintf('%s(%s)',
                         digit(mean(design$variables[,conti,with=FALSE][[1]],na.rm=TRUE),2),
                         digit(sd(design$variables[,conti,with=FALSE][[1]],na.rm=TRUE),2)) |>
            data.frame() |> do::give_names('unweighted')
        unwtd
        # wtd
        wtd <- sprintf('%s(%s)',
                       digit(eval(parse(text=sprintf('svymean(~%s,design = design,na.rm = TRUE)',conti)))[1],2),
                       digit(eval(parse(text=sprintf('svyvar(~%s,design = design,na.rm = TRUE)',conti)))[1] |> sqrt(),2)) |>
            as.data.frame() |> do::give_names('weighted')
        (total_df <- cbind(variable=conti,unwtd,wtd))

        ## strata
        # wtd
        (wtd_mean <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svymean,na.rm.all = FALSE))',conti)))[,1:2])
        (wtd_var <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svyvar,na.rm.all = FALSE))',conti)))[,1:2])
        wtd <- sprintf('%s(%s)',
                       digit(wtd_mean[,2],2),
                       digit(sqrt(wtd_var[,2]),2)) |>
            t() |> as.data.frame() |> do::give_names(paste0(wtd_mean[,1],'_wtd_value'))
        wtd

        # unwtd
        mean <- sapply(do::unique_no.NA(as.character(design$variables[,strata,with=FALSE][[1]])), function(i){
            digit(mean(design$variables[design$variables[,strata,with=FALSE][[1]]==i,conti,with=FALSE],na.rm=TRUE),2)
        })
        sd <- sapply(do::unique_no.NA(as.character(design$variables[,strata,with=FALSE][[1]])), function(i){
            digit(sd(design$variables[design$variables[,strata,with=FALSE][[1]]==i,conti,with=FALSE][[1]],na.rm=TRUE),2)
        })
        (unwtd <- sprintf('%s(%s)',mean,sd) |> t() |> as.data.frame() |> do::give_names(paste0(names(mean),'_unwtd_value')))

        # p for weighted
        p <- digit(eval(parse(text=sprintf('svyttest(%s~%s,design)',conti,strata)))$p.value,3)
        cbind(variable=conti,total_df,unwtd,wtd,p)
    }) |> do.call(what = rbind)
    plyr::rbind.fill(df1,df2)
}
cate_ustr <- function(design,cate){
    wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',cate[1]))))
    df1 <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
                     N=digit(sum(wtd[,2]),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
                     check.names = FALSE)

    df2 <- lapply(cate, function(catei){
        (unwtd <- as.data.frame(table(design$variables[,catei],useNA = 'i')))
        colnames(unwtd) <- c(catei,'n')
        unwtd <- cbind(unwtd,digit(unwtd[,2]/sum(unwtd[,2])*100,2))
        colnames(unwtd)[3] <- 'unweighted'
        # wtd
        wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',catei))))
        colnames(wtd)[2] <- 'N'
        wtd <- cbind(wtd,digit(wtd[,2]/sum(wtd[,2])*100,2))
        wtd[,2] <- digit(wtd[,2],2)
        colnames(wtd)[3] <- 'weighted'
        tab <- dplyr::full_join(unwtd,wtd,catei)
        mt <- as.data.frame(matrix(rep('',ncol(tab)),nrow = 1,dimnames = list(NULL,colnames(tab))))
        mt[1,1] <- catei
        tab <- rbind(mt,tab)
        colnames(tab)[1] <- 'variable'
        tab
    }) |> do.call(what = rbind)
    rbind(df1,df2)
}

cate_str <- function(design,strata,cate,horizontal=TRUE){
    if (length(strata)==1) design <- update(design,strataforsyvtalbeone = design$variables[,strata])
    if (length(strata)>1) design <- update(design,strataforsyvtalbeone = do::paste0_columns(design$variables[,strata],collapse = '_'))
    wtd <- survey::svytable(~strataforsyvtalbeone,design,exclude=NULL,na.action=na.pass)
    df1a <- data.frame(variable='Total',n=nrow(design$variables),'unweighted'='-',
                      N=digit(sum(wtd),2),'weighted'=digit(sum(!is.na(design$allprob[[1]]))/nrow(design$variables)*100,2),
                      check.names = FALSE)
    (u_n <- table(design$variables[,'strataforsyvtalbeone']))
    (u_value <- digit(prop.table(u_n)*100,2))
    (wtd_value <- digit(prop.table(wtd)*100,2))
    clnms <- c(paste0(names(wtd),'_unwtd_n'),
               paste0(names(wtd),'_unwtd_value'),
               paste0(names(wtd),'_wtd_n'),
               paste0(names(wtd),'_wtd_value'))
    (wtd <- digit(wtd,2))
    (df1b <- data.frame(matrix(c(u_n,u_value,wtd,wtd_value),nrow = 1,dimnames = list(NULL,clnms)),check.names = FALSE))
    (df1 <- cbind(df1a,df1b,p=''))
    df2 <- lapply(cate, function(catei){
        ## tatol
        # un wtd
        (unwtd <- as.data.frame(table(design$variables[,catei],useNA = 'i')))
        colnames(unwtd) <- c(catei,'n')
        (unwtd <- cbind(unwtd,digit(unwtd[,2]/sum(unwtd[,2])*100,2)))
        colnames(unwtd)[3] <- 'unweighted'
        # wtd
        wtd <- as.data.frame(eval(parse(text = sprintf('survey::svytable(~%s,design,exclude=NULL,na.action=na.pass)',catei))))
        colnames(wtd)[2] <- 'N'
        wtd <- cbind(wtd,digit(wtd[,2]/sum(wtd[,2])*100,2))
        wtd[,2] <- digit(wtd[,2],2)
        colnames(wtd)[3] <- 'weighted'
        (total_df <- dplyr::full_join(unwtd,wtd,catei))

        ## strata
        # wtd
        if (horizontal){
            (wtd <- eval(parse(text=sprintf('as.data.frame(svyby(~strataforsyvtalbeone,~%s,design,svytotal,na.rm.all = FALSE))',catei))))
            colnames(wtd) <- do::Replace0(colnames(wtd),'strataforsyvtalbeone')
            (strata_len <- length(unique(design$variables[,'strataforsyvtalbeone'])))
            (total <- rowSums(wtd[,2:(1+strata_len)]))
            for (i in 1:strata_len) {
                (e <- digit(wtd[,1+i]/total*100,2))
                (ci_l <- digit((wtd[,1+i]-1.96*wtd[,1+i+strata_len])/total*100,2))
                (ci_u <- digit((wtd[,1+i]+1.96*wtd[,1+i+strata_len])/total*100,2))
                wtd[,1+strata_len+i] <- sprintf('%s(%s,%s)',e,ci_l,ci_u)
                colnames(wtd)[1+strata_len+i] <- paste0(colnames(wtd)[1+i],'_wtd_value')
                colnames(wtd)[1+i] <- paste0(colnames(wtd)[1+i],'_wtd_n')
                wtd[,1+i] <- digit(wtd[,1+i],2)
            }
        }else{
            (wtd <- eval(parse(text=sprintf('as.data.frame(svyby(~%s,~strataforsyvtalbeone,design,svytotal,na.rm.all = FALSE))',catei))))
            ckse <- grepl(paste0('se.',catei),colnames(wtd))
            ck <- !ckse
            ck[1] <- FALSE
            for (i in 1:nrow(wtd)) {
                if (i==1){
                    dfi <- as.data.frame(do::Replace0(colnames(wtd)[ck],catei)) |>
                        do::give_names(catei)
                    dfi <- cbind(dfi,matrix(rep('',nrow(wtd)*2),nrow = 1,dimnames = list(NULL,sapply(c('_wtd_n','_wtd_value'), function(i)paste0(rownames(wtd),i)))))
                }
                dfi[,paste0(rownames(wtd)[i],'_wtd_n')] <- digit(as.numeric(wtd[i,ck]),2)
                (e <- digit(wtd[i,ck]/sum(wtd[i,ck])*100,2))
                (ci_l <- digit((wtd[i,ck]-1.96*as.numeric(wtd[i,ckse]))/sum(wtd[i,ck])*100,2))
                (ci_u <- digit((wtd[i,ck]+1.96*as.numeric(wtd[i,ckse]))/sum(wtd[i,ck])*100,2))
                dfi[,paste0(rownames(wtd)[i],'_wtd_value')] <- sprintf('%s(%s,%s)',e,ci_l,ci_u)
            }
            wtd <- dfi
        }

        # unwtd
        unwtd <- table(design$variables[,catei],design$variables[,'strataforsyvtalbeone'],useNA = 'i') |>
            as.data.frame() |>
            reshape2::dcast(Var1~Var2,value.var = 'Freq')
        colnames(unwtd)[1] <- catei
        if (horizontal){
            (total <- rowSums(unwtd[,2:(1+strata_len)]))
            for (i in 1:strata_len) {
                unwtd <- cbind(unwtd,digit(unwtd[,i+1]/total*100,2))
                colnames(unwtd)[ncol(unwtd)] <- paste0(colnames(unwtd)[i+1],'_unwtd_value')
                colnames(unwtd)[i+1] <- paste0(colnames(unwtd)[i+1],'_unwtd_n')
            }
        }else{
            for (i in 1:strata_len) {
                unwtd <- cbind(unwtd,digit(unwtd[,i+1]/sum(unwtd[,i+1])*100,2))
                colnames(unwtd)[ncol(unwtd)] <- paste0(colnames(unwtd)[i+1],'_unwtd_value')
                colnames(unwtd)[i+1] <- paste0(colnames(unwtd)[i+1],'_unwtd_n')
            }
        }

        tab <- dplyr::full_join(unwtd,wtd,catei)
        tab <- tab[,c(catei,do::increase(colnames(tab)[-1]))]
        tab <- dplyr::full_join(total_df,tab,catei)
        mt <- as.data.frame(matrix(rep('',ncol(tab)+1),nrow = 1,dimnames = list(NULL,c(colnames(tab),'p'))))
        mt[1,1] <- catei
        p <- digit(eval(parse(text=sprintf('svychisq(~%s+%s,design)',catei,strata)))$p.value,3)
        mt$p <- p
        tab$p <- ''
        tab <- rbind(mt,tab)
        colnames(tab)[1] <- 'variable'
        tab
    }) |> do.call(what = rbind)
    rbind(df1[,colnames(df2)],df2)
}
yikeshu0611/nhanesR documentation built on Jan. 29, 2022, 6:08 a.m.