#' Function created on 23-2-2021 by Matt Whitaker, to work with new wrangling system for REACT-2
#' cov_name_list can be found in cats_and_covs.R and is a named list of all useful covariates in react2
#' Any named list of covariates with variable descriptions will work here, or the function will work without
#'
#' Relies on Kylie's add_conf_ints function
#'
#'
#
# ### helper function
# # quick function to make sure 2dp always shown
# # specifyDecimal <- function(x,k) trimws(format(round(x,k),nsmall=k))
# specifyDecimal <- function(x,k){
# out <- formatC(signif(x = x,digits = k),digits = k,format = "fg",flag = "#")
# out <- gsub('^\\.|\\.$','',out)
# return(out)
# }
#' Make prevalence tables
makeTablesNew <- function(dat = dfRes,
result_var = "res",
pos_val = "1",
neg_val = "0",
covariates = covs, # character vector naming variables to create tables for
cov_name_list,
sens = 0.844,
spec = 0.986,
weights = NULL,
suffix = NULL,
sf = 4,
for_report = FALSE,
write_to_file = FALSE,
percent = TRUE,
output_list=TRUE,
separator=","
){
### Check cov_name_list
if(!is.null(cov_name_list)){
if(!all(covariates %in% names(cov_name_list))){
ind <- !covariates %in% names(cov_name_list)
print(paste0("These covariates are not supplied in your cov_name_list:",covariates[ind]))
print("Using simple variable name instead")
}else{
ind <- rep(F,length(covariates))
}
}
# browser()
rtn <- list()
# i <- 1
for (i in 1:length(covariates)){
print(paste0("Now processing ", covariates[[i]]))
# generate table of result_var by covariate[i]
if(is.null(weights)){
tab <- table( pull(dat, result_var), pull(dat, covariates[i]))
}
else{
tab <- round(questionr::wtd.table(x=pull(dat, result_var),
y=pull(dat, covariates[i]),
weights = pull(dat, weights),
normwt = F,
na.rm = T,
na.show = F),0)
}
#' for instances where there are only zeros or ones in the results (ie all pos or all neg)
if(dim(tab)[1] != 2){
nonmissing <- rownames(tab)
missing = setdiff(c("0","1"), nonmissing)
newtab <- (rep(0, ncol(tab)))
tab <- rbind(tab, missing = newtab)
rownames(tab) <- c(nonmissing, missing)
}
# reorder
tabnew <- as.data.frame.matrix(tab)
tabnew <- as.data.frame(tabnew[order(as.numeric(rownames(tabnew))),])
colnames(tabnew) <- colnames(tab)
rownames(tabnew) <- rownames(tab)
tab <- tabnew
# calculate prevalence and confidence intervals
tab_ci <- add_conf_ints(tab,
poscol = pos_val,
negcol = neg_val,
sens = sens,
spec = spec
)
### Determine variable name to use
if(!is.null(cov_name_list)){
if(!ind[[i]]){
cvname <- cov_name_list[covariates[i]]
}else{
cvname <- covariates[i]
}
}else{
cvname <- covariates[i]
}
# make pretty data frame for output
if(for_report){
df <- as.data.frame.matrix(tab_ci) %>%
dplyr::mutate(Variable = as.character(unlist(cvname)),
Category = rownames(tab_ci),
p = paste0(specifyDecimal(p*100, sf)," (",specifyDecimal(lb*100, sf),separator,specifyDecimal(ub*100, sf),")"),
p_adj = paste0(specifyDecimal(p_adj*100, sf)," (",specifyDecimal(lb_adj*100, sf),separator,specifyDecimal(ub_adj*100,sf),")")) %>%
dplyr::select(Variable, Category, `1`, all, p, p_adj) %>%
dplyr::rename("Positive" = "1", "Total" = "all", "Prevalence"=p,"Prevalence_adjusted" = "p_adj"
)
} else{
if(percent){
df <- as.data.frame.matrix(tab_ci) %>%
dplyr::mutate(Variable = as.character(unlist(cvname)),
Category = rownames(tab_ci),
p_adj = round(p_adj, sf)*100,
lb_adj = round(lb_adj, sf)*100,
ub_adj = round(ub_adj, sf)*100) %>%
dplyr::select(Variable, Category, `1`, all, p_adj, lb_adj, ub_adj) %>%
dplyr::rename("Positive" = "1", "Total" = "all", "Prevalence" = "p_adj",
"Lower" = "lb_adj","Upper" = "ub_adj")
} else{
df <- as.data.frame.matrix(tab_ci) %>%
dplyr::mutate(Variable = as.character(unlist(cvname)),
Category = rownames(tab_ci),
p_adj = round(p_adj, sf),
lb_adj = round(lb_adj, sf),
ub_adj = round(ub_adj, sf)) %>%
dplyr::select(Variable, Category, `1`, all, p_adj, lb_adj, ub_adj) %>%
dplyr::rename("Positive" = "1", "Total" = "all", "Prevalence" = "p_adj",
"Lower" = "lb_adj","Upper" = "ub_adj")
}
}
rtn[[i]] <- df
# write df to csv file (for easy incorporation into rmarkdown report)
if (write_to_file){
if (!is.null(suffix)){
write.csv(df,paste0(covariates[i],"_prev_",suffix,".csv"), row.names = FALSE)
} else {
write.csv(df,paste0(covariates[i],"_prev.csv"), row.names = FALSE)
}
}
}
names(rtn) <- covariates
if(!output_list){
rtn <- dplyr::bind_rows(rtn)
}
return(rtn)
}
# Stratified tables -------------------------------------------------------
stratifiedTables <- function(dat = dfRes,
result_var = "res",
pos_val = "1",
neg_val = "0",
covariates = covs, # character vector naming variables to create tables for
cov_name_list,
strat_var,
sens = 0.844,
spec = 0.986,
weights = NULL,
suffix = NULL,
sf = 4,
for_report = FALSE,
write_to_file = FALSE,
percent = TRUE,
output_list=TRUE,
include_counts=F,
separator=","){
if(class(pull(dat, strat_var)) == "factor"){
uniques=levels(pull(dat, strat_var))
}else{
uniques <- unique(pull(dat, strat_var))[!is.na(unique(pull(dat, strat_var)))] %>% as.character()
}
result_list <- list()
for (i in 1:length(uniques)){
prevs <- makeTablesNew(dat=dat[dat[,strat_var]== uniques[[i]],],
result_var = result_var, covariates = covariates,
sens = sens,spec = spec, cov_name_list = cov_name_list,
output_list = F, sf = sf, weights = weights,separator = separator)
prevs$prev_concat <- paste0(prevs$Prevalence," (",prevs$Lower,separator, prevs$Upper,")")
result_list[[i]] <- prevs
}
names(result_list) <- uniques
if(include_counts){
res <- dplyr::bind_rows(result_list, .id = "Level") %>% dplyr::select(Level, Variable,Positive, Total,
Category,prev_concat) %>%
dplyr::mutate(Level = factor(Level, levels = unique(Level))) %>%
arrange(Level) %>%
tidyr::pivot_wider(names_from = Level, id_cols = c(Variable,Category),values_from=c(Positive, Total,prev_concat))
}else{
res <- dplyr::bind_rows(result_list, .id = "Level") %>% dplyr::select(Level, Variable,
Category,prev_concat) %>%
dplyr::mutate(Level = factor(Level, levels = unique(Level))) %>%
arrange(Level) %>%
tidyr::pivot_wider(names_from = Level, id_cols = c(Variable,Category),values_from=prev_concat)
}
return(res)
}
# Add confidence intervals function --------------------------------------
# Created by Kylie Ainslie
add_conf_ints <- function(tab,method="wilson",poscol="Detected",negcol="Not Detected",
spec = 1, sens = 1) {
#browser()
rtn <- tab
tmp <- dim(tab)
nrows <- tmp[1]
ncols <- tmp[2]
rowP <- vector(mode="numeric", length=ncols)
rowUB <- vector(mode="numeric", length=ncols)
rowLB <- vector(mode="numeric", length=ncols)
rowP_adj <- vector(mode="numeric", length=ncols)
rowUB_adj <- vector(mode="numeric", length=ncols)
rowLB_adj <- vector(mode="numeric", length=ncols)
rowAll <- vector(mode="numeric", length=ncols)
for (i in 1:ncols) {
tmpbin <- propCI(x = tab[poscol,i],
n = tab[poscol,i] + tab[negcol,i],
method=method)
rowP[i] <- as.numeric(tmpbin$p)
rowUB[i] <- as.numeric(tmpbin$upper)
rowLB[i] <- as.numeric(tmpbin$lower)
# Peter Diggle's correction
rowP_adj[i] <- max(0, min(1, (rowP[i] + spec - 1) / (sens + spec - 1) ))
rowUB_adj[i] <- max(0, min(1, (rowUB[i] - (1 - spec)) / (sens + spec - 1)))
rowLB_adj[i] <- max(0, min(1, (rowLB[i] - (1 - spec)) / (sens + spec - 1)))
rowAll[i] <- tab[poscol,i] + tab[negcol,i]
}
## rtn[1:2,] <- round(rtn[1:2,])
rtn <- rbind(rtn, all=rowAll, p=rowP,lb=rowLB, ub=rowUB,
p_adj = rowP_adj, lb_adj = rowLB_adj, ub_adj = rowUB_adj)
t(rtn)
}
# Turn prevalence tables into pivoted plottable dfs -----------------------
makeStratifiedPrevalenceTablePlottable <- function(tab){
pivot_cols=setdiff(colnames(tab), c("Variable", "Category"))
tab_pivot=tidyr::pivot_longer(data = tab, cols=pivot_cols)
tab_pivot$prev <- as.numeric(gsub(" [(].*", "",tab_pivot$value))
tab_pivot$lower <- xtabPercentageExtractor(mystring = tab_pivot$value,lookbehind = "[(]",
lookahead = "[,]",return_numeric = T)
tab_pivot$upper <- xtabPercentageExtractor(mystring = tab_pivot$value,lookbehind = "[,]",
lookahead = "[)]",return_numeric = T)
return(tab_pivot)
}
# Reorder tables nicely ---------------------------------------------------
orderMyTable <- function(tab){
out.list <- list()
if("Level" %in% colnames(tab)){
tab$Category <- tab$Level
}
# define some common lists of categories #
freqs=stringr::word(c("All the time","Some of the time","Hardly ever","Never","Don't know"),1)
ynpns=stringr::word(c("Yes", "No", "Prefer not to say"),1)
yn=stringr::word(c("Yes", "No"),1)
fags = stringr::word(c("Current cigarette smoker","Not current cigarette smoker","Prefer not to say"),1)
vapes = stringr::word(c("Current vaper","Not current vaper","Prefer not to say"),1)
covid = stringr::word(c("COVID confirmed by test","COVID suspected by doctor",
"COVID suspected by respondent","No COVID"),-1)
adiposity = stringr::word(c("Underweight","Normal weight","Overweight", "Obese"),1)
catslist=c(freqs,ynpns,yn, fags, vapes, covid)
# done #
for(i in 1:length(unique(tab$Variable))){
varr=unique(tab$Variable)[[i]]
df_subset <- tab %>% filter(Variable == varr)
levs=stringr::word(unique(df_subset$Category),1)
mycats = stringr::word(df_subset$Category,1)
if(grepl("age",tolower(varr)) | grepl("imd",tolower(varr))){
x <- gsub("^(.*?)[[:punct:]].*","\\1",tab$Category[tab$Variable == varr])
out.list[[varr]] <- df_subset[order(as.numeric(x)),]
}else if(length(setdiff(levs,ynpns))==0){
out.list[[varr]] <-df_subset[order(match(mycats,ynpns)),]
}else if(length(setdiff(levs,yn))==0){
out.list[[varr]] <-df_subset[order(match(mycats,yn)),]
}else if(length(setdiff(levs,freqs))==0){
out.list[[varr]] <-df_subset[order(match(mycats,freqs)),]
}else if(length(setdiff(levs,fags))==0){
out.list[[varr]] <-df_subset[order(match(mycats,fags)),]
}else if(length(setdiff(levs,vapes))==0){
out.list[[varr]] <-df_subset[order(match(mycats,vapes)),]
}else if(length(setdiff(levs,covid))==0){
out.list[[varr]] <-df_subset[order(match(stringr::word(df_subset$Category,-1),
covid)),]
}else if(length(setdiff(levs,adiposity))==0){
out.list[[varr]] <-df_subset[order(match(stringr::word(df_subset$Category,-1),
adiposity)),]
}else{
out.list[[varr]] <- df_subset
}
}
out <- bind_rows(out.list)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.