#' Calculate means for model and sample data for tabulation and plotting.
#'
#' Get means by domain group for various variables from block model and sample
#' files. Combine the statistics in wide and tall (tidy) formats for tabulation
#' and plotting respectively.
#'
#' @author Alex M Trueman
#'
#' @param bm Block model as data frame.
#' @param bm_value Name of column in block model for calculation of mean.
#' @param samp Sample data as data frame.
#' @param samp_value Name of field in sample data for mean calculation.
#' @param group Grouping field present in block model and sample data.
#' @param samp_weight Optional decluster weight field in sample file.
#'
#' @return List of data frames with wide (for report tables) and tall (tidy)
#' layout.
#' @export
#' @importFrom dplyr funs group_by left_join summarise_at summarise
#' @importFrom magrittr %>%
#' @importFrom rlang enquo quo_name
#' @importFrom stats weighted.mean
#' @importFrom tidyr gather
means_by_group <- function(
bm, bm_value, samp, samp_value, group, samp_weight = NULL) {
group <- enquo(group)
group_str <- quo_name(group)
bm_value <- enquo(bm_value)
# Block model mean.
bm_means <- bm %>%
group_by(!!group) %>%
summarise(bm = mean(!!bm_value, na.rm = TRUE))
# Sample means.
samp_value <- enquo(samp_value)
if(is.null(samp_weight)) {
samp_means <- samp %>%
group_by(!!group) %>%
summarise(naive = mean(!!samp_value, na.rm = TRUE))
} else {
samp_weight <- enquo(samp_weight)
samp_means <- samp %>%
group_by(!!group) %>%
summarise(
naive = mean(!!samp_value, na.rm = TRUE),
declustered = weighted.mean(!!samp_value, !!samp_weight,
na.rm = TRUE)
)
}
# Join block and sample mean data in wide format.
all_means_wide <- bm_means %>%
left_join(samp_means, by = group_str)
# Create tidy format and make type an ordered factor so that plot order is
# controlled.
all_means_tall <- all_means_wide %>%
gather(type, mean, -c(!!group)) %>%
mutate(type = factor(
type,
levels = c("bm", "naive", "declustered"),
ordered = TRUE)
)
return(list(wide = all_means_wide, tall = all_means_tall))
}
#' Tabulation of summary statistics by group field.
#'
#' Markdown formatted tabulation of summary statistics by grouping field.
#'
#' @author Alex M Trueman
#'
#' @param df Data frame for generation of statistics
#' @param group Grouping field
#' @param value Numeric field to have statistics calculated
#' @param digits Round to this many digits
#'
#' @return Markdown format table as character string
#' @export
#' @importFrom dplyr filter group_by rename select summarise
#' @importFrom magrittr %>%
#' @importFrom rlang enquo UQ quo_name
#' @importFrom stats median sd var
#' @importFrom pander pander
sumstats <- function(df, group, value, digits) {
group <- enquo(group)
value <- enquo(value)
group_str <- quo_name(group)
value_str <- quo_name(value)
data <- df %>%
select(!! group, !! value) %>%
filter(!is.na(UQ(value))) %>%
group_by(!! group) %>%
summarise(
n = n(),
min = min(!! value),
max = max(!! value),
median = median(!! value),
mean = mean(!! value),
sd = sd(!! value),
var = var(!! value),
cv = sd / mean
) %>%
rename(group = !! group)
pander(
x = data,
style = "rmarkdown",
caption = paste0(
"Descriptive statistics for ", value_str,
" grouped by ", group_str
),
justify = c("center", rep("right", 8)),
round = c(0, 0, rep(digits, 7)),
keep.trailing.zeros = TRUE,
split.table = Inf
)
}
#' Tabulation of extended summary statistics by group field.
#'
#' Markdown formatted tabulation of summary statistics by grouping field.
#'
#' @author Alex M Trueman
#'
#' @param df Data frame for generation of statistics
#' @param group Grouping field
#' @param value Numeric field to have statistics calculated
#' @param digits Round to this many digits
#'
#' @return Markdown format table as character string
#' @export
#' @importFrom dplyr filter group_by rename select summarise
#' @importFrom magrittr %>%
#' @importFrom rlang enquo UQ quo_name
#' @importFrom stats median quantile sd var
#' @importFrom pander pander
extended_sumstats <- function(df, group, value, digits) {
group <- enquo(group)
value <- enquo(value)
group_str <- quo_name(group)
value_str <- quo_name(value)
data <- df %>%
select(!! group, !! value) %>%
filter(!is.na(UQ(value))) %>%
group_by(!! group) %>%
summarise(
n = n(),
min = min(!! value),
max = max(!! value),
median = median(!! value),
mean = mean(!! value),
irq = quantile(!! value, probs = 0.75) - quantile(!! value, probs = 0.25),
sd = sd(!! value),
var = var(!! value),
cv = sd / mean,
cl90 = 1.96 * sd
) %>%
rename(group = !! group)
pander(
x = data,
style = "rmarkdown",
caption = paste0(
"Descriptive statistics for ", value_str,
" grouped by ", group_str
),
justify = c("center", rep("right", 10)),
#big.mark = ",",
round = c(0, 0, rep(digits, 9)),
keep.trailing.zeros = TRUE,
split.table = Inf
)
}
#' Tabulation of weighted summary statistics by group field.
#'
#' Markdown formatted tabulation of weighted summary statistics by grouping field.
#'
#' @author Alex M Trueman
#'
#' @param df Data frame for generation of statistics
#' @param group Grouping field
#' @param value Numeric field to have statistics calculated
#' @param weight Weight field for statistics
#' @param digits Round to this many digits
#'
#' @return Markdown format table as character string
#' @export
#' @importFrom dplyr filter group_by rename select summarise
#' @importFrom Hmisc wtd.var
#' @importFrom magrittr %>%
#' @importFrom rlang enquo UQ quo_name
#' @importFrom stats median weighted.mean
#' @importFrom pander pander
weighted_sumstats <- function(df, group, value, weight, digits) {
group <- enquo(group)
value <- enquo(value)
weight <- enquo(weight)
group_str <- quo_name(group)
value_str <- quo_name(value)
data <- df %>%
select(!! group, !! value, !! weight) %>%
filter(!is.na(UQ(value))) %>%
group_by(!! group) %>%
summarise(
n = n(),
min = min(!! value),
max = max(!! value),
median = median(!! value),
mean = weighted.mean(!! value, !! weight),
var = wtd.var(x = !! value, weights = !! weight),
sd = sqrt(var),
cv = sd / mean
) %>%
rename(group = !! group)
pander(
x = data,
style = "rmarkdown",
caption = paste0(
"Weighted descriptive statistics for ", value_str,
" grouped by ", group_str
),
justify = c("center", rep("right", 8)),
#big.mark = ",",
round = c(0, 0, rep(digits, 7)),
keep.trailing.zeros = TRUE,
split.table = Inf
)
}
#' Tabulation of top cutting statistics by group field.
#'
#' Markdown formatted tabulation of weighted summary statistics by grouping field.
#'
#' @author Alex M Trueman
#'
#' @param df Data frame for generation of statistics
#' @param group Grouping field
#' @param value Numeric field to have statistics calculated
#' @param value_tc Numeric top cut field for statistics
#' @param weight Weight field for statistics
#' @param digits Round to this many digits
#'
#' @return Markdown format table as character string
#' @export
#' @importFrom dplyr filter group_by rename select summarise
#' @importFrom magrittr %>%
#' @importFrom rlang enquo UQ quo_name
#' @importFrom stats sd weighted.mean
#' @importFrom pander pander
topcut_sumstats <- function(df, group, value, value_tc, weight, digits) {
group <- enquo(group)
value <- enquo(value)
value_tc <- enquo(value_tc)
weight <- enquo(weight)
group_str <- quo_name(group)
value_str <- quo_name(value)
data <- df %>%
filter(!is.na(UQ(value))) %>%
group_by(!! group) %>%
summarise(
n = n(),
min = min(!! value, na.rm = TRUE),
max = max(!! value, na.rm = TRUE),
max_tc = max(!! value_tc, na.rm = TRUE),
topcut = ifelse(max > max_tc, max_tc, NA),
n_tc = sum(UQ(value) > topcut),
p_tc = n_tc / n * 100,
mean = weighted.mean(!! value, !! weight, na.rm = TRUE),
mean_tc = weighted.mean(!! value_tc, !! weight, na.rm = TRUE),
cv = sd(!! value, na.rm = TRUE) / mean,
cv_tc = sd(!! value_tc, na.rm = TRUE) / mean_tc
) %>%
rename(group = !! group) %>%
select(group, n, n_tc, p_tc, min, max, topcut, mean, mean_tc, cv, cv_tc)
pander(
x = data,
style = "rmarkdown",
caption = paste0(
"Weighted top cutting statistics for ", value_str,
" grouped by ", group_str
),
justify = c("center", rep("right", 10)),
# big.mark = ",",
round = c(0, 0, 0, rep(digits, 8)),
keep.trailing.zeros = TRUE,
split.table = Inf
)
}
#' Detailed descriptive statistics for one or more columns columns.
#'
#' @author Alex M Trueman
#'
#' @param df Dataframe
#' @param ... One or more numeric column names to be evaluated.
#' @param sep Character string separating original column name and statistic.
#' By default, this is a regex that finds the last underscore character in
#' the column name. Normally this will work and doesn't need to be changed.
#'
#' @return Dataframe of statistics for each column input.
#' @export
#' @importFrom dplyr funs n select summarise_all
#' @importFrom magrittr %>%
#' @importFrom moments skewness
#' @importFrom rlang quos !!!
#' @importFrom stats median na.omit quantile sd var
#' @importFrom tidyr gather separate spread
#'
#' @examples
#' d <- data.frame(a = rnorm(10, 100, 20), b = rnorm(10, 1, 2))
#' descstat(d, a, b)
descstat <- function(df, ..., sep = "_(?=[^_]+$)") {
vars <- quos(...)
x <- df %>%
select(!!!vars) %>%
na.omit() %>%
summarise_all(funs(
n = n(),
min = min,
q25 = quantile(., 0.25),
median = median,
q75 = quantile(., 0.75),
max = max,
mean = mean,
sd = sd,
var = var,
skew = skewness)) %>%
gather(statistic, value) %>%
separate(statistic, into = c("variable", "statistic"), sep = sep) %>%
spread(statistic, value) %>%
mutate(cv = sd / mean) %>%
select(variable, n, min, q25, median, q75, max, mean, sd, var, cv, skew)
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.