#' @title Table of Relative Risk results by zone by group by envt risk factor
#' @description Make table of Relative Risk results by zone by group by envt risk factor.
#' See source code for notes on this work.
#' @param mydat Data.frame of input data, one row per geographic unit such as US Census block groups, or tracts.
#' @param Enames names of columns with environmental risk factor data, default is names.e from ejscreen package
#' @param Dnames names of columns with percent (fraction) of population that is in each given demographic group, default is names.d from ejscreen package
#' @param popcolname name of column with total population count, default is pop
#' @param Zcolname name of column with name of zone such as US State name
#' @param digits Default is 4. How many significant digits to use.
#' @param testing default is FALSE
#' @return Compiles RR values in array of 3 dimensions: `RRS[Dnames, Enames, Zcolnames]`
#' Returns a matrix with
#' one demographic group per row,
#' one environmental risk indicator per column, and
#' third dimension for which zone (e.g., which US State)
#' @seealso [RR()]
#'
#' @examples
#'
#' # (This is very slow right now)
#'
#' # See examples for [RR.table()] and [RR.means()] and [RR()]
#'
#' ######################################## #
#'
#' ## if just using ejanalysis pkg test data:
#' bg <- ejanalysis::bgtest
#' enames <- c("pm", "o3", "cancer", "resp", "dpm", "pctpre1960", "traffic.score",
#' "proximity.npl", "proximity.rmp", "proximity.tsdf", "proximity.npdes", "ust")
#' dnames = c("pctlingiso", "pctlowinc")
#' dnames.subgroups.count = c("hisp", "nhwa", "nhba", "nhaiana",
#' "nhaa", "nhnhpia", "nhotheralone", "nhmulti")
#' dnames.subgroups.pct = c("pcthisp", "pctnhwa", "pctnhba", "pctnhaiana",
#' "pctnhaa", "pctnhnhpia", "pctnhotheralone", "pctnhmulti")
#'
#' ## if EJAM pkg available:
#' # bg <- as.data.frame(EJAM::blockgroupstats)
#' # enames = EJAM::names_e
#' # dnames = EJAM::names_d
#' # dnames.subgroups.count = EJAM::names_d_subgroups_count
#' # dnames.subgroups.pct = EJAM::names_d_subgroups
#'
#' ## if EJAM pkg not available and using ejscreen pkg data:
#' # bg <- ejscreen::bg22
#' # enames = ejscreen::names.e
#' # dnames = ejscreen::names.d
#' # dnames.subgroups.count = ejscreen::names.d.subgroups
#' # dnames.subgroups.pct = ejscreen::names.d.subgroups.pct
#'
#' ######################################## #
#' Ratios <- ejanalysis::RR.table(bg, Enames = enames,
#' Dnames = c(dnames, dnames.subgroups.pct),
#' popcolname = 'pop', digits = 2)
#'
#' # done like this, it still has NA values:
#'
#' MeansByGroup_and_Ratios <- ejanalysis::RR.means(
#' subset(bg, select = enames),
#' subset(bg, select = c(dnames, dnames.subgroups.pct)),
#' bg$pop)
#'
#' RRS.US <- RR.table(mydat = bg, Enames = enames,
#' Dnames = c(dnames, dnames.subgroups.pct),
#' popcolname = 'pop')
#' RRS.ST <- RR.table(mydat = bg, Enames = enames,
#' Dnames = c(dnames, dnames.subgroups.pct),
#' popcolname = 'pop', Zcolname = 'ST')
#' RRS <- RR.table.add(RRS.ST, RRS.US)
#' RRS['pctlowinc', , ]
#' RRS[ , , 'CA'] # RRS[, , 'PR']
#' RRS[ , 'pm', ]
#' RRS.REGION <- RR.table(mydat = bg,
#' Enames = enames,
#' Dnames = c(dnames, dnames.subgroups.pct),
#' popcolname = 'pop', Zcolname = 'REGION')
#' RRS2 <- RR.table.add(RRS, RRS.REGION)
#' RRS2[ , , '8']
#'
#' @export
#'
RR.table <- function(mydat,
Enames=ejscreen::names.e[ejscreen::names.e %in% names(mydat)],
Dnames=ejscreen::names.d[ejscreen::names.d %in% names(mydat)],
popcolname='pop', Zcolname, testing=FALSE, digits=4) {
# Compile RR values in array of 3 dimensions: RRS[Dnames, Enames, Zcolnames]
# one Demog group per row,
# one Envt factor per column,
# and 3d dimension is for each zone (e.g. each state)
########################### #
# TO DO LIST FOR RR.table() :
########################### #
# FIX THESE:
#- ? options to sort results by D, E, and/or Zone; or by maxE, maxD, or maxZone?
# Can use RR.table.sort() for that.
#- to work on counties, where there seem to be cases where all values are NA in some slice?
# not sure if/why it fails still
# RIGHT NOW THIS WILL NOT HANDLE A CASE WHERE A GIVEN DEMOG GROUP HAS NA FOR THE ENTIRE COUNTY OR STATE? IS THAT EVEN POSSIBLE?
#- to use zone data and other data vectors, not colnames.
# Could recode this so parameters would be vectors with data rather than names of columns in mydata data.frame
# That would make it easier to specify more complex / useful zones, for example,
# or for case where zone factor and E and D and pop are in separate variables, not all in one data.frame
#- to better handle a single zone.****
#- option of saving to csv (done via write.RR.tables() now )
#- ? have a way to add a zone that is the combination of all the specified zones,
# so you can specify States as zones and it will add the entire US as a zone?
# THAT NOW CAN BE DONE VIA RR.table.add()
######################### #
# - to do summary stats: **********
######################### #
# - what E has the highest RRs "usually" or at least "most often" (which E has max RR for the most Ds in given zone,
# then for the most zones for given D, then overall= which E has more max RRs of all D-Z combos?)?,
# - what D has the highest RRs (same breakdowns as for what E)
# - what zone has highest RRs (same breakdowns)
# You can Show 3 slices:
#
# 1. what is the E with highest RR, for each group in each zone?
# 2. what is the D with highest RR, for each E in each zone?
# 3. what is the Z with highest RR, for each group for each E?
#
# 4. which EZ combo has the highest RR, for each D? (18 D)
# 5. which DZ combo has the highest RR, for each E? (13 E with 12 and a summary E)
# 6. which DE combo has the highest RR, for each Z? (52 Z with States/DC & USA) e.g., in CA,
#
# in other words...
#
# for each D, see table of Z X E RR values. get colMaxs and rowMaxs, i.e.
# for each Z which E has max RR, (worst E for each DZ combo) = "worst E" has dz RRs
# for each E which Z has max RR; (worst Z for each DE combo) = "worst Z" has de RRs
# which EZ combo has max RR. (worst EZ combo for each D) has d RRs
#
# for each E, see table of Z X D RR values. get colMaxs and rowMaxs, i.e.
# for each Z which D has max RR, (worst D for each EZ combo) = "worst D"
# for each D which Z has max RR; (worst Z for each DE combo) = "worst Z"
# which DZ combo has max RR. (worst DZ combo for each E) has e RRs
#
# for each Z, see table of D X E RR values. get colMaxs and rowMaxs, i.e.
# for each D which E has max RR, (worst E for each DZ combo) = "worst E"
# for each E which D has max RR; (worst D for each EZ combo) = "worst D"
# which DE combo has max RR. (worst DE combo for each Z) has z RRs
#
# Full list would be
# D X Z X E
# (including for Z=whole USA, not just portions of USA)
# For 51 States + USA, almost 18 D, 12 E plus wtd Esum =
# sum(52 * 18, 51 * 13, 18 * 13, 52, 18, 13 )
#[1] 1916 RRs, most with two identified elements of pair, like "traffic for hispanics",
# plus the RR value itself, or something like 5,000 elements of results, which is absurd.
# We can't possible absorb results that tell us for every single state,
# for each E, what is the Demog group that has the worst RR, & then
# for each D, what is the E with worst RR, etc.
############## #
if (!missing(Zcolname)) {
if (length(Zcolname) > 1) {stop('Zcolname must be a single character string')}
if (!(Zcolname %in% names(mydat))) {
Znames <- Zcolname
} else {
Znames <- unique(mydat[ , Zcolname])
}
} else {
Znames <- 'USA'
}
RRS <- array(NA, c(length(Dnames) + 1, length(Enames) + 1, length(Znames)))
for (myzone.i in 1:length(Znames)) {
# RIGHT NOW THIS WILL NOT HANDLE A CASE LIKE PR, PUERTO RICO,
# WHERE A GIVEN DEMOG GROUP LIKE pcthisp HAS NA FOR
# THE ENTIRE COUNTY OR STATE?
if (!missing(Zcolname)) {
inzone <- (mydat[ , Zcolname] == Znames[myzone.i])
} else {
inzone <- rep(TRUE, length(mydat[ , popcolname]))
}
# Note: ***** This used na.rm in one max but not other,
# to handle states where some E is missing always,
# but still show max RR of the E's with valid data
x <- RR(mydat[inzone , Enames], mydat[inzone, Dnames], mydat[inzone, popcolname])
z <- round(rbind(myzone.max = matrixStats::colMaxs(x, na.rm = FALSE), x), digits) # analyze.stuff version did not work here
x <- cbind(myzone.max = analyze.stuff::rowMaxs(z, na.rm = TRUE), z) # see if matrixStats:: version is same or better now
# rownames(x)[1] <- paste(Znames[myzone.i], '.maxD', sep = '')
# colnames(x)[length(colnames(x))] <- paste(Znames[myzone.i], '.maxE', sep = '')
RRS[ , , myzone.i] <- x
}
dimnames(RRS) <- list(d = c('max.D', Dnames), e = c('max.E', Enames), zone = Znames)
RRS <- RR.table.addmaxzone(RRS, testing = testing)
return(RRS)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.