#' Find sources SHared Across a REgion (SHARE)
#'
#' \code{sharehealth} Estimates regional health effects using SHARE
#'
#' This function 1) Applies SHARE, 2) Estimates sources using
#' APCA at each monitor, 3) Estimates health effects for each community,
#' and 4) Estimates regional health effects for each source
#'
#' @param consdata list of data frames of daily constituent concentrations with date as first column for each monitor
#' @param healthdata list of data frames corresponding to health counts and covariates with date as first column for each community
#' @param list list of length equal to healthdata indicating which monitors correspond to which elements in healthdata
#' @param tots list of vectors corresponding to total concentrations (total PM2.5) for each day and monitor. If null, uses \code{rowSums(data)}
#' @param method regional source apportionment method (SHARE or mAPCA)
#
#' @export
sharehealth <- function(consdata, healthdata = NULL, list = NULL,
formula = NULL,
lag = NULL, groupvar = NULL, print = F,
cut = 1, thres = pi/4,
tots = NULL, method = "SHARE") {
out <- getsources(consdata, type = tolower(method), tots = tots, list = list)
if(!is.null(healthdata)) {
#get source concentrations, averaged over community
if(!is.null(list)) {
sourcec <- combsource(out$sources, list)
out$sourceorder <- sourcec
}else{
sourcec <- out$sources
}
#get health effects
regcoef <- list()
for(i in 1 : length(healthdata)) {
print(i)
regcoef[[i]] <- get.hosp(dat = healthdata[[i]], sources = sourcec[[i]],
lag = lag, formu = form1, gv = gv)
}
regcoefREG <- combhealth(regcoef, print = print)
sources <- as.numeric(substr(rownames(regcoefREG), 7, 7))
regcoefREG <- regcoefREG[order(sources), ]
out$hcounty <- regcoef
out$regcoef <- regcoefREG
#iqr increase (IQRS from data)
iqrs <- out$summary["IQR"]
lb <- regcoefREG[, 1] - 1.96 * regcoefREG[, 2]
lb <- percinc(lb, scale = iqrs)
ub <- regcoefREG[, 1] + 1.96 * regcoefREG[, 2]
ub <- percinc(ub, scale = iqrs)
est <- percinc(regcoefREG[, 1], scale = iqrs)
percincci <- data.frame(est, lb, ub)
colnames(percincci) <- c("est", "lb", "ub")
out$iqrinc <- percincci
}
out
}
# dat is hosp dat for one county
# sources is sources for one county
# lag is lag
get.hosp <- function(dat, sources, lag, formu, outcome = "cardio", gv = "agecat") {
#merge with sources
colnames(dat) <- tolower(colnames(dat))
colnames(sources) <- tolower(colnames(sources))
merged <- merge(dat, sources, all.x = T,
by = "date")
#get lag info
#which columns are sources
whSource <- which(substr(colnames(merged), 1, 4) == "sour")
lagSource <- vector(, length = nrow(merged))
temp2 <- rep(NA, nrow(merged))
#for each source, #lag by agecat
for(i in 1 : length(whSource)) {
sour <- merged[, whSource[i]]
#for each source with at least 1 day of data
if(length(which(!is.na(sour))) > 0) {
#if group variable
if(!is.null(gv)) {
gv1 <- merged[, gv]
}else{
gv1 <- gv
}
temp <- Lag(sour, k = lag, group = gv1)
lagSource <- cbind(lagSource, temp)
#else all NA
}else{
lagSource <- cbind(lagSource, temp2)
}
}
lagSource <- lagSource[, -1]
#add in lagged sources
merged[, whSource] <- lagSource
#number of years of data (should be 12)
years <- length(unique(substr(merged$date, 1, 4)))
#get formula
formUSE1 <- paste0(formu, years)
#set up estimates
ests <- matrix(nrow = length(whSource), ncol = 2)
#for each source
for(l in 1 : length(whSource)) {
# print(c("l", l))
covar1 <- paste(colnames(merged)[whSource[l]], collapse = "+")
formUSE <- paste0(outcome, " ~", covar1, "+", formUSE1, ")")
#run model
options(warn = 2)
glm1 <- try(glm(formula = eval(formUSE),
data = merged, family = "quasipoisson",
offset = log(denom)), silent = T)
options(warn = 1)
tf <- suppressWarnings(class(glm1) == "try-error")
if(tf[1]) {
glm1 <- try(glm(formula = eval(formUSE),
data = merged, family = "quasipoisson",
offset = log(denom), control = list(epsilon = .00001)),
silent = T)
}
#save results
tf <- suppressWarnings(class(glm1) != "try-error")
if(tf[1]) {
out1 <- summary(glm1)$coef
whS <- which(substr(rownames(out1), 1, 4) == "sour")
out <- out1[whS, c(1, 2)]
}else{
out <- c(NA, NA)
}
ests[l, ] <- out
}
rownames(ests) <- colnames(merged)[whSource]
colnames(ests) <- c("est", "se")
ests
}
combsource <- function(outsource, list) {
lens <- sapply(list, length)
wh <- unlist(list[which(lens > 1)])
namesU <- unique(names(wh))
sources <- list()
for(i in 1 : length(list)) {
if(lens[i] == 1) {
sources[[i]] <- outsource[[list[[i]]]]
}else{
source1 <- outsource[list[[i]]]
source1 <- ldply(source1, data.frame)[, -1]
sources[[i]] <- aggregate(source1, by = list(source1$date),
FUN= "mean", na.rm = T)[, -1]
}
}
names(sources) <- names(list)
sources
}
getsources <- function(data, type = "share", tots = tots, list = NULL) {
if(type == "share") {
share1 <- share(data)
share <- share1$share
sources <- list()
for(i in 1 : length(data)) {
if(is.null(tots)) {
tots1 <- tots
}else{
tots1 <- tots[[i]]
}
temp <- apca(data[[i]], tots = tots1)$conc
temp <- data.frame(data[[i]][, 1], temp)
colnames(temp) <- c("date", paste0("source", share[[i]]))
sources[[i]] <- temp
}
major.sig <- share1$major.sig
}else if(type == "mapca") {
mapca1 <- mAPCA(data, tots = tots)
apca <- mapca1[["apca"]]
sourcesM <- apca$conc
dates <- as.Date(substr(rownames(sourcesM), 1, 10))
#need to get list
sources <- list()
unmon <- names(data)
whM <- which(colnames(sourcesM) == "mons")
for(i in 1 : length(unmon)) {
whR <- which(sourcesM$mons == unmon[i])
temp1 <- sourcesM[whR, -whM]
temp <- data.frame(dates[whR], temp1)
colnames(temp) <- c("date", colnames(temp1))
sources[[i]] <- temp
}
major.sig <- apca[["vmax"]][["loadings"]][1 : (ncol(data[[1]]) - 1), ]
share <- matrix(rep(seq(1, ncol(sources[[i]]) - 1), length(data)),
nrow = length(data), byrow = T)
share <- sapply(apply(share, 1, list),
function(x) x[[1]], simplify = F)
}
#names(sources) <- names(data)
out <- list()
if(is.null(list)) {
list <- as.list(names(data))
names(list) <- names(data)
}
names(sources) <- names(data)
out$summary <- getsummary(sources, list, major.sig)
out$sources <- sources
out$share <- share
out$major.sig <- major.sig
out
}
sumfun <- function(vec) {
out <- c(summary(vec, na.rm = T), IQR(vec, na.rm =T))
names(out)[length(out)] <- "IQR"
out
}
#get summary for each monitor
getsummary <- function(sourcec, list, vmax) {
#get rid of dates
sourcec <- lapply(sourcec, function(x) {
x2 <- x[, -1]
colnames(x2) <- colnames(x)[-1]
x2
})
#summary stats for each monitor
sumstats <- lapply(sourcec, function(x) t(apply(x, 2, sumfun)))
#median over unique sources
unsource <- unique(unlist(sapply(sourcec, colnames, simplify = F)))
unsource <- sort(unsource[unsource != "sourceInf"])
sumall <- matrix(nrow = length(unsource), ncol = 9)
colnames(sumall) <- c(colnames(sumstats[[1]]), "monitor", "counties")
rownames(sumall) <- unsource
#correspond counties and monitors
list <- unlist(list)
source1 <- sort(as.numeric(substr(unsource, 7, 7)))
cons <- vector()
for(j in source1) {
#find which column
wh1 <- sapply(sourcec, function(x) {
s1 <- paste0("source", j)
if(s1 %in% colnames(x)) {
which(colnames(x) == s1)
}else{
0
}})
#get rid of missing
sumstats1 <- sumstats[wh1 != 0]
cn <- names(sourcec)[wh1 != 0]
wh1 <- wh1[wh1 != 0]
#find median summary
sumj <- mapply(function(x, y) x[y, ], sumstats1, wh1)
sumall[j, 1 : 7] <- apply(sumj, 1, median, na.rm = T)
sumall[j, 8] <- ncol(sumj)
counties <- unique(names(list)[which(list %in% cn)])
sumall[j, 9] <- length(counties)
vmax1 <- abs(vmax[, j])
vmax1 <- sort(vmax1, decreasing = T)
cons[j] <- paste(names(vmax1)[which(vmax1 > 0.4)], collapse = ", ")
}
sumall <- data.frame(sumall, cons)
sumall
}
combhealth <- function(regcoef, print = F) {
regcoef <- sapply(regcoef, function(x) {
x <- data.frame(rownames(x), x)
colnames(x) <- c("source", "est", "se")
x
}, simplify = F)
regcoef <- ldply(regcoef, data.frame)
sources <- unique(regcoef$source)
sources <- sources[substr(sources, 7, 7) != "i"]
regcoefREG <- matrix(nrow = length(sources), ncol = 2)
rownames(regcoefREG) <- sources
colnames(regcoefREG) <- c("est", "se")
for(i in 1 : length(sources)) {
regcoef1 <- regcoef[which(regcoef$source == sources[i]), c("est", "se")]
#combine results
if(nrow(regcoef1) > 1) {
tln1 <- tlniseC(Y = regcoef1$est, V = regcoef1$se^2,
prnt = print, brief = 2)
if(tln1$converge == "no") {
tln1 <- tlniseC(Y = regcoef1$est, V = regcoef1$se^2,
prnt = print, maxiter = 5000, brief = 2)
if(tln1$converge == "no") {
cat("\nDid not converge: source", sources[i], "\n")
}
}
regcoefREG[i, ] <- tln1$gamma[1:2]
}else{
regcoefREG[i, ] <- as.matrix(regcoef1)
}
}
regcoefREG
}
#groupvar is list is factors to group lag by
#formula includes source, can include ns
gethealth <- function(sourcec, healthdata, formula, lag, groupvar = NULL) {
regcoef <- list()
for(i in 1 : length(sourcec)) {
#merge health and source data
colnames(sourcec[[i]])[1] <- tolower(colnames(sourcec[[i]])[1])
colnames(healthdata[[i]])[1] <- tolower(colnames(healthdata[[i]])[1])
merged <- merge(sourcec[[i]], healthdata[[i]], by = "date", all = T)
cnALL <- colnames(sourcec[[i]])[-1]
#remove local sources
whINF <- which(substr(cnALL, 7) == "I")
if(length(whINF) > 0 ) {
cnREG <- cnALL[-whINF]
}
regcoef[[i]] <- matrix(nrow = length(cnREG), ncol = 3)
rownames(regcoef[[i]]) <- cnREG
colnames(regcoef[[i]]) <- c("source", "est", "se")
regcoef[[i]]$source <- cnREG
for(j in 1 : length(cnREG)) {
#get rid of other sources
cnREM <- cnALL[-which(cnALL == cnREG[j])]
merged1 <- merged[, -which(colnames(merged) %in% cnREM)]
colnames(merged1)[which(colnames(merged1) == cnREG[j])] <- "source"
merged1[, "source"] <- Lag(merged1[, "source"], k = lag, group = groupvar)
glm1 <- glm(eval(formula), data = merged1, family = "quasipoisson")
regcoef[[i]][j, c("est", "se")] <- summary(glm1)$coef["source", c(1, 2)]
}
}
regcoef
}
tlncomb <- function(ests, Sources, share, sinkf = NULL, names = cities) {
tln <- list()
if(!is.null(sinkf)) {
sink(file = sinkf)
}
names1 <- list()
Sources <- sort(Sources)
for(i in Sources) {
tln[[i]] <- list()
tln[[i]][[1]] <- matrix(nrow = 3, ncol = 2)
tln[[i]][[2]] <- list()
tln[[i]][[3]] <- vector()
whin <- sapply(share, function(x) {
whin <- which(x == as.character(i))
ifelse(length(whin) > 0, whin, 0)
})
whn0 <- which(whin != 0)
names1[[i]] <- names(whn0)
if(length(whn0) > 0) {
for(lag in 1 : 3) {
order <- matrix(nrow = length(ests[[lag]]), ncol = 2)
for(j in whn0) {
order[j, ] <- ests[[lag]][[j]][whin[j], ]
}
rownames(order) <- names
order <- order[complete.cases(order), ]
if(lag == 1) {
tln[[i]][[4]] <- array(dim = c(nrow(order), 2, 3))
dimnames(tln[[i]][[4]]) <- list(rownames(order),
c("est", "se"),
paste("lag", seq(0, 2)))
}
print(i)
print(c("TLN:", i))
temp <- tlniseC(Y = order[, 1], V = order[, 2]^2,
maxiter = 5000)
tln[[i]][[1]][lag, ] <- temp$gamma[1:2]
tln[[i]][[3]][lag] <- temp$A
tln[[i]][[2]][[lag]] <- order
tln[[i]][[4]][, , lag] <- cbind(temp$theta, temp$SDtheta)
lagUSE <- lag - 1
}#end loop over lag
}else if(length(whn0) == 1){#end whn0
temp <- ests[[whn0]][whin[whn0], ]
temp <- t(as.matrix(temp))
rownames(temp) <- names1[[i]]
tln[[i]][[1]][lag, ] <- temp
tln[[i]][[2]][[lag]] <- temp
}else{
sink()
stop("no source error")
}
rownames(tln[[i]][[1]]) <- paste0("lag", seq(0, 2))
names(tln[[i]][[2]]) <- paste0("lag", seq(0, 2))
names(tln[[i]][[3]]) <- paste0("lag", seq(0, 2))
}#end loop over source
sink()
list(tln, names1)
}
#' \code{percinc} Percent increase
#'
#' @param beta regression coefficients from poisson model
#' @param scale amount for percent increase
#' @export
percinc <- function(beta, scale = 10) {
if(!is.na(beta[1])) {
100 * (exp(beta * scale ) - 1)
}else{
beta
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.