demo/pmdata.R

library(LocalControlStrategy)

# Use the <Particulate Matter> data of Obenchain and Young (2022).
data(pmdata)
# pmdata <- read.csv(file="pmdata.csv")          # 2,980 observations on 122 Variables...
# outcome:    AACRmort ...continuous CDC measure of "Age Adjusted" Circulatory and/or Respiratory deaths per 100K US County residents.
# treatment:  Bvoc     ...continuous Exposure (Biogenic volatile organic compounds; micro g/m^3)
# str(pmdata, list.len=122)

data9 <- na.omit(subset(pmdata, select = c(AACRmort, Bvoc, pmSO4, Avoc, PREMdeath, ASmoke, ChildPOV, IncomIEQ, pmSOA)))
str(data9)
# 'data.frame':   2973 obs. of  9 variables:
#  $ AACRmort : num  382 294 325 399 427 ...
#  $ Bvoc     : num  2.72 2.1 2.2 3.18 2.26 ...
#  $ pmSO4    : num  1.07 1.027 0.962 1.044 1.089 ...
#  $ Avoc     : num  1.41 1.31 1.32 1.4 1.53 ...
#  $ PREMdeath: num  9409 7468 8929 11742 9359 ...
#  $ ASmoke   : num  0.191 0.168 0.215 0.199 0.197 ...
#  $ ChildPOV : num  0.193 0.176 0.396 0.275 0.194 0.457 0.366 0.257 0.325 0.275 ...
#  $ IncomIEQ : num  4.39 4.6 5.86 4.23 4.07 ...
#  $ pmSOA    : num  4.13 3.41 3.53 4.58 3.79 ...
#  - attr(*, "na.action")= 'omit' Named int [1:7] 231 1563 1624 1669 2299 2301 2672
#   ..- attr(*, "names")= chr [1:7] "231" "1563" "1624" "1669" ...
# write.csv(data9, file = "data9.csv",row.names = FALSE)
  
rm(pmdata)

# Define Cluster Hierarchy for UNSUPERVISED, nonparametric analyses...
xvars  <- c("Bvoc","Avoc","pmSO4","PREMdeath","ChildPOV","ASmoke")
hclobj <- LCcluster(data9, xvars)   # default clustering method = "ward.D"
hclobj  
plot(hclobj)

# Save Local Control basic parameter settings to an environment that will be Updated...
e <- LCsetup(hclobj, data9, Bvoc, AACRmort)
ls.str(e)

# Compute and Save LRC distributions for a range of K = Number of Clusters...
mort010 <- lrcagg( 10, e)
mort025 <- lrcagg( 25, e)
mort050 <- lrcagg( 50, e)           # Average Cluster Size: ~53 US Counties
plot(mort050, show="ecdf", e)
mort075 <- lrcagg( 75, e)           # Average Cluster Size: ~35 US Counties
mort100 <- lrcagg(100, e)
mort200 <- lrcagg(200, e)           # Average Cluster Size: Only ~13 US Counties

# "Sensitivity Analysis" Summary...
LCcompare(e)                        # LRC Distribution for 50 Clusters appears to Optimize Variance-Bias Trade-Offs... 

# Confirm: Does the Observed LRC distribution for 50 clusters truly differ from
# the Random Permutation NULL distribution assuming x_Covariates are Ignorable?
system.time( conf050 <- confirm(mort050) )   # Simulation takes ~ 5 seconds. 
conf050                                      # Print result summary...
plot(conf050)                                # Compare Observed LRC [Blue] and "NULL" [Gray] eCDFs...

# Simulate maximum p-value for observed Kolmogorov-Smirnov D-statistic... 
system.time( ksd050 <- KSperm(conf050) )     # Simulation takes ~ 9 seconds for the Default number of reps=100 (replications)...   
ksd050                                       # Printed output and Plot imply this p-Value is at most 0.01...
plot(ksd050)                                 # That this p-Value is at most 0.001 can be shown using 1000 replications (takes ~ 82 secs)...

# Example: "Most-Like-Me" Visualizations for Residents of
#   "OC" = Orange County, California (fips=6059):
# xvars:   "Bvoc"   "Avoc"    "pmSO4" "PREMdeath" "ChildPOV" "ASmoke"
xvecOC <- c(0.64748, 2.890584, 0.930111, 4094.363, 0.145, 0.099952)  
mlmeOC <- mlme(e, hclobj, mort050, xvecOC )
mlmeOC                      # Implicit print...
plot(mlmeOC, NN = 25)       # "Most-Like-Me" plot for
# 25 "Nearest Neighbors" of... >>>> Orange County, CA. <<<<
# Summary Statistics...
mlme.stats(mlmeOC, NN = 25)

### End of demo(pmdata) ##############################################
 

Try the LocalControlStrategy package in your browser

Any scripts or data that you put into this service are public.

LocalControlStrategy documentation built on Nov. 10, 2022, 5:49 p.m.