knitr::opts_chunk$set(echo = TRUE)

Load the data and remove all questions except Q3 and Q15 and remove Q15p as it is an "Other" category.

library(dplyr)

d <- STRIPSArbuckle::survey_responses %>% 
  dplyr::select(CaseID,
         starts_with("q3"), 
         starts_with("q15"),
         -starts_with("q15p")) %>%
  tidyr::gather(question, response, -CaseID, -q3) %>%
  rename(farmer = q3)

Compute sufficient statistics for the data, i.e. the number of respondents for each category (No Response, No Priority, Slight Priority, Moderate Priority, High Priority, and Very High Priority) for all parts of question 15 stratified by their answer to question 3.

ordered_responses = c("No response",
                      "No Priority",
                      "Slight Priority",
                      "Moderate Priority",
                      "High Priority",
                      "Very High Priority")

s = d %>%
  filter(!is.na(farmer)) %>%
  mutate(question = substr(question, 1,4),
         response = ifelse(is.na(response), "No response", response)) %>%
  group_by(farmer, question, response) %>%
  summarize(n = length(CaseID)) %>%
  ungroup() %>%
  mutate(response = factor(response, levels=ordered_responses))

For each question, run a t-test by assuming numeric values for the responses that are consecutive integers (0 to 4 below).

# Run t test to get t statistic and p value
make_data <- function(n) {
  data.frame(y=rep(0:4,n)) # Converts counts by category to numeric vector
}

my_t.test <- function(d) {
  test = t.test(make_data(d$No),
                make_data(d$Yes),
                var.equal = TRUE)
  data.frame(t   = test$statistic,
             p   = test$p.value,
             No  = test$estimate[1],
             Yes = test$estimate[2])
}

test = s %>%
  tidyr::spread(farmer,n) %>%
  arrange(question, response) %>%
  filter(response != "No response") %>%
  group_by(question) %>%
  do(my_t.test(.)) %>%
  ungroup() 
# Create data.frame with nice question formatting
pretty_names = 
  c("Increase recreation opportunities",
    "Landscape beautification",
    "Increase crop production",
    "Increase livestock production",
    "Improve flood control",
    "Drinking water quality",
    "Swimming water quality",
    "Aquatic life water quality",
    "Restore native prairie",
    "Restore wetlands",
    "Reduce greenhouse gases",
    "Increase rural job opportunities",
    "Increase tourism",
    "Improve game habitat",
    "Improve non-game habitat")

names(pretty_names) <- paste0("q15",letters[1:15])

Combine the counts with the t statistic and p value to provide a parsimonious summary of the results.

ordered_colnames = paste(rep(c("No","Yes"), each=length(ordered_responses)), 
                         ordered_responses,
                         sep="_")

tmp_for_table = s %>%
  mutate(column_name = factor(paste(farmer,response,sep="_"), 
                              levels = ordered_colnames)) %>%
  select(question, column_name, n) 

test_formatted <- test %>%
  dplyr::select(-No, -Yes) %>%
  mutate(t = formatC(t, 2, format='f'),
         p = formatC(p, 3, format='f'))

for_table <- tmp_for_table %>%
  tidyr::spread(column_name, n) %>%
  left_join(test_formatted, by="question") %>%
  mutate(question = plyr::revalue(question, pretty_names))

names(for_table) <- gsub("No_", "", names(for_table))
names(for_table) <- gsub("Yes_","", names(for_table))


htmlTable::htmlTable(for_table,
                     rnames = FALSE,
                     cgroup = c("","Non-farmer","Farmer","",""),
                     n.cgroup = c(1,6,6,1,1),
                     align = "l|rrrrrr|rrrrrr|rr")

#for_table %>% readr::write_csv(path="social_data_table.csv")

Create data for figure

g_data <- test %>% 
  mutate(significant = p<0.05,
         farmer_higher = ifelse(Yes > No, "Farm resident higher", "Farm resident lower"),
         question = plyr::revalue(question, pretty_names),
         question = factor(question, 

                           # hack to get order to match table in manuscript
                           levels = question[order(0.2*Yes+0.8*No, 
                                                   decreasing = TRUE)]))

Create figure

library(ggplot2)
library(grid)
cp2 = rgb(red = c(241,200)/256, 
         green = c(190,0)/256,
         blue = c(72,0)/256,
         alpha = c(1,1))
names(cp2) = c("Farm resident lower",
               "Farm resident higher")


g_survey = ggplot(g_data, aes(x     = question, 
                              xend  = question, 
                              y     = No, 
                              yend  = Yes, 
                              color = farmer_higher)) + 
  theme_bw() +
  scale_y_continuous(limit=c(0,4), 
                     labels = c("None",
                                "Slight",
                                "Moderate",
                                "High",
                                "Very high")) +
  scale_alpha_discrete(range=c(0.5,1)) +
  geom_segment(size=4) + 

  # Add * for significance
  geom_text(data = g_data %>% filter(significant) %>% mutate(star='*'),
             aes(x = question, y = (No+Yes)/2, label = star), 
            color='black',
            vjust=0.8) +

  theme(
    legend.position = c(.01,0.99),
    legend.justification = c(0,1),
    axis.text.x = element_text(angle = 45, hjust = 1, size=8),
    axis.text.y = element_text(size=8, hjust=0),
    legend.text = element_text(size=6),
    legend.title = element_blank(),
    legend.background = element_rect(fill='white'),
    legend.margin = margin(t=-0.18,r=0,b=0,l=0,unit="cm"),
    legend.key.height = unit(-1,"line"),
    legend.key.width = unit(.01,"cm"),
    plot.margin=unit(c(-0.5,0.02,-0.45,-0.45), "cm"),
    panel.grid.minor.x = element_blank()) + 

  labs(x="", y="Priority", title="") +
  scale_color_manual(values = cp2, drop=FALSE) + 
  coord_flip()

g_survey

ggsave("Fig4.tiff",
       dpi = 300, height = 7, width = 8.7, units ="cm")


ISU-STRIPS/STRIPSArbuckle documentation built on May 8, 2019, 1:26 p.m.