inst/doc/recreating_analyses.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE
)
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)
library(salem)

## -----------------------------------------------------------------------------
salem_theme <-  theme(axis.text.x = element_text(angle = 45, vjust = .5))

## ----fig.width=5--------------------------------------------------------------
ggplot(data = accused_witches) + 
       aes(x = Month.of.Accusation.Name) +
       geom_bar() +
       labs(title = "Accusations by month", x = "Month")  +
       scale_x_discrete(drop = FALSE) +
       salem_theme

table(accused_witches$Month.of.Accusation.Name, useNA = "ifany", 
      dnn = "Month") %>% kable(caption = "Accusations by Month")


## ----fig.width=5--------------------------------------------------------------
ggplot(data = accused_witches) + 
       aes(x = as.numeric(Month.of.Accusation.Name)) +
       stat_ecdf(na.rm = TRUE, geom = "line") +
       labs(title = "Cumulative accusations by month", x = "Month", 
            y = "Cumulative Accusations")  +
       scale_x_continuous(breaks = c(1:12), 
                          labels = c("Jan", "Feb", "Mar", "Apr", "May", 
                                     "Jun", "Jul","Aug", "Sept", "Oct", "Nov", "Dec") )


cumsum(table(accused_witches$Month.of.Accusation.Name)) %>% kable()


## ----message=FALSE, fig.width=5-----------------------------------------------
monthly_accusations <- accused_witches %>% 
                      filter(!is.na(Month.of.Accusation)) %>%
                      group_by(Month.of.Accusation.Name) %>%
                      summarize(number = n()) %>% 
                      mutate(cumulative = cumsum(number) )

monthly_accusations %>% kable(caption = "Accusations by Month")

ggplot(data = monthly_accusations) +
             aes(x = Month.of.Accusation.Name, y = cumulative, group =1) +
             geom_col(aes(y = number)) +
             geom_line() +
             labs(title = "Frequency and Cumulative Frequency of Accusations by Month") +
             salem_theme


 monthly_accusations %>% 
             arrange(desc(number))  %>% 
            mutate(Month.of.Accusation.Name = 
                   factor(Month.of.Accusation.Name, 
                         levels=Month.of.Accusation.Name)) %>%
            mutate(pareto_cumulative = cumsum(number) ) %>%
        ggplot() +
             aes(x = Month.of.Accusation.Name, y = number) +
             geom_col() +
             labs(title = "Pareto Histogram of Accusations by Month") +
             salem_theme


monthly_accusations %>% 
            arrange(desc(number))  %>% 
            mutate(Month.of.Accusation.Name = 
                   factor(Month.of.Accusation.Name, 
                         levels=Month.of.Accusation.Name)) %>%
            mutate(pareto_cumulative = cumsum(number) ) %>%
        ggplot() +
             aes(x = Month.of.Accusation.Name, y = pareto_cumulative) +
             geom_point( ) +
             geom_path(aes(y=pareto_cumulative, group=1), colour="blue", lty=3, size=0.9) +
             labs(title = "Pareto Cumulative Distribution of Accusations by Month") +
             salem_theme


 monthly_accusations %>% 
             arrange(desc(number))  %>% 
            mutate(Month.of.Accusation.Name = 
                   factor(Month.of.Accusation.Name, 
                         levels = Month.of.Accusation.Name)) %>%
            mutate(pareto_cumulative = cumsum(number) ) %>%
        ggplot() +
             aes(x = Month.of.Accusation.Name, y = number) +
             geom_col() +
             geom_path(aes(y=pareto_cumulative, group = 1), colour = "blue", lty=3, size=0.9) +
             labs(title = "Pareto Histogram of Accusations by Month") +
             salem_theme



## ----fig.width=5--------------------------------------------------------------
ggplot(data = na.omit(accused_witches)) + 
       aes(x = Month.of.Execution.Name) +
       geom_bar() +
       labs(title = "Executions by month", x = "Month")  +
       scale_x_discrete(drop = FALSE) +
       salem_theme


table(accused_witches$Month.of.Execution.Name, useNA = "ifany", 
      dnn = "Month") %>% kable(caption = "Executions by Month")


## ----fig.width=6--------------------------------------------------------------

monthly_accused <- accused_witches %>% 
                      filter(!is.na(Month.of.Accusation)) %>%
                      group_by(Month.of.Accusation.Name) %>%
                      summarize(accusations = n())  %>%
                      rename(Month = Month.of.Accusation.Name)
monthly_executed <- accused_witches %>% 
                      filter(!is.na(Month.of.Execution)) %>%
                      group_by(Month.of.Execution.Name) %>%
                      summarize(executions = n()) %>%
                      rename(Month = Month.of.Execution.Name)
monthly_data <- left_join(monthly_accused, monthly_executed) %>%
                          mutate(executions = 
                                   ifelse(is.na(executions), 0, executions))


ggplot(monthly_data) +
     aes(x = Month, y = accusations, group = 1) +
     geom_path( color = "blue" ) +
     geom_line(aes(y = executions, x = Month, group = 1), color = "red") +
     labs(title = "Executions and Accusations by Month",
          subtitle = "Accusations in blue and executions in red") +
     salem_theme

## -----------------------------------------------------------------------------
addmargins(table( accused_witches$Month.of.Accusation.Name, accused_witches$Residence,
        dnn = c("Month", "Residence") )) 

## -----------------------------------------------------------------------------
table(accused_witches$Residence, dnn = "Residence") %>% 
  kable(caption = "Residences of Accused")

accused_witches %>% group_by(Residence) %>% summarize(number = n()) %>% 
     kable(caption = "Residences of Accused")


## ----fig.width=5--------------------------------------------------------------

accused_witches %>% filter(Residence == "Andover") %>%
   ggplot() +
   aes(x = Month.of.Accusation.Name) +
   geom_bar() +
   scale_x_discrete(drop=FALSE) +
   salem_theme

## ----fig.width=5--------------------------------------------------------------

accused_witches %>% filter(Residence == "Andover") %>%
   ggplot() +
   aes(x = Month.of.Accusation.Name) +
   geom_bar() +
   geom_vline(xintercept = 6.5, color = "red") +
   scale_x_discrete(drop = FALSE) +
  labs(title = "Accusations by Month", 
       subtitle = "Red line indicates reconvening of the Court of Oyer and Terminer",
       x = "Month of accusation") +
   salem_theme


## ----fig.width=6, fig.height=8------------------------------------------------

accused_witches %>% 
   ggplot() +
   aes(x = Month.of.Accusation.Name) +
   geom_bar() +
   geom_vline(xintercept = 6.5, color = "red") +
   scale_x_discrete(drop = FALSE) +
   facet_wrap(facets = vars(Residence), ncol = 4) +
   labs(title = "Number of accusations by month",
        subtitle = "Red line indicates reconvening of the Court of Oyer and Terminer",
        x = "Month of Accusation") +
   theme(axis.text.x = element_text(angle = 90, vjust = .5))


## -----------------------------------------------------------------------------

newdata <- salem_region %>% mutate(February.Any = February > 0, March.Any = March > 0,
                                   April.Any = April > 0, May.Any = May > 0,
                                   June.Any = June > 0, July.Any = July > 0,
                                   August.Any = August > 0, 
                                   September.Any = September >0,
                                   October.Any = October > 0,
                                   November.Any = November > 0)

newdata$TOWN_LABEL <- ifelse(newdata$n_accused == 0, NA,  newdata$TOWN_LABEL)

## ----fig.width=8, fig.height=8------------------------------------------------
if (requireNamespace("sf", quietly = TRUE)) {
      p1 <- ggplot(newdata) 
      p2 <- geom_sf_text(aes(label = TOWN_LABEL), color = "blue", size = 2, nudge_x = 5,
                      nudge_y = 5, na.rm = TRUE) 
      p3 <-       scale_fill_manual(values = c( "grey", "red"), na.value = "white") 
      
      p1  + geom_sf(data = newdata,  aes(fill = February.Any), color = "black", size = .1) +
        p3 + p2 + labs(title = "Location of Accusations in February") 
      
      p1 +  geom_sf(data = newdata,  aes(fill = July.Any), color = "black", size = .1) +
        p3 + p2 + labs(title = "Location of Accusations in July") 
      
      
      p1 + geom_sf(data = newdata,  aes(fill = August.Any), color = "black", size = .1) +
        p3 + p2  + labs(title = "Location of Accusations in August") 
      
      p1 + geom_sf(data = newdata,  aes(fill = November.Any), color = "black", size = .1) +
        p3 + p2 + labs(title = "Location of Accusations in November") 
}

## -----------------------------------------------------------------------------

table(parris_social$Sex, parris_social$Identification, parris_social$view,
      dnn = c("Sex", "Identification", "View"))

ftable(parris_social$Sex, parris_social$Identification, parris_social$view,
      dnn = c("Sex", "Identification", "View"))



## -----------------------------------------------------------------------------

table(parris_social$view, parris_social$Identification, parris_social$Sex, 
      dnn = c("View", "Identification", "Sex"))

ftable( parris_social$Identification, parris_social$view,  parris_social$Sex,
      dnn = c("View", "Identification", "Sex"))



## -----------------------------------------------------------------------------
table(salem_village$Petition, salem_village$Church.to.1696, 
      dnn = c( "Membership", "View"))

addmargins( table(salem_village$Petition, salem_village$Church.to.1696, 
      dnn = c( "Membership", "View")))

## -----------------------------------------------------------------------------

table(committee_list$Petition, committee_list$Year, dnn = c("Petition", "Year"))


## -----------------------------------------------------------------------------
table( committee_list$Petition, committee_list$Social, committee_list$Year )

## -----------------------------------------------------------------------------

table( committee_list$Petition, committee_list$Social )


## -----------------------------------------------------------------------------
committee_list2 <-  committee_list %>% group_by(Committee.Members) %>% 
                             summarize(Social = first(Social), 
                                       Petition = first(Petition),
                                       Terms = n())

addmargins(table( committee_list2$Petition, committee_list2$Social ))

## -----------------------------------------------------------------------------
table(tax_comparison$Year, dnn = c("Year")) %>% kable(caption = "Tax payers by year")

## -----------------------------------------------------------------------------
tax_comparison %>% group_by(Year) %>%
            summarize(mean = round(mean(Tax), 1), median = round(median(Tax), 1),
                                   minimum = min(Tax),
                      maximum =  max(Tax), range = maximum - minimum, 
                      sum = sum(Tax), Count = n()) %>% 
                      kable(caption = "Taxes paid by year")



tax_comparison %>% group_by(Year, Petition) %>%
            summarize(mean = round(mean(Tax), 1), median = round(median(Tax), 1),
                                   minimum = min(Tax),
                      maximum =  max(Tax), range = maximum - minimum, 
                      sum = sum(Tax), Tax_payers = n()) %>% 
                      kable(caption = "Taxes paid by year and petition signed")

## ----fig.width=5--------------------------------------------------------------
yearly_tax <- tax_comparison %>% group_by(Year, Petition) %>%
            summarize(mean = round(mean(Tax), 1), median = round(median(Tax), 1),
                                   minimum = min(Tax),
                      maximum =  max(Tax), range = maximum - minimum, 
                      sum = sum(Tax), Count = n())

ggplot(yearly_tax) + 
        aes(x = Year, y = median, group = Petition, color = Petition) +
        geom_line() +
        labs(title = "Tax paid by petition", y = "Shillings", x = "Year") 


## ----fig.width=5--------------------------------------------------------------

ratios <- yearly_tax %>% filter(Petition %in% c("Pro-P", "Anti-P")) %>%
                   select(Year, Petition, median) %>% 
                   pivot_wider(id_cols = c(Year), values_from = median, 
                               names_from = Petition) %>% 
                    mutate(median_ratio = `Pro-P`/`Anti-P`)
ratios %>% kable()
ggplot(ratios, 
        aes(x = Year, y = median_ratio)) +
        geom_point() +
        geom_line(group = 1) +
        geom_hline(yintercept = 1, color = "blue") +
        labs(title = "Ratio of Pro- to Anti- Parris median tax",
             y = "Ratio", x = "Year") 


## -----------------------------------------------------------------------------
ratios <- yearly_tax %>% filter(Petition %in% c("Pro-P", "Anti-P")) %>%
                   select(Year, Petition, mean) %>% 
                   pivot_wider(id_cols = c(Year), values_from = mean, 
                               names_from = Petition) %>% 
                    mutate(mean_ratio = `Pro-P`/`Anti-P`)
ratios %>% kable()
ggplot(ratios, 
        aes(x = Year, y = mean_ratio)) +
        geom_point() +
        geom_line(group = 1) +
        geom_hline(yintercept = 1, color = "blue") +
        labs(title = "Ratio of Pro- to Anti- Parris mean tax",
             y = "Ratio", x = "Year") 

## -----------------------------------------------------------------------------
persisters <- tax_comparison %>% pivot_wider(id_cols = c(Name, Petition),
                                             names_from = Year,
                                             names_prefix = "X",
                                             values_from = Tax
                                             ) %>%
                                 relocate(X1681, .before = X1690) 


## -----------------------------------------------------------------------------

comparison <- persisters %>% 
                    mutate(across(starts_with("X"), 
                                               list(
                                                    rank = min_rank
                                                    )
                                        )) %>%
                   filter(!is.na(X1690) & !is.na(X1681)) %>%
                   mutate(
                         X1690_ptile = 100 * (X1690_rank - 1)/100,
                         X1681_ptile = 100* (X1681_rank - 1)/94,
                         compare_1681_1690 =
                         case_when(
                          X1690_ptile -X1681_ptile >=  10 ~ "Up",
                          X1690_ptile -X1681_ptile <= -10 ~ "Down",
                          TRUE ~ "No Change"
                        )  
                    ) 

table(comparison$compare_1681_1690, comparison$Petition, 
      dnn = c("Change", "Petition")) %>%
      kable(caption = "Change in position 1681-1690")

Try the salem package in your browser

Any scripts or data that you put into this service are public.

salem documentation built on Jan. 13, 2021, 6:35 a.m.