data-raw/convert_data_rda.R

#script to convert raw data to .RDA form and put in the data directory of package

#Authors: NAH

#load required packages
library(tidyverse)
library(usethis)
library(tools)
library(raster)
library(here)
library(RStoolbox)

####################################################################################################################
#data to read in and clean
####################################################################################################################

##########################################################
#wine market risk
##########################################################

#COUNTRIES
countries_market <- read_csv("./data-raw/wine_market_FAOSTAT_data_8-31-2020.csv")


#rm NA's
countries_market <- countries_market %>%
  filter(!is.na(Value))

#recode some country names and make sure that Area == ID
countries_market <- countries_market %>%
  mutate(ID = Area) %>%
  mutate(
    ID = recode(
      ID,
      `Bolivia (Plurinational State of)` = "Bolivia",
      `Brunei Darussalam` = "Brunei",
      `Cabo Verde` = "Cape Verde",
      `China, mainland` = "China",
      Congo = "Republic of Congo",
      `Falkland Islands (Malvinas)` = "Falkland Islands",
      `China, Hong Kong SAR` = "Hong Kong",
      `Iran (Islamic Republic of)` = "Iran",
      `Côte d'Ivoire` = "Ivory Coast",
      `Democratic People's Republic of Korea` = "North Korea",
      `Republic of Korea` = "South Korea",
      `Lao People's Democratic Republic` = "Laos",
      `China, Macao SAR` = "Macao",
      `Republic of Moldova` = "Moldova",
      `Netherlands Antilles (former)` = "Bonaire, Sint Eustatius and Saba",
      `North Macedonia` = "Macedonia",
      Czechia = "Czech Republic",
      `Russian Federation` = "Russia",
      `Syrian Arab Republic`  = "Syria",
      `China, Taiwan Province of` = "Taiwan",
      `United Republic of Tanzania` = "Tanzania",
      `United Kingdom of Great Britain and Northern Ireland` = "United Kingdom",
      `United States of America` = "United States",
      `Venezuela (Bolivarian Republic of)` = "Venezuela",
      `Viet Nam` = "Vietnam",
      Palestine = "Palestina",
      Eswatini = "Swaziland"

    )
  ) %>%
  mutate(Area = ID)


##########################################################
#correlation plots
##########################################################

#############################
  #trade
#############################

#Trade data inherently requires cleaning and tidying from the source

#STATES
states <- read_csv("./data-raw/FAFdata_states_13_june_2019.csv")
states_mass <- read_csv("./data-raw/FAFdata_states_tons_13_june_2019.csv")

#clean up col labels and order, then get some summary data
#note that this step turns value units for states to USD
states <- states %>%
  mutate(origin = `DMS ORIG`,
         destination = `DMS DEST`,
         value_2012 = 1e6*(`Total M$ in 2012`),
         value_2013 = 1e6*(`Total M$ in 2013`),
         value_2014 = 1e6*(`Total M$ in 2014`),
         value_2015 = 1e6*(`Total M$ in 2015`),
         value_2016 = 1e6*(`Total M$ in 2016`),
         value_2017 = 1e6*(`Total M$ in 2017`)) %>%
  dplyr::select(origin, destination, SCTG2, value_2012, value_2013, value_2014, value_2015, value_2016, value_2017) %>%
  group_by(origin, destination, SCTG2) %>%
  mutate(
    sum = sum(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017, na.rm = T),
    average = mean(x = c(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017), na.rm = T)
    )

#note that this step turns the mass units for states_mass to kg (1 kTon = 907184.7 kg)
states_mass <- states_mass %>%
  mutate(origin = `DMS ORIG`,
         destination = `DMS DEST`,
         mass_2012 = 907184.7*(`Total KTons in 2012`),
         mass_2013 = 907184.7*(`Total KTons in 2013`),
         mass_2014 = 907184.7*(`Total KTons in 2014`),
         mass_2015 = 907184.7*(`Total KTons in 2015`),
         mass_2016 = 907184.7*(`Total KTons in 2016`),
         mass_2017 = 907184.7*(`Total KTons in 2017`)) %>%
  dplyr::select(origin, destination, SCTG2,mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017) %>%
  group_by(origin, destination, SCTG2) %>%
  mutate(
    sum = sum(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017, na.rm = T),
    average = mean(x = c(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017), na.rm = T)
    )

#now we take the overall commerce and summarize for every pair of origin and destination
#value
states_value <- states %>%
  dplyr::select(-sum, -average) %>%
  group_by(origin, destination) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>%
  group_by(origin, destination) %>%
  mutate(
    sum = sum(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017, na.rm = T),
    average = mean(x = c(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017), na.rm = T)
    )

#same for mass
states_mass <- states_mass %>%
  dplyr::select(-sum, -average) %>%
  group_by(origin, destination) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>%
  group_by(origin, destination) %>%
  mutate(
    sum = sum(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017, na.rm = T),
    average = mean(x = c(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017), na.rm = T)
    )

#filter to remove in state commerce (origin == destination)
states_value <- states_value %>%
  filter(!destination == origin)
states_mass <- states_mass %>%
  filter(!destination == origin)

#trade VALUE
{#first all trade by state
#this gets totals and averages per destination across years and for individual years
trade_summary <- states_value %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(all_total = sum(sum, na.rm = T),
            all_average = (sum(sum, na.rm = T) / 6),
            all_2012 = sum(value_2012, na.rm = T),
            all_2013 = sum(value_2013, na.rm = T),
            all_2014 = sum(value_2014, na.rm = T),
            all_2015 = sum(value_2015, na.rm = T),
            all_2016 = sum(value_2016, na.rm = T),
            all_2017 = sum(value_2017, na.rm = T)
  )

#Connecticut
trade_ct <- states_value %>%
  filter(origin %in% "Connecticut") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ct_total = sum(sum, na.rm = T),
            ct_average = (sum(sum, na.rm = T) / 6),
            ct_2012 = sum(value_2012, na.rm = T),
            ct_2013 = sum(value_2013, na.rm = T),
            ct_2014 = sum(value_2014, na.rm = T),
            ct_2015 = sum(value_2015, na.rm = T),
            ct_2016 = sum(value_2016, na.rm = T),
            ct_2017 = sum(value_2017, na.rm = T)
  )

#Delaware
trade_de <- states_value %>%
  filter(origin %in% "Delaware") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(de_total = sum(sum, na.rm = T),
            de_average = (sum(sum, na.rm = T) / 6),
            de_2012 = sum(value_2012, na.rm = T),
            de_2013 = sum(value_2013, na.rm = T),
            de_2014 = sum(value_2014, na.rm = T),
            de_2015 = sum(value_2015, na.rm = T),
            de_2016 = sum(value_2016, na.rm = T),
            de_2017 = sum(value_2017, na.rm = T)
  )

#Maryland
trade_md <- states_value %>%
  filter(origin %in% "Maryland") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(md_total = sum(sum, na.rm = T),
            md_average = (sum(sum, na.rm = T) / 6),
            md_2012 = sum(value_2012, na.rm = T),
            md_2013 = sum(value_2013, na.rm = T),
            md_2014 = sum(value_2014, na.rm = T),
            md_2015 = sum(value_2015, na.rm = T),
            md_2016 = sum(value_2016, na.rm = T),
            md_2017 = sum(value_2017, na.rm = T)
  )

#New Jersey
trade_nj <- states_value %>%
  filter(origin %in% "New Jersey") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nj_total = sum(sum, na.rm = T),
            nj_average = (sum(sum, na.rm = T) / 6),
            nj_2012 = sum(value_2012, na.rm = T),
            nj_2013 = sum(value_2013, na.rm = T),
            nj_2014 = sum(value_2014, na.rm = T),
            nj_2015 = sum(value_2015, na.rm = T),
            nj_2016 = sum(value_2016, na.rm = T),
            nj_2017 = sum(value_2017, na.rm = T)
  )

#New York
trade_ny <- states_value %>%
  filter(origin %in% "New York") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ny_total = sum(sum, na.rm = T),
            ny_average = (sum(sum, na.rm = T) / 6),
            ny_2012 = sum(value_2012, na.rm = T),
            ny_2013 = sum(value_2013, na.rm = T),
            ny_2014 = sum(value_2014, na.rm = T),
            ny_2015 = sum(value_2015, na.rm = T),
            ny_2016 = sum(value_2016, na.rm = T),
            ny_2017 = sum(value_2017, na.rm = T)
  )

#North Carolina
trade_nc <- states_value %>%
  filter(origin %in% "North Carolina") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nc_total = sum(sum, na.rm = T),
            nc_average = (sum(sum, na.rm = T) / 6),
            nc_2012 = sum(value_2012, na.rm = T),
            nc_2013 = sum(value_2013, na.rm = T),
            nc_2014 = sum(value_2014, na.rm = T),
            nc_2015 = sum(value_2015, na.rm = T),
            nc_2016 = sum(value_2016, na.rm = T),
            nc_2017 = sum(value_2017, na.rm = T)
  )

#Ohio
trade_oh <- states_value %>%
  filter(origin %in% "Ohio") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(oh_total = sum(sum, na.rm = T),
            oh_average = (sum(sum, na.rm = T) / 6),
            oh_2012 = sum(value_2012, na.rm = T),
            oh_2013 = sum(value_2013, na.rm = T),
            oh_2014 = sum(value_2014, na.rm = T),
            oh_2015 = sum(value_2015, na.rm = T),
            oh_2016 = sum(value_2016, na.rm = T),
            oh_2017 = sum(value_2017, na.rm = T)
  )

#Pennsylvania
trade_pa <- states_value %>%
  filter(origin %in% "Pennsylvania") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(pa_total = sum(sum, na.rm = T),
            pa_average = (sum(sum, na.rm = T) / 6),
            pa_2012 = sum(value_2012, na.rm = T),
            pa_2013 = sum(value_2013, na.rm = T),
            pa_2014 = sum(value_2014, na.rm = T),
            pa_2015 = sum(value_2015, na.rm = T),
            pa_2016 = sum(value_2016, na.rm = T),
            pa_2017 = sum(value_2017, na.rm = T)
  )

#Virginia
trade_va <- states_value %>%
  filter(origin %in% "Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(va_total = sum(sum, na.rm = T),
            va_average = (sum(sum, na.rm = T) / 6),
            va_2012 = sum(value_2012, na.rm = T),
            va_2013 = sum(value_2013, na.rm = T),
            va_2014 = sum(value_2014, na.rm = T),
            va_2015 = sum(value_2015, na.rm = T),
            va_2016 = sum(value_2016, na.rm = T),
            va_2017 = sum(value_2017, na.rm = T)
  )

#West Virginia
trade_wv <- states_value %>%
  filter(origin %in% "West Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(wv_total = sum(sum, na.rm = T),
            wv_average = (sum(sum, na.rm = T) / 6),
            wv_2012 = sum(value_2012, na.rm = T),
            wv_2013 = sum(value_2013, na.rm = T),
            wv_2014 = sum(value_2014, na.rm = T),
            wv_2015 = sum(value_2015, na.rm = T),
            wv_2016 = sum(value_2016, na.rm = T),
            wv_2017 = sum(value_2017, na.rm = T)
  )

}

###PRESENT TRADE VALUE
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV
trade_summary_slf <- left_join(trade_summary, trade_pa, by = "destination") %>%
  left_join(., trade_ct, by = "destination") %>%
  left_join(., trade_de, by = "destination") %>%
  left_join(., trade_md, by = "destination") %>%
  left_join(., trade_nj, by = "destination") %>%
  left_join(., trade_ny, by = "destination") %>%
  left_join(., trade_oh, by = "destination") %>%
  left_join(., trade_va, by = "destination") %>%
  left_join(., trade_wv, by = "destination")

#add info for the total of all infected states
trade_summary_slf <- trade_summary_slf %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total), na.rm = T),
    infected_average = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total), na.rm = T)/54 #54 is 6 years * 9 total infected states, previous average removes NA's, forgetting them in the mean calc
  )


###FUTURE TRADE VALUE
#new version of summarizing that works for new states
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV + NC
trade_summary_slf_future <- left_join(trade_summary, trade_pa, by = "destination") %>%
  left_join(., trade_ct, by = "destination") %>%
  left_join(., trade_de, by = "destination") %>%
  left_join(., trade_md, by = "destination") %>%
  left_join(., trade_nc, by = "destination") %>%
  left_join(., trade_nj, by = "destination") %>%
  left_join(., trade_ny, by = "destination") %>%
  left_join(., trade_oh, by = "destination") %>%
  left_join(., trade_va, by = "destination") %>%
  left_join(., trade_wv, by = "destination")

#add info for the total of all infected states
trade_summary_slf_future <- trade_summary_slf_future %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total, nc_total), na.rm = T),
    infected_average = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total, nc_total), na.rm = T)/60 #60 is 6 years * 10 total infected states, previous average removes NA's, forgetting them in the mean calc
  )


#replace the Na's with zeros
trade_summary_slf[is.na(trade_summary_slf)] <- 0
trade_summary_slf_future[is.na(trade_summary_slf_future)] <- 0

#change the name of DC
trade_summary_slf$destination <- gsub(pattern = "Washington DC", replacement = "District of Columbia", x = trade_summary_slf$destination)
trade_summary_slf_future$destination <- gsub(pattern = "Washington DC", replacement = "District of Columbia", x = trade_summary_slf_future$destination)

#change the names to work
states_trade_summary_slf <- trade_summary_slf
states_trade_summary_slf_future <- trade_summary_slf_future

#trade MASS
{#first all trade by state
#this gets totals and averages per destination across years and for individual years
trade_mass_summary <- states_mass %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(all_total = sum(sum, na.rm = T),
            all_average = (sum(sum, na.rm = T) / 6),
            all_2012 = sum(mass_2012, na.rm = T),
            all_2013 = sum(mass_2013, na.rm = T),
            all_2014 = sum(mass_2014, na.rm = T),
            all_2015 = sum(mass_2015, na.rm = T),
            all_2016 = sum(mass_2016, na.rm = T),
            all_2017 = sum(mass_2017, na.rm = T)
  )

#Connecticut
trade_mass_ct <- states_mass %>%
  filter(origin %in% "Connecticut") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ct_total = sum(sum, na.rm = T),
            ct_average = (sum(sum, na.rm = T) / 6),
            ct_2012 = sum(mass_2012, na.rm = T),
            ct_2013 = sum(mass_2013, na.rm = T),
            ct_2014 = sum(mass_2014, na.rm = T),
            ct_2015 = sum(mass_2015, na.rm = T),
            ct_2016 = sum(mass_2016, na.rm = T),
            ct_2017 = sum(mass_2017, na.rm = T)
  )

#Delaware
trade_mass_de <- states_mass %>%
  filter(origin %in% "Delaware") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(de_total = sum(sum, na.rm = T),
            de_average = (sum(sum, na.rm = T) / 6),
            de_2012 = sum(mass_2012, na.rm = T),
            de_2013 = sum(mass_2013, na.rm = T),
            de_2014 = sum(mass_2014, na.rm = T),
            de_2015 = sum(mass_2015, na.rm = T),
            de_2016 = sum(mass_2016, na.rm = T),
            de_2017 = sum(mass_2017, na.rm = T)
  )

#Maryland
trade_mass_md <- states_mass %>%
  filter(origin %in% "Maryland") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(md_total = sum(sum, na.rm = T),
            md_average = (sum(sum, na.rm = T) / 6),
            md_2012 = sum(mass_2012, na.rm = T),
            md_2013 = sum(mass_2013, na.rm = T),
            md_2014 = sum(mass_2014, na.rm = T),
            md_2015 = sum(mass_2015, na.rm = T),
            md_2016 = sum(mass_2016, na.rm = T),
            md_2017 = sum(mass_2017, na.rm = T)
  )

#North Carolina
trade_mass_nc <- states_mass %>%
  filter(origin %in% "North Carolina") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nc_total = sum(sum, na.rm = T),
            nc_average = (sum(sum, na.rm = T) / 6),
            nc_2012 = sum(mass_2012, na.rm = T),
            nc_2013 = sum(mass_2013, na.rm = T),
            nc_2014 = sum(mass_2014, na.rm = T),
            nc_2015 = sum(mass_2015, na.rm = T),
            nc_2016 = sum(mass_2016, na.rm = T),
            nc_2017 = sum(mass_2017, na.rm = T)
  )

#New Jersey
trade_mass_nj <- states_mass %>%
  filter(origin %in% "New Jersey") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nj_total = sum(sum, na.rm = T),
            nj_average = (sum(sum, na.rm = T) / 6),
            nj_2012 = sum(mass_2012, na.rm = T),
            nj_2013 = sum(mass_2013, na.rm = T),
            nj_2014 = sum(mass_2014, na.rm = T),
            nj_2015 = sum(mass_2015, na.rm = T),
            nj_2016 = sum(mass_2016, na.rm = T),
            nj_2017 = sum(mass_2017, na.rm = T)
  )

#New York
trade_mass_ny <- states_mass %>%
  filter(origin %in% "New York") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ny_total = sum(sum, na.rm = T),
            ny_average = (sum(sum, na.rm = T) / 6),
            ny_2012 = sum(mass_2012, na.rm = T),
            ny_2013 = sum(mass_2013, na.rm = T),
            ny_2014 = sum(mass_2014, na.rm = T),
            ny_2015 = sum(mass_2015, na.rm = T),
            ny_2016 = sum(mass_2016, na.rm = T),
            ny_2017 = sum(mass_2017, na.rm = T)
  )

#Ohio
trade_mass_oh <- states_mass %>%
  filter(origin %in% "Ohio") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(oh_total = sum(sum, na.rm = T),
            oh_average = (sum(sum, na.rm = T) / 6),
            oh_2012 = sum(mass_2012, na.rm = T),
            oh_2013 = sum(mass_2013, na.rm = T),
            oh_2014 = sum(mass_2014, na.rm = T),
            oh_2015 = sum(mass_2015, na.rm = T),
            oh_2016 = sum(mass_2016, na.rm = T),
            oh_2017 = sum(mass_2017, na.rm = T)
  )


#Pennsylvania
trade_mass_pa <- states_mass %>%
  filter(origin %in% "Pennsylvania") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(pa_total = sum(sum, na.rm = T),
            pa_average = (sum(sum, na.rm = T) / 6),
            pa_2012 = sum(mass_2012, na.rm = T),
            pa_2013 = sum(mass_2013, na.rm = T),
            pa_2014 = sum(mass_2014, na.rm = T),
            pa_2015 = sum(mass_2015, na.rm = T),
            pa_2016 = sum(mass_2016, na.rm = T),
            pa_2017 = sum(mass_2017, na.rm = T)
  )

#Virginia
trade_mass_va <- states_mass %>%
  filter(origin %in% "Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(va_total = sum(sum, na.rm = T),
            va_average = (sum(sum, na.rm = T) / 6),
            va_2012 = sum(mass_2012, na.rm = T),
            va_2013 = sum(mass_2013, na.rm = T),
            va_2014 = sum(mass_2014, na.rm = T),
            va_2015 = sum(mass_2015, na.rm = T),
            va_2016 = sum(mass_2016, na.rm = T),
            va_2017 = sum(mass_2017, na.rm = T)
  )

#West Virginia
trade_mass_wv <- states_mass %>%
  filter(origin %in% "West Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(wv_total = sum(sum, na.rm = T),
            wv_average = (sum(sum, na.rm = T) / 6),
            wv_2012 = sum(mass_2012, na.rm = T),
            wv_2013 = sum(mass_2013, na.rm = T),
            wv_2014 = sum(mass_2014, na.rm = T),
            wv_2015 = sum(mass_2015, na.rm = T),
            wv_2016 = sum(mass_2016, na.rm = T),
            wv_2017 = sum(mass_2017, na.rm = T)
  )
}

###PRESENT TRADE MASS
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV
trade_mass_summary_slf <- left_join(trade_mass_summary, trade_mass_pa, by = "destination") %>%
  left_join(., trade_mass_ct, by = "destination") %>%
  left_join(., trade_mass_de, by = "destination") %>%
  left_join(., trade_mass_md, by = "destination") %>%
  left_join(., trade_mass_nj, by = "destination") %>%
  left_join(., trade_mass_ny, by = "destination") %>%
  left_join(., trade_mass_oh, by = "destination") %>%
  left_join(., trade_mass_va, by = "destination") %>%
  left_join(., trade_mass_wv, by = "destination")

#add info for the total of all infected states
trade_mass_summary_slf <- trade_mass_summary_slf %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total), na.rm = T),
    infected_average = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total), na.rm = T)/54 #54 is 6 years * 9 total infected states, previous average removes NA's, forgetting them in the mean calc
  )

###FUTURE TRADE MASS
#new version of summarizing that works for new states
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV + NC
trade_mass_summary_slf_future <- left_join(trade_mass_summary, trade_mass_pa, by = "destination") %>%
  left_join(., trade_mass_ct, by = "destination") %>%
  left_join(., trade_mass_de, by = "destination") %>%
  left_join(., trade_mass_md, by = "destination") %>%
  left_join(., trade_mass_nc, by = "destination") %>%
  left_join(., trade_mass_nj, by = "destination") %>%
  left_join(., trade_mass_ny, by = "destination") %>%
  left_join(., trade_mass_oh, by = "destination") %>%
  left_join(., trade_mass_va, by = "destination") %>%
  left_join(., trade_mass_wv, by = "destination")

#add info for the total of all infected states
trade_mass_summary_slf_future <- trade_mass_summary_slf_future %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total, nc_total), na.rm = T),
    infected_average = sum(c(pa_total, ct_total, de_total, md_total, nj_total, ny_total, oh_total, va_total, wv_total, nc_total), na.rm = T)/60 #60 is 6 years * 10 total infected states, previous average removes NA's, forgetting them in the mean calc
  )


#replace the Na's with zeros
trade_mass_summary_slf[is.na(trade_mass_summary_slf)] <- 0
trade_mass_summary_slf_future[is.na(trade_mass_summary_slf_future)] <- 0

#change the name of DC
trade_mass_summary_slf$destination <- gsub(pattern = "Washington DC", replacement = "District of Columbia", x = trade_mass_summary_slf$destination)
trade_mass_summary_slf_future$destination <- gsub(pattern = "Washington DC", replacement = "District of Columbia", x = trade_mass_summary_slf_future$destination)

#rename to make naming  convention work better
states_trade_mass_summary_slf <- trade_mass_summary_slf
states_trade_mass_summary_slf_future <- trade_mass_summary_slf_future

#COUNTRIES
countries <- read_csv("./data-raw/USCensus_trade_countries_13_june_2019.csv")

#clean up col labels and order, then get some summary data
countries <- countries %>%
  mutate(origin = State, destination = Country, value = `Total Value ($US)`) %>%
  dplyr::select(-Commodity, -State, -Country, -`Total Value ($US)`) %>%
  dplyr::select(origin, destination, Time:value) %>%
  group_by(origin, destination, Time) %>%
  mutate(mass = sum(`Vessel SWT (kg)`, `Containerized Vessel Total Exports SWT (kg)`, `Air SWT (kg)`, na.rm = T), year = Time) %>%
  group_by(origin, destination) %>%
  dplyr::select(origin, destination, year, value, mass, -Time)

#create a summary table for countries that matches states

#mass
countries_mass <- countries %>%
  dplyr::select(-value) %>%
  group_by(origin, destination) %>%
  spread(value = mass, key = year)

colnames(countries_mass) <- gsub(pattern = "201", replacement = "mass_201", x = colnames(countries_mass))

#get summary stats across years
countries_mass <- countries_mass %>%
  group_by(origin, destination) %>%
  mutate(
    sum = sum(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017, na.rm = T),
    average = mean(x = c(mass_2012, mass_2013, mass_2014, mass_2015, mass_2016, mass_2017), na.rm = T)
    )


#value
countries_value <- countries %>%
  dplyr::select(-mass) %>%
  group_by(origin, destination) %>%
  spread(value = value, key = year)

colnames(countries_value) <- gsub(pattern = "201", replacement = "value_201", x = colnames(countries_value))

#get summary stats across years
countries_value <- countries_value %>%
  group_by(origin, destination) %>%
  mutate(
    sum = sum(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017, na.rm = T),
    average = mean(x = c(value_2012, value_2013, value_2014, value_2015, value_2016, value_2017), na.rm = T)
    )


#first all trade by state
#this gets totals and averages per destination across years and for individual years
#trade VALUE
{
trade_summary <- countries_value %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(all_total = sum(sum, na.rm = T),
            all_average = (sum(sum, na.rm = T) / 6),
            all_2012 = sum(value_2012, na.rm = T),
            all_2013 = sum(value_2013, na.rm = T),
            all_2014 = sum(value_2014, na.rm = T),
            all_2015 = sum(value_2015, na.rm = T),
            all_2016 = sum(value_2016, na.rm = T),
            all_2017 = sum(value_2017, na.rm = T)
  )

#Connecticut
trade_ct <- countries_value %>%
  filter(origin %in% "Connecticut") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ct_total = sum(sum, na.rm = T),
            ct_average = (sum(sum, na.rm = T) / 6),
            ct_2012 = sum(value_2012, na.rm = T),
            ct_2013 = sum(value_2013, na.rm = T),
            ct_2014 = sum(value_2014, na.rm = T),
            ct_2015 = sum(value_2015, na.rm = T),
            ct_2016 = sum(value_2016, na.rm = T),
            ct_2017 = sum(value_2017, na.rm = T)
  )

#Delaware
trade_de <- countries_value %>%
  filter(origin %in% "Delaware") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(de_total = sum(sum, na.rm = T),
            de_average = (sum(sum, na.rm = T) / 6),
            de_2012 = sum(value_2012, na.rm = T),
            de_2013 = sum(value_2013, na.rm = T),
            de_2014 = sum(value_2014, na.rm = T),
            de_2015 = sum(value_2015, na.rm = T),
            de_2016 = sum(value_2016, na.rm = T),
            de_2017 = sum(value_2017, na.rm = T)
  )

#Maryland
trade_md <- countries_value %>%
  filter(origin %in% "Maryland") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(md_total = sum(sum, na.rm = T),
            md_average = (sum(sum, na.rm = T) / 6),
            md_2012 = sum(value_2012, na.rm = T),
            md_2013 = sum(value_2013, na.rm = T),
            md_2014 = sum(value_2014, na.rm = T),
            md_2015 = sum(value_2015, na.rm = T),
            md_2016 = sum(value_2016, na.rm = T),
            md_2017 = sum(value_2017, na.rm = T)
  )

#North Carolina
trade_nc <- countries_value %>%
  filter(origin %in% "North Carolina") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nc_total = sum(sum, na.rm = T),
            nc_average = (sum(sum, na.rm = T) / 6),
            nc_2012 = sum(value_2012, na.rm = T),
            nc_2013 = sum(value_2013, na.rm = T),
            nc_2014 = sum(value_2014, na.rm = T),
            nc_2015 = sum(value_2015, na.rm = T),
            nc_2016 = sum(value_2016, na.rm = T),
            nc_2017 = sum(value_2017, na.rm = T)
  )

#New Jersey
trade_nj <- countries_value %>%
  filter(origin %in% "New Jersey") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(nj_total = sum(sum, na.rm = T),
            nj_average = (sum(sum, na.rm = T) / 6),
            nj_2012 = sum(value_2012, na.rm = T),
            nj_2013 = sum(value_2013, na.rm = T),
            nj_2014 = sum(value_2014, na.rm = T),
            nj_2015 = sum(value_2015, na.rm = T),
            nj_2016 = sum(value_2016, na.rm = T),
            nj_2017 = sum(value_2017, na.rm = T)
  )

#New York
trade_ny <- countries_value %>%
  filter(origin %in% "New York") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(ny_total = sum(sum, na.rm = T),
            ny_average = (sum(sum, na.rm = T) / 6),
            ny_2012 = sum(value_2012, na.rm = T),
            ny_2013 = sum(value_2013, na.rm = T),
            ny_2014 = sum(value_2014, na.rm = T),
            ny_2015 = sum(value_2015, na.rm = T),
            ny_2016 = sum(value_2016, na.rm = T),
            ny_2017 = sum(value_2017, na.rm = T)
  )

#Ohio
trade_oh <- countries_value %>%
  filter(origin %in% "Ohio") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(oh_total = sum(sum, na.rm = T),
            oh_average = (sum(sum, na.rm = T) / 6),
            oh_2012 = sum(value_2012, na.rm = T),
            oh_2013 = sum(value_2013, na.rm = T),
            oh_2014 = sum(value_2014, na.rm = T),
            oh_2015 = sum(value_2015, na.rm = T),
            oh_2016 = sum(value_2016, na.rm = T),
            oh_2017 = sum(value_2017, na.rm = T)
  )

#Pennsylvania
trade_pa <- countries_value %>%
  filter(origin %in% "Pennsylvania") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(pa_total = sum(sum, na.rm = T),
            pa_average = (sum(sum, na.rm = T) / 6),
            pa_2012 = sum(value_2012, na.rm = T),
            pa_2013 = sum(value_2013, na.rm = T),
            pa_2014 = sum(value_2014, na.rm = T),
            pa_2015 = sum(value_2015, na.rm = T),
            pa_2016 = sum(value_2016, na.rm = T),
            pa_2017 = sum(value_2017, na.rm = T)
  )

#Virginia
trade_va <- countries_value %>%
  filter(origin %in% "Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(va_total = sum(sum, na.rm = T),
            va_average = (sum(sum, na.rm = T) / 6),
            va_2012 = sum(value_2012, na.rm = T),
            va_2013 = sum(value_2013, na.rm = T),
            va_2014 = sum(value_2014, na.rm = T),
            va_2015 = sum(value_2015, na.rm = T),
            va_2016 = sum(value_2016, na.rm = T),
            va_2017 = sum(value_2017, na.rm = T)
  )

#West Virginia
trade_wv <- countries_value %>%
  filter(origin %in% "West Virginia") %>%
  group_by(destination) %>%
  arrange(destination) %>%
  summarize(wv_total = sum(sum, na.rm = T),
            wv_average = (sum(sum, na.rm = T) / 6),
            wv_2012 = sum(value_2012, na.rm = T),
            wv_2013 = sum(value_2013, na.rm = T),
            wv_2014 = sum(value_2014, na.rm = T),
            wv_2015 = sum(value_2015, na.rm = T),
            wv_2016 = sum(value_2016, na.rm = T),
            wv_2017 = sum(value_2017, na.rm = T)
  )
}

###PRESENT TRADE VALUE
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV
countries_trade_summary_slf <- left_join(trade_summary, trade_pa, by = "destination") %>%
  left_join(., trade_ct, by = "destination") %>%
  left_join(., trade_de, by = "destination") %>%
  left_join(., trade_md, by = "destination") %>%
  left_join(., trade_nj, by = "destination") %>%
  left_join(., trade_ny, by = "destination") %>%
  left_join(., trade_oh, by = "destination") %>%
  left_join(., trade_va, by = "destination") %>%
  left_join(., trade_wv, by = "destination")

#add info for the total of all infected states
countries_trade_summary_slf <- countries_trade_summary_slf %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total), na.rm = T),
    infected_average = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total), na.rm = T)/54 #54 is 6 years * 9 total infected states, previous average removes NA's, forgetting them in the mean calc
  )

###FUTURE TRADE VALUE
#new version of summarizing that works for new states
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV + NC
countries_trade_summary_slf_future <- left_join(trade_summary, trade_pa, by = "destination") %>%
  left_join(., trade_ct, by = "destination") %>%
  left_join(., trade_de, by = "destination") %>%
  left_join(., trade_md, by = "destination") %>%
  left_join(., trade_nc, by = "destination") %>%
  left_join(., trade_nj, by = "destination") %>%
  left_join(., trade_ny, by = "destination") %>%
  left_join(., trade_oh, by = "destination") %>%
  left_join(., trade_va, by = "destination") %>%
  left_join(., trade_wv, by = "destination")

#add info for the total of all infected states
countries_trade_summary_slf_future <- countries_trade_summary_slf_future %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total, nc_total), na.rm = T),
    infected_average = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total, nc_total), na.rm = T)/60 #60 is 6 years * 10 total infected states, previous average removes NA's, forgetting them in the mean calc
  )


#replace the Na's with zeros
countries_trade_summary_slf[is.na(countries_trade_summary_slf)] <- 0
countries_trade_summary_slf_future[is.na(countries_trade_summary_slf_future)] <- 0

#fix koreas' names
countries_trade_summary_slf$destination <- gsub(x = countries_trade_summary_slf$destination, pattern = "Korea, North", replacement = "North Korea")
countries_trade_summary_slf$destination <- gsub(x = countries_trade_summary_slf$destination, pattern = "Korea, South", replacement = "South Korea")
countries_trade_summary_slf_future$destination <- gsub(x = countries_trade_summary_slf_future$destination, pattern = "Korea, North", replacement = "North Korea")
countries_trade_summary_slf_future$destination <- gsub(x = countries_trade_summary_slf_future$destination, pattern = "Korea, South", replacement = "South Korea")

#rearrange by destination
countries_trade_summary_slf <- countries_trade_summary_slf %>%
  arrange(destination)
countries_trade_summary_slf_future <- countries_trade_summary_slf_future %>%
  arrange(destination)


#trade MASS
{
  trade_mass_summary <- countries_mass %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(all_total = sum(sum, na.rm = T),
              all_average = (sum(sum, na.rm = T) / 6),
              all_2012 = sum(mass_2012, na.rm = T),
              all_2013 = sum(mass_2013, na.rm = T),
              all_2014 = sum(mass_2014, na.rm = T),
              all_2015 = sum(mass_2015, na.rm = T),
              all_2016 = sum(mass_2016, na.rm = T),
              all_2017 = sum(mass_2017, na.rm = T)
    )

  #Connecticut
  trade_mass_ct <- countries_mass %>%
    filter(origin %in% "Connecticut") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(ct_total = sum(sum, na.rm = T),
              ct_average = (sum(sum, na.rm = T) / 6),
              ct_2012 = sum(mass_2012, na.rm = T),
              ct_2013 = sum(mass_2013, na.rm = T),
              ct_2014 = sum(mass_2014, na.rm = T),
              ct_2015 = sum(mass_2015, na.rm = T),
              ct_2016 = sum(mass_2016, na.rm = T),
              ct_2017 = sum(mass_2017, na.rm = T)
    )

  #Delaware
  trade_mass_de <- countries_mass %>%
    filter(origin %in% "Delaware") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(de_total = sum(sum, na.rm = T),
              de_average = (sum(sum, na.rm = T) / 6),
              de_2012 = sum(mass_2012, na.rm = T),
              de_2013 = sum(mass_2013, na.rm = T),
              de_2014 = sum(mass_2014, na.rm = T),
              de_2015 = sum(mass_2015, na.rm = T),
              de_2016 = sum(mass_2016, na.rm = T),
              de_2017 = sum(mass_2017, na.rm = T)
    )

  #Maryland
  trade_mass_md <- countries_mass %>%
    filter(origin %in% "Maryland") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(md_total = sum(sum, na.rm = T),
              md_average = (sum(sum, na.rm = T) / 6),
              md_2012 = sum(mass_2012, na.rm = T),
              md_2013 = sum(mass_2013, na.rm = T),
              md_2014 = sum(mass_2014, na.rm = T),
              md_2015 = sum(mass_2015, na.rm = T),
              md_2016 = sum(mass_2016, na.rm = T),
              md_2017 = sum(mass_2017, na.rm = T)
    )

  #North Carolina
  trade_mass_nc <- countries_mass %>%
    filter(origin %in% "North Carolina") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(nc_total = sum(sum, na.rm = T),
              nc_average = (sum(sum, na.rm = T) / 6),
              nc_2012 = sum(mass_2012, na.rm = T),
              nc_2013 = sum(mass_2013, na.rm = T),
              nc_2014 = sum(mass_2014, na.rm = T),
              nc_2015 = sum(mass_2015, na.rm = T),
              nc_2016 = sum(mass_2016, na.rm = T),
              nc_2017 = sum(mass_2017, na.rm = T)
    )

  #New Jersey
  trade_mass_nj <- countries_mass %>%
    filter(origin %in% "New Jersey") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(nj_total = sum(sum, na.rm = T),
              nj_average = (sum(sum, na.rm = T) / 6),
              nj_2012 = sum(mass_2012, na.rm = T),
              nj_2013 = sum(mass_2013, na.rm = T),
              nj_2014 = sum(mass_2014, na.rm = T),
              nj_2015 = sum(mass_2015, na.rm = T),
              nj_2016 = sum(mass_2016, na.rm = T),
              nj_2017 = sum(mass_2017, na.rm = T)
    )

  #New York
  trade_mass_ny <- countries_mass %>%
    filter(origin %in% "New York") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(ny_total = sum(sum, na.rm = T),
              ny_average = (sum(sum, na.rm = T) / 6),
              ny_2012 = sum(mass_2012, na.rm = T),
              ny_2013 = sum(mass_2013, na.rm = T),
              ny_2014 = sum(mass_2014, na.rm = T),
              ny_2015 = sum(mass_2015, na.rm = T),
              ny_2016 = sum(mass_2016, na.rm = T),
              ny_2017 = sum(mass_2017, na.rm = T)
    )

  #Ohio
  trade_mass_oh <- countries_mass %>%
    filter(origin %in% "Ohio") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(oh_total = sum(sum, na.rm = T),
              oh_average = (sum(sum, na.rm = T) / 6),
              oh_2012 = sum(mass_2012, na.rm = T),
              oh_2013 = sum(mass_2013, na.rm = T),
              oh_2014 = sum(mass_2014, na.rm = T),
              oh_2015 = sum(mass_2015, na.rm = T),
              oh_2016 = sum(mass_2016, na.rm = T),
              oh_2017 = sum(mass_2017, na.rm = T)
    )

  #Pennsylvania
  trade_mass_pa <- countries_mass %>%
    filter(origin %in% "Pennsylvania") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(pa_total = sum(sum, na.rm = T),
              pa_average = (sum(sum, na.rm = T) / 6),
              pa_2012 = sum(mass_2012, na.rm = T),
              pa_2013 = sum(mass_2013, na.rm = T),
              pa_2014 = sum(mass_2014, na.rm = T),
              pa_2015 = sum(mass_2015, na.rm = T),
              pa_2016 = sum(mass_2016, na.rm = T),
              pa_2017 = sum(mass_2017, na.rm = T)
    )

  #Virginia
  trade_mass_va <- countries_mass %>%
    filter(origin %in% "Virginia") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(va_total = sum(sum, na.rm = T),
              va_average = (sum(sum, na.rm = T) / 6),
              va_2012 = sum(mass_2012, na.rm = T),
              va_2013 = sum(mass_2013, na.rm = T),
              va_2014 = sum(mass_2014, na.rm = T),
              va_2015 = sum(mass_2015, na.rm = T),
              va_2016 = sum(mass_2016, na.rm = T),
              va_2017 = sum(mass_2017, na.rm = T)
    )

  #West Virginia
  trade_mass_wv <- countries_mass %>%
    filter(origin %in% "West Virginia") %>%
    group_by(destination) %>%
    arrange(destination) %>%
    summarize(wv_total = sum(sum, na.rm = T),
              wv_average = (sum(sum, na.rm = T) / 6),
              wv_2012 = sum(mass_2012, na.rm = T),
              wv_2013 = sum(mass_2013, na.rm = T),
              wv_2014 = sum(mass_2014, na.rm = T),
              wv_2015 = sum(mass_2015, na.rm = T),
              wv_2016 = sum(mass_2016, na.rm = T),
              wv_2017 = sum(mass_2017, na.rm = T)
    )
}


#PRESENT TRADE MASS
#add all infected state data added to new summary object: CT, DE, MD, NJ, NY, OH, PA, VA, WV
countries_trade_mass_summary_slf <- left_join(trade_mass_summary, trade_mass_pa, by = "destination") %>%
  left_join(., trade_mass_ct, by = "destination") %>%
  left_join(., trade_mass_de, by = "destination") %>%
  left_join(., trade_mass_md, by = "destination") %>%
  left_join(., trade_mass_nj, by = "destination") %>%
  left_join(., trade_mass_ny, by = "destination") %>%
  left_join(., trade_mass_oh, by = "destination") %>%
  left_join(., trade_mass_va, by = "destination") %>%
  left_join(., trade_mass_wv, by = "destination")

#add info for the total of all infected states
countries_trade_mass_summary_slf <- countries_trade_mass_summary_slf %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total), na.rm = T),
    infected_average = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total), na.rm = T)/54 #54 is 6 years * 9 total infected states, previous average removes NA's, forgetting them in the mean calc
  )

#FUTURE TRADE MASS: CT, DE, MD, NJ, NY, OH, PA, VA, WV + NC
countries_trade_mass_summary_slf_future <- left_join(trade_mass_summary, trade_mass_pa, by = "destination") %>%
  left_join(., trade_mass_ct, by = "destination") %>%
  left_join(., trade_mass_de, by = "destination") %>%
  left_join(., trade_mass_md, by = "destination") %>%
  left_join(., trade_mass_nc, by = "destination") %>%
  left_join(., trade_mass_nj, by = "destination") %>%
  left_join(., trade_mass_ny, by = "destination") %>%
  left_join(., trade_mass_oh, by = "destination") %>%
  left_join(., trade_mass_va, by = "destination") %>%
  left_join(., trade_mass_wv, by = "destination")

#add info for the total of all infected states
countries_trade_mass_summary_slf_future <- countries_trade_mass_summary_slf_future %>%
  group_by(destination) %>%
  mutate(
    infected_total = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total, nc_total), na.rm = T),
    infected_average = sum(c(ct_total, de_total, md_total, nj_total, ny_total, oh_total, pa_total, va_total, wv_total, nc_total), na.rm = T)/60 #60 is 6 years * 10 total infected states, previous average removes NA's, forgetting them in the mean calc
  )

#replace the Na's with zeros
countries_trade_mass_summary_slf[is.na(countries_trade_mass_summary_slf)] <- 0
countries_trade_mass_summary_slf_future[is.na(countries_trade_mass_summary_slf_future)] <- 0

#fix koreas' names
countries_trade_mass_summary_slf$destination <- gsub(x = countries_trade_mass_summary_slf$destination, pattern = "Korea, North", replacement = "North Korea")
countries_trade_mass_summary_slf$destination <- gsub(x = countries_trade_mass_summary_slf$destination, pattern = "Korea, South", replacement = "South Korea")
countries_trade_mass_summary_slf_future$destination <- gsub(x = countries_trade_mass_summary_slf_future$destination, pattern = "Korea, North", replacement = "North Korea")
countries_trade_mass_summary_slf_future$destination <- gsub(x = countries_trade_mass_summary_slf_future$destination, pattern = "Korea, South", replacement = "South Korea")

#rearrange by destination
countries_trade_mass_summary_slf <- countries_trade_mass_summary_slf %>%
  arrange(destination)
countries_trade_mass_summary_slf_future <- countries_trade_mass_summary_slf_future %>%
  arrange(destination)





#############################
  #grapes
#############################
#STATES
#yield
#(convert the yield)
states_grapes_yield <- read_csv(file = "./data-raw/states_grape_yield_2012-2017.csv") %>%
  dplyr::select(State, Year, Value, `Data Item`) %>%
  mutate(State = tolower(State)) %>%
  mutate(State = toTitleCase(State)) %>%
  rename(geopol_unit = State) %>%  #rename Area to geopol_unit
  mutate(Value = Value*2.2417, Unit = "tonnes/ha") %>%  #convert tons/acre to metric tons/hectare
  mutate(Item = "grape_yield") %>%  #set Item as a description col
  dplyr::select(-`Data Item`) #rm junk col


#production these data run 1998 - 2019
states_grapes_prod <- read_csv(file = "./data-raw/NASS_QUICKSTATS_data__states_grapes_production_01-24-2020.csv") %>%
  dplyr::select(State, Year, Value, `Data Item`) %>%
  mutate(State = tolower(State)) %>%
  mutate(State = toTitleCase(State)) %>%
  rename(geopol_unit = State) %>%  #rename Area to geopol_unit
  filter(Year > 2011 & Year < 2018) %>% #filter out to be 2012-2017
  mutate(Value = as.numeric(gsub(",","", .$Value))) %>% #make the Value numeric and rm commas
  mutate(Value = Value*1.01605, Unit = "tonnes")  %>%  #convert from us tons to metric tonnes
  mutate(Item = "grape_production") %>%  #set Item as a description col
  dplyr::select(-`Data Item`) #rm junk col

#combine yield and production
states_grapes <- bind_rows(states_grapes_yield, states_grapes_prod)


#COUNTRIES
#YIELD
countries_grapes_yield <- read_csv(file = "./data-raw/FAOSTAT_data_4-22-2019.csv") %>%
  dplyr::select(Area, Year, Value, Unit, Item) %>%
  mutate(Item = "grape_yield") %>%  #set Item as a description col
  mutate(Value = Value*0.0001, Unit = "tonnes/ha") %>%   #convert hg/hectare to metric tons/hectare
  mutate(Area = gsub(pattern = "United States of America", replacement = "United States", x = .$Area)) %>% #change the name of United States of America to United States
  rename(geopol_unit = Area)  #rename Area to geopol_unit


#production
countries_grapes_prod <- read_csv("./data-raw/FAOSTAT_data_countries_grapes_production_01-24-2020.csv") %>%
  dplyr::select(Area, Year, Value, Unit, Item) %>%   #select cols of interest
  rename(geopol_unit = Area) %>%   #rename Area to geopol_unit
  mutate(Item = "grape_production") %>%  #set Item as a description col
  filter(Year > 2011 & Year < 2018)   #filter out to be 2012-2017

#combine yield and production
countries_grapes <- bind_rows(countries_grapes_yield, countries_grapes_prod)

#clean up a naming issue
grep(pattern = "C�te d'Ivoire", x = countries_grapes$geopol_unit) #need to change
countries_grapes$geopol_unit <- gsub(pattern = "C�te d'Ivoire", replacement = "Ivory Coast", x = countries_grapes$geopol_unit)

#############################
  #wine
#############################
#STATES
states_wine <- read_csv("./data-raw/TTB_data_states_wine.csv")

#clean up the data a little
states_wine <- states_wine %>%
  mutate(geopol_unit = tolower(geopol_unit)) %>%
  mutate(geopol_unit = toTitleCase(geopol_unit))

#convert from gallons to mass in metric tons: conversion on https://www.aqua-calc.com/calculate/food-volume-to-weight#anchor-about
states_wine[,-1] <- states_wine[,-1] * 3.776e-3

#tibble it up
states_wine <- states_wine %>%
  group_by(geopol_unit) %>%
  gather(`2012`:`2017`, key = "Year", value = "Mass") %>%
  mutate(Unit = "tonnes")


#COUNTRIES
#countries_wine <- read_csv("./data-raw/FAOSTAT_data_wine_6-21-2019_v2.csv")
countries_wine <- read_csv("./data-raw/FAOSTAT_data_countries_wine_4-2-2021.csv")

#clean up the countries data
countries_wine <- countries_wine %>%
  mutate(geopol_unit = Area, Mass = Value) %>%
  dplyr::select(geopol_unit, Year, Mass, Unit) #%>%
  #mutate(Unit = gsub("tonnes", "tons", x = .$Unit))

#############################
#wineries
#############################

#read in the wineries
wineries <- read_csv(file = "./data-raw/WineList.csv") %>%
  dplyr::rename(y = `Decimal Lat`, x = `Decimal Long`)


#############################
  #suitability models
#############################
    #STATES
states_slf_extract <- read_csv(file = "./data-raw/extract_states_02_15_19_maxent_slf+atc-bio02.csv", col_names = T)
states_toh_extract <- read_csv(file = "./data-raw/extract_states_10_29_18_maxent_toh+atc-bio02.csv", col_names = T)
states_slftoh_extract <- read_csv(file = "./data-raw/extract_states_11_07_18_maxent_slf+toh+atc-bio02.csv", col_names = T)

#ensemble
states_extracts_ensemble <- read_csv(file = "./data-raw/extract_states_slftoh_ensemble_mean.csv", col_names = T)

#add model as a categorical var
states_slf_extract <- states_slf_extract %>%
  mutate(model = "slf")
states_toh_extract <- states_toh_extract %>%
  mutate(model = "toh")
states_slftoh_extract <- states_slftoh_extract %>%
  mutate(model = "slftoh")

#put all of the states model extracts in a single object
states_extracts <- bind_rows(states_slf_extract, states_toh_extract, states_slftoh_extract) %>%
  group_by(geopol_unit) %>%
  arrange(geopol_unit) %>%
  dplyr::select(geopol_unit, model, everything())

#clean up the extract data (change names)
colnames(states_extracts) <- gsub("max", replacement = "obs_max", x = colnames(states_extracts))
colnames(states_extracts) <- gsub("min", replacement = "obs_min", x = colnames(states_extracts))
colnames(states_extracts) <- gsub("mean", replacement = "obs_mean", x = colnames(states_extracts))
colnames(states_extracts) <- gsub("sd", replacement = "obs_sd", x = colnames(states_extracts))


    #COUNTRIES
countries_slf_extract <- read_csv(file = "./data-raw/extract_world_11_07_18_maxent_slf+atc-bio02.csv", col_names = T)
countries_toh_extract <- read_csv(file = "./data-raw/extract_world_11_07_18_maxent_toh+atc-bio02.csv", col_names = T)
countries_slftoh_extract <- read_csv(file = "./data-raw/extract_world_11_07_18_maxent_slf+toh+atc-bio02.csv", col_names = T)

#ensemble
countries_extracts_ensemble <- read_csv(file = "./data-raw/extract_countries_slftoh_ensemble_mean.csv", col_names = T)

#clean ensemble names
countries_extracts_ensemble$geopol_unit[grep(pattern = "Ivoire", x = countries_extracts_ensemble$geopol_unit, value = F)] <- "Ivory Coast"
countries_extracts_ensemble$geopol_unit[grep(pattern = "ncipe", x = countries_extracts_ensemble$geopol_unit, value = F)] <- "Sao Tome and Principe"
countries_extracts_ensemble$geopol_unit[grep(pattern = "Cura", countries_extracts_ensemble$geopol_unit, value = F)] <- "Curacao"
countries_extracts_ensemble$geopol_unit[grep(pattern = "Saint-Bart", x = countries_extracts_ensemble$geopol_unit, value = F)] <- "Saint Barthelemy"
countries_extracts_ensemble$geopol_unit[grep(pattern = "Saint-Martin", x = countries_extracts_ensemble$geopol_unit, value = F)] <- "Saint Martin"

#add model as a categorical var
countries_slf_extract <- countries_slf_extract %>%
  mutate(model = "slf")
countries_toh_extract <- countries_toh_extract %>%
  mutate(model = "toh")
countries_slftoh_extract <- countries_slftoh_extract %>%
  mutate(model = "slftoh")

#put all of the states model extracts in a single object
countries_extracts <- bind_rows(countries_slf_extract, countries_toh_extract, countries_slftoh_extract) %>%
  group_by(geopol_unit) %>%
  arrange(geopol_unit)  %>%
  dplyr::select(geopol_unit, model, everything())

#clean up the extract data (change names)
colnames(countries_extracts) <- gsub("max", replacement = "obs_max", x = colnames(countries_extracts))
colnames(countries_extracts) <- gsub("min", replacement = "obs_min", x = colnames(countries_extracts))
colnames(countries_extracts) <- gsub("mean", replacement = "obs_mean", x = colnames(countries_extracts))
colnames(countries_extracts) <- gsub("sd", replacement = "obs_sd", x = colnames(countries_extracts))

#clean country issues
countries_extracts$geopol_unit[grep(pattern = "Ivoire", x = countries_extracts$geopol_unit, value = F)] <- "Ivory Coast"
countries_extracts$geopol_unit[grep(pattern = "ncipe", x = countries_extracts$geopol_unit, value = F)] <- "Sao Tome and Principe"
countries_extracts$geopol_unit[grep(pattern = "Cura",countries_extracts$geopol_unit, value = F)] <- "Curacao"
countries_extracts$geopol_unit[grep(pattern = "Saint Bart", x = countries_extracts$geopol_unit, value = F)] <- "Saint Barthelemy"


#############################
#suitability models rasters
#############################

#load the summary table
models_summary <- read_csv(file = "./data-raw/models_summary.csv")

#load USA
suitability_usa <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slftoh_ensemble_mean_downsampled_x4_usa.tif")


#fortify (dataframe-fy) the global model
suitability_usa_df <- fortify(suitability_usa, maxpixels = 1e10)
colnames(suitability_usa_df)[3] <- "value"
#remove the NA values to shrink the object size
suitability_usa_df <- suitability_usa_df[!is.na(suitability_usa_df$value),]

#round the value points
suitability_usa_df$value <- round(suitability_usa_df$value, digits = 2)

#adding the state centers for plotting the map
states_centers <- read_csv(file = "./data-raw/us_capitals.csv")
colnames(states_centers)[1] <- "geopol_unit"

#individual models
slf_usa <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slf_downsampled_x4_usa.tif")
slftoh_usa <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slftoh_downsampled_x4_usa.tif")
toh_usa <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/toh_downsampled_x4_usa.tif")


#fortify (dataframe-fy) the global model
slf_usa_df <- fortify(slf_usa, maxpixels = 1e10)
slftoh_usa_df <- fortify(slftoh_usa, maxpixels = 1e10)
toh_usa_df <- fortify(toh_usa, maxpixels = 1e10)

colnames(slf_usa_df)[3] <- "value"
colnames(slftoh_usa_df)[3] <- "value"
colnames(toh_usa_df)[3] <- "value"

#remove the NA values to shrink the object size
slf_usa_df <- slf_usa_df[!is.na(slf_usa_df$value),]
slftoh_usa_df <- slftoh_usa_df[!is.na(slftoh_usa_df$value),]
toh_usa_df <- toh_usa_df[!is.na(toh_usa_df$value),]

#round the value points
slf_usa_df$value <- round(slf_usa_df$value, digits = 2)
slftoh_usa_df$value <- round(slftoh_usa_df$value, digits = 2)
slf_usa_df$value <- round(slf_usa_df$value, digits = 2)


#COUNTRIES
suitability_countries <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slftoh_ensemble_mean_downsampled_x4.tif")

#fortify (dataframe-fy) the global model
suitability_countries_df <- fortify(suitability_countries, maxpixels = 1e10)
colnames(suitability_countries_df)[3] <- "value"
#remove the NA values to shrink the object size
suitability_countries_df <- suitability_countries_df[!is.na(suitability_countries_df$value),]

#round the value points
suitability_countries_df$value <- round(suitability_countries_df$value, digits = 2)

#individual models
slf <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slf_downsampled_x10.tif")
slftoh <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/slftoh_downsampled_x10.tif")
toh <- raster("/Volumes/GoogleDrive/Shared drives/slfData/data/slfRisk/maxent_models/toh_downsampled_x10.tif")


#fortify (dataframe-fy) the global model
slf_df <- fortify(slf, maxpixels = 1e10)
slftoh_df <- fortify(slftoh, maxpixels = 1e10)
toh_df <- fortify(toh, maxpixels = 1e10)

colnames(slf_df)[3] <- "value"
colnames(slftoh_df)[3] <- "value"
colnames(toh_df)[3] <- "value"

#remove the NA values to shrink the object size
slf_df <- slf_df[!is.na(slf_df$value),]
slftoh_df <- slftoh_df[!is.na(slftoh_df$value),]
toh_df <- toh_df[!is.na(toh_df$value),]

#round the value points
slf_df$value <- round(slf_df$value, digits = 2)
slftoh_df$value <- round(slftoh_df$value, digits = 2)
slf_df$value <- round(slf_df$value, digits = 2)

#add in the countries center for plotting the map ***TURNED OFF RIGHT NOW***
#read in the centroids for a check
#countries_centers <- read_csv(file = "./data-raw/geopolitical_centers_world.csv") %>%
#  dplyr::rename(y = y_coord, x = x_coord)
#colnames(countries_centers)[1] <- "geopol_unit"

#add in the countries capitals for plotting the map
#read in the basic world cities data from Pareto Software, LLC, the owner of Simplemaps.com, available at: https://simplemaps.com/data/world-cities
countries_centers <- read_csv(file = "./data-raw/worldcities.csv") %>%
  dplyr::select(country, iso2, iso3, city_ascii, lng, lat, capital) %>%
  dplyr::rename(geopol_unit = country, y = lat, x = lng, city_type = capital, capital = city_ascii) %>%
  dplyr::arrange(geopol_unit) %>%
  dplyr::filter(city_type == "primary")

#fix Ivory Coast to be same as other files
countries_centers$geopol_unit <- gsub(pattern = "Côte D???Ivoire", replacement = "Ivory Coast", x = countries_centers$geopol_unit)

#need to filter out cases where a country has more than one primary capital listed to be just one of the capitals
countries_centers <- countries_centers %>%
  filter(!(geopol_unit == "South Africa" & capital != "Cape Town")) %>%
  filter(!(geopol_unit == "Benin" & capital != "Porto-Novo")) %>%
  filter(!(geopol_unit == "Bolivia" & capital != "Sucre")) %>%
  filter(!(geopol_unit == "Burma" & capital != "Nay Pyi Taw")) %>%
  filter(!(geopol_unit == "Burundi" & capital != "Gitega")) %>%
  filter(!(geopol_unit == "Ivory Coast" & capital != "Yamoussoukro")) %>%
  filter(!(geopol_unit == "Netherlands" & capital != "Amsterdam")) %>%
  filter(!(geopol_unit == "Sri Lanka" & capital != "Colombo")) %>%
  filter(!(geopol_unit == "Swaziland" & capital != "Mbabane")) %>%
  filter(!(geopol_unit == "Tanzania" & capital != "Dodoma"))

#rm the city_type col, it is not useful here
countries_centers <- countries_centers %>%
  dplyr::select(-city_type)

##########################################################
#suitability models
##########################################################

#read in presence records
#slf_points <- read_csv(file = "./data-raw/slf_presence_records.csv") %>%
#  dplyr::rename(species = Species, x = Longitude, y = Latitude) %>%
#  mutate(species = "Lycorma delicatula") %>%
#  dplyr::select(species, x, y)
#toh_points <- read_csv(file = "./data-raw/toh_presence_records.csv") %>%
#  dplyr::rename(species = name, x = decimalLongitude, y = decimalLatitude) %>%
#  dplyr::select(species, x, y)

slf_points <- read_csv(file = "./data-raw/slf_gbif_cleaned_coords_2020.csv") %>%
  dplyr::rename(species = name, x = longitude, y = latitude) %>%
  mutate(species = "Lycorma delicatula") %>%
  dplyr::select(species, x, y)
toh_points <- read_csv(file = "./data-raw/toh_gbif_cleaned_coords_2020.csv") %>%
  dplyr::rename(species = name, x = longitude, y = latitude) %>%
  mutate(species = "Ailanthus altissima") %>%
  dplyr::select(species, x, y)

####################################################################################################################
#Write out new RDA files
####################################################################################################################

#############################
#wine market risk
#############################
save(countries_market, file = file.path(here(), "data", "countries_market.rda")) #wine import and export data by country

#############################
#trade
#############################
save(states_trade_summary_slf, file = file.path(here(), "data", "states_trade_summary_slf.rda")) #present value trade with infected states
save(states_trade_summary_slf_future, file = file.path(here(), "data", "states_trade_summary_slf_future.rda")) #future scenario value trade with infected states
save(states_trade_mass_summary_slf, file = file.path(here(), "data", "states_trade_mass_summary_slf.rda")) #present mass trade with infected states
save(states_trade_mass_summary_slf_future, file = file.path(here(), "data", "states_trade_mass_summary_slf_future.rda")) #future scenario mass trade with infected states

save(countries_trade_summary_slf, file = file.path(here(), "data", "countries_trade_summary_slf.rda")) #present value trade with infected states
save(countries_trade_summary_slf_future, file = file.path(here(), "data", "countries_trade_summary_slf_future.rda")) #future scenario value trade with infected states
save(countries_trade_mass_summary_slf, file = file.path(here(), "data", "countries_trade_mass_summary_slf.rda")) #present mass trade with infected states
save(countries_trade_mass_summary_slf_future, file = file.path(here(), "data", "countries_trade_mass_summary_slf_future.rda")) #future scenario mass trade with infected states

#############################
#grapes
#############################
save(states_grapes, file = file.path(here(), "data", "states_grapes.rda"))
save(countries_grapes, file = file.path(here(), "data", "countries_grapes.rda"))

#############################
#wine
#############################
save(states_wine, file = file.path(here(), "data", "states_wine.rda"))
save(countries_wine, file = file.path(here(), "data", "countries_wine.rda"))

#############################
#wineries
#############################
save(wineries, file = file.path(here(), "data", "wineries.rda"))

#############################
#suitability models
#############################
save(states_extracts, file = file.path(here(), "data", "states_extracts.rda"))
save(countries_extracts, file = file.path(here(), "data", "countries_extracts.rda"))

#ensemble extracts
save(states_extracts_ensemble, file = file.path(here(), "data", "states_extracts_ensemble.rda"))
save(countries_extracts_ensemble, file = file.path(here(), "data", "countries_extracts_ensemble.rda"))

save(models_summary, file = file.path(here(), "data", "models_summary.rda"))

#############################
#suitability models rasters
#############################
save(suitability_usa_df, file = file.path(here(), "data", "suitability_usa_df.rda"))
save(suitability_countries_df, file = file.path(here(), "data", "suitability_countries_df.rda"))
save(states_centers, file = file.path(here(), "data", "states_centers.rda"))
save(countries_centers, file = file.path(here(), "data", "countries_centers.rda"))

#plotting original models
save(slf_usa_df, file = file.path(here(), "data", "slf_usa_df.rda"))
save(slftoh_usa_df, file = file.path(here(), "data", "slftoh_usa_df.rda"))
save(toh_usa_df, file = file.path(here(), "data", "toh_usa_df.rda"))
save(slf_df, file = file.path(here(), "data", "slf_df.rda"))
save(slftoh_df, file = file.path(here(), "data", "slftoh_df.rda"))
save(toh_df, file = file.path(here(), "data", "toh_df.rda"))

#############################
#presence records for models
#############################
save(slf_points, file = file.path(here(), "data", "slf_points.rda"))
save(toh_points, file = file.path(here(), "data", "toh_points.rda"))
ieco-lab/slfrsk documentation built on Aug. 18, 2022, 10:44 a.m.