knitr::opts_chunk$set(echo = TRUE)

import libraries and data.

require(tidyverse)

# county level data of fertilizer application.  
# Source: https://www.sciencebase.gov/catalog/item/5851b2d1e4b0f99207c4f238
raw_data = read_csv("CNTY_FERT_1987-2012.csv")
#summary(raw_data)

# County summary from US census bureau.
# Source: https://www.census.gov/geo/maps-data/data/gazetteer2010.html
county_raw = read.table("Gaz_counties_national.txt", sep = "\t", header=TRUE)
# remove duplicates in county data.
county_data = county_raw %>%
  distinct(GEOID, .keep_all = TRUE) %>%
  # select certin columns.
  select(GEOID, ALAND, AWATER,INTPTLAT, INTPTLONG) %>%
  mutate(FIPSno = GEOID) %>%
  select(-GEOID)

# combine county data with county level fertilizer data.
county_summary = left_join(raw_data,county_data, by = "FIPSno")

Data cleanning

clean data before 1982.

# read in data, extracted from coverage in ArcGIS.
n45_64 <- read.table("cty_fert0.n45-64.txt", sep = ",", header = T)
n65_85 <- read.table("cty_fert0.n65-85.txt", sep = ",", header = T)
p45_64 <- read.table("cty_fert0.p45-64.txt", sep = ",", header = T)
p65_85 <- read.table("cty_fert0.p65-85.txt", sep = ",", header = T)
# merge nitrogen and P data together.
n45_85 = inner_join(n45_64, n65_85, by = c("FIPS","STATE","Rowid_"))
p45_85 = inner_join(p45_64, p65_85, by = c("FIPS","STATE","Rowid_"))

# clean nitroge and phosphorus data.
nitrogen_1985 = n45_85 %>% 
  select(-Rowid_) %>%  # remove irrelavent info.
  # add leading zeros for FIPS to make it 5 digits.
  mutate(FIPS = str_pad(FIPS, 5, pad = "0")) %>% 
  gather(Year_temp, Quantity, Y45:Y85) %>% 
  mutate(Fertilizer = rep("N", length(.$Quantity)),
         Farm.Type = rep("farm", length(.$Quantity)),
         Year = paste("19",str_sub(Year_temp, start = 2),sep = "")
         ) %>% 
  select(-Year_temp)

phosphorus_1985 = p45_85 %>% 
  select(-Rowid_) %>%  # remove irrelavent info.
  mutate(FIPS = str_pad(FIPS, 5, pad = "0")) %>% 
  gather(Year_temp, Quantity, Y45:Y85) %>% 
  mutate(Fertilizer = rep("P", length(.$Quantity)),
         Farm.Type = rep("farm", length(.$Quantity)),
         Year = paste("19",str_sub(Year_temp, start = 2),sep = "")
         ) %>% 
  select(-Year_temp)

clean_data_1985 = rbind(phosphorus_1985, nitrogen_1985)

# extract county summaries info from clean data.
cnty_summary_1985 = county_summary %>% 
  select(FIPS,State, County, ALAND, AWATER, INTPTLAT, INTPTLONG) %>% 
  right_join(clean_data_1985, by = "FIPS") 
clean_data = county_summary %>%
  # remove some columns with FIPS numbers.
  select(-c(FIPS_st, FIPS_co,FIPSno)) %>%
  # wide to long dataset.
  gather(Fert.Type, Quantity, farmN1987:nonfP2012) %>%
  # separate the fert.type into three columns: farm type, fertilizer, year.
  mutate(Year = str_sub(Fert.Type, start = -4), 
         Fertilizer = str_sub(Fert.Type, start = -5, end = -5),
         Farm.Type = str_sub(Fert.Type, start = 1, end = 4)
         ) %>%
  # repalce nonf into nonfarm
  mutate(Farm.Type = ifelse(Farm.Type == "nonf", "nonfarm", "farm")) %>%
  # remove Fert.Type
  select(-Fert.Type)

# add data from 1945. 
clean_data = rbind(clean_data, cnty_summary_1985) %>% 
  rename(Nutrient = Fertilizer) %>% # renam Fertilizer to nutrient.
  mutate(Input.Type = rep("Fertilizer")) # add a colume as fertilizer, compared with Manure.

Clean manure data before 1997

# read in manure data from 1982 to 1997.
cnty_manure_97 = read_csv("cnty_manure_82-97.csv")
head(cnty_manure_97)
cnty_manure_summary = cnty_manure_97 %>% 
  select(-c(State, County)) %>% 
  gather(dummy, Quantity, N_1982:P_1997) %>%   # dummy is a temporay column.
  mutate(Farm.Type = rep("farm", length(.$FIPS)),
         Input.Type = rep("Manure", length(.$FIPS))) %>% 
  separate(dummy, c("Nutrient", "Year"), sep = "_")

head(cnty_manure_summary)

Clean manure data after 1997

# read in manure data.
cnty_manure_02 = read_csv("cnty_manure_2002.csv")
cnty_manure_07 = read_csv("cnty_manure_2007.csv")
cnty_manure_12 = read_csv("cnty_manure_2012.csv")

cnty_manure_02_12 = rbind(cnty_manure_02, cnty_manure_07, cnty_manure_12) %>% 
  select(-c(State, County)) %>% 
  gather(Nutrient, Quantity, N:P) %>% 
  mutate(Farm.Type = rep("farm", length(.$FIPS)),
         Input.Type = rep("Manure", length(.$FIPS)))
head(cnty_manure_02_12)

Merge manure data and fertilizer data.

# connect manure data.
cnty_manure_summary = rbind(cnty_manure_summary,cnty_manure_02_12)

cnty_manure_all = county_summary %>% 
  select(FIPS,State, County, ALAND, AWATER, INTPTLAT, INTPTLONG) %>% 
  right_join(cnty_manure_summary, by = "FIPS") 

clean_data = rbind(clean_data, cnty_manure_all)



# save cleaned data into .rda format.
# not run.
if (FALSE){
save(clean_data, file = "../data/usfertilizer_county.rda")
  }

Data visualization

# plot the top 10 nitrogen application in year 1990.
# Reorder to make the plot more cleanner.
year_plot = 2008
clean_data %>%
  filter(Nutrient == "N" & Year == year_plot) %>%
  top_n(10, Quantity) %>%
  ggplot(aes(x=reorder(paste(County,State, sep = ","), Quantity), Quantity))+
  geom_col()+
  ggtitle(paste("Top 10 counties with most N application in the year of", year_plot)) + 
  coord_flip()
# plot the P application for each county in the year of 2008.
clean_data %>%

filter(State == "OH" & Nutrient == "P" & Year == 2002 & Input.Type == "Manure") %>%
ggplot()+
  geom_point(aes(x=INTPTLONG, y = INTPTLAT, color = Quantity))+
  scale_color_gradient(low = "blue", high = 'red')
  coord_fixed(1.3)
clean_data %>% 
  filter(State == "OH" & Nutrient == "N" ) %>% 
  group_by(Year, Input.Type) %>% 
  summarise(N_App = sum(Quantity, na.rm = T)) %>% 
  ggplot(aes(x = as.numeric(Year), y = N_App, color = Input.Type)) +
  geom_point()+
  geom_line()+
  scale_x_continuous(name = "Year")+
  scale_y_continuous(name = "Nitrogen application (kg N/year)")+
  scale_color_discrete(name = NULL)+
  theme_bw()


wenlong-liu/usfertilizer documentation built on May 26, 2019, 5:36 a.m.