#geo_level can be country, region, continent
#fish_level can be total, family, order, isscaap, or species
# Testing fuction:
#tidy_fish<-tmp_fish
#year_start=2005
#year_end=2010
#fish_var="quantity"
#fish_level="total"; # can also be isscaap_group, family, or species_scientific_name, but must match column name
#fish_name=NA # must be found within fish_level
#geo_level="country"
#fish_unit="t" # can also be "no"; used for whales, seals, walruses, crocodiles/alligators
#combine_sources=FALSE
#output_path=("~/")
## Other options:
#combine_sources=TRUE
#fish_level="species_scientific_name"
#fish_name="Arius spp"
# NOTE: fish_var should always be quantity, i.e., name of fish column to be plotted, leave in as a parameter for now? in case it's possible to use this function for other data sources
map_production<-function(tidy_fish,
year_start,
year_end=NA,
fish_var="quantity",
fish_level="total",
fish_name=NA,
fish_unit="t",
geo_level="country",
combine_sources=FALSE,
output_path=NA){
require(dplyr)
require(rnaturalearth)
require(rnaturalearthdata)
require(rgeos)
require(ggplot2)
require(sf)
if (is.na(output_path)) {
outdir<-getwd()
setwd(outdir)
} else {
setwd(output_path)
}
worldmap <- ne_countries(scale = "medium", returnclass = "sf")
# FAO data country column (N = 247) matches worldmap iso_n3 column (N = 235)
# FAO data country_iso3_code column (N = 260) matches worldmap iso3_a3 (N = 235)
# ISO_a3 is finer-scale than country: example, Taiwan separate from China; Guam, PR separate from USA
# Use iso_a3 for the merging: For mapping, actually doesn't make a difference since worldmap iso_n3 and iso_a3 are both N=235
# But stick with iso_a3 to be consistent with function "graph_production"
# For people downloading from FishStatJ, include in instructions that this data field should be specified
# Deal with countries that have no iso3 alpha codes:
# For Sudan, country column (aka iso numeric code) changed from 736 to 729 after year 2011
# Fill in iso3 alpha = SUD for iso3 numeric 736
dat_fish <- tidy_fish %>%
mutate(country_iso3_code = replace(country_iso3_code, country==736, "SDN")) %>%
# For Channel Islands, drop for now (FAO lumps them together with iso_n3=830, while worldmap splits them into 831 and 832)
filter(country!=830) %>%
# Drop iso_n3=896 (used for "Areas not elsewhere specified", aka country ID not known?)
filter(country!=896) %>%
# Filter for desired units
filter(unit == fish_unit)
# Filter years
if (is.na(year_end)){
all_years <- year_start
} else {
all_years <- seq(from = year_start, to = year_end, by = 1)
}
year_fish <- dat_fish %>%
filter(year %in% all_years)
# Match column name to geo_level parameter
# In tmp_fish there are more levels in iso3 than in "country"; for now, use iso3 as the reporting level
# other notes: un_a3 in worldmap == "country" in tmp_fish == code in FishStatJ (3 digit UNM49 country ID number)
if (geo_level == "country"){
geography <- "country_iso3_code"
merge_col <- "iso_a3"
} else if (geo_level == "continent"){
geography<-"continent_group"
merge_col <- "region_un"
} else if (geo_level == "region"){
geography<-"georegion_group"
# FIXIT - need to check, no equivalent for this merge???
}
# NOTE: in some cases, countries explicitly report "zero" for certain groups of fish and/or sources, while others are NA (likely also zero?)
# i.e., FIXIT: Ask JG/FAO, is there a difference between countries that explicitly report zeroes vs. NAs?
# For now, remove all entries where quantity=0 (otherwise, countries are colored in as 0 as opposed to being NA)
year_fish <- year_fish %>%
filter(get(fish_var) != 0)
# Now aggregate:
# If production sources should be combined:
# If, fish_level is not "total", then code will filter for specified fish_name in specified fish_level
if (combine_sources == TRUE & fish_level == "total"){
year_geo_taxa_fish <- year_fish %>%
group_by(get(geography), year) %>%
summarize(fish_sum = sum(get(fish_var))) %>%
rename(!!geography := 'get(geography)') %>%
droplevels()
} else if (combine_sources == TRUE & fish_level != "total"){
if (fish_name %in% year_fish[[fish_level]]){ # Can the fish be found in the dataset?
year_geo_taxa_fish <- year_fish %>%
filter(get(fish_level) == fish_name) %>%
group_by(get(geography), year) %>%
summarize(fish_sum = sum(get(fish_var))) %>%
rename(!!geography := 'get(geography)') %>%
droplevels()
} else {
nofish<-paste("No", tolower(fish_name), "production in specified year(s)")
stop(nofish)
}
}
# If production sources (capture vs various types of aquaculture) should be kept separate
# If, fish_level is not "total", then code will filter for specified fish_name in specified fish_level
if (combine_sources == FALSE & fish_level == "total"){
year_geo_taxa_fish <- year_fish %>%
group_by(year, get(geography), source_name_en) %>%
summarize(fish_sum = sum(get(fish_var))) %>%
rename(!!geography := 'get(geography)') %>% # using !! and := tells dplyr to rename based on the expression of geography
droplevels()
} else if (combine_sources == FALSE & fish_level != "total"){ # If grouped fish is desired:
if (fish_name %in% year_fish[[fish_level]])
year_geo_taxa_fish <- year_fish %>%
filter(get(fish_level) == fish_name) %>%
group_by(year, get(geography), source_name_en) %>%
summarize(fish_sum = sum(get(fish_var))) %>%
rename(!!geography := 'get(geography)') %>%
droplevels()
else {
nofish<-paste("No", tolower(fish_name), "production in specified year(s)")
stop(nofish)
}
}
# Convert fao[[geography]] to character before merge to suppress warning message
year_geo_taxa_fish[[geography]] <- as.character(year_geo_taxa_fish[[geography]])
#worldmap[[merge_col]] <- as.factor(worldmap[[merge_col]])
#worldmap_nomatch <- levels(worldmap[[merge_col]])[!levels(worldmap[[merge_col]]) %in% levels(year_geo_taxa_fish[[geography]])]
#fao_nomatch <- levels(year_geo_taxa_fish[[geography]])[!levels(year_geo_taxa_fish[[geography]]) %in% levels(worldmap[[merge_col]])]
#sort(unique(year_fish$country_name_en[year_fish$country_iso3_code %in% fao_nomatch]))
# Countries in Worldmap but not in FAO data: "ALA" "ATA" "ESH" "GGY" "HMD" "JEY" "SGS" "VAT"
# Countries in FAO but not Worldmap:"ANT" "BES" "CSK" "EAZ" "GIB" "GLP" "GUF" "MTQ" "MYT" "REU" "SCG" "SJM" "SUN" "TKL" "TUV" "YUG"
#[1] Bonaire/S.Eustatius/Saba Channel Islands Czechoslovakia
#[4] French Guiana Gibraltar Guadeloupe
#[7] Martinique Mayotte Netherlands Antilles
#[10] Other nei Réunion Serbia and Montenegro
#[13] Sudan (former) Svalbard and Jan Mayen Tokelau
#[16] Tuvalu Un. Sov. Soc. Rep. Yugoslavia SFR
#[19] Zanzibar
#combined <- sort(union(levels(worldmap[[merge_col]]), levels(year_geo_taxa_fish[[geography]])))
#levels(worldmap[[merge_col]])<-combined
#levels(year_geo_taxa_fish[[geography]])<-combined
firstname <- geography
join_cols <- merge_col
names(join_cols) <- firstname
# Map setup:
allplots<-
theme_void()+
theme(axis.text.x = element_blank(),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
title = element_text(size = 40))
png_width <- 1800
png_height <- 1200
# Plot map
# If sources should be combined...
if (combine_sources == TRUE){
# First find maximum value for fish_sum and use this to standardize legend of all maps in time series
max_scale<-max(year_geo_taxa_fish[["fish_sum"]], na.rm = TRUE)
# max_scale<-signif(max_fish, digits = 1) # This creates NA's on the map if max_scale ends up being less than max value
# Loop through each year, and plot single map of combined total production from all sources
for (y in 1:length(all_years)){
mapdat <- year_geo_taxa_fish %>%
filter(year==all_years[y]) %>%
full_join(worldmap, join_cols) %>%
arrange(desc(fish_sum))
if (is.na(fish_name)){
total_title <- paste("Global total seafood production in", all_years[y], sep = " ")
} else {
total_title <- paste("Global total seafood production of", tolower(fish_name), "in", all_years[y], sep=" ")
}
p1<-ggplot()+
geom_sf(data=mapdat, aes(fill=fish_sum, geometry=geometry))+ # When working with tibbles, Need to specify geometry column manually
labs(title=total_title)+
scale_fill_continuous(limits=c(0, max_scale), name = fish_unit)+
allplots
if (is.na(fish_name)){
totalfile <- paste("plot_map_TotalProduction-AllSources-", all_years[y], ".png", sep = "")
} else {
totalfile <- paste("plot_map_TotalProduction-AllSources-", fish_name, "-", all_years[y], ".png", sep="")
}
png(filename = totalfile, width=png_width, height=png_height)
print(p1)
dev.off()
}
# BUT if production sources should be kept separate
} else if (combine_sources == FALSE){
# Loop and plot separate maps for each production source and each year
for (i in 1:nlevels(year_geo_taxa_fish$source_name_en)){
# First get the production source
sourcedat<-year_geo_taxa_fish %>%
filter(source_name_en == levels(year_geo_taxa_fish$source_name_en)[i])
# First find maximum value for fish_sum (within a production source) and use this to standardize legend of all maps in time series
max_scale<-max(sourcedat[["fish_sum"]], na.rm = TRUE)
# Then loop through sourcedat and make plot for each year:
for (y in 1:length(all_years)){
mapdat <- sourcedat %>%
filter(year==all_years[y]) %>%
full_join(worldmap, join_cols) %>%
arrange(desc(fish_sum))
if (is.na(fish_name)){
next_title <- paste("Global", tolower(levels(year_geo_taxa_fish$source_name_en)[i]), "in", all_years[y], sep=" ")
} else {
next_title <- paste("Global", tolower(levels(year_geo_taxa_fish$source_name_en)[i]), "of", tolower(fish_name), "in", all_years[y], sep=" ")
}
p1<-ggplot()+
geom_sf(data=mapdat, aes(fill=fish_sum, geometry=geometry))+ # When working with tibbles, Need to specify geometry column manually
labs(title = next_title)+
scale_fill_continuous(limits=c(0, max_scale), name = fish_unit)+
allplots
if (is.na(fish_name)){
nextfile <- paste("plot_map_", levels(year_geo_taxa_fish$source_name_en)[i], "-", all_years[y], ".png", sep="")
} else {
nextfile <- paste("plot_map_", levels(year_geo_taxa_fish$source_name_en)[i], "-", fish_name, "-", all_years[y], ".png", sep="")
}
png(filename=nextfile, width=png_width, height=png_height)
print(p1)
dev.off()
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.