## An R script to summarize and look at various things
## based on discussion with Mark, Ian, and Colleeen
## Essentially, these things are meant to
## get everyone on the same page
## with how the urban scores are working for species
# packages
library(readr)
library(sf)
library(dplyr)
library(ggplot2)
library(scales)
library(ggcorrplot)
library(tidyr)
# assigned urbanness scores
urbanness <- readRDS("data/urbanness_summaries.RDS")
# read in cnc only data
cnc_data <- readRDS("data/inat_cnc.rds")
# read in regional/semi-continental lvl2 data
lvl2 <- readRDS("data/lvl2.rds")
# read in sf of cnc_area
cnc_area <- readRDS("data/cnc_area.rds")
# read in viirs_lights_obs
lights_data <- readRDS("data/viirs_lights_obs.RDS")
# lights for each town data
lights_towns <- readRDS("data/viirs_lights_towns.RDS")
# trim urbanness values to those with > 100 observations
urbanness.2 <- urbanness %>%
dplyr::filter(number_of_records>=100)
# this trims the number of species from 15,595 to 1,728
# look at the relationship between the 0.25, median, mean etc.
# for each species
# based off lights data (i.e., urbanization level)
lvl2 %>%
dplyr::filter(species %in% urbanness.2$species) %>%
left_join(., lights_data, by="catalogNumber") %>%
group_by(species) %>%
summarize(med=median(lights, na.rm=TRUE),
mean=mean(lights, na.rm=TRUE),
quantile_0.25=quantile(lights, 0.25, na.rm=TRUE),
quantile_0.75=quantile(lights, 0.75, na.rm=TRUE)) %>%
dplyr::select(-species) %>%
cor(.) %>%
ggcorrplot(lab=TRUE)
ggsave("outputs/graphics/distribution_summary_correlation.png", width=6.8, height=5, units="in")
# all highly correlated
# so will continue with the 'median' for now as the urban score
# plot histogram of these 1700 species
ggplot(urbanness.2, aes(x=median_lights))+
geom_histogram(fill="orange", color="black", bins=40)+
theme_classic()+
theme(axis.text=element_text(color="black"))+
ylab("Count")+
xlab("Urban score (median lights)")
ggsave("outputs/graphics/urban_score_histogram.png", width=6.8, height=5, units="in")
# re make this plot, but with a log-transformed axis
ggplot(urbanness.2, aes(x=median_lights))+
geom_histogram(fill="orange", color="black", bins=40)+
theme_classic()+
scale_x_log10()+
theme(axis.text=element_text(color="black"))+
ylab("Count")+
xlab("log Urban score (median lights)")
ggsave("outputs/graphics/urban_score_histogram_log-scale.png", width=6.8, height=5, units="in")
# now try and get a list of 6 or so species to plot where they occur along this axis
# first create a tiny dataframe with this
example_species <- urbanness.2 %>%
dplyr::filter(species %in% c("Acalypha virginica",
"Acer negundo",
"Oxydendrum arboreum",
"Iris pseudacorus",
"Alces alces",
"Vicia villosa")) %>%
mutate(y=c(40, 70, 60, 90, 170, 120))
# now combine with previous plot
ggplot()+
geom_histogram(data=urbanness.2, aes(x=median_lights),
fill="orange", color="black", bins=40)+
geom_segment(data=example_species, aes(x=median_lights, xend=median_lights,
y=y, yend=0), color="red",
size=1.5, linetype="dashed")+
geom_label(data=example_species, aes(x=median_lights, y=y, label=species))+
theme_classic()+
scale_x_log10()+
theme(axis.text=element_text(color="black"))+
ylab("Count")+
xlab("log Urban score (median lights)")
ggsave("outputs/graphics/urban_score_histogram_log-scale_with_examples.png", width=7.5, height=5, units="in")
# make example plots of the distributions
# for our 6 example species
light_dat_subsetted <- lvl2 %>%
dplyr::filter(species %in% example_species$species) %>%
left_join(., lights_data, by="catalogNumber") %>%
group_by(species) %>%
mutate(med_lights=median(lights, na.rm=TRUE))
ggplot(light_dat_subsetted, aes(x=lights, fill=species))+
geom_density(color="black")+
scale_x_log10()+
facet_wrap(~species)+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab(bquote('Average radiance ('* 'nW' ~cm^-2~sr^-1*')'))+
ylab("Density")+
geom_vline(aes(xintercept=med_lights), colour='red')+
guides(fill=FALSE)+
scale_fill_brewer(palette="Dark2")
ggsave("outputs/graphics/species_distribution_examples.png", width=7.5, height=5, units="in")
# plot the same thing, but with free scales
ggplot(light_dat_subsetted, aes(x=lights, fill=species))+
geom_density(color="black")+
scale_x_log10()+
facet_wrap(~species, scales="free_y")+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab(bquote('Average radiance ('* 'nW' ~cm^-2~sr^-1*')'))+
ylab("Density")+
geom_vline(aes(xintercept=med_lights), colour='red')+
guides(fill=FALSE)+
scale_fill_brewer(palette="Dark2")
ggsave("outputs/graphics/species_distribution_examples_free_y_scales.png", width=7.5, height=5, units="in")
# Now lets look at the relationship between the 'continental/regional' scores
# and the score sif they were assigned locally
# first get a list of the 100 most abundant species within the cnc area
common_species_in_cnc <- cnc_data %>%
group_by(species) %>%
summarize(N=n()) %>%
dplyr::filter(N>=50) %>%
.$species
cnc_urbanness <- cnc_data %>%
dplyr::filter(species %in% common_species_in_cnc) %>%
left_join(., lights_data, by="catalogNumber") %>%
group_by(species) %>%
summarize(cnc_urban_score=median(lights, na.rm=TRUE))
# now look at the cnc urban score versus the regional urban score
plot_temp <- cnc_urbanness %>%
left_join(., urbanness, by="species") %>%
dplyr::select(species, median_lights, number_of_records, cnc_urban_score) %>%
rename(regional_urban_score=median_lights) %>%
dplyr::filter(number_of_records>=100)
ggplot(plot_temp, aes(x=cnc_urban_score, y=regional_urban_score))+
geom_point()+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab("CNC urbanness (log)")+
ylab("Regional urbanness (log)")+
scale_x_log10(labels=comma)+
scale_y_log10(labels=comma)+
geom_smooth(method="lm", color="orange")+
ggtitle(paste0("N = ", nrow(plot_temp), "species"))
ggsave("outputs/graphics/regional_vs_local_urbanness_scores.png", width=7.5, height=5, units="in")
# number of obs in CNC area that will be used
# i.e., removing the species in the cnc area which have < 100 observations througout
# the level 2 terra region as a whole
# first get list of species with >100 obs
greater_than_100_obs <- urbanness %>%
dplyr::filter(number_of_records>=100) %>%
.$species
# remove obs
cnc_trimmed <- cnc_data %>%
dplyr::filter(species %in% greater_than_100_obs)
(nrow(cnc_trimmed)/nrow(cnc_data))*100
# Now do some spatial stuff
# looking at neighborhoods as the spatial unit of interest
# read in towns
towns <- st_read("data/towns_in_cnc_area/towns_in_cnc_area.shp")
inat_points <- st_as_sf(cnc_data, coords=c("decimalLongitude", "decimalLatitude"), crs=st_crs(cnc_area))
cnc_trimmed_points <- st_as_sf(cnc_trimmed, coords=c("decimalLongitude", "decimalLatitude"),
crs=st_crs(towns))
towns_joined <- cnc_trimmed_points %>%
st_intersects(towns) %>%
as.data.frame() %>%
right_join(., cnc_trimmed_points %>%
mutate(row.id=1:nrow(cnc_trimmed_points)), by="row.id") %>%
left_join(., towns %>%
st_set_geometry(NULL) %>%
mutate(col.id=1:nrow(towns)), by="col.id")
# make a histogram of the number of data points from
# all the towns
towns_joined %>%
group_by(TOWN) %>%
summarize(N=n()) %>%
arrange(N) %>%
ggplot(., aes(x=N))+
geom_histogram(color="black", fill="blue")+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab("Number of observations")+
ylab("Count")
ggsave("outputs/graphics/number_of_obs_per_town.png", width=7.5, height=5, units="in")
sample_size <- towns_joined %>%
group_by(TOWN) %>%
summarize(N=n())
plot_sample_size_dat <- towns %>%
dplyr::select(TOWN) %>%
left_join(., sample_size, by="TOWN")
ggplot()+
geom_sf(data=cnc_area)+
geom_sf(data=plot_sample_size_dat, aes(fill=N))+
theme_classic()+
ggtitle("Number of iNat submissions")
ggsave("outputs/graphics/number_of_obs_per_town_map.png", width=7.5, height=5, units="in")
ggplot()+
geom_sf(data=cnc_area)+
geom_sf(data=plot_sample_size_dat, aes(fill=log10(N)))+
scale_fill_continuous(labels=comma)+
theme_classic()+
ggtitle("Number of iNat submissions")
ggsave("outputs/graphics/number_of_obs_per_town_map_log10.png", width=7.5, height=5, units="in")
# Now start looking at the relationship between
# the night-time lights within a town
# and the observation sampling within a town
# let's pick 6 towns and then remake the plots for the 'species' example above
example_towns <- c("WALTHAM", "LINCOLN", "PLYMOUTH", "WELLESLEY", "CONCORD", "QUINCY")
# make example plots of the distributions
# for our 6 example cities
town_dat_subsetted <- lights_towns %>%
dplyr::filter(TOWN %in% example_towns) %>%
group_by(TOWN) %>%
mutate(med_lights=median(viirs_values, na.rm=TRUE)) %>%
ungroup() %>%
dplyr::filter(complete.cases(.))
ggplot(town_dat_subsetted, aes(x=viirs_values, fill=TOWN))+
geom_density(color="black")+
scale_x_log10()+
facet_wrap(~TOWN)+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab(bquote('Average radiance ('* 'nW' ~cm^-2~sr^-1*')'))+
ylab("Density")+
geom_vline(aes(xintercept=med_lights), colour='red')+
guides(fill=FALSE)+
scale_fill_brewer(palette="Dark2")
ggsave("outputs/graphics/town_light_distribution_examples.png", width=7.5, height=5, units="in")
# get the median lights of all pixels for each town
town_urban_summary <- lights_towns %>%
group_by(TOWN) %>%
summarize(median_underlying_score=median(viirs_values, na.rm=TRUE),
sd_underlying_score=sd(viirs_values, na.rm=TRUE),
number_pixels=n(viirs_values))
# now to make sure this makes sense
# lets make a map showing the median scores for each town polygon
plot_median_scores_dat <- towns %>%
dplyr::select(TOWN) %>%
left_join(., town_urban_summary, by="TOWN")
ggplot()+
geom_sf(data=cnc_area)+
geom_sf(data=plot_median_scores_dat, aes(fill=log(median_underlying_score)))+
theme_classic()+
ggtitle("Urbanness of towns")
ggsave("outputs/graphics/map_of_town_urbanness.png", width=7.5, height=5, units="in")
# it appears to make logical sense
# now we want to look at the distribution of the actual observations in a given town/spatial unit
# once again, use the example six towns from above
town_dat_observations <- towns_joined %>%
left_join(., lights_data, by="catalogNumber") %>%
dplyr::filter(TOWN %in% example_towns) %>%
group_by(TOWN) %>%
mutate(med_lights=median(lights, na.rm=TRUE)) %>%
ungroup()
ggplot(town_dat_observations, aes(x=lights, fill=TOWN))+
geom_density(color="black")+
scale_x_log10()+
facet_wrap(~TOWN)+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab(bquote('Average radiance ('* 'nW' ~cm^-2~sr^-1*')'))+
ylab("Density")+
geom_vline(aes(xintercept=med_lights), colour='red')+
guides(fill=FALSE)+
scale_fill_brewer(palette="Dark2")
ggsave("outputs/graphics/town_observation_distribution_examples.png", width=7.5, height=5, units="in")
# now summarize the observations within a town, regardless of species
town_observations_summary <- towns_joined %>%
group_by(TOWN) %>%
left_join(., lights_data, by="catalogNumber") %>%
summarize(observation_median=median(lights, na.rm=TRUE),
observation_sd=sd(lights, na.rm=TRUE))
# look at the relationship between the town observations
# and the town underlying pixels
town_observations_summary %>%
left_join(., town_urban_summary) %>%
ggplot(., aes(x=observation_median, median_underlying_score))+
geom_point()+
scale_x_log10()+
scale_y_log10()+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab("Median score of all observations")+
ylab("Median score of pixels in a town")+
geom_smooth(method="lm", color="orange")
ggsave("outputs/graphics/relationship_between_sampling_and_town_urbanization.png", width=7.5, height=5, units="in")
# now summarize the median of all the species in a town
# for a third distribution to compare
town_species_summary <- towns_joined %>%
left_join(., urbanness) %>%
group_by(TOWN) %>%
summarize(species_median_all_obs=median(median_lights),
sd_all_obs=sd(median_lights),
number_obs=n(species),
number_species=length(unique(species))) %>%
left_join(., towns_joined %>%
left_join(., urbanness) %>%
dplyr::select(TOWN, species, median_lights) %>%
distinct() %>%
group_by(TOWN) %>%
summarize(species_median_distinct=median(median_lights),
sd_median_distinct=sd(median_lights)), by="TOWN")
# plot an example of two towns, with the three distributions for each town
town_dat_lights_ex <- town_dat_subsetted %>%
dplyr::filter(TOWN %in% c("WALTHAM", "CONCORD")) %>%
dplyr::rename(lights=viirs_values) %>%
dplyr::select(TOWN, lights, med_lights) %>%
mutate(distribution="Town pixels")
town_dat_obs_ex <- towns_joined %>%
left_join(., lights_data, by="catalogNumber") %>%
dplyr::filter(TOWN %in% c("WALTHAM", "CONCORD")) %>%
group_by(TOWN) %>%
mutate(med_lights=median(lights, na.rm=TRUE)) %>%
ungroup() %>%
dplyr::select(TOWN, lights, med_lights) %>%
mutate(distribution="Town observations")
town_dat_species_ex <- towns_joined %>%
left_join(., urbanness) %>%
dplyr::filter(TOWN %in% c("WALTHAM", "CONCORD")) %>%
dplyr::select(TOWN, median_lights) %>%
dplyr::rename(lights=median_lights) %>%
group_by(TOWN) %>%
mutate(med_lights=median(lights)) %>%
ungroup() %>%
mutate(distribution="Species-specific urban scores")
bind_rows(town_dat_lights_ex,
town_dat_obs_ex,
town_dat_species_ex) %>%
ggplot(., aes(x=lights, fill=distribution))+
geom_density(color="black", alpha=0.6)+
scale_x_log10()+
facet_wrap(~TOWN)+
theme_bw()+
theme(axis.text=element_text(color="black"))+
xlab(bquote('Average radiance ('* 'nW' ~cm^-2~sr^-1*')'))+
ylab("Density")+
geom_vline(aes(xintercept=med_lights), colour='red')+
scale_fill_brewer(palette="Dark2")
ggsave("outputs/graphics/two_towns_example_of_three_distributions.png", width=7.5, height=5, units="in")
# create a large table that can be used for comparisons
summary_dataframe <- town_urban_summary %>%
left_join(., town_observations_summary) %>%
left_join(., town_species_summary)
write_csv(summary_dataframe, "C:/Users/CTC/Desktop/summary_of_towns_for_Mark.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.