knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, fig.width = 8, fig.height = 5 )
library(alohakez) library(tidyverse)
Shore Stations Program SIO Pier Water Temperature \ Daily sea-surface temperature(SST) measurements, collected and provided by the Shore Stations Program, La Jolla CA, 1916 - May 2019
cce_sst %>% head() sapply(cce_sst, typeof)
Filter valid data: Notice only the value 0 stands for good data \
watertemp <- cce_sst %>% filter(sea_surface_temperature_flag==0) %>% filter(!is.na(sea_surface_temperature_c)) sst_invalid <- cce_sst %>% filter(sea_surface_temperature_flag!=0 | is.na(sea_surface_temperature_c)) sst_invalid %>% group_by(year) %>% count() %>% head(14)
We see that initial years contain large amount of invalid/missing data \ Data from 1930 onward are more complete \ Data for 1916 and 2015 are incomplete. \ We should keep these in mind moving forward
watertemp %>% slice_max(sea_surface_temperature_c, n=6) watertemp %>% slice_min(sea_surface_temperature_c, n=6)
1) Calculate Annual average, lowest & highest temperature over years
annual_temp <- watertemp %>% group_by(year) %>% summarize(annual_avg = mean(sea_surface_temperature_c), min_temp = min(sea_surface_temperature_c), max_temp = max(sea_surface_temperature_c)) %>% mutate(diff=max_temp-min_temp)
2) Plot annual temperature over years
annual_temp %>% filter(year!=1916 & year!=2015) %>% ggplot(aes(x = year, y = annual_avg)) + geom_path(color="blue") + labs(x = "Year", y = "Mean Sea-Surface Temperature (C)", title = "Annual Mean Sea-Surface Temperature from 1916 to 2015") + theme_bw() + scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
3) Now plot the minimum and maximum temperature over years
annual_temp %>% filter(year!=1916 & year!=2015) %>% ggplot(aes(x = year)) + geom_path(aes(y=min_temp), color="blue") + geom_path(aes(y=max_temp), color="orangered1") + labs(x = "Year", y = "Sea-Surface Temperature (C)", title = "Lowest and Highest Single Day Sea-Surface Temperature") + scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + theme_minimal()
4) See them on the same plot
annual_temp %>% filter(year!=1916 & year!=2015) %>% ggplot(aes(x = year)) + geom_path(aes(y=max_temp, color="highest"), size=0.6) + geom_path(aes(y = annual_avg, color="mean"), size=1) + geom_path(aes(y=min_temp, color="lowest"), size=0.7) + labs(x = "Year", y = "Sea-Surface Temperature (C)", title = "Annual Sea-Surface Temperature from 1916 to 2015") + scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + scale_colour_manual(name="", values=c("highest"="orangered1", "mean"="black","lowest"="blue")) + theme_minimal()
The sea surface temperature has been increasing in the past century as a result of global warming. \
5) Let's see if there's anything interesting about the max difference
annual_temp %>% filter(year!=1916 & year!=2015) %>% ggplot(aes(x = year, y = diff)) + geom_path() + labs(x = "Year", y = "Sea-Surface Temperature (C)", title = "Difference") + scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
1) Calculate monthly average, minimum, and maximum
monthly_temp <- watertemp %>% group_by(year, month) %>% summarize(monthly_avg = mean(sea_surface_temperature_c), min_temp = min(sea_surface_temperature_c), max_temp = max(sea_surface_temperature_c))
2) Let's see how temp varies over month for more recent years
monthly_temp %>% filter(year>2005 & year<2015) %>% ggplot() + geom_path(aes(x=factor(month), y=monthly_avg, group=factor(year), color=factor(year)), size=1) + theme_minimal() + labs(x="year", y="monthly average temperature", color="year", title="temperature change monthly trends in recent years")
Plotting them on same graph seems a little messy
3) Look at them separately
monthly_temp %>% filter(year>2002 & year<2015) %>% ggplot() + geom_path(aes(x=factor(month), y=monthly_avg, group=factor(year), color="dodgerblue"), size=1.2) + theme_classic() + facet_wrap(~year) + labs(x="year", y="", title="monthly average temperature in recent years") + theme(legend.position = "none")
When does temperature reach its highest every year? \ 1) find the day and create a variable that includes only month and day
highest_temp <-watertemp %>% mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>% group_by(year) %>% slice(which.max(sea_surface_temperature_c)) highest_temp <- highest_temp %>% mutate(month_day=as.Date(paste(2224, month, day, sep = "/" ))) # choose a lunar year that is obviously in the future highest_temp %>% head() highest_temp %>% tail()
2) Map it
highest_temp %>% filter(year>=1930) %>% ggplot(aes(x=year, y=month_day)) + geom_path(color="sienna1", size=1) + scale_y_date(date_breaks = "10 days", date_labels = "%b%d") + scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + geom_text(aes(label=sea_surface_temperature_c), angle=10, size=3.2, vjust=0.5, color="darkgreen") + labs(y="date", title="highest temperature in year") + theme_classic()
The day varies, but it is within our expectations \ The highest temperature happens between late June and late September \
3) Let's try the same thing for lowest
lowest_temp <-watertemp %>% mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>% group_by(year) %>% slice(which.min(sea_surface_temperature_c)) lowest_temp <- lowest_temp %>% mutate(month_day=as.Date(paste(2224, month, day, sep = "/" ))) lowest_temp %>% head() lowest_temp %>% tail()
4) Map it
lowest_temp %>% filter(year>=1930) %>% ggplot() + geom_point(aes(x=year, y=month_day)) + scale_y_date(date_breaks = "1 month", date_labels = "%b%d")
It is hard to see any pattern or trend \ Let's modify the month_day variable. We'll chose November as a cutoff
lowest_temp <- watertemp %>% mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>% group_by(year) %>% slice(which.min(sea_surface_temperature_c)) lowest_temp <- lowest_temp %>% mutate( month_day = as.Date(ifelse(month>=11, paste(2223, month, day, sep = "/" ), paste(2224, month, day, sep = "/" ) )) ) lowest_temp %>% head() lowest_temp %>% tail()
now map again
lowest_temp %>% filter(year>=1930) %>% ggplot(aes(x=year, y=month_day)) + geom_point(color="slateblue3") + scale_y_date(date_breaks = "1 month", date_labels = "%b%d") + scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + geom_text(aes(label=sea_surface_temperature_c), angle=10, size=3, hjust=1, vjust=1, color="brown") + labs(y="date", title="lowest temperature in year")
We see an interesting result here \ this could be a result of incorrect entries or irregular weathers on these days
Since this data does not have a season column, we can create a new variable season using dates \ So when do seasons (fall/winter/spring/summer) start? \ According to this article from the National Geographic(https://www.nationalgeographic.org/encyclopedia/season/), it actually varies a bit each year \
watertemp <- watertemp %>% mutate(month_day=as.Date(paste(2224, month, day, sep = "/" )))
But for our purpose, we will let each \ - fall start on September 22, - winter on December 21 - spring on March 20 - and Summer begins on June 20
fall_start <- as.Date("2224/09/22") wint_start <- as.Date("2224/12/21") sprg_start <- as.Date("2224/3/20") summ_start <- as.Date("2224/6/20") watertemp <- watertemp %>% mutate( season = case_when( month_day>=fall_start & month_day<wint_start ~ "fall", month_day>=wint_start | month_day<sprg_start ~ "wint", month_day>=sprg_start & month_day<summ_start ~ "sprg", month_day>=summ_start & month_day<fall_start ~ "summ" ) ) #watertemp %>% select(date_pst:day, month_day, season) watertemp$season <- factor(watertemp$season, levels = c("fall", "wint", "sprg", "summ")) levels(watertemp$season)[2:3]
we can check our data by taking a look at the seasonal averages across years
watertemp %>% group_by(season) %>% summarize(seasonal_avg = mean(sea_surface_temperature_c), min_temp = min(sea_surface_temperature_c), max_temp = max(sea_surface_temperature_c)) %>% mutate(diff=max_temp-min_temp) #%>% arrange(season)
the results makes sense
1) Now we want annual averages based on season
seasonal_temp <- watertemp %>% group_by(year, season) %>% summarize(seasonal_avg = mean(sea_surface_temperature_c), min_temp = min(sea_surface_temperature_c), max_temp = max(sea_surface_temperature_c)) %>% mutate(diff=max_temp-min_temp) #%>% arrange(season)
2) Plot it
seasonal_temp %>% #filter(year!=1916 & year!=2015) %>% ggplot(aes(x=year)) + geom_path(aes(y=seasonal_avg, color=season), size=1) + geom_path(aes(y = annual_avg), size=1, alpha=0.5, data=annual_temp) + theme_minimal() + labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") seasonal_temp %>% #filter(year!=1916 & year!=2015) %>% ggplot(aes(x=year)) + geom_path(aes(y=seasonal_avg, color=season), size=1) + geom_path(aes(y = annual_avg), size=1, alpha=0.5, data=annual_temp) + theme_minimal() + labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") + facet_wrap(~season)
Winter is below average, we are expecting that \ We see that spring is also below average. This is probably a result of water having a higher heat capacity (meaning it takes more energy to increase the temperature of water compared to other substances)
3) Like what we did before for annual temperature, see highest & lowest of that season
seasonal_temp %>% #filter(year!=1916 & year!=2015) %>% ggplot(aes(x = year)) + geom_path(aes(y=max_temp, color="highest"), size=0.6) + geom_path(aes(y=seasonal_avg, color="mean"), size=1) + geom_path(aes(y=min_temp, color="lowest"), size=0.7) + scale_colour_manual(name="", values=c("highest"="orangered1", "mean"="black","lowest"="blue")) + facet_wrap(~season) + labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)")
4) See difference \
seasonal_temp %>% ggplot(aes(x = year, y = diff)) + geom_path(aes(color=season)) + geom_path(data=annual_temp) + geom_hline(yintercept=0, linetype="longdash") + labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") seasonal_temp %>% ggplot(aes(x = year, y = diff)) + geom_path(aes(color=season)) + geom_path(data=annual_temp) + geom_hline(yintercept=0, linetype="longdash") + labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") + facet_wrap(~season)
Compared to other seasons, when the ocean enters winter, the sea surface temperature does not vary much.
Shore Stations Program. 2019. Daily sea-surface temperature measurements, collected and provided by the Shore Stations Program, La Jolla CA, 1916 - May 2019 ver 4. Environmental Data Initiative. https://doi.org/10.6073/pasta/d8e2473a3de462bd4d75bad8f934a40a (Accessed 2021-05-03).
r knitr::spin_child(here::here("data-raw","cce_sst.R"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.