knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE, 
  fig.width = 8, fig.height = 5
)
library(alohakez)
library(tidyverse)

California Current Ecosystem (CCE) LTER

Shore Stations Program SIO Pier Water Temperature \ Daily sea-surface temperature(SST) measurements, collected and provided by the Shore Stations Program, La Jolla CA, 1916 - May 2019

Let's take a look at the data first \

cce_sst %>% head()
sapply(cce_sst, typeof)

Filter valid data: Notice only the value 0 stands for good data \

watertemp <- cce_sst %>%
  filter(sea_surface_temperature_flag==0) %>%
  filter(!is.na(sea_surface_temperature_c))
sst_invalid <- cce_sst %>%
  filter(sea_surface_temperature_flag!=0 | is.na(sea_surface_temperature_c)) 
sst_invalid %>% group_by(year) %>% count() %>% head(14)

We see that initial years contain large amount of invalid/missing data \ Data from 1930 onward are more complete \ Data for 1916 and 2015 are incomplete. \ We should keep these in mind moving forward

What has been the lowest and highest temperatures?
watertemp %>% slice_max(sea_surface_temperature_c, n=6)
watertemp %>% slice_min(sea_surface_temperature_c, n=6)

Annual

1) Calculate Annual average, lowest & highest temperature over years

annual_temp <- watertemp %>%
  group_by(year) %>%
  summarize(annual_avg = mean(sea_surface_temperature_c),
            min_temp = min(sea_surface_temperature_c),
            max_temp = max(sea_surface_temperature_c)) %>%
  mutate(diff=max_temp-min_temp)

2) Plot annual temperature over years

annual_temp %>%
  filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x = year, y = annual_avg)) +
  geom_path(color="blue") +
  labs(x = "Year",
       y = "Mean Sea-Surface Temperature (C)",
       title = "Annual Mean Sea-Surface Temperature from 1916 to 2015") +
  theme_bw() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10))

3) Now plot the minimum and maximum temperature over years

annual_temp %>%
  filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x = year)) +
  geom_path(aes(y=min_temp), color="blue") +
  geom_path(aes(y=max_temp), color="orangered1") +
  labs(x = "Year",
       y = "Sea-Surface Temperature (C)",
       title = "Lowest and Highest Single Day Sea-Surface Temperature") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
  theme_minimal()

4) See them on the same plot

annual_temp %>%
  filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x = year)) +
  geom_path(aes(y=max_temp, color="highest"), size=0.6) +
  geom_path(aes(y = annual_avg, color="mean"), size=1) +
  geom_path(aes(y=min_temp, color="lowest"), size=0.7) +
  labs(x = "Year",
       y = "Sea-Surface Temperature (C)",
       title = "Annual Sea-Surface Temperature from 1916 to 2015") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
  scale_colour_manual(name="",
                      values=c("highest"="orangered1", "mean"="black","lowest"="blue")) +
  theme_minimal()

The sea surface temperature has been increasing in the past century as a result of global warming. \

5) Let's see if there's anything interesting about the max difference

annual_temp %>%
  filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x = year, y = diff)) +
  geom_path() +
  labs(x = "Year",
       y = "Sea-Surface Temperature (C)",
       title = "Difference") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10))

Monthly

1) Calculate monthly average, minimum, and maximum

monthly_temp <- watertemp %>%
  group_by(year, month) %>%
  summarize(monthly_avg = mean(sea_surface_temperature_c),
            min_temp = min(sea_surface_temperature_c),
            max_temp = max(sea_surface_temperature_c))

2) Let's see how temp varies over month for more recent years

monthly_temp %>%
  filter(year>2005 & year<2015) %>%
  ggplot() +
  geom_path(aes(x=factor(month), y=monthly_avg, 
                group=factor(year), color=factor(year)), size=1) +
  theme_minimal() +
  labs(x="year", y="monthly average temperature", 
       color="year",
       title="temperature change monthly trends in recent years")

Plotting them on same graph seems a little messy

3) Look at them separately

monthly_temp %>%
  filter(year>2002 & year<2015) %>%
  ggplot() +
  geom_path(aes(x=factor(month), y=monthly_avg, 
                group=factor(year), color="dodgerblue"), size=1.2) +
  theme_classic() +
  facet_wrap(~year) +
    labs(x="year", y="", 
       title="monthly average temperature in recent years") +
  theme(legend.position = "none")

Plotting with dates

When does temperature reach its highest every year? \ 1) find the day and create a variable that includes only month and day

highest_temp <-watertemp %>%
    mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>%
    group_by(year) %>%
    slice(which.max(sea_surface_temperature_c))
highest_temp <- highest_temp %>%
    mutate(month_day=as.Date(paste(2224, month, day, sep = "/" ))) # choose a lunar year that is obviously in the future
highest_temp %>% head()
highest_temp %>% tail()

2) Map it

highest_temp %>%
  filter(year>=1930) %>%
  ggplot(aes(x=year, y=month_day)) +
  geom_path(color="sienna1", size=1) +
  scale_y_date(date_breaks = "10 days", date_labels = "%b%d") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
  geom_text(aes(label=sea_surface_temperature_c), 
            angle=10, size=3.2, vjust=0.5, color="darkgreen") +
  labs(y="date", title="highest temperature in year") +
  theme_classic()

The day varies, but it is within our expectations \ The highest temperature happens between late June and late September \

3) Let's try the same thing for lowest

lowest_temp <-watertemp %>%
    mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>%
    group_by(year) %>%
    slice(which.min(sea_surface_temperature_c))
lowest_temp <- lowest_temp %>%
    mutate(month_day=as.Date(paste(2224, month, day, sep = "/" )))
lowest_temp %>% head()
lowest_temp %>% tail()

4) Map it

lowest_temp %>%
  filter(year>=1930) %>%
  ggplot() +
  geom_point(aes(x=year, y=month_day)) +
  scale_y_date(date_breaks = "1 month", date_labels = "%b%d")

It is hard to see any pattern or trend \ Let's modify the month_day variable. We'll chose November as a cutoff

lowest_temp <- watertemp %>%
    mutate(date_pst=as.Date(date_pst, "%Y/%m/%d")) %>%
    group_by(year) %>%
    slice(which.min(sea_surface_temperature_c))
lowest_temp <- lowest_temp %>%
    mutate( month_day = as.Date(ifelse(month>=11,
                               paste(2223, month, day, sep = "/" ), paste(2224, month, day, sep = "/" ) 
                               )) )
lowest_temp %>% head()
lowest_temp %>% tail()

now map again

lowest_temp %>%
  filter(year>=1930) %>%
  ggplot(aes(x=year, y=month_day)) +
  geom_point(color="slateblue3") +
  scale_y_date(date_breaks = "1 month", date_labels = "%b%d") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
  geom_text(aes(label=sea_surface_temperature_c), 
            angle=10, size=3, hjust=1, vjust=1, color="brown") +
  labs(y="date", title="lowest temperature in year")

We see an interesting result here \ this could be a result of incorrect entries or irregular weathers on these days

Seasonal plots

Since this data does not have a season column, we can create a new variable season using dates \ So when do seasons (fall/winter/spring/summer) start? \ According to this article from the National Geographic(https://www.nationalgeographic.org/encyclopedia/season/), it actually varies a bit each year \

watertemp <- watertemp %>%
    mutate(month_day=as.Date(paste(2224, month, day, sep = "/" )))

But for our purpose, we will let each \ - fall start on September 22, - winter on December 21 - spring on March 20 - and Summer begins on June 20

fall_start <- as.Date("2224/09/22")
wint_start <- as.Date("2224/12/21")
sprg_start <- as.Date("2224/3/20")
summ_start <- as.Date("2224/6/20")

watertemp <- watertemp %>% mutate(
  season = case_when( month_day>=fall_start &  month_day<wint_start ~ "fall",
                      month_day>=wint_start | month_day<sprg_start  ~ "wint",
                      month_day>=sprg_start &  month_day<summ_start ~ "sprg",
                      month_day>=summ_start &  month_day<fall_start ~ "summ"
                     )
  ) 
#watertemp %>% select(date_pst:day, month_day, season)
watertemp$season <- factor(watertemp$season, 
                                levels = c("fall", "wint", "sprg", "summ"))
levels(watertemp$season)[2:3]

we can check our data by taking a look at the seasonal averages across years

watertemp %>%
  group_by(season) %>%
  summarize(seasonal_avg = mean(sea_surface_temperature_c),
            min_temp = min(sea_surface_temperature_c),
            max_temp = max(sea_surface_temperature_c)) %>%
  mutate(diff=max_temp-min_temp) #%>% arrange(season)

the results makes sense

1) Now we want annual averages based on season

seasonal_temp <- watertemp %>%
  group_by(year, season) %>%
  summarize(seasonal_avg = mean(sea_surface_temperature_c),
            min_temp = min(sea_surface_temperature_c),
            max_temp = max(sea_surface_temperature_c)) %>%
  mutate(diff=max_temp-min_temp) #%>% arrange(season)

2) Plot it

seasonal_temp %>%
  #filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x=year)) +
  geom_path(aes(y=seasonal_avg, color=season), size=1) +
  geom_path(aes(y = annual_avg), size=1, alpha=0.5, data=annual_temp) +
  theme_minimal() +
  labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)")

seasonal_temp %>%
  #filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x=year)) +
  geom_path(aes(y=seasonal_avg, color=season), size=1) +
  geom_path(aes(y = annual_avg), size=1, alpha=0.5, data=annual_temp) +
  theme_minimal() +
  labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") +
  facet_wrap(~season)

Winter is below average, we are expecting that \ We see that spring is also below average. This is probably a result of water having a higher heat capacity (meaning it takes more energy to increase the temperature of water compared to other substances)

3) Like what we did before for annual temperature, see highest & lowest of that season

seasonal_temp %>%
  #filter(year!=1916 & year!=2015) %>%
  ggplot(aes(x = year)) +
  geom_path(aes(y=max_temp, color="highest"), size=0.6) +
  geom_path(aes(y=seasonal_avg, color="mean"), size=1) +
  geom_path(aes(y=min_temp, color="lowest"), size=0.7) +
  scale_colour_manual(name="",
                      values=c("highest"="orangered1", "mean"="black","lowest"="blue")) +
  facet_wrap(~season) +
  labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)")

4) See difference \

seasonal_temp %>%
  ggplot(aes(x = year, y = diff)) +
  geom_path(aes(color=season)) +
  geom_path(data=annual_temp) +
  geom_hline(yintercept=0, linetype="longdash") +
  labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)")

seasonal_temp %>%
  ggplot(aes(x = year, y = diff)) +
  geom_path(aes(color=season)) +
  geom_path(data=annual_temp) +
  geom_hline(yintercept=0, linetype="longdash") +
  labs(x = "Year", y = "Seasonal Sea-Surface Temperature (C)") +
  facet_wrap(~season)

Compared to other seasons, when the ocean enters winter, the sea surface temperature does not vary much.

Citation

Shore Stations Program. 2019. Daily sea-surface temperature measurements, collected and provided by the Shore Stations Program, La Jolla CA, 1916 - May 2019 ver 4. Environmental Data Initiative. https://doi.org/10.6073/pasta/d8e2473a3de462bd4d75bad8f934a40a (Accessed 2021-05-03).

How we processed the raw data

r knitr::spin_child(here::here("data-raw","cce_sst.R"))



karenezhao/alohakez documentation built on June 27, 2021, 10:22 p.m.