knitr::opts_chunk$set(echo = TRUE)

Creating BC Lightstation Data on SST and Salinity

Joe's original code - will need adapting slightly. Should these be temporal objects or spatio-temporal - I'd assumed just simple time series each corresponding to a location.

This markdown document shows how the sea-surface temperature (SST) and salinity data collected by the BC lightstations/lighthouses are compiled into PACea.

Read buoy data from BC Lightstations

library(dplyr)

dataset_commonname <-
  c('SST_Monthly_BC_Lightstation',
    'Salinity_Monthly_BC_Lightstation')

# read in shapefile containing the lighthouse locations
Lightstations <-
sf::st_read(
  dsn='./BC_Lightstation_Data_SHP_Files/BC_Lighthouse_DATA.shp'
)

# rename Langara lighthouse to match later data merge
Lightstations <-
  Lightstations %>%
  dplyr::mutate(LIGHSTATIO = recode(LIGHSTATIO,'LANGARA POINT LIGHTSTATION'='LANGARA ISLAND LIGHTSTATION'))
plot(Lightstations)
nstations <- 12
nmonths <- 12
# 1914 is the year the first 'active' buoy went online
nyears <- length(1914:lubridate::year(Sys.Date()))

# Create a 'complete' data.frame skeleton to fill in
Data_Lightstations <- data.frame(Location=rep(NA,nstations*nyears*nmonths),
                                 Year=rep(rep(1914:lubridate::year(Sys.Date()),
                                              each=nmonths),times=nstations),
                                 Month=rep(1:12, times=nyears*nstations),
                                 SST=rep(NA, nstations*nyears*nmonths),
                                 Salinity=rep(NA, nstations*nyears*nmonths),
                                 Longitude=NA,
                                 Latitude=NA)

# Loop through the currently active stations and obtain SST and salinity
count_SST <- 1
count_salinity <- 1
# loop through the files contained in the directory
for(filename in list.files('./DATA_Active_lightstations',pattern = '.csv') )
{
  if(grepl(filename, pattern = 'Temperatures'))
  {
    Data_Lightstations$Location[
      (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <-
      substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)

    # Extract the lat/lon coords of the buoy
    Data_Lightstations$Longitude[
      (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <- as.numeric(
      Lightstations$LONGITUDE[grepl(
          pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1),
          gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'),
          ignore.case = T)])
    Data_Lightstations$Latitude[
      (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <- as.numeric(
      Lightstations$LATITUDE__[grepl(
         pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1),
          gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'),
          ignore.case = T)])

    # read the csv data and merge by Location Year Month
    # Convert to the data.frame into long format by creating 'Month' variable
    tmp <- read.csv(paste0('./DATA_Active_Lightstations/', filename),
                    header = T, skip = 1)
    tmp <- tmp %>%
      dplyr::select(YEAR, JAN, FEB, MAR, APR, MAY, JUN, JUL, AUG, SEP, OCT, NOV, DEC) %>%
      tidyr::pivot_longer(
        cols=c('JAN','FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'),
        names_to = 'Month',
        values_to = 'SST'
      ) %>%
      dplyr::mutate(Month=recode(Month, JAN=1, FEB=2, MAR=3,
                                 APR=4, MAY=5, JUN=6, JUL=7, AUG=8,
                                 SEP=9, OCT=10, NOV=11, DEC=12),
                    Location=substr(filename, start=1,
                                    stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)) %>%
      dplyr::rename(Year=YEAR)

    Data_Lightstations[(count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)),] <-
      dplyr::left_join(Data_Lightstations[(count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)),
                                          c('Location','Year','Month','Salinity','Longitude','Latitude')],
                       tmp) %>%
      select(Location,Year,Month,SST,Salinity,Longitude,Latitude)

    count_SST <- count_SST + 1
  }
  if(grepl(filename, pattern = 'Salinities'))
  {
    Data_Lightstations$Location[
      (count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <-
      substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)

    # Extract the lat/lon coords of the buoy
    Data_Lightstations$Longitude[
      (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <- as.numeric(
      Lightstations$LONGITUDE[grepl(
          pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1),
          gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'),
          ignore.case = T)])
    Data_Lightstations$Latitude[
      (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths))
    ] <- as.numeric(
      Lightstations$LATITUDE__[grepl(
         pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1),
          gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'),
          ignore.case = T)])

    # read the data and merge by Location, Year and Month
    tmp <- read.csv(paste0('./DATA_Active_Lightstations/', filename),
                    header = T, skip = 1)
    tmp <- tmp %>%
      dplyr::select(YEAR, JAN, FEB, MAR, APR, MAY, JUN, JUL, AUG, SEP, OCT, NOV, DEC) %>%
      tidyr::pivot_longer(
        cols=c('JAN','FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'),
        names_to = 'Month',
        values_to = 'Salinity'
      ) %>%
      dplyr::mutate(Month=recode(Month, JAN=1, FEB=2, MAR=3,
                                 APR=4, MAY=5, JUN=6, JUL=7, AUG=8,
                                 SEP=9, OCT=10, NOV=11, DEC=12),
                    Location=substr(filename, start=1,
                                    stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)) %>%
      rename(Year=YEAR)

    Data_Lightstations[(count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths)),] <-
      dplyr::left_join(Data_Lightstations[(count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths)),
                                          c('Location','Year','Month','SST','Longitude','Latitude')],
                       tmp) %>%
      select(Location,Year,Month,SST,Salinity,Longitude,Latitude)

    count_salinity <- count_salinity + 1
  }
}

# Set all 999.99 values to NA
Data_Lightstations <-
  Data_Lightstations %>%
  dplyr::mutate(SST=ifelse(SST==999.99, NA, SST),
         Salinity=ifelse(Salinity==999.99, NA, Salinity),
         Longitude=ifelse(Longitude==999.99, NA, Longitude),
         Latitude=ifelse(Latitude==999.99, NA, Latitude))

# Check for missing values
summary(Data_Lightstations)

# Make it a spatial object in sf package with lat/lon CRS
Data_Lightstations <-
  sf::st_as_sf(Data_Lightstations,
               coords=c('Longitude','Latitude'),
               crs=sf::st_crs('EPSG:4326'))

# Convert to the same CRS as the data
Data_Lightstations <-
  sf::st_transform(Data_Lightstations,
               sf::st_crs(PACea::Coastline@proj4string))

Next, we aggregate over the polygons defined by BC_Partitions

# read in BC Grid
BC_Partition_Objects <- PACea::BC_Partition_Objects

# compute the mean relative abundance per grid cell
Partition_Polys <- BC_Partition_Objects$BC_Partition[
  BC_Partition_Objects$BC_Partition$Regions_Name=='BC Grid',]

# Create a data.frame object for storing the aggregated predator abundance values
npoly=length(Partition_Polys$Poly_ID)

Lighthouse_SST_Salinity_DF <-
  data.frame(Poly_ID=rep(unique(Partition_Polys$Poly_ID), times=nyears*nmonths),
             Month=rep(rep(1:12, each=npoly), times=nyears),
             Year=rep(min(Data_Lightstations$Year,na.rm=T):max(Data_Lightstations$Year,na.rm=T),
                      each=npoly*nmonths),
             Mean_SST_BC_Lightstation=NA,
             N_Obs_SST_BC_Lightstation=0,
             Mean_Salinity_BC_Lightstation=NA,
             N_Obs_Salinity_BC_Lightstation=0
  )

# Which Polygon does each Lightstation fall in?
Lightstations_final <-
  Data_Lightstations[Data_Lightstations$Year==1914 & Data_Lightstations$Month==12,]

ind_Poly_to_Lightstation <- apply(
sf::st_contains_properly(Partition_Polys, Lightstations_final, sparse=F),
1, FUN = function(x){Lightstations_final$Location[which(x)]})

# loop through the polygons
for(j in unique(Partition_Polys$Poly_ID))
{
  print(paste0('Processing Data For Polygon ',j,' out of ', length(unique(Partition_Polys$Poly_ID))))
  # Does a lighthouse fall within the polygon?
  if(length(ind_Poly_to_Lightstation[[j]])>0)
  {
    Lighthouse_SST_Salinity_DF[Lighthouse_SST_Salinity_DF$Poly_ID==j,
                 c('Mean_SST_BC_Lightstation', 'N_Obs_SST_BC_Lightstation',
                   'Mean_Salinity_BC_Lightstation', 'N_Obs_Salinity_BC_Lightstation')] <-
      (sf::st_drop_geometry(Data_Lightstations) %>%
      filter(Location %in% ind_Poly_to_Lightstation[[j]]) %>%
      group_by(Year, Month) %>%
      summarize(
        Mean_SST_BC_Lightstation=mean(SST, na.rm=T),
        N_Obs_SST_BC_Lightstation=sum(!is.na(SST)),
        Mean_Salinity_BC_Lightstation=mean(Salinity, na.rm=T),
        N_Obs_Salinity_BC_Lightstation=sum(!is.na(Salinity))
      ))[,c('Mean_SST_BC_Lightstation', 'N_Obs_SST_BC_Lightstation',
            'Mean_Salinity_BC_Lightstation', 'N_Obs_Salinity_BC_Lightstation')]

  }
}

Now we save the data and plot to check correctness. The use_data() function is responsible for the mapping.

usethis::use_data(Lighthouse_SST_Salinity_DF, overwrite = T)

# save the data key, pointing the common names of the datasets to the correct DF
# data_key <-
#   data.frame(
#     Common_Name = dataset_commonname,
#     DF_Name = 'Lighthouse_SST_Salinity_DF',
#     Time_Resolution = 'Monthly',
#     Data_Type = 'Point-Referenced Raw Data',
#     Units = c('Degrees Celsius',
#               'PSU (Practical Salinity Unit)'),
#     Author = 'Fisheries and Oceans Canada',
#     Citation = NA,
#     Comments = 'See https://open.canada.ca/data/en/dataset/719955f2-bf8e-44f7-bc26-6bd623e82884 for details'
#   )
#
# # append to the master Data_Key and remove duplicates
# Data_Key <-
#   rbind(PACea::Data_Key,
#         data_key)
#
# Data_Key <-
#   Data_Key[!duplicated(Data_Key[,c('Common_Name','DF_Name')]),]
#
# # Update the master key
# usethis::use_data(Data_Key, overwrite=T)

# plot the mean lighthouse SST in April 1999 aggregated over the BC partition
ggplot2::ggplot(data=
                  cbind(Partition_Polys,
                        (Lighthouse_SST_Salinity_DF %>%
                           filter(Year==1999, Month==4))),
                ggplot2::aes(fill=Mean_SST_BC_Lightstation)
) + ggplot2::geom_sf() +
  ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4)

# plot the mean lighthouse Salinity in April 1999 aggregated over the BC partition
ggplot2::ggplot(data=
                  cbind(Partition_Polys,
                        (Lighthouse_SST_Salinity_DF %>%
                           filter(Year==1999, Month==4))),
                ggplot2::aes(fill=Mean_Salinity_BC_Lightstation)
) + ggplot2::geom_sf() +
  ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4)

# plot the number of monthly mean SST observations made by lightstations in April 1999 aggregated over the BC partition
ggplot2::ggplot(data=
                  cbind(Partition_Polys,
                        (Lighthouse_SST_Salinity_DF %>%
                           filter(Year==1999, Month==4))),
                ggplot2::aes(fill=N_Obs_SST_BC_Lightstation)
) + ggplot2::geom_sf() +
  ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4)

# map to WCVI and plot
# Extract only the indices that correspond to the West Coast Vancouver Island
BC_Partition_Objects$BC_Partition$Poly_Name[
  BC_Partition_Objects$BC_Partition$Regions_Name=='BC Major Areas'
]
# WCVI is the fourth BC Major Area
BC_Partition_Objects$index_vectors$BC_Major_Area_Boundaries[4]
# Index 93 is the correct column from the BC_Partition_Objects$Mapping_Matrix

# Plot the mean Salinity as inferred by the average lighthouse recording in the region

# Compute the mean using the Mapping_Matrix
WCVI_Mean_1994_April <-
t(BC_Partition_Objects$Mapping_Matrix[,
                                    BC_Partition_Objects$index_vectors$BC_Major_Area_Boundaries[4]
                          ]) %*%
  as.matrix(
  Lighthouse_SST_Salinity_DF %>%
       filter(Year==1999, Month==4) %>%
       mutate(Mean_Salinity_BC_Lightstation=
                ifelse(is.na(Mean_Salinity_BC_Lightstation),
                       0,
                       Mean_Salinity_BC_Lightstation)) %>%
       dplyr::pull(Mean_Salinity_BC_Lightstation)
  )


ggplot2::ggplot(BC_Partition_Objects$BC_Partition[
  BC_Partition_Objects$BC_Partition$Poly_Name=='WCVI',
],
        ggplot2::aes(fill=WCVI_Mean_1994_April)) +
  ggplot2::geom_sf()


pbs-assess/PACea documentation built on April 17, 2025, 11:36 p.m.