knitr::opts_chunk$set(echo = TRUE)
Joe's original code - will need adapting slightly. Should these be temporal objects or spatio-temporal - I'd assumed just simple time series each corresponding to a location.
This markdown document shows how the sea-surface temperature (SST) and salinity data collected by the BC lightstations/lighthouses are compiled into PACea.
library(dplyr) dataset_commonname <- c('SST_Monthly_BC_Lightstation', 'Salinity_Monthly_BC_Lightstation') # read in shapefile containing the lighthouse locations Lightstations <- sf::st_read( dsn='./BC_Lightstation_Data_SHP_Files/BC_Lighthouse_DATA.shp' ) # rename Langara lighthouse to match later data merge Lightstations <- Lightstations %>% dplyr::mutate(LIGHSTATIO = recode(LIGHSTATIO,'LANGARA POINT LIGHTSTATION'='LANGARA ISLAND LIGHTSTATION')) plot(Lightstations)
nstations <- 12 nmonths <- 12 # 1914 is the year the first 'active' buoy went online nyears <- length(1914:lubridate::year(Sys.Date())) # Create a 'complete' data.frame skeleton to fill in Data_Lightstations <- data.frame(Location=rep(NA,nstations*nyears*nmonths), Year=rep(rep(1914:lubridate::year(Sys.Date()), each=nmonths),times=nstations), Month=rep(1:12, times=nyears*nstations), SST=rep(NA, nstations*nyears*nmonths), Salinity=rep(NA, nstations*nyears*nmonths), Longitude=NA, Latitude=NA) # Loop through the currently active stations and obtain SST and salinity count_SST <- 1 count_salinity <- 1 # loop through the files contained in the directory for(filename in list.files('./DATA_Active_lightstations',pattern = '.csv') ) { if(grepl(filename, pattern = 'Temperatures')) { Data_Lightstations$Location[ (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1) # Extract the lat/lon coords of the buoy Data_Lightstations$Longitude[ (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- as.numeric( Lightstations$LONGITUDE[grepl( pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1), gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'), ignore.case = T)]) Data_Lightstations$Latitude[ (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- as.numeric( Lightstations$LATITUDE__[grepl( pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1), gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'), ignore.case = T)]) # read the csv data and merge by Location Year Month # Convert to the data.frame into long format by creating 'Month' variable tmp <- read.csv(paste0('./DATA_Active_Lightstations/', filename), header = T, skip = 1) tmp <- tmp %>% dplyr::select(YEAR, JAN, FEB, MAR, APR, MAY, JUN, JUL, AUG, SEP, OCT, NOV, DEC) %>% tidyr::pivot_longer( cols=c('JAN','FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'), names_to = 'Month', values_to = 'SST' ) %>% dplyr::mutate(Month=recode(Month, JAN=1, FEB=2, MAR=3, APR=4, MAY=5, JUN=6, JUL=7, AUG=8, SEP=9, OCT=10, NOV=11, DEC=12), Location=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)) %>% dplyr::rename(Year=YEAR) Data_Lightstations[(count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)),] <- dplyr::left_join(Data_Lightstations[(count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)), c('Location','Year','Month','Salinity','Longitude','Latitude')], tmp) %>% select(Location,Year,Month,SST,Salinity,Longitude,Latitude) count_SST <- count_SST + 1 } if(grepl(filename, pattern = 'Salinities')) { Data_Lightstations$Location[ (count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1) # Extract the lat/lon coords of the buoy Data_Lightstations$Longitude[ (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- as.numeric( Lightstations$LONGITUDE[grepl( pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1), gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'), ignore.case = T)]) Data_Lightstations$Latitude[ (count_SST-1)*nyears*nmonths + (1:(nyears*nmonths)) ] <- as.numeric( Lightstations$LATITUDE__[grepl( pattern=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1), gsub(Lightstations$LIGHSTATIO, pattern = ' ', replacement = '_'), ignore.case = T)]) # read the data and merge by Location, Year and Month tmp <- read.csv(paste0('./DATA_Active_Lightstations/', filename), header = T, skip = 1) tmp <- tmp %>% dplyr::select(YEAR, JAN, FEB, MAR, APR, MAY, JUN, JUL, AUG, SEP, OCT, NOV, DEC) %>% tidyr::pivot_longer( cols=c('JAN','FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'), names_to = 'Month', values_to = 'Salinity' ) %>% dplyr::mutate(Month=recode(Month, JAN=1, FEB=2, MAR=3, APR=4, MAY=5, JUN=6, JUL=7, AUG=8, SEP=9, OCT=10, NOV=11, DEC=12), Location=substr(filename, start=1, stop = stringr::str_locate_all(filename,'_')[[1]][2,]-1)) %>% rename(Year=YEAR) Data_Lightstations[(count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths)),] <- dplyr::left_join(Data_Lightstations[(count_salinity-1)*nyears*nmonths + (1:(nyears*nmonths)), c('Location','Year','Month','SST','Longitude','Latitude')], tmp) %>% select(Location,Year,Month,SST,Salinity,Longitude,Latitude) count_salinity <- count_salinity + 1 } } # Set all 999.99 values to NA Data_Lightstations <- Data_Lightstations %>% dplyr::mutate(SST=ifelse(SST==999.99, NA, SST), Salinity=ifelse(Salinity==999.99, NA, Salinity), Longitude=ifelse(Longitude==999.99, NA, Longitude), Latitude=ifelse(Latitude==999.99, NA, Latitude)) # Check for missing values summary(Data_Lightstations) # Make it a spatial object in sf package with lat/lon CRS Data_Lightstations <- sf::st_as_sf(Data_Lightstations, coords=c('Longitude','Latitude'), crs=sf::st_crs('EPSG:4326')) # Convert to the same CRS as the data Data_Lightstations <- sf::st_transform(Data_Lightstations, sf::st_crs(PACea::Coastline@proj4string))
Next, we aggregate over the polygons defined by BC_Partitions
# read in BC Grid BC_Partition_Objects <- PACea::BC_Partition_Objects # compute the mean relative abundance per grid cell Partition_Polys <- BC_Partition_Objects$BC_Partition[ BC_Partition_Objects$BC_Partition$Regions_Name=='BC Grid',] # Create a data.frame object for storing the aggregated predator abundance values npoly=length(Partition_Polys$Poly_ID) Lighthouse_SST_Salinity_DF <- data.frame(Poly_ID=rep(unique(Partition_Polys$Poly_ID), times=nyears*nmonths), Month=rep(rep(1:12, each=npoly), times=nyears), Year=rep(min(Data_Lightstations$Year,na.rm=T):max(Data_Lightstations$Year,na.rm=T), each=npoly*nmonths), Mean_SST_BC_Lightstation=NA, N_Obs_SST_BC_Lightstation=0, Mean_Salinity_BC_Lightstation=NA, N_Obs_Salinity_BC_Lightstation=0 ) # Which Polygon does each Lightstation fall in? Lightstations_final <- Data_Lightstations[Data_Lightstations$Year==1914 & Data_Lightstations$Month==12,] ind_Poly_to_Lightstation <- apply( sf::st_contains_properly(Partition_Polys, Lightstations_final, sparse=F), 1, FUN = function(x){Lightstations_final$Location[which(x)]}) # loop through the polygons for(j in unique(Partition_Polys$Poly_ID)) { print(paste0('Processing Data For Polygon ',j,' out of ', length(unique(Partition_Polys$Poly_ID)))) # Does a lighthouse fall within the polygon? if(length(ind_Poly_to_Lightstation[[j]])>0) { Lighthouse_SST_Salinity_DF[Lighthouse_SST_Salinity_DF$Poly_ID==j, c('Mean_SST_BC_Lightstation', 'N_Obs_SST_BC_Lightstation', 'Mean_Salinity_BC_Lightstation', 'N_Obs_Salinity_BC_Lightstation')] <- (sf::st_drop_geometry(Data_Lightstations) %>% filter(Location %in% ind_Poly_to_Lightstation[[j]]) %>% group_by(Year, Month) %>% summarize( Mean_SST_BC_Lightstation=mean(SST, na.rm=T), N_Obs_SST_BC_Lightstation=sum(!is.na(SST)), Mean_Salinity_BC_Lightstation=mean(Salinity, na.rm=T), N_Obs_Salinity_BC_Lightstation=sum(!is.na(Salinity)) ))[,c('Mean_SST_BC_Lightstation', 'N_Obs_SST_BC_Lightstation', 'Mean_Salinity_BC_Lightstation', 'N_Obs_Salinity_BC_Lightstation')] } }
Now we save the data and plot to check correctness. The use_data()
function is responsible for the mapping.
usethis::use_data(Lighthouse_SST_Salinity_DF, overwrite = T) # save the data key, pointing the common names of the datasets to the correct DF # data_key <- # data.frame( # Common_Name = dataset_commonname, # DF_Name = 'Lighthouse_SST_Salinity_DF', # Time_Resolution = 'Monthly', # Data_Type = 'Point-Referenced Raw Data', # Units = c('Degrees Celsius', # 'PSU (Practical Salinity Unit)'), # Author = 'Fisheries and Oceans Canada', # Citation = NA, # Comments = 'See https://open.canada.ca/data/en/dataset/719955f2-bf8e-44f7-bc26-6bd623e82884 for details' # ) # # # append to the master Data_Key and remove duplicates # Data_Key <- # rbind(PACea::Data_Key, # data_key) # # Data_Key <- # Data_Key[!duplicated(Data_Key[,c('Common_Name','DF_Name')]),] # # # Update the master key # usethis::use_data(Data_Key, overwrite=T) # plot the mean lighthouse SST in April 1999 aggregated over the BC partition ggplot2::ggplot(data= cbind(Partition_Polys, (Lighthouse_SST_Salinity_DF %>% filter(Year==1999, Month==4))), ggplot2::aes(fill=Mean_SST_BC_Lightstation) ) + ggplot2::geom_sf() + ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4) # plot the mean lighthouse Salinity in April 1999 aggregated over the BC partition ggplot2::ggplot(data= cbind(Partition_Polys, (Lighthouse_SST_Salinity_DF %>% filter(Year==1999, Month==4))), ggplot2::aes(fill=Mean_Salinity_BC_Lightstation) ) + ggplot2::geom_sf() + ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4) # plot the number of monthly mean SST observations made by lightstations in April 1999 aggregated over the BC partition ggplot2::ggplot(data= cbind(Partition_Polys, (Lighthouse_SST_Salinity_DF %>% filter(Year==1999, Month==4))), ggplot2::aes(fill=N_Obs_SST_BC_Lightstation) ) + ggplot2::geom_sf() + ggplot2::geom_sf(data=Lightstations_final, ggplot2::aes(fill=1), colour='red', size=4) # map to WCVI and plot # Extract only the indices that correspond to the West Coast Vancouver Island BC_Partition_Objects$BC_Partition$Poly_Name[ BC_Partition_Objects$BC_Partition$Regions_Name=='BC Major Areas' ] # WCVI is the fourth BC Major Area BC_Partition_Objects$index_vectors$BC_Major_Area_Boundaries[4] # Index 93 is the correct column from the BC_Partition_Objects$Mapping_Matrix # Plot the mean Salinity as inferred by the average lighthouse recording in the region # Compute the mean using the Mapping_Matrix WCVI_Mean_1994_April <- t(BC_Partition_Objects$Mapping_Matrix[, BC_Partition_Objects$index_vectors$BC_Major_Area_Boundaries[4] ]) %*% as.matrix( Lighthouse_SST_Salinity_DF %>% filter(Year==1999, Month==4) %>% mutate(Mean_Salinity_BC_Lightstation= ifelse(is.na(Mean_Salinity_BC_Lightstation), 0, Mean_Salinity_BC_Lightstation)) %>% dplyr::pull(Mean_Salinity_BC_Lightstation) ) ggplot2::ggplot(BC_Partition_Objects$BC_Partition[ BC_Partition_Objects$BC_Partition$Poly_Name=='WCVI', ], ggplot2::aes(fill=WCVI_Mean_1994_April)) + ggplot2::geom_sf()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.