#' Provides a helper dataframe to list all of the files extracted for geomac
#' for a specified year of download
#'
#' @param ds_source (character) :
#' @param dl_date (date) :
#' @param yr (integer) : The specifics geomac year shapefile we want to
#' transform
#'
#' @return A tibble of metadata paths
#' @export
get_geomac_mtda_paths <- function(ds_source, dl_date, yr){
# Define key directories and metadata file paths
outpath_mtda <- here::here("data", ds_source, base::format(dl_date,
"%Y%m%d"),
stringr::str_c(ds_source,
"metadata.csv",
sep = "_"))
outpath_mtda_all <- here::here("data", ds_source, base::format(dl_date,
"%Y%m%d"),
stringr::str_c(ds_source,
"metadata_all.csv",
sep = "_"))
outdir_mtda <- outpath_mtda %>% fs::path_dir(path = .)
outdir_mtda_all <- outpath_mtda_all %>% fs::path_dir(path = .)
outdir_mtda_yr_dir <- fs::path_join(parts = c(outdir_mtda, yr))
outdir_mtda_yr_files <- outdir_mtda_yr_dir %>%
# TODO: Check if we can set recurse = FALSE and only specify the
# unzipped directory once we extract to a specified path
fs::dir_ls(glob = "*", recurse = TRUE) %>%
tibble::enframe(x = ., name = NULL, value = "fpath") %>%
dplyr::mutate(fextn = fs::path_ext(fpath),
fname = fs::path_file(fpath),
# Get different shape file type indicators
# https://www.earthdatascience.org/courses/earth-analytics/spatial-data-r/shapefile-structure/
ind_shp =
ind_re_match(string = fextn,
pattern = "shp$"),
ind_prj =
ind_re_match(string = fextn,
pattern = "prj$"),
ind_shx =
ind_re_match(string = fextn,
pattern = "shx$"),
ind_dbf =
ind_re_match(string = fextn,
pattern = "dbf$"),
ind_sqlite =
ind_re_match(string = fextn,
pattern = "sqlite$"),
ind_perims = stringr::str_detect(string = fname,
pattern =
stringr::fixed('perim',
ignore_case = TRUE)),
ind_sit_reps = stringr::str_detect(string = fname,
pattern =
stringr::fixed('sit_rep',
ignore_case = TRUE)),
year = yr)
base::return(outdir_mtda_yr_files)
}
#' Get data dictionaries for column names given .xlxs file
#'
#' @param ddict_xlsx_path : .xlxs file path
#' @param cols_rng : Column range in each of the excel sheets name
#'
#' @return List with column names in the different .xlxs pages
#' @export
get_ddict_geomac <- function(ddict_xlsx_path, cols_rng){
dim_geomac_perims_pre15_xl <- backburner::get_ddict(
ddict_xlsx_path = ddict_xlsx_path,
sheet_nm = "dim_geomac_perims_pre15",
cols_rng = cols_rng)
dim_geomac_perims_pst15_xl <- backburner::get_ddict(
ddict_xlsx_path = ddict_xlsx_path,
sheet_nm = "dim_geomac_perims_pst15",
cols_rng = cols_rng)
dim_geomac_sitreps_pre15_xl <- backburner::get_ddict(
ddict_xlsx_path = ddict_xlsx_path,
sheet_nm = "dim_geomac_sitreps_pre15",
cols_rng = cols_rng)
dim_geomac_sitreps_pst15_xl <- backburner::get_ddict(
ddict_xlsx_path = ddict_xlsx_path,
sheet_nm = "dim_geomac_sitreps_pst15",
cols_rng = cols_rng)
out_list <- list(dim_geomac_perims_pre15_xl,
dim_geomac_perims_pst15_xl,
dim_geomac_sitreps_pre15_xl,
dim_geomac_sitreps_pst15_xl)
names(out_list) <- c("dim_geomac_perims_pre15", "dim_geomac_perims_pst15",
"dim_geomac_sitreps_pre15", "dim_geomac_sitreps_pst15")
base::return(out_list)
}
#' Set Shapefile column names given dictionary names fetched from .xlsx
#' data dictionary file.
#'
#' @param fpath : shapefile path
#' @param year : year the shapefile is referred to
#' @param ddict_type : dictionary type - this needs to be corresponding to one
#' of the data dictionaries retrieved in the .xlsx
#' @param ddict_xlsx_path : path of the data dictionary .xlsx
#' @param cols_rng : column ranges to extract from the .xlsx
#' @param target_srid (numeric): Transform the geometries into this coordinate
#' system, specified by SRID (default: 4326, GPS latitude/longitude)
#'
#' @return Shapefile dataframe
#' @export
get_geomac_new_cnames <- function(fpath, year, ddict_type, ddict_xlsx_path,
cols_rng, target_srid = 4326){
# Obtain all dictionaries of geomac variable name mappings
ddict_geomac <- get_ddict_geomac(ddict_xlsx_path = ddict_xlsx_path,
cols_rng = cols_rng)
# Get specific dictionary mapping for required shapefile
geomac_dict <- ddict_geomac[[ddict_type]]
# Read in shapefile
shp_df <- sf::read_sf(fpath) %>% sf::st_transform(target_srid)
# Get raw column names and convert to lower case
orig_colnames_lwr <- base::colnames(x = shp_df) %>%
stringr::str_to_lower(string = .) %>%
tibble::enframe(x = ., name = NULL, value = "orig_varname")
# Join on the new column name from our dictionary
new_colnames_df <- orig_colnames_lwr %>%
dplyr::left_join(x = ., y = geomac_dict,
by = "orig_varname")
# TODO: Check that there are no NAs here
new_colnames <- new_colnames_df %>%
dplyr::select(new_varname) %>%
base::unlist() %>%
base::as.vector()
# Set the new column names for our imported data frame
base::colnames(shp_df) <- new_colnames
# Return the new imported shapefile, with revised column names
base::return(shp_df)
}
#' Format GEOMAC shapefiles given path
#'
#' @param shapefiles_path (character) : Path of the shapefile to be transformed
#' @param geomac_type : Can be either `geomac_fire_perimeters` or
#' `geomac_situation_report`. Different types filter
#' on different columns for the creation of the shapefile
#'
#' @return A shapefile object
#' @export
geomac_format_shapefiles <- function(shapefiles_path, geomac_type){
# ND: Creating an IF statement over here for geomac_type
if (geomac_type == 'geomac_fire_perimeters'){
outdir_mtda_yr_perims <- shapefiles_path %>%
dplyr::filter(ind_perims) %>%
dplyr::select(fpath, year, ddict_type,
ddict_xlsx_path, cols_rng)
geomac_tform <- outdir_mtda_yr_perims %>%
purrr::pmap(.l = ., .f = get_geomac_new_cnames)
} else if (geomac_type == 'geomac_situation_report'){
outdir_mtda_yr_sit_reps <- shapefiles_path %>%
dplyr::filter(ind_sit_reps) %>%
dplyr::select(fpath, year, ddict_type,
ddict_xlsx_path, cols_rng)
geomac_tform <- outdir_mtda_yr_sit_reps %>%
purrr::pmap(.l = ., .f = get_geomac_new_cnames)
}
base::return(geomac_tform)
}
#' Determine Sheet Prefix for Firepred Data Dictionary
#'
#' Before 11/7/2019, both fire perimeters and incident reports displayed
#' two sets of variable, depending on whether the data were recorded before
#' or after 2015.
#' As of 11/7/2019, fire perimeters have been updated so that the variable
#' names are consistent across years now. However, incident reports still
#' presents two different variable names depending on whether the year
#' was before or after 2015.
#' This function looks at which sets of variable names to look at based on
#' whether it's a perimeter or incident report file, which year the file is
#' and whether we are dealing with data before 11//7/2019 changes.
#' @param ind_perims (logical): Boolean indicator. If TRUE, the file is a perimeter file,
#' otherwise it is an incident report.
#' @param yrs (numeric): Vector with years which data need to be transformed
#' @param pre_pst_split_ind (logical): Boolean indicator. If TRUE, then variable names for perimeters
#' change before and after 2015 (as before 11/7/2019 changes),
#' so that the `*_pre15` sheets are used in the Firepred
#' data dictionary for before 2015 and the `*_pst15' for the
#' years afterwards. If FALSE (default since 11/7/2019 changes),
#' the `*_pst15' sheets are used across the years for perimeters.
#' @return A vector of `pre15` or `pst15` according to which Data Dictionary sheet should be considered.
#' @export
derive_year_sheet_ind <- function(ind_perims, year, pre_pst_split_ind){
ind_var <- base::ifelse(ind_perims & !pre_pst_split_ind, TRUE, FALSE)
out <- base::ifelse(ind_var,
'pst15',
dplyr::case_when(
year >=2007 & year <= 2015 ~ "pre15",
year > 2015 ~ "pst15"))
return(out)
}
#' Full Pipeline for Transforming GEOMAC Shapefiles
#'
#' This function will transform both geomac files, i.e.
#' \code{geomac_fire_perimeters, geomac_situation_report}.
#' @param ds_source (character) : data names. Default to "geomac"
#' @param dl_date (date) : Date in which the file were downloaded. This is
#' going to look for a folder in the `data/{ds_source}/` named
#' as this date
#' @param yrs (numeric): Vector with years which data need to be transformed
#' @param pre_pst_split_ind (logical): Boolean indicator. If TRUE, then variable names for perimeters
#' change before and after 2015 (as before 11/7/2019 changes),
#' so that the `*_pre15` sheets are used in the Firepred
#' data dictionary for before 2015 and the `*_pst15' for the
#' years afterwards. If FALSE (default since 11/7/2019 changes),
#' the `*_pst15' sheets are used across the years for perimeters.
#' @return A list with two shapefiles for loading, in which the key is the
#' mtbs_type
#' @export
geomac_transform <- function(dl_date, yrs, ds_source = "geomac", pre_pst_split_ind=FALSE){
GEOMAC_DDICT_XLSX_PATH <- here::here("data", "Fire-Prediction-Data-Dictionary.xlsx")
GEOMAC_DDICT_COLS_RNG <- "A:E"
# Identify perimeter and situation reports shapefiles
outdir_mtda_yr_shp <- yrs %>%
purrr::map_df(.x = ., .f =
~get_geomac_mtda_paths(
ds_source = ds_source,
dl_date = dl_date,
yr = .x)) %>%
dplyr::filter(ind_shp == 1, ind_perims | ind_sit_reps) %>%
dplyr::arrange(year) %>%
dplyr::mutate(geomac_type =
dplyr::if_else(ind_perims,
"perims",
"sitreps"),
pre_pst_type = derive_year_sheet_ind(
ind_perims, year, pre_pst_split_ind),
# base::ifelse(ind_perims,
# base::ifelse(pre_pst_split_ind,
# dplyr::case_when(
# year >=2007 & year <= 2015 ~ "pre15",
# year > 2015 ~ "pst15"),
# 'pst15'),
# base::ifelse(pre_pst_split_ind,
# dplyr::case_when(
# year >=2007 & year <= 2015 ~ "pre15",
# year > 2015 ~ "pst15"),
# dplyr::case_when(
# year >=2007 & year <= 2015 ~ "pre15",
# year > 2015 ~ "pst15"))),
ddict_type =
stringr::str_c("dim_geomac",
geomac_type,
pre_pst_type,
sep = "_"),
ddict_xlsx_path = GEOMAC_DDICT_XLSX_PATH,
cols_rng = GEOMAC_DDICT_COLS_RNG) %>%
dplyr::select(fpath, year, ddict_type,
ddict_xlsx_path, cols_rng,
ind_perims, ind_sit_reps)
# Return shapefiles
sources <- c('geomac_fire_perimeters', 'geomac_situation_report')
names(sources) <- sources
geomac_shapefiles <- purrr::map(sources,
function(geomac_type) {
shapefile_out <- geomac_format_shapefiles(
shapefiles_path = outdir_mtda_yr_shp,
geomac_type = geomac_type
)
return(shapefile_out)
})
base::return(geomac_shapefiles)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.