R/fGetSiteInfo.R

Defines functions fGetSiteInfo

Documented in fGetSiteInfo

#' This function retrieves metadata (lat, long, name, elevation, MAT, MAP, etc.) for a selected site within the FLUXNET2015, AmeriFlux, and NEON databases.
#' Here you provide a dataframe (dat) containing all of the required columns specified in args. In addition you will need to specify the site you're retrieting data for (and its respective database).
#' Likewise, you will need to specify column names for each of the required variables.

#' @export
#' @title Get metadata for selected site
#' @param dat dataframe, contains metadata for FLUXNET2015, AmeriFlux, and NEON sites (must contain the columns specified in arguments)
#' @param site character, specify the site you're retrieving metadata for
#' @param db character, specify the database you're retrieving metadata for


#' @param site_col character, specify the column name containing abbreviated site names
#' @param db_col character, specify the column name containing abbreviated database names
#' @param site_name_col character, specify the column name containing full site names
#' @param start_yr_col character, specify the column name containing years in which record started
#' @param end_yr_col character, specify the column name containing years in which record ended
#' @param exclude_yr_col character, specify the column name containing years to exclude from analysis
#' @param lat_col character, specify the column name containing latitude
#' @param long_col character, specify the column name containing longitude
#' @param country_col character, specify the column name containing the countries where each site is located
#' @param IGBP_col character, specify the column name containing the IGBP land classification
#' @param elev_col character, specify the column name containing elevation
#' @param MAT_col character, specify the column name containing mean annual temperature (C)
#' @param MAP_col character, specify the column name containing mean annual precipitation (mm)

#' @importFrom lutz tz_lookup_coords tz_list



# fGetSiteInfo(dat = site.meta,
#              site = site,
#              db = db,
#              site_col = "site_ID",
#              db_col = "database",
#              site_name_col = "site_Name",
#              start_yr_col = "start_year",
#              end_yr_col = "end_year",
#              exclude_yr_col = "exclude_years",
#              lat_col = "lat",
#              long_col = "long",
#              country_col = "country",
#              IGBP_col = "IGBP",
#              elev_col = "elev_orig",
#              MAT_col = "MAT_orig",
#              MAP_col = "MAP_orig")



fGetSiteInfo <- function(dat, site, db, site_col, db_col, site_name_col,
                         start_yr_col, end_yr_col, exclude_yr_col, lat_col, long_col,
                         country_col, IGBP_col, elev_col, MAT_col, MAP_col) {

  # Convert factor columns to character
  i <- sapply(dat, is.factor)
  dat[i] <- lapply(dat[i], as.character)


  # Extract vectors from each of the named columns:
  site.tmp <- dat[,colnames(dat) %in% site_col]
  db.tmp <- dat[,colnames(dat) %in% db_col]
  site_Name.tmp <- dat[,colnames(dat) %in% site_name_col]
  start_year.tmp <- dat[,colnames(dat) %in% start_yr_col]
  end_year.tmp <- dat[,colnames(dat) %in% end_yr_col]
  exclude_years.tmp <- dat[,colnames(dat) %in% exclude_yr_col]
  lat.tmp <- dat[,colnames(dat) %in% lat_col]
  long.tmp <- dat[,colnames(dat) %in% long_col]
  country.tmp <- dat[,colnames(dat) %in% country_col]
  IGBP.tmp <- dat[,colnames(dat) %in% IGBP_col]
  elev.tmp <- dat[,colnames(dat) %in% elev_col]
  MAT.tmp <- dat[,colnames(dat) %in% MAT_col]
  MAP.tmp <- dat[,colnames(dat) %in% MAP_col]


  # Build a new dataframe containing the vectors above (and remove '.tmp' from the colname)
  df <- data.frame(site.tmp, db.tmp, site_Name.tmp, start_year.tmp, end_year.tmp, exclude_years.tmp,
                   lat.tmp, long.tmp, country.tmp, IGBP.tmp, elev.tmp, MAT.tmp, MAP.tmp, stringsAsFactors = FALSE)

  colnames(df) <- sub('\\.[^.]+$', '', colnames(df))

  # Append timezone info to site metadata
  metadat <- df %>%
    mutate(no_years = end_year - start_year,
           tz_name = tz_lookup_coords(lat = lat, lon = long, warn=F)) %>%
    left_join(subset(tz_list(), is_dst == FALSE), by = "tz_name") %>%
    rename(tzone = zone, utc_off = utc_offset_h) %>%
    select(-c(is_dst)) %>%
    left_join(subset(tz_list(), is_dst == TRUE), by = "tz_name") %>%
    rename(utc_off.dst = utc_offset_h) %>%
    select(c(site, db, site_Name, start_year, end_year, no_years, exclude_years, country, lat, long, tz_name, tzone, utc_off, utc_off.dst,
             IGBP, elev, MAT, MAP))


  # Index the site list by your specified site and database
  site.index <- metadat$site == site & metadat$db == db


  # Grab metadata for your specified site
  site <- metadat$site[site.index]
  db <- metadat$db[site.index]
  info <- metadat$site_Name[site.index]
  start_year <- metadat$start_year[site.index]
  end_year <- metadat$end_year[site.index]
  no_years <- metadat$no_years[site.index]
  exclude_years <- metadat$exclude_years[site.index]
  IGBP <- metadat$IGBP[site.index]
  MAT <-  metadat$MAT[site.index]
  MAP <-  metadat$MAP[site.index]
  country <- metadat$country[site.index]
  lat <- metadat$lat[site.index]
  long <- metadat$long[site.index]
  UTC_offset <- metadat$utc_off[site.index]
  tz_name <- metadat$tz_name[site.index]

  site.info <- list('site'=site, 'db'=db, 'info'=info, 'start_year'=start_year, 'end_year'=end_year,
                    'no_years'=no_years, 'exclude_years'=exclude_years, 'IGBP'=IGBP, 'MAT'=MAT, 'MAP'=MAP,
                    'country'=country, 'lat'=lat, 'long'=long, 'UTC_offset'=UTC_offset, 'tz_name'=tz_name)
  return(site.info)



  # In case there are lutz/lubridate issues down the line...

  # dataset <- data.frame(
  #   datetimeUTC=c("2014-01-01 00:00 +0000","2014-01-01 00:00 +0000"),
  #   olson=c("Canada/Eastern", "Canada/Pacific"),
  #   stringsAsFactors=FALSE
  # )
  #
  # dataset$localtime <- with(dataset,
  #                           mapply(function(dt, ol)
  #                             format(as.POSIXct(dt, "%Y-%m-%d %H:%M %z", tz = ol), "%Y-%m-%d %H:%M %z"),
  #                             datetimeUTC, olson))
  #
  #
  #
  # OlsonNames.df <- data.frame(OlsonNames(), stringsAsFactors = FALSE)
  # names(OlsonNames.df) <- "tz_name"
  #
  # datetime_pattern <- "\\d{4}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]) \\d{2}:\\d{2} "
  #
  # OlsonNames.df <- OlsonNames.df %>%
  #   mutate(datetime_null = "2020-05-01 00:00 +0000",
  #          localtime = with(., mapply(function(dt, ol)
  #            format(as.POSIXct(dt, "%Y-%m-%d %H:%M %z", tz = ol), "%Y-%m-%d %H:%M %z"),
  #            datetime_null, tz_name)),
  #          UTC_off_char = gsub(pattern = datetime_pattern,
  #                              replacement = "",
  #                              localtime),
  #          sign = sign(as.numeric(UTC_off_char)),
  #          HHMM = regmatches(UTC_off_char, regexpr("\\d{4}", UTC_off_char)),
  #          HH = floor(as.numeric(HHMM)/1e2),
  #          MIN = as.numeric(HHMM) - HH*100,
  #          HH.dec = HH + MIN/60,
  #          UTC_offset = HH.dec * sign) %>%
  #   select(c(tz_name, UTC_offset))


}
ksmiff33/FluxSynthU documentation built on Dec. 15, 2020, 10:29 p.m.