R/AppEEARS_unpack_QC.R

Defines functions AppEEARS_unpack_QC

Documented in AppEEARS_unpack_QC

#' Unpacks downloaded MODIS LAI data and QC information
#' @description This function unpacks downloaded MODIS LAI data from the lpdaac
#' AppEEARS download portal into a list with each item in the list representing
#' a single site. 
#'
#' @param zip_file The name of the zip file. For example, "myzip.zip"
#' @param zip_dir The directory the zip file is located in
#' @param request_sites A string of Site_IDs
#'
#' @export

#===============================================================================
#Function for unpacking the LAI data downloaded from AppEEARS
#Created 6/25/2020
#===============================================================================
AppEEARS_unpack_QC <- function(zip_file, zip_dir, request_sites){
  #Get the name of the request based on the .zip file
    request <- sub('\\..*', '', zip_file)

  #Unzip the AppEEARS file to the same path as the zip file
    unzip(paste(zip_dir, "/", zip_file, sep = ""), exdir = paste(zip_dir, "/",
      request, sep = ""))

  #Identifying the file containing the results using regular expression and pattern matching
    grx_exp <- glob2rx(paste("*-results.csv", sep = ""))
    folder_files <- list.files(paste(zip_dir, "/", request, sep = ""))
    results_fn <- folder_files[grep(grx_exp,folder_files)][1]

  #Identify the MODIS product used and info to paste in to read the file
    res_split <- strsplit(results_fn, "-")[[1]]
    prod_loc <- grep(glob2rx(paste("MCD*", sep = "")), res_split)

  #Reading in the results based on the MODIS product used
    raw <- data.frame(data.table::fread(paste0(zip_dir, "/", request, "/", results_fn)))

  #-------------------------------------------------
  #Performing QC, adding date, and selecting the final columns
  #-------------------------------------------------
    #Get a string needed to select column headings based on version
      heading <- paste(strsplit(results_fn, "-")[[1]][prod_loc:(prod_loc + 1)], collapse = "_")

    #Identify the QC column headings
      QC_cols <- paste0(heading, c("_FparLai_QC_MODLAND", "_FparLai_QC_Sensor", 
        "_FparLai_QC_DeadDetector", "_FparLai_QC_CloudState", "_FparLai_QC_SCF_QC"))
      
    #Extract the QC information and subset for columns I am interested in
      reduced <- data.frame(
        raw[, c("ID", "Date", paste0(heading, c("_Lai_500m", "_LaiStdDev_500m")))],
        apply(raw[, QC_cols], MARGIN = 2, FUN = function(x){gsub(".*b", "", x)})
      )
      
      colnames(reduced)[3:9] <- c("Lai", "Lai_sd", "FparLai_QC_MODLAND", "FparLai_QC_Sensor", 
        "FparLai_QC_DeadDetector", "FparLai_QC_CloudState", "FparLai_QC_SCF_QC")

    #Adding POSIX time column
      reduced$pos_time <- as.POSIXct(reduced[, "Date"], format = "%Y-%m-%d", tz = "UTC")

    #Adding in Year and DOY information
      reduced$Year <- as.numeric(strftime(reduced[, "pos_time"], format = "%Y", tz = "UTC"))
      reduced$DOY <- as.numeric(strftime(reduced[, "pos_time"], format = "%j", tz = "UTC"))

    #Selecting only the final information I need
      VOI <- reduced[, c("ID", "pos_time", "Year", "DOY", "Lai", "Lai_sd", "FparLai_QC_MODLAND", 
        "FparLai_QC_Sensor", "FparLai_QC_DeadDetector", "FparLai_QC_CloudState", "FparLai_QC_SCF_QC")]

  #-------------------------------------------------
  #Exporting the data for each site
  #-------------------------------------------------
    #Splitting the dataset up
      site_split <- split(VOI, VOI[, "ID"])
      
    #Remove sites with all missing values
      remove_null <- function(Site){
        if(all(Site[, "FparLai_QC_SCF_QC"] == "100") == FALSE){
          return(Site)
        } #End if statement
      } #End remove_null
      
      split_filtered <- lapply(site_split, FUN = remove_null)
       
      split_na_rm <- split_filtered[!sapply(split_filtered, is.null)] 

    #Assigning the proper Site ID
      for(i in 1:length(split_na_rm)){
        #Getting the Site name (the MODIS request removes "_")
          site_name <- request_sites[gsub("[[:punct:]]", "", request_sites) %in%
            unique(split_na_rm[[i]][, "ID"])]

          split_na_rm[[i]][, "ID"] <- site_name

      } #End for loop

    #Data frame of Site_ID's with and without punctuation
      ID_DF <- setNames(data.frame(gsub("[[:punct:]]", "", request_sites), request_sites),
        c("no_punct", "Site_ID"))

    #Merging together the names
      ID_merge <- merge(setNames(data.frame(names(split_na_rm)), "no_punct"), ID_DF,
        by = "no_punct")

    #Add row names to serve as an index
      rownames(ID_merge) <- ID_merge[, "no_punct"]
      
    #Assign names
      names(split_na_rm) <- ID_merge[names(split_na_rm), "Site_ID"]      

  #Notify the user with a list of sites that did not have data
    missing <- request_sites[!(request_sites %in% names(split_na_rm))]

    if(length(missing) != 0){
      message(paste("The following sites did not have LAI data in this request:",
      paste(missing, sep="", collapse=", ")))
    } #End if statement

  return(split_na_rm)

} #End AppEEARS_unpack_QC function
psavoy/StreamLightUtils documentation built on April 5, 2022, 11:05 p.m.