R/fHHTimestampforREddyProc.R

Defines functions fHHTimestampforREddyProc

Documented in fHHTimestampforREddyProc

#' REddyProc is expecting input files formatted in a very particular way: https://www.bgc-jena.mpg.de/bgi/index.php/Services/REddyProcWebDataFormat.
#' Unforunately, when we parse out our data, it oftens violates this required format (esp on leapyears).
#' To get around this issue, these functions generate a list of dataframes for each year from 1980-2100 (plenty of time to work with).
#'
#' @export
#' @title Match half-hourly flux data to a 'perfect' timestamp (no gaps)
#' @param dat a dataframe containing half-hourly timestamps.
#' @param yr the current process year.
#' @param YY specify the column name containing the YEAR timestamp
#' @param DD specify the column name containing the DAY OF YEAR timestamp (e.g. 1-366)
#' @param HH specify the column name containing the HOUR timestamp (e.g. 0.5-23.5). NOTE: 0.5 == 30 minutes.


fHHTimestampforREddyProc <- function(dat, yr, YY, DD, HH){
  year.list <- list()
  for (i in 1980:2100) {
    Year <- rep(i, 17520)  # exactly 17520 rows
    DoY <- c(rep(1,47),rep(2:365, each=48),366)  # exactly 17520 rows
    Hour <- c(seq(0.5,23.5, by=0.5), rep(seq(0,23.5, by=0.5), times=364), 0)  # exactly 17520 rows

    year.list[[i]] <- as.data.frame(cbind(Year, DoY, Hour))
  }


  # Take the dataframe out from the list, save as df1
  df1 <- data.frame(year.list[yr])
  names(df1) <- c('YEAR', 'DOY', 'HOUR')

  # Rename the supplied column names
  colnames(dat)[colnames(dat) %in% YY] <- "YEAR"
  colnames(dat)[colnames(dat) %in% DD] <- "DOY"
  colnames(dat)[colnames(dat) %in% HH] <- "HOUR"

  # Use a left join to append the data from dat to df1
  df2 <- df1 %>%
    left_join(dat, by = c("YEAR", "DOY", "HOUR"))


  if (nrow(subset(dat, YEAR==yr)) != 17520) {
    df3 <- df2
  } else {
    # Finds the row number where Year = yr, DoY = 365, and Hour = 23.5, then we find data from the next row
    next.to.last.dat <- which(dat$YEAR == yr & dat$DOY == 365 & dat$HOUR == 23.5)

    # This finds the next row (which for our purposes is the final row for each year)
    missing.dat <- dat[next.to.last.dat + 1,]

    # Add a temporary flag to both the missing row, and the row you want to insert it into
    missing.dat$flag <- "temp"
    drops <- c("YEAR","DOY","HOUR")
    missing.dat <- missing.dat[ , !(names(missing.dat) %in% drops)]

    last.row <- df1[17520,]
    last.row$flag <- "temp"

    # Replaces the original timestamp with the REddyProc final row stamp, while retaining the original data
    fixed.row <- missing.dat %>%
      left_join(last.row, "flag") %>%
      select(-c("flag"))

    # Drops the last row from df2
    df2.drop.last <- df2[-17520,]

    # Adds the correct row and saves as new dataframe, df3
    df3 <- rbind(df2.drop.last, fixed.row)
  }

  # Convert names back to original
  colnames(df3)[colnames(df3) %in% "YEAR"] <- YY
  colnames(df3)[colnames(df3) %in% "DOY"] <- DD
  colnames(df3)[colnames(df3) %in% "HOUR"] <- HH

  return(df3)
}
ksmiff33/FluxSynthU documentation built on Dec. 15, 2020, 10:29 p.m.