R/rf1_simulated_data.R

Defines functions create.simulated.data DOY.to.date

#' Weather Data
#'
#' Data follows ArboMAP format (except missing date and districtdate fields)
#' Data are entirely simulated to represent anomalies drawn from a
#' normal distribution with mean of 0 and standard deviation of 1.
#' They have no actual relationship with the human cases or mosquito cases.
#' @docType data
#'
#' @source See file rf1_simulated_data.R
#'
"weather.data"

#' Human Data
#'
#' Simulated unrealistic human case data to test the Random
#' Forest model. All cases occur on the same date of the year.
#' Five years each with one case per district are simulated,
#' then approximately half the cases were dropped at random.
#'
#' @docType data
#' @source See file rf1_simulated_data.R
"human.data"

#' Mosquito Data
#'
#' Simulated mosquito data for mosquito pools. 
#' Assumed two trap-nights per year per county.
#' Positive pools were determined at random, with values drawn
#' from a uniform random distribution between 0 and 0.7.
#' Values below 0.5 were rounded to 0, values above were rounded
#' to 1.
#'
#' @docType data
#' @source See file rf1_simulated_data.R
"mosq.data"

#' Mosquito Results
#' 
#' Results from using the simulated mosquito data generated by the 
#' Random Forest model. Used in the unit tests to separate testing
#' of the model from testing extraction of the model outputs
#'
#' @docType data
'mosquito.results'

#' Human Results
#' 
#' Results from the Random Forest model using the simulated human data.
#' Used in the unit tests to separate running the models from 
#' extracting the the model outputs
#' 
#' @docType data
'human.results'


#' Convert day of year to date
#' 
#' @noRd
#' 
DOY.to.date = function(doy, year){
  
  # Determine if it is a leap year
  days = get.days(year)
  
  # Sum up days from months
  months = c(31,28,31,30,31,30,31,31,30,31,30,31)

  if (days == 366){ months[2] = 29  }

  day = doy
  month = 1
  month.total = months[month]
  
  while (day > month.total){
    # Update date information
    day = day - months[month]
    month = month + 1
    month.total = months[month]
  }
  
  date = sprintf("%s-%02d-%02d", year, month, day)
  
  return(date)
}


# Create simulated data for purpose of testing code
create.simulated.data = function(){
  districts = c("district1", "district2", "district3", "district4")
  years = sort(rep(seq(2011,2015), length(districts)))
  
  # Make data generation repeatable
  set.seed(20200304)
  
  # Create human data to have one case per year for five years, then subset randomly
  human.data = data.frame(district = rep(districts, length(unique(years))), date = sprintf("8/1/%s", years))
  human.index = runif(nrow(human.data))
  human.data = human.data[human.index > 0.5, ]
  #**# Write to data file, and move this code somewhere else
  
  # Create mosquito data & randomly assign positive pools
  mosq.data = data.frame(district = rep(districts, 2*length(unique(years))),
                         col_date = sprintf("8/1/%s", years),
                         wnv_result = 0,
                         pool_size = 50, species = 'Culex sp')
  
  pool.result = runif(nrow(mosq.data), 0, 0.7) # make more negative pools than positive pools via rounding
  mosq.data$wnv_result = sapply(pool.result, round, 0)
  
  # Create weather data
  # Note: date and districtdate are not used by the RF1 model, and so are not generated here
  doy = rep(seq(1,365), length(districts))
  year.len = length(doy)
  doy2 = rep(seq(1,366), length(districts))
  year.len2 = length(doy2)
  doy = c(doy, doy2, doy, doy, doy) # One for each year. Assumes 2011 - 2015, or at least a leap year in the second position
  
  year = c(rep(2011, year.len), rep(2012, year.len2), rep(2013, year.len), rep(2014, year.len), rep(2015, year.len))
  district.p1 = sort(rep(districts, 365))
  district.p2 = sort(rep(districts, 366))
  district.vec = c(district.p1, district.p2, district.p1, district.p1, district.p1)
  tminc = stats::rnorm(length(district.vec)) #**# Assumes variables are standardized anomalies (saves me having to think of plausible values)
  tmeanc = stats::rnorm(length(district.vec))
  tmaxc = stats::rnorm(length(district.vec))
  pr = stats::rnorm(length(district.vec))
  rmean = stats::rnorm(length(district.vec))
  vpd = stats::rnorm(length(district.vec))
  date = mapply(DOY.to.date, doy, year)
  
  weather.data = data.frame(district = district.vec, doy = doy, year = year,
                            tminc = tminc, tmeanc = tmeanc, tmaxc = tmaxc,
                            pr = pr, rmean = rmean, vpd = vpd, date = date)
  usethis::use_data(human.data, overwrite = TRUE)
  usethis::use_data(mosq.data, overwrite = TRUE)
  usethis::use_data(weather.data, overwrite = TRUE)
  
}
akeyel/rf1 documentation built on Dec. 28, 2020, 4:48 a.m.