R/GetData.R

Defines functions CreateLatestDoneFile GetDataInfo GetData

Documented in CreateLatestDoneFile GetData GetDataInfo

#' test
#' @param data_clean a
#' @param f a
#' @import fhi
#' @export CreateLatestDoneFile
CreateLatestDoneFile <- function(data_clean = fd::path("data_clean"), f) {
  file <- file.path(data_clean, sprintf("done_%s.txt", f))
  try(file.create(file), TRUE)
  # try(file.create(paste0("data_clean/done_",LatestRawID(),".txt")),TRUE)
}

#' GetDataInfo
#' @param folder_raw a
#' @param folder_clean a
#' @import data.table
#' @export GetDataInfo
GetDataInfo <- function(
                        folder_raw = fd::path("data_raw"),
                        folder_clean = fd::path("data_clean")) {
  f <- list.files(folder_clean, "done_")
  f <- gsub("done_", "", f)
  f <- gsub(".txt$", "", f)
  if (length(f) > 0) {
    fDone <- max(f)
  } else {
    fDone <- ""
  }

  f <- list.files(folder_raw, "FHIDOD2")
  f <- gsub("FHIDOD2_", "", f)
  f <- gsub(".txt$", "", f)
  f <- max(f)
  dateData <- as.Date(f, format = "%Y%m%d")
  dateDataMinusOneWeek <- dateData - 7

  return(list(
    fDone = fDone,
    f = f,
    dateData = dateData,
    dateDataMinusOneWeek = dateDataMinusOneWeek
  ))
}


#' GetData
#' @param folder_raw a
#' @param fDone a
#' @param f a
#' @param forceRun a
#' @import data.table
#' @export GetData
GetData <- function(
                    folder_raw = fd::path("data_raw"),
                    fDone,
                    f,
                    forceRun = FALSE) {
  if (fDone == f & !forceRun) {
    cat(sprintf("%s/%s/R/NORMOMO No new data", Sys.time(), Sys.getenv("COMPUTER")), "\n")
    quit(save = "no", status = 0)
  } else if (!fhi::file_stable(file.path(folder_raw, paste0("FHIDOD2_", f, ".txt")))) {
    cat(sprintf("%s/%s/R/NORMOMO Unstable data file", Sys.time(), Sys.getenv("COMPUTER")), "\n")
    quit(save = "no", status = 0)
  } else {
    cat(sprintf("%s/%s/R/NORMOMO Stable data file", Sys.time(), Sys.getenv("COMPUTER")), "\n")

    masterData <- fread(file.path(folder_raw, paste0("FHIDOD2_", f, ".txt")))
    masterData[, DoD := as.Date(as.character(DODS_DATO), format = "%Y%m%d")]
    masterData[, DoR := as.Date(as.character(ENDR_DATO), format = "%Y%m%d")]
    masterData[, DoB := as.Date(as.character(FDATO_YYYYMMDD), format = "%Y%m%d")]
    masterData[, age := floor(as.numeric(difftime(DoD, DoB, units = "days")) / 365.25)]
    masterData[is.na(DoR), DoR := DoD + 1]
    masterData[DoR >= "2015-09-03", DoR := DoR + 1]

    masterData[, year := as.numeric(stringr::str_sub(DODS_DATO, 1, 4))]
    masterData[, county_code := paste0("county", formatC(FYLKE, width = 2, flag = "0"))]
    nrow(masterData)
    masterData <- merge(
      masterData,
      fd::norway_county_merging(),
      by.x = c("county_code", "year"),
      by.y = c("county_code_original", "year"),
      all.x = T,
      allow.cartesian = TRUE
    )
    nrow(masterData)
    masterData[is.na(weighting), weighting := 1]
    masterData[, x := 1:.N]
    masterData[, keep := sample(c(TRUE, FALSE), 1, replace = T, prob = c(weighting, 1 - weighting)), by = x]
    sum(masterData$keep == T)
    masterData <- masterData[keep == TRUE]
    nrow(masterData)

    masterData[, county_code := NULL]
    masterData[, FYLKE := NULL]
    masterData[, weighting := NULL]
    masterData[, x := NULL]
    masterData[, keep := NULL]

    setnames(masterData, "county_code_current", "location_code")

    masterData[, ageCat := cut(age, c(0, 4, 14, 64, 200), include.lowest = TRUE)]
    masterData[, ageCat := car::recode(as.character(ageCat), "'[0,4]'='0to4';'(4,14]'='5to14';'(14,64]'='15to64';'(64,200]'='65P'")]
    masterData[, deathWeek := fhi::isoweek_n(masterData$DoD)]
    masterData[, deathYear := fhi::isoyear_n(masterData$DoD)]

    return(masterData)
  }
}
folkehelseinstituttet/dashboards_normomo documentation built on March 20, 2020, 4:16 p.m.