R/import_export.R

Defines functions import_feather import_sav import_txt import_tsv import_csv2 import_csv import_xlsx import_rds import export_html export_png export_pdf export_feather export_sav export_txt export_tsv export_csv2 export_csv export_xlsx export_rds export plot_export_result file_can_be_overwritten rownames_1st_column doc_requirement parse_file_location import_exec export_exec

Documented in export export_csv export_csv2 export_feather export_html export_pdf export_png export_rds export_sav export_tsv export_txt export_xlsx import import_csv import_csv2 import_feather import_rds import_sav import_tsv import_txt import_xlsx

# ===================================================================== #
#  An R package by Certe:                                               #
#  https://github.com/certe-medical-epidemiology                        #
#                                                                       #
#  Licensed as GPL-v2.0.                                                #
#                                                                       #
#  Developed at non-profit organisation Certe Medical Diagnostics &     #
#  Advice, department of Medical Epidemiology.                          #
#                                                                       #
#  This R package is free software; you can freely use and distribute   #
#  it for both personal and commercial purposes under the terms of the  #
#  GNU General Public License version 2.0 (GNU GPL-2), as published by  #
#  the Free Software Foundation.                                        #
#                                                                       #
#  We created this package for both routine data analysis and academic  #
#  research and it was publicly released in the hope that it will be    #
#  useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
# ===================================================================== #

# Helper functions --------------------------------------------------------

export_exec <- function(object,
                        needed_extension,
                        filename,
                        filename_deparse,
                        project_number,
                        overwrite,
                        fn = NULL,
                        ...) {
  if (!is.data.frame(object) && !is.list(object) && needed_extension %unlike% "xls" ) {
    # maybe it's a name of an object? Try to get it:
    object <- tryCatch(eval(parse(text = object)),
                       error = function(e) NULL)
    if (!is.data.frame(object)) {
      stop("'object' must be a data.frame", call. = FALSE)
    }
  }
  if (is.null(needed_extension)) {
    needed_extension <- ""
  }
  filename_deparse <- gsub('"', "", filename_deparse, fixed = TRUE)
  if (is.null(filename)) {
    filename <- filename_deparse
  }
  needed_extension <- tolower(needed_extension)
  filename <- parse_file_location(filename,
                                  needed_extension = needed_extension,
                                  project_number = project_number)
  filename_old <- paste0(filename, ".certetoolbox_export")
  if (!file_can_be_overwritten(overwrite, filename)) {
    return(invisible(object))
  }
  
  needed_extension <- needed_extension[1L]
  if (needed_extension == "") {
    if (filename %unlike% "[.][a-z0-9_-]+$") {
      warning("No (valid) file extension set.", call. = FALSE)
    }
    # custom method
    fn(object, filename, ...)
  } else if (needed_extension == "rds") {
    # R format
    base::saveRDS(object, file = filename, ...)
  } else if (needed_extension == "sav") {
    # SPSS format
    haven::write_sav(object, path = filename, ...)
  } else if (needed_extension == "feather") {
    # Apache's Feather format
    object <- rownames_1st_column(object)
    arrow::write_feather(x = object, sink = filename, ...)
  # } else if (needed_extension == "parquet") {
  #   # Apache's Parquet format
  #   object <- rownames_1st_column(object)
  #   arrow::write_parquet()
  #   arrow::write_parquet(x = object, sink = filename, ...)
  } else if (needed_extension == "xlsx") {
    xl_object <- object
    # Excel format
    if (!inherits(xl_object, class(wb_workbook()))) {
      # not yet an openxlsx2 object (but rather e.g. a data frame)
      xl_object <- suppressMessages(as_excel(xl_object, project_number = project_number, ...))
    }
    suppressMessages(save_excel(xl = xl_object, filename = filename, overwrite = TRUE))
    if (is.list(object)) {
      object <- object[[1]]
    }
  } else {
    # flat data file
    object <- rownames_1st_column(object)
    if (needed_extension %in% c("csv", "tsv", "txt")) {
      # arguments such as 'sep' etc. are passed into '...':
      utils::write.table(object, file = filename, ...)
    } else {
      stop("Unknown extension method: ", needed_extension, call. = FALSE)
    }
  }
  if (file.exists(filename)) {
    message(paste0("Exported data set (",
                   format2(NROW(object)), pkg_env$cross_icon, format2(NCOL(object)),
                   ") to '",
                   tools::file_path_as_absolute(filename), 
                   "' (", size_humanreadable(file.size(filename)), ")."))
  } else {
    warning("Error while exporting to `", filename, "`.", call. = FALSE)
    # revert the old existing file (see the file_can_be_overwritten() function)
    if (file.exists(filename_old)) {
      file.copy(filename_old, filename, copy.mode = TRUE, copy.date = TRUE)
    }
  }
  if (file.exists(filename_old)) {
    try(file.remove2(filename_old), silent = TRUE)
  }
  invisible(structure(object, filename = tools::file_path_as_absolute(filename)))
}

#' @importFrom readr read_delim locale problems
#' @importFrom dplyr select
#' @importFrom certeprojects project_get_file
import_exec <- function(filename,
                        filename_deparse,
                        extension,
                        project_number,
                        auto_transform,
                        ...) {
  extension <- tolower(extension[1L])
  
  if (!is.character(filename)) {
    filename <- filename_deparse
  }
  
  filename_url <- NULL
  if (filename %like% "^(http|https|ftp|sftp|ftps|ssh)://") {
    # download the file first
    filename_url <- filename
    if (filename_url %like% "git(hub|lab)[.]com/.*/blob/") {
      # get GitHub/GitLab raw URL
      filename_url <- gsub("/blob/", "/raw/", filename_url, fixed = TRUE)
    }
    if (extension == "") {
      extension <- tools::file_ext(basename(filename_url))
    }
    filename <- tempfile(pattern = "import_", fileext = paste0(".", extension))
    if (.Platform$OS.type == "windows") {
      utils::download.file(url = filename_url, destfile = filename, mode = "wb")
    } else {
      utils::download.file(url = filename_url, destfile = filename)
    }
    if (!file.exists(filename)) {
      stop("Failed to download: ", filename_url, call. = FALSE)
    } else {
      message("Downloaded file: ", size_humanreadable(file.size(filename), decimal.mark = "."), " (", file.size(filename), " bytes)")
    }
  }
  
  filename <- gsub('\\', '/', filename, fixed = TRUE)
  if (filename %unlike% "[.][a-zA-Z0-9]{1,7}$") {
    # does not have extension yet
    filename <- paste0(filename, ".", gsub("^[.]", "", extension))
  }
  if (!file.exists(filename) && !is.null(project_number)) {
    # try project file using the 'certeprojects' package
    filename <- project_get_file(filename, project_number = project_number)
  }
  
  if (!file.exists(filename)) {
    stop("File not found: ", filename, call. = FALSE)
  }
  
  if (extension == "rds") {
    # R format
    df <- base::readRDS(file = filename)
  } else if (extension == "rda") {
    env <- new.env()
    load(filename, envir = env)
    df <- get(names(env), envir = env)
    rm(env)
  } else if (extension %in% c("csv", "tsv", "txt")) {
    # flat files
    df <- read_delim(file = filename,
                     guess_max = 20000,
                     delim = list(...)$sep,
                     na = list(...)$na,
                     progress = interactive(),
                     skip = list(...)$skip,
                     locale = locale(date_names = list(...)$datenames,
                                     date_format = list(...)$dateformat,
                                     time_format = list(...)$timeformat,
                                     tz = list(...)$timezone,
                                     decimal_mark = list(...)$decimal.mark,
                                     grouping_mark = list(...)$big.mark,
                                     encoding = list(...)$encoding),
                     show_col_types = FALSE)
    probs <- problems(df)
    if (nrow(probs) > 0) {
      warning("Contents of `readr::problems()`:\n", paste0(format(probs), collapse = "\n"), call. = FALSE)
    }
    if (isTRUE(auto_transform)) {
      df <- auto_transform(df, decimal.mark = ".", big.mark = "")
      auto_transform <- FALSE
    }
  } else if (extension == "sav") {
    # SPSS format
    df <- haven::as_factor(haven::read_sav(file = filename))
  } else if (extension %like% "xlsx?") {
    # Excel format
    df <- readxl::read_excel(path = filename,
                             guess_max = 200000,
                             sheet = list(...)$sheet,
                             range = list(...)$range,
                             na = list(...)$na,
                             skip = list(...)$skip)
  # } else if (extension %in% c("feather", "parquet")) {
  } else if (extension == "feather") {
    # Apache's Feather and Parquet format
    fun <- eval(parse(text = paste0("read_", extension)),
                envir = asNamespace("arrow"))
    df <- fun(file = filename,
              # always include first column, might be rownames
              col_select = c(1, list(...)$col_select),
              as_data_frame = TRUE)
    if (colnames(df)[1] != "rownames" && !1 %in% list(...)$col_select) {
      # 1st column is not rownames and 1st column was not in original selection, so:
      df <- df |> select(-1)
    }
    
  } else {
    # use rio::import which pretty much understands any file type
    check_is_installed("rio")
    df <- rio::import(file = filename, ...)
  }
  
  if (!inherits(df, "sf")) {
    # force plain data.frame if type is not map data
    df <- as.data.frame(df, stringsAsFactors = FALSE)
  }
  
  # if row names were saved as first col, set back to row names
  if (colnames(df)[1L] %like% "row.?names?") {
    rownames(df) <- df[, 1, drop = TRUE]
    df <- select(df, -1)
    message("Row names restored from first column.")
  }
  
  if (isTRUE(auto_transform)) {
    df <- auto_transform(df, ...)
  }
  
  if (interactive()) {
    if (is.null(filename_url)) {
      file_src <- tools::file_path_as_absolute(filename)
    } else {
      file_src <- filename_url
      # try to remove downloaded file
      try(file.remove2(filename), silent = TRUE)
    }
    message(
      paste0(
        "Imported data set (",
        format2(NROW(df)), pkg_env$cross_icon, format2(NCOL(df)), ") from '",
        file_src, "'"
      )
    )
  }
  
  df
}

#' @importFrom certeprojects project_set_file
parse_file_location <- function(filename, needed_extension, project_number) {
  if (is.null(project_number) || is.na(project_number) || isFALSE(project_number) || project_number %in% c(0, "")) {
    project_number <- NULL
  }
  needed_extension <- gsub("^[.]", "", needed_extension[1L])
  if (filename == ".") {
    filename <- "tbl"
  }
  if (needed_extension != "" & filename %unlike% paste0("[.](", paste0(needed_extension, collapse = "|"), ")$")) {
    filename <- paste0(filename, ".", needed_extension[1L])
  }
  if (!is.null(project_number) && filename %unlike% paste0("p", project_number, "|[A-Z]:/")) {
    # has no valid location yet, so include project number
    filename_proj <- project_set_file(filename, project_number = project_number)
    if (!is.na(filename_proj)) {
      filename <- filename_proj
    }
  }
  # remove invalid characters
  filename <- gsub("[?|<>|*]+", "", filename)
  filename
}

doc_requirement <- function(filetype, fn, pkg) {
  fn <- paste0("[", fn, "()]", collapse = " or ")
  paste0(ifelse(fn %like% "import", "Importing", "Exporting to"), " ", 
         filetype, " using ", fn, " requires the ", paste0("`", pkg, "`", collapse = " and "), 
         " package", ifelse(length(pkg) > 1, "s", ""), " to be installed")
}

#' @importFrom tibble rownames_to_column
rownames_1st_column <- function(object) {
  if (!all(rownames(object) == as.character(seq_len(NROW(object))))) {
    object <- rownames_to_column(object, var = "rownames")
    message("Note: Row names added as first column 'rownames'")
  }
  object
}

#' @importFrom certestyle format2
file_can_be_overwritten <- function(overwrite, filename) {
  if (!file.exists(filename)) {
    return(TRUE)
  }
  if (is.logical(overwrite)) {
    return(overwrite)
  }
  
  file_info <- file.info(filename)
  file_text <- paste0("Created: ", format2(file_info$ctime, "yyyy-mm-dd HH:MM:ss"), "\n",
                      "Changed: ", format2(file_info$mtime, "yyyy-mm-dd HH:MM:ss"), "\n",
                      "Size: ", size_humanreadable(file_info$size, decimal.mark = "."))
  if (base::interactive()) {
    q_text <- paste0("The file '", filename, "' already exists:\n",
                     file_text, "\n\n",
                     "Overwrite this file?")
    q <- tryCatch(rstudioapi::showQuestion(title = paste("File", basename(filename), "exists"),
                                           message = q_text,
                                           ok = "Yes",
                                           cancel = "No"),
                  error = function(e) NULL)
    if (is.null(q)) {
      q <- utils::askYesNo(q_text, default = TRUE, prompts = c("Yes", "No", "Cancel"))
    }
    if (isTRUE(q)) {
      # so the file exists - create a backup so it can be reverted back if export fails
      file.copy(from = filename, to = paste0(filename, ".certetoolbox_export"))
      # and remove the existing file
      try(file.remove2(filename), silent = TRUE)
    }
    return(isTRUE(q))
  } else {
    # non-interactive mode, make a permanent copy
    filename_new <- gsub("[.]([a-zA-Z0-9_-]+)$", paste0("_", format2(now(), "yyyymmdd-HHMMSS"), ".\\1"), filename)
    file.copy(from = filename, to = filename_new)
    # and remove the existing file
    try(file.remove2(filename), silent = TRUE)
    message("Original file ", filename, " existed, this file was renamed to ", filename_new, " before overwriting the original file.")
    return(TRUE)
  }
}

plot_export_result <- function(filename) {
  filename_old <- paste0(filename, ".certetoolbox_export")
  if (file.exists(filename)) {
    message(paste0("Exported plot to '",
                   tools::file_path_as_absolute(filename),
                   "' (", size_humanreadable(file.size(filename)), ")."))
  } else {
    # revert the old existing file (see the file_can_be_overwritten() function)
    if (file.exists(filename_old)) {
      file.copy(filename_old, filename, copy.mode = TRUE, copy.date = TRUE)
    }
    warning("Error while saving `", filename, "`.", call. = FALSE)
  }
  
  try(file.remove2(filename_old), silent = TRUE)
  if (file.exists(filename)) {
    return(invisible(tools::file_path_as_absolute(filename)))
  } else {
    return(invisible(NULL))
  }
}


# Export functions --------------------------------------------------------

#' Export Data Sets and Plots
#' 
#' These functions can be used to export data sets and plots. They invisibly return the object itself again, allowing for usage in pipes (except for the plot-exporting functions [export_pdf()], [export_png()] and [export_html()]). The functions work closely together with the `certeprojects` package to support Microsoft Planner project numbers.
#' @param object,plot the \R object to export
#' @param fn a manual export function, such as `haven::write_sas` to write SAS files. This function has to have the object as first argument and the future file location as second argument.
#' @param filename the full path of the exported file
#' @param project_number a Microsoft Planner project number
#' @param overwrite a [logical] value to indicate if an existing file must be overwritten. In [interactive mode][base::interactive()], this will be asked if the file exists. In non-interactive mode, this has a special default behaviour: the original file will be copied to `filename_datetime.ext` before overwriting the file. Exporting with existing files is always non-destructive: if exporting fails, the original, existing file will not be altered.
#' @param ... arguments passed on to methods
#' @details The [export()] function can export to any file format, also with a manually set export function when passed on to the `fn` argument. This function `fn` has to have the object as first argument and the future file location as second argument. If `fn` is left blank, the `export_*` function will be used based on the filename.
#' @rdname export
#' @seealso [import()]
#' @export
#' @examples 
#' library(dplyr, warn.conflicts = FALSE)
#' 
#' # export to two files: 'whole_file.rds' and 'first_ten_rows.xlsx'
#' starwars |>
#'   export_rds("whole_file") |>
#'   slice(1:10) |>
#'   export_xlsx("first_ten_rows")
#'   
#' # the above is equal to:
#' # starwars |>
#' #   export("whole_file.rds") |>
#' #   slice(1:10) |>
#' #   export("first_ten_rows.xlsx")
#' 
#' 
#' # Apache's Feather format is column-based
#' # and allow for cross-language specific and fast file reading
#' starwars |> export_feather()
#' import("starwars.feather",
#'        col_select = starts_with("h")) |> 
#'   head()
#'   
#' 
#' # (cleanup)
#' file.remove("whole_file.rds")
#' file.remove("first_ten_rows.xlsx")
#' file.remove("starwars.feather")
export <- function(object,
                   filename = NULL,
                   project_number = project_get_current_id(ask = FALSE),
                   overwrite = NULL,
                   fn = NULL,
                   ...) {
  
  if (!is.null(fn)) {
    # export using manual function
    if (is.character(fn)) {
      fn <- tryCatch(eval(parse(text = fn)), error = function(e) NA_character_)
    }
    if (!is.function(fn)) {
      stop("`fn` must be a function")
    }
    export_exec(object = object,
                needed_extension = NULL,
                filename = filename,
                filename_deparse = deparse(substitute(filename)),
                project_number = project_number,
                overwrite = overwrite,
                fn = fn,
                ...)
  } else {
    if (is.null(filename)) {
      filename <- deparse(substitute(filename))
    }
    if (filename %like% "[.]rds$") {
      export_rds(object = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]csv$") {
      export_csv(object = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]tsv$") {
      export_tsv(object = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]txt$") {
      export_txt(object = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]xlsx?$") {
      export_xlsx(object = object,
                  filename = filename,
                  project_number = project_number,
                  overwrite = overwrite,
                  ...)
    } else if (filename %like% "[.]sav$") {
      export_sav(object = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]feather$") {
      export_feather(object = object,
                     filename = filename,
                     project_number = project_number,
                     overwrite = overwrite,
                     ...)
    # } else if (filename %like% "[.]parquet$") {
    #   export_parquet(object = object,
    #                  filename = filename,
    #                  project_number = project_number,
    #                  overwrite = overwrite,
    #                  ...)
    } else if (filename %like% "[.]pdf$") {
      export_pdf(plot = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]png$") {
      export_png(plot = object,
                 filename = filename,
                 project_number = project_number,
                 overwrite = overwrite,
                 ...)
    } else if (filename %like% "[.]html$") {
      export_html(plot = object,
                  filename = filename,
                  project_number = project_number,
                  overwrite = overwrite,
                  ...)
    } else {
      stop("Unknown file format for export: ", filename, call. = FALSE)
    }
  }
}

#' @rdname export
#' @details RDS files as created using [export_rds()] are compatible with R3 and R4.
#' @export
export_rds <- function(object,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       ...) {
  export_exec(object, "rds",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              compress = "gzip",
              ascii = FALSE,
              version = 2)
}

#' @rdname export
#' @inheritParams as_excel
#' @details The [export_xlsx()] and [export_excel()] functions use [`save_excel(as_excel(...))`][as_excel()] internally. **IMPORTANT**: these two functions can accept more than one [data.frame]. When naming the data sets, the names will become sheet names in the resulting Excel file. For a complete visual overview of supported table styles, see [as_excel()]. If the last value in `...` is a [character] of length 1 and `filename` is `NULL`, this value is assumed to be the filename.
#' @export
export_xlsx <- function(...,
                        filename = NULL,
                        project_number = project_get_current_id(ask = FALSE),
                        overwrite = NULL,
                        sheet_names = NULL,
                        autofilter = TRUE,
                        rows_zebra = TRUE,
                        cols_zebra = FALSE,
                        freeze_top_row = TRUE,
                        table_style = "TableStyleMedium2",
                        align = "center") {
  object <- list(...)
  if (length(object) > 1) {
    object_name <- "."
    # check if second value is filename
    if (is.null(filename) && is.character(object[[2]]) && length(object[[2]]) == 1) {
      filename <- object[[2]]
      object <- object[-2]
    }
    # check if third value is project number
    if (length(object) > 1 && is.null(project_number) && is.numeric(object[[length(object)]]) && length(object[[length(object)]]) == 1) {
      project_number <- object[[length(object)]]
      object <- object[-length(object)]
    }
  } else if (length(object) == 1) {
    object_name <- tryCatch(paste0(trimws(deparse(substitute(...))), collapse = ""), error = function(x) ".")
  }
  export_exec(object = object, "xlsx",
              filename = filename,
              filename_deparse = object_name,
              project_number = project_number,
              overwrite = overwrite,
              sheet_names = sheet_names,
              autofilter = autofilter,
              rows_zebra = rows_zebra,
              cols_zebra = cols_zebra,
              freeze_top_row = freeze_top_row,
              table_style = table_style,
              align = align)
}

#' @rdname export
#' @export
export_excel <- export_xlsx

#' @rdname export
#' @param na replacement character for empty values (default: `""`)
#' @details For [export_csv()], [export_csv2()] and [export_tsv()], files will be saved in UTF-8 encoding and `NA` values will be exported as `""` at default. Like other `*.csv` and `*.csv2` functions, csv is comma (`,`) separated and csv2 is semicolon (`;`) separated.
#' @export
export_csv <- function(object,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       na = "",
                       ...) {
  export_exec(object, "csv",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              append = FALSE,
              quote = TRUE,
              sep = ",",
              eol = "\n",
              na = na,
              dec = ".",
              row.names = FALSE,
              col.names = TRUE,
              qmethod = "double",
              fileEncoding = "UTF-8")
}

#' @rdname export
#' @export
export_csv2 <- function(object,
                        filename = NULL,
                        project_number = project_get_current_id(ask = FALSE),
                        overwrite = NULL,
                        na = "",
                        ...) {
  export_exec(object, "csv",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              append = FALSE,
              quote = TRUE,
              sep = ";",
              eol = "\n",
              na = na,
              dec = ",",
              row.names = FALSE,
              col.names = TRUE,
              qmethod = "double",
              fileEncoding = "UTF-8")
}

#' @rdname export
#' @export
export_tsv <- function(object,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       na = "",
                       ...) {
  export_exec(object, "tsv",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              append = FALSE,
              quote = TRUE,
              sep = "\t",
              eol = "\n",
              na = na,
              dec = ".",
              row.names = FALSE,
              col.names = TRUE,
              qmethod = "double",
              fileEncoding = "UTF-8")
}

#' @rdname export
#' @param sep separator for values in a row (default: tab)
#' @details The [export_txt()] function exports to a tab-separated file.
#' @export
export_txt <- function(object,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       sep = "\t",
                       na = "",
                       ...) {
  export_exec(object, "txt",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              append = FALSE,
              quote = TRUE,
              sep = sep,
              eol = "\n",
              na = na,
              dec = ".",
              row.names = FALSE,
              col.names = TRUE,
              qmethod = "double",
              fileEncoding = "UTF-8")
}

#' @rdname export
#' @details `r doc_requirement("an SPSS file", c("export_sav", "export_spss"), "haven")`.
#' @export
export_sav <- function(object,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       ...) {
  check_is_installed("haven")
  export_exec(object, "sav",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              compress = FALSE)
}

#' @rdname export
#' @export
export_spss <- export_sav 

#' @rdname export
#' @details `r doc_requirement("a Feather file", "export_feather", "arrow")`. [Apache Feather](https://arrow.apache.org/docs/python/feather.html) provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R).
#' @export
export_feather <- function(object,
                           filename = NULL,
                           project_number = project_get_current_id(ask = FALSE),
                           overwrite = NULL,
                           ...) {
  check_is_installed("arrow")
  export_exec(object, "feather",
              filename = filename,
              filename_deparse = deparse(substitute(object)),
              project_number = project_number,
              overwrite = overwrite,
              ...)
}

#' #' @rdname export
#' #' @details `r doc_requirement("a Parquet file", "export_parquet", "arrow")`. [Apache Parquet](https://parquet.apache.org) is an open source, column-oriented data file format designed for efficient data storage and retrieval.
#' #' @export
#' export_parquet <- function(object,
#'                            filename = NULL,
#'                            project_number = project_get_current_id(ask = FALSE),
#'                            overwrite = NULL,
#'                            ...) {
#'   check_is_installed("arrow")
#'   export_exec(object, "parquet",
#'               filename = filename,
#'               filename_deparse = deparse(substitute(object)),
#'               project_number = project_number,
#'               overwrite = overwrite,
#'               ...)
#' }

#' @rdname export
#' @param size paper size, defaults to A5. Can be A0 to A7.
#' @param portrait portrait mode, defaults to `FALSE` (i.e., landscape mode)
#' @details `r doc_requirement("a PDF file", "export_pdf", "ggplot2")`. If the filename is left blank in [export_pdf()], [export_png()] or [export_html()], the title of `plot` will be used if it's available and the `certeplot2` package is installed, and a timestamp otherwise. **NOTE:** All export functions invisibly return `object` again, but the plotting functions invisibly return the file path
#' @importFrom certestyle format2
#' @export
export_pdf <- function(plot,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       size = "A5",
                       portrait = FALSE,
                       ...) {
  check_is_installed("ggplot2")
  if ("certeplot2" %in% rownames(utils::installed.packages())) {
    get_plot_title <- certeplot2::get_plot_title
  } else {
    get_plot_title <- NULL
  }
  
  if (is.null(filename) && !is.null(get_plot_title)) {
    filename <- get_plot_title(plot)
    if (is.na(filename)) {
      filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
    }
    filename <- paste0(filename, ".pdf")
  } else if (is.null(filename)) {
    filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
  }
  filename <- parse_file_location(filename,
                                  needed_extension = "pdf",
                                  project_number = project_number)
  if (!file_can_be_overwritten(overwrite, filename)) {
    return(invisible(NULL))
  }
  
  a0_height <- sqrt(sqrt(2)) * 1000 # x1000 for millimetres
  a1_height <- a0_height / sqrt(2)
  a2_height <- a1_height / sqrt(2)
  a3_height <- a2_height / sqrt(2)
  a4_height <- a3_height / sqrt(2)
  a5_height <- a4_height / sqrt(2)
  a6_height <- a5_height / sqrt(2)
  a7_height <- a6_height / sqrt(2)
  if (tolower(size) == "a0") {
    height <- a0_height
    width <- a0_height / sqrt(2)
  } else if (tolower(size) == "a1") {
    height <- a1_height
    width <- a1_height / sqrt(2)
  } else if (tolower(size) == "a2") {
    height <- a2_height
    width <- a2_height / sqrt(2)
  } else if (tolower(size) == "a3") {
    height <- a3_height
    width <- a3_height / sqrt(2)
  } else if (tolower(size) == "a4") {
    height <- a4_height
    width <- a4_height / sqrt(2)
  } else if (tolower(size) == "a5") {
    height <- a5_height
    width <- a5_height / sqrt(2)
  } else if (tolower(size) == "a6") {
    height <- a6_height
    width <- a6_height / sqrt(2)
  } else if (tolower(size) == "a7") {
    height <- a7_height
    width <- a7_height / sqrt(2)
  } else {
    message("No valid value for size - using A4 as paper format.")
    height <- a4_height
    width <- a4_height / sqrt(2)
  }
  
  if (portrait == FALSE) {
    height.bak <- height
    height <- width
    width <- height.bak
  }
  
  ggplot2::ggsave(filename = filename,
                  device = grDevices::cairo_pdf,
                  width = width,
                  height = height,
                  units = "mm",
                  plot = plot,
                  ...)
  
  plot_export_result(filename)
}

#' @rdname export
#' @param width required width of the PNG file in pixels
#' @param height required height of the PNG file in pixels
#' @param dpi plot resolution, defaults to DPI set in `showtext` package
#' @details `r doc_requirement("a PNG file", "export_png", c("ggplot2", "showtext"))`.
#' @importFrom certestyle format2
#' @export
export_png <- function(plot,
                       filename = NULL,
                       project_number = project_get_current_id(ask = FALSE),
                       overwrite = NULL,
                       width = 1000,
                       height = 800,
                       dpi = NULL,
                       ...) {
  
  check_is_installed(c("ggplot2", "showtext"))
  if ("certeplot2" %in% rownames(utils::installed.packages())) {
    get_plot_title <- certeplot2::get_plot_title
  } else {
    get_plot_title <- NULL
  }
  
  if (is.null(filename) && !is.null(get_plot_title)) {
    filename <- get_plot_title(plot)
    if (is.na(filename)) {
      filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
    }
    filename <- paste0(filename, ".png")
  } else if (is.null(filename)) {
    filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
  }
  filename <- parse_file_location(filename,
                                  needed_extension = "png",
                                  project_number = project_number)
  if (!file_can_be_overwritten(overwrite, filename)) {
    return(invisible(NULL))
  }
  
  if (is.null(dpi)) {
    dpi <- showtext::showtext_opts()$dpi
  }
  dpi_old <- showtext::showtext_opts()$dpi
  showtext::showtext_opts(dpi = dpi)
  
  suppressWarnings(
    ggplot2::ggsave(filename = filename,
                    dpi = dpi,
                    width = width,
                    height = height,
                    units = "px",
                    plot = plot,
                    ...)
  )
  
  showtext::showtext_opts(dpi = dpi_old)
  
  plot_export_result(filename)
}

#' @rdname export
#' @details `r doc_requirement("an HTML file", "export_html", c("ggplot2", "htmltools"))`. The arguments put in `...` will be passed on to [plotly::layout()] if `plot` is not yet a Plotly object (but rather a `ggplot2` object), which of course then requires the `plotly` package to be installed as well.
#' @importFrom certestyle format2
#' @export
export_html <- function(plot,
                        filename = NULL,
                        project_number = project_get_current_id(ask = FALSE),
                        overwrite = NULL,
                        ...) {
  check_is_installed(c("ggplot2", "htmltools"))
  if ("certeplot2" %in% rownames(utils::installed.packages())) {
    get_plot_title <- certeplot2::get_plot_title
  } else {
    get_plot_title <- NULL
  }
  
  if (is.null(filename) && !is.null(get_plot_title)) {
    filename <- get_plot_title(plot)
    if (is.na(filename)) {
      filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
    }
    filename <- paste0(filename, ".html")
  } else if (is.null(filename)) {
    filename <- format2(now(), "yyyy-mm-dd-HHMMSS")
  }
  filename <- parse_file_location(filename,
                                  needed_extension = "html",
                                  project_number = project_number)
  if (!file_can_be_overwritten(overwrite, filename)) {
    return(invisible(NULL))
  }
  
  if (ggplot2::is.ggplot(plot)) {
    # transform to plotly first
    check_is_installed("plotly")
    plot <- plotly::layout(plotly::ggplotly(plot), ...)
  }
  
  suppressWarnings(
    htmltools::save_html(plot,
                         file = filename,
                         lang = "nl",
                         libdir = "library_do_not_delete")
  )
  
  plot_export_result(filename)
}


# Import functions --------------------------------------------------------

#' Import Data Sets
#' 
#' These functions can be used to import data, from local or remote paths, or from the internet. They work closely with the `certeprojects` package to support Microsoft Planner project numbers. To support row names and older R versions, `import_*()` functions return plain [data.frame]s, not e.g. [tibble][tibble::tibble()]s.
#' @param filename the full path of the file to be imported, will be parsed to a [character], can also be a remote location (from http/https/ftp/ssh, GitHub/GitLab)
#' @param auto_transform transform the imported data with [auto_transform()]
#' @param project_number a Microsoft Planner project number
#' @param ... arguments passed on to methods
#' @details `r doc_requirement("any unlisted filetype", "import", "rio")`.
#' @rdname import
#' @seealso [export()]
#' @export
#' @examples 
#' export_csv(iris)
#' import_csv("iris") |> head()
#' 
#' # the above is equal to:
#' # export(iris, "iris.csv")
#' # import("iris.csv") |> head()
#' 
#' 
#' # row names are also supported
#' export_csv(mtcars)
#' import_csv("mtcars") |> head()
#' 
#' 
#' # Apache's Feather format is column-based
#' # and allow for specific and fast file reading
#' library(dplyr, warn.conflicts = FALSE)
#' starwars |> export_feather()
#' import("starwars.feather",
#'        col_select = starts_with("h")) |> 
#'   head()
#'   
#' 
#' # (cleanup)
#' file.remove("iris.csv")
#' file.remove("mtcars.csv")
#' file.remove("starwars.feather")
import <- function(filename,
                   project_number = project_get_current_id(ask = FALSE),
                   auto_transform = TRUE,
                   encoding = "UTF-8",
                   ...) {
  if (!is.character(filename)) {
    filename <- deparse(substitute(filename))
  }
  if (filename %like% "[.]rds$") {
    import_rds(filename = filename,
               project_number = project_number,
               ...)
  } else if (filename %like% "[.]csv$" && (is.null(list(...)$sep) || identical(list(...)$sep, ","))) {
    import_csv(filename = filename,
               project_number = project_number,
               auto_transform = auto_transform,
               ...)
  } else if (filename %like% "[.]csv$" && identical(list(...)$sep, ";")) {
    import_csv2(filename = filename,
                project_number = project_number,
                auto_transform = auto_transform,
                ...)
  } else if (filename %like% "[.]tsv$") {
    import_tsv(filename = filename,
               project_number = project_number,
               auto_transform = auto_transform,
               ...)
  } else if (filename %like% "[.]txt$") {
    import_txt(filename = filename,
               project_number = project_number,
               auto_transform = auto_transform,
               ...)
  } else if (filename %like% "[.]xlsx?$") {
    import_xlsx(filename = filename,
                project_number = project_number,
                auto_transform = auto_transform,
                ...)
  } else if (filename %like% "[.]sav$") {
    import_sav(filename = filename,
               project_number = project_number,
               auto_transform = auto_transform,
               ...)
  } else if (filename %like% "[.]feather$") {
    import_feather(filename = filename,
                   project_number = project_number,
                   ...)
  # } else if (filename %like% "[.]parquet$") {
  #   import_parquet(filename = filename,
  #                  project_number = project_number,
  #                  ...)
  } else {
    import_exec(filename,
                extension = "",
                project_number = project_number,
                auto_transform = auto_transform,
                ...)
  }
}

#' @rdname import
#' @export
import_rds <- function(filename,
                       project_number = project_get_current_id(ask = FALSE),
                       ...) {
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "rds",
              project_number = project_number,
              auto_transform = FALSE)
}

#' @rdname import
#' @param sheet Excel sheet to import, defaults to first sheet
#' @param range a cell range to read from, allows typical Excel ranges such as "B3:D87" and "Budget!B2:G14"
#' @param skip number of first rows to skip
#' @inheritParams auto_transform
#' @details `r doc_requirement("an Excel file", c("import_xlsx", "import_excel"), "readxl")`.
#' @importFrom cleaner format_datetime
#' @export
import_xlsx <- function(filename,
                        project_number = project_get_current_id(ask = FALSE),
                        sheet = 1,
                        range = NULL,
                        auto_transform = TRUE,
                        datenames = "nl",
                        dateformat = "yyyy-mm-dd",
                        timeformat = "HH:MM",
                        decimal.mark = dec_mark(),
                        big.mark = "",
                        timezone = "UTC",
                        na = c("", "NULL", "NA", "<NA>"),
                        skip = 0,
                        ...) {
  check_is_installed("readxl")
  if (length(sheet) != 1) {
    stop("'sheet' must be a single number or name, since only one sheet can be imported at a time", call. = FALSE)
  }
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = ifelse(filename %like% "[.]xls$", "xls", "xlsx"),
              project_number = project_number,
              sheet = sheet,
              range = range,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = format_datetime(dateformat),
              timeformat = format_datetime(timeformat),
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na,
              skip = skip)
}

#' @rdname import
#' @export
import_excel <- import_xlsx

#' @rdname import
#' @param encoding Default encoding. This only affects how the file is read.
#' @importFrom cleaner format_datetime
#' @export
import_csv <- function(filename,
                       project_number = project_get_current_id(ask = FALSE),
                       auto_transform = TRUE,
                       datenames = "nl",
                       dateformat = "yyyy-mm-dd",
                       timeformat = "HH:MM",
                       decimal.mark = ".",
                       big.mark = "",
                       timezone = "UTC",
                       na = c("", "NULL", "NA", "<NA>"),
                       skip = 0,
                       encoding = "UTF-8",
                       ...) {
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "csv",
              sep = ",",
              project_number = project_number,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = format_datetime(dateformat),
              timeformat = format_datetime(timeformat),
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na,
              skip = skip,
              encoding = encoding)
}

#' @rdname import
#' @importFrom cleaner format_datetime
#' @export
import_csv2 <- function(filename,
                        project_number = project_get_current_id(ask = FALSE),
                        auto_transform = TRUE,
                        datenames = "nl",
                        dateformat = "yyyy-mm-dd",
                        timeformat = "HH:MM",
                        decimal.mark = ",",
                        big.mark = "",
                        timezone = "UTC",
                        na = c("", "NULL", "NA", "<NA>"),
                        skip = 0,
                        encoding = "UTF-8",
                        ...) {
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "csv",
              sep = ";",
              project_number = project_number,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = format_datetime(dateformat),
              timeformat = format_datetime(timeformat),
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na,
              skip = skip,
              encoding = encoding)
}

#' @rdname import
#' @importFrom cleaner format_datetime
#' @export
import_tsv <- function(filename,
                       project_number = project_get_current_id(ask = FALSE),
                       auto_transform = TRUE,
                       datenames = "nl",
                       dateformat = "yyyy-mm-dd",
                       timeformat = "HH:MM",
                       decimal.mark = ".",
                       big.mark = "",
                       timezone = "UTC",
                       na = c("", "NULL", "NA", "<NA>"),
                       skip = 0,
                       encoding = "UTF-8",
                       ...) {
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "tsv",
              sep = "\t",
              project_number = project_number,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = format_datetime(dateformat),
              timeformat = format_datetime(timeformat),
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na,
              skip = skip,
              encoding = encoding)
}

#' @rdname import
#' @importFrom cleaner format_datetime
#' @export
import_txt <- function(filename,
                       project_number = project_get_current_id(ask = FALSE),
                       auto_transform = TRUE,
                       sep = "\t",
                       datenames = "nl",
                       dateformat = "yyyy-mm-dd",
                       timeformat = "HH:MM",
                       decimal.mark = ",",
                       big.mark = "",
                       timezone = "UTC",
                       na = c("", "NULL", "NA", "<NA>"),
                       skip = 0,
                       encoding = "UTF-8",
                       ...) {
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "txt",
              sep = sep,
              project_number = project_number,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = format_datetime(dateformat),
              timeformat = format_datetime(timeformat),
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na,
              skip = skip,
              encoding = encoding)
}

#' @rdname import
#' @details `r doc_requirement("an SPSS file", c("import_sav", "import_spss"), "haven")`.
#' @export
import_sav <- function(filename,
                       project_number = project_get_current_id(ask = FALSE),
                       auto_transform = TRUE,
                       datenames = "en",
                       dateformat = "yyyy-mm-dd",
                       timeformat = "HH:MM",
                       decimal.mark = ".",
                       big.mark = "",
                       timezone = "UTC",
                       na = c("", "NULL", "NA", "<NA>"),
                       ...) {
  check_is_installed("haven")
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "sav",
              project_number = project_number,
              auto_transform = auto_transform,
              datenames = datenames,
              dateformat = dateformat,
              timeformat = timeformat,
              decimal.mark = decimal.mark,
              big.mark = big.mark,
              timezone = timezone,
              na = na)
}

#' @rdname import
#' @export
import_spss <- import_sav

#' @rdname import
#' @details `r doc_requirement("a Feather file", "import_feather", "arrow")`. [Apache Feather](https://arrow.apache.org/docs/python/feather.html) provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R). Use the `col_select` argument (which supports the [tidyselect language][tidyselect::language]) for specific data selection to improve importing speed.
#' @param col_select columns to select, supports the [tidyselect language][tidyselect::language])
#' @importFrom dplyr everything
#' @export
import_feather <- function(filename,
                           project_number = project_get_current_id(ask = FALSE),
                           col_select = everything(),
                           ...) {
  check_is_installed("arrow")
  import_exec(filename,
              filename_deparse = deparse(substitute(filename)),
              extension = "feather",
              project_number = project_number,
              auto_transform = FALSE,
              col_select = col_select)
}

#' #' @rdname import
#' #' @details `r doc_requirement("a Parquet file", "import_parquet", "arrow")`. [Apache Parquet](https://parquet.apache.org) is an open source, column-oriented data file format designed for efficient data storage and retrieval. Use the `col_select` argument (which supports the [tidyselect language][tidyselect::language]) for specific data selection to improve importing speed.
#' #' @importFrom dplyr everything
#' #' @export
#' import_parquet <- function(filename,
#'                            project_number = project_get_current_id(ask = FALSE),
#'                            col_select = everything(),
#'                            ...) {
#'   check_is_installed("arrow")
#'   import_exec(filename,
#'               filename_deparse = deparse(substitute(filename)),
#'               extension = "parquet",
#'               project_number = project_number,
#'               auto_transform = FALSE,
#'               col_select = col_select)
#' }
certe-medical-epidemiology/certetoolbox documentation built on April 17, 2025, 3:24 a.m.