Nothing
# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of CohortGenerator
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Used to read a .csv file
#'
#' @description
#' This function is used to centralize the function for reading
#' .csv files across the HADES ecosystem. This function will automatically
#' convert from snake_case in the file to camelCase in the data.frame returned
#' as is the standard described in:
#' https://ohdsi.github.io/Hades/codeStyle.html#Interfacing_between_R_and_SQL
#'
#' @param file The .csv file to read.
#' @param warnOnCaseMismatch When TRUE, raise a warning if column headings
#' in the .csv are not in snake_case format
#'
#' @param colTypes Corresponds to the `col_types` in the `readr::read_csv` function.
#' One of `NULL`, a [readr::cols()] specification, or
#' a string. See `vignette("readr")` for more details.
#'
#' If `NULL`, all column types will be inferred from `guess_max` rows of the
#' input, interspersed throughout the file. This is convenient (and fast),
#' but not robust. If the guessed types are wrong, you'll need to increase
#' `guess_max` or supply the correct types yourself.
#'
#' Column specifications created by [list()] or [cols()] must contain
#' one column specification for each column.
#'
#' Alternatively, you can use a compact string representation where each
#' character represents one column:
#' - c = character
#' - i = integer
#' - n = number
#' - d = double
#' - l = logical
#' - f = factor
#' - D = date
#' - T = date time
#' - t = time
#' - ? = guess
#' - _ or - = skip
#'
#' By default, reading a file without a column specification will print a
#' message showing what `readr` guessed they were. To remove this message,
#' set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`.
#'
#' @return
#' A tibble with the .csv contents
#'
#' @export
readCsv <- function(file, warnOnCaseMismatch = TRUE, colTypes = readr::cols()) {
fileContents <- .readCsv(file = file, colTypes = colTypes)
columnNames <- colnames(fileContents)
columnNamesInSnakeCaseFormat <- isSnakeCase(columnNames)
if (!all(columnNamesInSnakeCaseFormat) && warnOnCaseMismatch) {
problemColumns <- columnNames[!columnNamesInSnakeCaseFormat]
problemColumnsWarning <- paste(problemColumns, collapse = ", ")
warning(paste("The following columns were not in snake case format:", problemColumnsWarning))
}
colIdxToConvert <- which(isSnakeCase(columnNames))
names(fileContents)[colIdxToConvert] <- SqlRender::snakeCaseToCamelCase(names(fileContents)[colIdxToConvert])
invisible(fileContents)
}
#' Internal read CSV file when control over column formatting is required.
#'
#' @description
#' This function is internal to the package
#'
#' @param file The .csv file to read.
#'
#' @return
#' Returns the file contents invisibly.
#'
#' @noRd
#' @keywords internal
.readCsv <- function(file, colTypes = readr::cols()) {
invisible(readr::read_csv(
file = file,
col_types = colTypes,
lazy = FALSE
))
}
#' Used to write a .csv file
#'
#' @description
#' This function is used to centralize the function for writing
#' .csv files across the HADES ecosystem. This function will automatically
#' convert from camelCase in the data.frame to snake_case column names
#' in the resulting .csv file as is the standard
#' described in:
#' https://ohdsi.github.io/Hades/codeStyle.html#Interfacing_between_R_and_SQL
#'
#' This function may also raise warnings if the data is stored in a format
#' that will not work with the HADES standard for uploading to a results database.
#' Specifically file names should be in snake_case format, all column headings
#' are in snake_case format and where possible the file name should not be plural.
#' See \code{isFormattedForDatabaseUpload} for a helper function to check a
#' data.frame for rules on the column names
#'
#' @param x A data frame or tibble to write to disk.
#'
#' @param file The .csv file to write.
#'
#' @param append When TRUE, append the values of x to an existing file.
#'
#' @param warnOnCaseMismatch When TRUE, raise a warning if columns in the
#' data.frame are NOT in camelCase format.
#'
#' @param warnOnFileNameCaseMismatch When TRUE, raise a warning if the file
#' name specified is not in snake_case format.
#'
#' @param warnOnUploadRuleViolations When TRUE, this function will provide
#' warning messages that may indicate if the data is stored in a format in the
#' .csv that may cause problems when uploading to a database.
#'
#' @return
#' Returns the input x invisibly.
#'
#' @export
writeCsv <- function(x, file, append = FALSE, warnOnCaseMismatch = TRUE, warnOnFileNameCaseMismatch = TRUE, warnOnUploadRuleViolations = TRUE) {
columnNames <- colnames(x)
columnNamesInCamelCaseFormat <- isCamelCase(columnNames)
if (!all(columnNamesInCamelCaseFormat) && warnOnCaseMismatch) {
problemColumns <- columnNames[!columnNamesInCamelCaseFormat]
problemColumnsWarning <- paste(problemColumns, collapse = ", ")
warning(paste("The following columns were not in camel case format:", problemColumnsWarning))
}
colnames(x) <- SqlRender::camelCaseToSnakeCase(colnames(x))
if (warnOnUploadRuleViolations) {
# Check the data.frame to ensure it is in the proper format for upload
isFormattedForDatabaseUpload(x)
# Check if the file name is in lower snake case format
fileName <- gsub(x = basename(file), pattern = ".csv", replacement = "")
if (!isSnakeCase(fileName) && warnOnFileNameCaseMismatch) {
warning(paste("The filename:", basename(file), "is not in snake case format"))
}
}
# Write the file
.writeCsv(x = x, file = file, append = append)
}
#' Internal write.csv file when control over column formatting is required.
#'
#' @description
#' This function is internal to the package since the \code{exportCohortStatsTables}
#' requires additional control over the column formatting which requires that
#' we bypass the \code{writeCsv} function since it will automatically convert
#' from camelCase to snake_case.
#'
#' @param x A data frame or tibble to write to disk.
#'
#' @param file The .csv file to write.
#'
#' @param append When TRUE, append the values of x to an existing file.
#'
#' @return
#' Returns the input x invisibly.
#'
#' @noRd
#' @keywords internal
.writeCsv <- function(x, file, append = FALSE) {
# Write the file
readr::write_csv(
x = x,
file = file,
append = append
)
invisible(x)
}
#' Is the data.frame formatted for uploading to a database?
#'
#' @description
#' This function is used to check a data.frame to ensure all
#' column names are in snake case format.
#'
#' @param x A data frame
#' @param warn When TRUE, display a warning of any columns are not in snake
#' case format
#'
#' @return
#' Returns TRUE if all columns are snake case format. If warn == TRUE,
#' the function will emit a warning on the column names that are not in snake
#' case format.
#'
#' @export
isFormattedForDatabaseUpload <- function(x, warn = TRUE) {
checkmate::assert_data_frame(x)
columnNames <- colnames(x)
columnNamesInSnakeCaseFormat <- isSnakeCase(columnNames)
if (!all(columnNamesInSnakeCaseFormat) && warn) {
problemColumns <- columnNames[!columnNamesInSnakeCaseFormat]
# Are the problem columns in camelCase? If so, we can use SqlRender
# to provide a suggestion
columnNameSuggestions <- c()
if (length(problemColumns) > 0) {
for (i in 1:length(problemColumns)) {
suggestion <- problemColumns[i]
if (isCamelCase(suggestion)) {
suggestion <- SqlRender::camelCaseToSnakeCase(suggestion)
}
columnNameSuggestions <- c(columnNameSuggestions, suggestion)
}
}
problemColumnsWarning <- paste(problemColumns, collapse = ", ")
problemColumnsSuggestions <- paste(columnNameSuggestions, collapse = ", ")
warning(paste("The following data.frame column names are not in snake case format:", problemColumnsWarning, "\n Consider revising the column names to:", problemColumnsSuggestions))
}
return(all(columnNamesInSnakeCaseFormat))
}
#' Used to check if a string is in snake case
#'
#' @description
#' This function is used check if a string conforms to
#' the snake case format.
#'
#' @param x The string to evaluate
#'
#' @return
#' TRUE if the string is in snake case
#'
#' @export
isSnakeCase <- function(x) {
return(grepl(pattern = "^[a-z0-9]+(?:_[a-z0-9]+)*$", x = x))
}
#' Used to check if a string is in lower camel case
#'
#' @description
#' This function is used check if a string conforms to
#' the lower camel case format.
#'
#' @param x The string to evaluate
#'
#' @return
#' TRUE if the string is in lower camel case
#'
#' @export
isCamelCase <- function(x) {
return(grepl(pattern = "^[a-z]+([A-Z0-9]*[a-z0-9]+[A-Za-z0-9]*)$", x = x))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.