#' Read CSMAR data
#'
#' \code{read_csmar_data()} reads a data file exported from CSMAR into R. A
#' wrapper of \code{read_csv} and \code{read_excel}.
#'
#' @param path Path to a raw data file.
#' @param skip Integer. The number of lines of the data file to skip before beginning to read data.
#' Usually 1 for 'Excel Format Create R Data' from CSMAR.
#' @param col_names "Chinese" to use chinese column names from the external txt description file.
#' Otherwise the same with \code{read_csv} and \code{read_excel}.
#' @param col_types The same with \code{read_csv} and \code{read_excel}.
#'
#' @return A tibble.
#' @export
#'
#' @seealso \code{\link[readr]{read_csv}} and \code{\link[readxl]{read_excel}}.
#'
#' @examples
read_csmar_data <- function(path, skip = 0, col_names = TRUE, col_types = NULL) {
### import the first file of data, return the whole combined dataset, in the case of a seqence of ".xls" data
### otherwise, directly import the single data file (for ".csv" and ".txt")
stopifnot(file.exists(path))
if (col_names == "Chinese") {
skip <- dplyr::if_else(skip == 0, 1, skip)
col_names <- path %>%
dirname() %>%
dir(pattern = "\\[DES\\]") %>%
`[`(length(.)) %>%
file.path(dirname(path), .) %>%
readLines(encoding = "UTF-8") %>%
sub("^.+\\[(.+)\\].+$", "\\1", .)
if (is.character(col_types))
col_names <- col_names[col_types != "skip"]
}
if (grepl(".xls$", path)) {
path %>%
dirname() %>%
dir(pattern = paste0("^", unlist(strsplit(basename(path), "\\."))[1], "-?", "[0-9]*\\.xls$")) %>%
file.path(dirname(path), .) %>%
lapply(readxl::read_excel, skip = skip, col_names = col_names, col_types = col_types) %>%
bind_rows() %>%
return()
} else if (grepl(".csv$", path)) {
if (is.character(col_types)) col_names <- col_names[unlist(strsplit(col_types, "")) != "_"]
return(readr::read_csv(path, skip = skip, col_names = col_names, col_types = col_types))
} else if (grepl(".txt$", path)) {
if (is.character(col_types)) col_names <- col_names[unlist(strsplit(col_types, "")) != "_"]
return(readr::read_tsv(path, skip = skip, col_names = col_names, col_types = col_types))
} else {
stop("unknown file type")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.