#'
#' @title write_rcsv
#' @description an extension of the csv file format
#' @details write out an rcsv file, an extension of csv, with column format details
#' stored in a header for more consistent reading into R
#'
#' @param input a data.table or data.frame object to write out
#' @param file file path to which the rscv will be written
#' @param strings.convert global switching of storage conversion types
#' @param strings.as.factor.ints logical, convert strings to factors,
#' and write to file as integer. May save space on disk
#' @param factors.as.ints logical, write factor columns to file as
#' integer, with "levels" stored in a header object. May save space on disk
#' @param dates.as.ints logical, write date columns to file as
#' integer, with "tz" stored in a header object, and origin assumed to be
#' "1970-01-01". May save space on disk
#' @param posix.as.num logical, write POSIXct columns to file as
#' numeric, with "tz" stored in a header object, and origin assumed to be
#' "1970-01-01 00:00:00". May save space on disk
#' @param times.as.num logical, write `chron::times` columns to file as
#' numeric. Allows for sub-second precision to be stored, as opposed to
#' converting times to character, therefore only storing precision to seconds
#' @param ITimes.as.ints logical, write `ITime` columns to file as
#' integers. May save space on disk
#' @param logical.convert one of "int"/TRUE to store as integers, "short" to shorten
#' to "T" or "F", or "long"/FALSE to leave unchanged. May save space on disk
#' @param notes single line of text. Add notes to the dataset. Will be displayed in
#' the console when data is impoted by `read_rcsv`, and stored in the imported data frame
#' as an attribute "notes". By default, any existing "notes" attribute is stored here.
#'
#'
#' @import data.table
#' @importFrom chron times
#' @importFrom lubridate force_tz
#'
#' @export
write_rcsv <- function( input,
file,
strings.convert = FALSE,
strings.as.factor.ints = strings.convert,
factors.as.ints = strings.convert,
dates.as.ints = strings.convert,
posix.as.num = strings.convert,
times.as.num = strings.convert,
ITimes.as.ints = strings.convert,
logical.convert = strings.convert,
notes = attr( input, "notes" ) ) {
logical.as.int <- FALSE
# make sure the input object qualifies as a data frame
if( !"data.frame" %chin% class( input ) ) {
convert.DT.attempt <- try( data.table::setDT( input ), silent = TRUE )
if( class( convert.DT.attempt ) == "try-error" ) {
stop( "Object for writing could not be converted to a data frame." )
} else {
warning( "Object has been coerced to data frame for writing as rcsv." )
}
} else {
data.table::setDT( input )
}
# # make a copy of the input object. This is inefficient in terms of memory,
# # but will prevent data.table making in-place changes to the object.
# input <- data.table::copy( table )
# find the column names
column.names <- names( input )
column.names.header <- paste0( "colname:", column.names )
# find the current class of each column
column.classes <- lapply( input, class )
column.classes <- sapply( column.classes, "[", 1L )
column.classes.header <- paste0( "colclass:", column.classes )
# create text header to start the output file.
# this will be used when reading the file back into R
header <- paste( column.names.header, column.classes.header, sep = "},{" )
if( strings.as.factor.ints ) {
char.cols <- which( column.classes == "character" )
for( col in char.cols ) {
input[ , ( col ) := factor( .SD[[col]] ) ]
}
levels.char.cols <- lapply(
X = input[ , char.cols, with = FALSE ],
FUN = levels
)
levels.char.cols <- sapply( levels.char.cols, paste, collapse = "," )
levels.char.cols <- paste0( "levels:", levels.char.cols )
for( col in char.cols ) {
input[ , ( col ) := as.integer( .SD[[col]] ) ]
}
header[ char.cols ] <- paste( header[ char.cols ],
levels.char.cols,
"from:factorints",
sep = "},{" )
}
# add a shortened form logical if requested
if( "logical" %chin% column.classes ) {
logical.cols <- which( column.classes == "logical" )
if( isTRUE( logical.convert ) ||
{ !is.logical( logical.convert ) &&
logical.convert %chin% c( "int", "integer", "number", "numeric", "num" ) }
) {
logical.as.int <- TRUE
header[ logical.cols ] <- paste( header[ logical.cols ],
"from:integer",
sep = "},{" )
} else if( logical.convert == FALSE || logical.convert %chin% c( "long", "lng" ) ) {
header[ logical.cols ] <- paste( header[ logical.cols ],
"from:long",
sep = "},{" )
} else if( logical.convert %chin% c( "short", "shrt" ) ) {
for( col in logical.cols ) {
input[ , ( col ) := substr( as.character( .SD[[col]] ), 0, 1 ) ]
}
header[ logical.cols ] <- paste( header[ logical.cols ],
"from:short",
sep = "},{" )
} else {
stop(
sprintf( "%s is not a valid input for `logical.convert`", logical.convert )
)
}
}
# add a levels parameter to any factor columns
if( "factor" %chin% column.classes ) {
factor.cols <- which( column.classes == "factor" )
header.factor.cols <- lapply(
X = input[ , factor.cols, with = FALSE ],
FUN = levels
)
header.factor.cols <- sapply( header.factor.cols, paste, collapse = "," )
header.factor.cols <- paste0( "levels:", header.factor.cols )
if( factors.as.ints ) {
for( col in factor.cols ) {
input[ , ( col ) := as.integer( .SD[[col]] ) ]
}
header[ factor.cols ] <- paste( header[ factor.cols ],
header.factor.cols,
"from:integer",
sep = "},{" )
} else {
header[ factor.cols ] <- paste( header[ factor.cols ],
header.factor.cols,
"from:string",
sep = "},{" )
}
}
# add timezone to any POSIXct columns
if( "POSIXct" %chin% column.classes ) {
posix.cols <- which( column.classes == "POSIXct" )
header.posix.cols.tz <- vapply(
X = input[ , posix.cols, with = FALSE ],
FUN = attr,
FUN.VALUE = NA_character_,
which = "tzone"
)
header.posix.cols.tz[ header.posix.cols.tz == "" ] <- "none"
header.posix.cols.tz <- paste0( "tz:", header.posix.cols.tz )
header.posix.cols.offset <- vapply(
X = input[ , posix.cols, with = FALSE ],
FUN = tz_offset,
FUN.VALUE = NA_real_
)
header.posix.cols.offset <- paste0( "tzoffset:", header.posix.cols.offset )
if( posix.as.num ) {
for( col in posix.cols ) {
input[ , ( col ) := as.integer( .SD[[col]] ) ]
}
header[ posix.cols ] <- paste( header[ posix.cols ],
header.posix.cols.tz,
header.posix.cols.offset,
"from:integer",
sep = "},{" )
} else {
header[ posix.cols ] <- paste( header[ posix.cols ],
header.posix.cols.tz,
header.posix.cols.offset,
"from:string",
sep = "},{" )
# fool data.table to avoid automatic timezone shifting
for( col in posix.cols ) {
input[ , ( col ) := lubridate::force_tz( .SD[[col]], tzone = "UTC" ) ]
}
}
}
# convert any columns of `times` class to character or numeric before writing
if( "times" %chin% column.classes ) {
times.cols <- which( column.classes == "times" )
if( times.as.num ) {
for( col in times.cols ) {
input[ , ( col ) := as.numeric( .SD[[col]] ) ]
}
header[ times.cols ] <- paste( header[ times.cols ],
"from:numeric",
sep = "},{" )
} else {
for( col in times.cols ) {
# convert to ITime so that fwrite writes it out properly
input[ , ( col ) := as_char_time( .SD[[col]] ) ]
}
header[ times.cols ] <- paste( header[ times.cols ],
"from:string",
sep = "},{" )
}
}
# convert any columns of `ITime` class to character or integer before writing
if( "ITime" %chin% column.classes ) {
ITime.cols <- which( column.classes == "ITime" )
if( ITimes.as.ints ) {
for( col in ITime.cols ) {
input[ , ( col ) := as.integer( .SD[[col]] ) ]
}
header[ ITime.cols ] <- paste( header[ ITime.cols ],
"from:integer",
sep = "},{" )
} else {
for( col in ITime.cols ) {
# input[ , ( col ) := as.character( .SD[[col]] ) ]
}
header[ ITime.cols ] <- paste( header[ ITime.cols ],
"from:string",
sep = "},{" )
}
}
if( "Date" %chin% column.classes ) {
date.cols <- which( column.classes == "Date" )
if( dates.as.ints ) {
for( col in date.cols ) {
input[ , ( col ) := as.integer( .SD[[col]] ) ]
}
header[ date.cols ] <- paste( header[ date.cols ],
"from:integer",
sep = "},{" )
} else {
for( col in date.cols ) {
# input[ , ( col ) := as.character( .SD[[col]] ) ]
}
header[ date.cols ] <- paste( header[ date.cols ],
"from:string",
sep = "},{" )
}
}
# also add a `notes` row
if( is.null( notes ) || length( notes ) == 0L ) {
notes <- NULL
} else if( sum( grepl( "\n", notes ) ) > 0 ) {
# warning( "`notes` is being coerced to a single line" )
# notes <- gsub( "\n", " ", notes )
notes <- unlist( strsplit( notes, "\n" ) )
}
if( !is.null( notes ) ) {
notes <- paste0( "notes:", notes )
}
# define how many lines are to be used in the header, and how many in the body
head.lines <- ncol( input ) + 1L + length( notes )
body.rows <- nrow( input )
# put that into a header line
if( !is.null( notes ) ) {
head.line.readlines <- paste(
"rcsvHeader",
paste0( "headlines:", head.lines ),
paste0( "noteslines:", length( notes ) ),
paste0( "colreflines:", ncol( input ) ),
paste0( "tablerows:", body.rows ),
sep = "},{"
)
header <- paste0( "#{",
c( head.line.readlines,
notes,
paste0( "colref:", seq_along( input ), "},{", header ) ),
"}" )
} else {
head.line.readlines <- paste(
"rcsvHeader",
paste0( "headlines:", head.lines ),
paste0( "colreflines:", ncol( input ) ),
paste0( "tablerows:", body.rows ),
sep = "},{"
)
header <- paste0( "#{",
c( head.line.readlines,
paste0( "colref:", seq_along( input ), "},{", header ) ),
"}" )
}
# write the header to the output file
cat( header,
sep = "\n",
file = file, append = FALSE
)
# now write the data out, appending below the head line
data.table::fwrite( input,
file = file,
append = TRUE,
sep = ",", sep2 = c( "", "|", "" ),
dateTimeAs = "ISO",
logicalAsInt = logical.as.int,
col.names = TRUE,
showProgress = TRUE
)
if( !is.null( notes ) ) {
cat( "Notes: ", paste( sub( "^notes:", "", notes ), collapse = "\n\t" ), "\n" )
}
return( invisible( TRUE ) )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.