R/ufovectors.R

Defines functions test sqlite_table_columns ufo_sql_table ufo_sql_column ufo_psql ufo_bind ufo_write_protect ufo_character_bz2 ufo_raw_bz2 ufo_logical_bz2 ufo_complex_bz2 ufo_numeric_bz2 ufo_integer_bz2 ufo_store_bin ufo_character ufo_raw ufo_logical ufo_complex ufo_numeric ufo_integer ufo_vector ufo_csv ufo_character_mmap ufo_matrix_bin ufo_vector_bin ufo_matrix_raw_bin ufo_matrix_logical_bin ufo_matrix_complex_bin ufo_matrix_numeric_bin ufo_matrix_integer_bin ufo_raw_bin ufo_logical_bin ufo_complex_bin ufo_numeric_bin ufo_integer_bin ufo_numeric_seq ufo_integer_seq ufo_operators_is_loaded maybe_add_class .expect_type .expect_exactly_one .check_path ufo_set_debug_mode

ufo_set_debug_mode <- function(debug=TRUE) {
  invisible(.Call(UFO_C_vectors_set_debug_mode, .expect_exactly_one(.expect_type(debug, "logical"))))
}

.check_path <- function(path) {
    name <- substitute(path)
    if (typeof(path) != "character") {
        stop(paste0("Path specified by `", name, "` is of type", typeof(path),
        ", must be a character vector."))
    }
    if (!file.exists(path)) {
        stop(paste0("File '", path, "' (specified by `", name, "`) does not exist."))
    }
    if (!file_test("-f", path)) {
        stop(paste0("File '", path, "' (specified by `", name, "`) exists but is not a file."))
    }
    if (0 != file.access(path, 4)) { # 0: existence, 1: execute, 2: write, 4: read
        stop(paste0("File '", path, "' (specified by `", name, "`) exists but is not readable."))
    }
    if (0 != file.access(path, 2)) { # 0: existence, 1: execute, 2: write, 4: read
        warning(paste0("File '", path, "' (specified by `", name, "`) exists but is not writeable."))
    }
    path
}

.expect_exactly_one <- function(vector, name=substitute(vector)) {
  if (length(vector) > 1) {
    warning(paste0("`", name, "` ",
                   "is a vector containing multiple values, ",
                   "picking the first one, ignoring the rest"))
  }
  if (length(vector) == 0) {
    stop(paste0("`", name, "` ",
                "is a zero-length vector, ",
                "but it should be a vector containing a single value"))
  }
  vector
}

.expect_type <- function(vector, expected_type, name=substitute(vector)) {
    if (typeof(vector) != expected_type) {
        stop(paste0("`", name, "` ",
                    "is a vector of type `", typeof(vector), "` ",
                    "but a vector of type `", expected_type, "` was found"))
    }
    vector
}

maybe_add_class <- function(vector, add_class) {
  if ((missing(add_class) && (ufo_operators_is_loaded()))
       || (!missing(add_class) && isTRUE(add_class))) {
      class(vector) <- c("ufo", class(vector))
  }
  return(vector)
}

ufo_operators_is_loaded <- function() {
  any(names(sessionInfo()$otherPkgs) == "ufooperators")
}

ufo_integer_seq <- function(from, to, by = 1, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_intsxp_seq,
                    as.integer(from), as.integer(to), as.integer(by),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_numeric_seq <- function(from, to, by = 1, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_realsxp_seq,
                    as.integer(from), as.integer(to), as.integer(by),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_integer_bin <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_vectors_intsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_numeric_bin <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_vectors_realsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_complex_bin <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_vectors_cplxsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_logical_bin <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_vectors_lglsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_raw_bin <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_vectors_rawsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_matrix_integer_bin <- function(path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_matrix_intsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.integer(.expect_exactly_one(rows)),
                    as.integer(.expect_exactly_one(cols)),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_matrix_numeric_bin <- function(path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_matrix_realsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.integer(.expect_exactly_one(rows)),
                    as.integer(.expect_exactly_one(cols)),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_matrix_complex_bin <- function(path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_matrix_cplxsxp_bin,
                  path.expand(.check_path(.expect_exactly_one(path))),
                  as.integer(.expect_exactly_one(rows)),
                  as.integer(.expect_exactly_one(cols)),
                  as.logical(.expect_exactly_one(read_only)),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_matrix_logical_bin <- function(path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_matrix_lglsxp_bin,
                  path.expand(.check_path(.expect_exactly_one(path))),
                  as.integer(.expect_exactly_one(rows)),
                  as.integer(.expect_exactly_one(cols)),

                  as.logical(.expect_exactly_one(read_only)),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_matrix_raw_bin <- function(path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_matrix_rawsxp_bin,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.integer(.expect_exactly_one(rows)),
                    as.integer(.expect_exactly_one(cols)),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_vector_bin <- function(type, path, read_only = FALSE, min_load_count = 0, add_class) {
  if (missing(type)) stop("Missing vector type.")

  if (type == "integer") return(ufo_integer_bin(path, read_only, min_load_count, add_class))
  if (type == "numeric" || type == "double") return(ufo_numeric_bin(path, read_only, min_load_count, add_class))
  if (type == "complex") return(ufo_complex_bin(path, read_only, min_load_count, add_class))
  if (type == "logical") return(ufo_logical_bin(path, read_only, min_load_count, add_class))
  if (type == "raw")     return(ufo_raw_bin    (path, read_only, min_load_count, add_class))

  stop(paste0("Unknown UFO vector type: ", type))
}

ufo_matrix_bin <- function(type, path, rows, cols, read_only = FALSE, min_load_count = 0, add_class) {
  if (missing(type)) stop("Missing matrix type.")

  if (type == "integer") return(ufo_matrix_integer_bin(path), rows, cols, read_only, min_load_count, add_class)
  if (type == "numeric" || type == "double") return(ufo_matrix_numeric_bin(path), rows, cols, read_only, min_load_count, add_class)
  if (type == "complex") return(ufo_matrix_complex_bin(path), rows, cols, read_only, min_load_count, add_class)
  if (type == "logical") return(ufo_matrix_logical_bin(path), rows, cols, read_only, min_load_count, add_class)
  if (type == "raw")     return(ufo_matrix_raw_bin(path),     rows, cols, read_only, min_load_count, add_class)

  stop(paste0("Unknown UFO matrix type: ", type))
}

ufo_character_mmap <- function(paths, offset, extent, writeback = FALSE, fill = " ", min_load_count = 0, add_class) {
    open_indexed_files <- function(paths, offset, extent) {
        if (all(extent == 0)) 
            return(character(0))

        if (length(offset) != length(extent)) 
            stop("length of 'offset' [", length(offset), "] ",
                "must equal length of 'extent' [",
                length(extent), "]")

        paths <- normalizePath(paths, mustWork = FALSE)
        if (!all(file.exists(paths))) {
            # data <- rep(list(" "), length(extent))e
            # filemode <- force(filemode)
            result <- file.create(paths)
            if (!all(result))
                stop("error creating file(s)")
        }
    }
    open_indexed_files(paths, offset, extent)

    if (length(paths) != length(extent) && length(paths) != 1)
        stop("Paths can be of the same length as extent (",
             extent, ") or of length 1, but it is: ", length(paths))

    maybe_add_class(.Call("strsxp_mmap",
              as.character(paths),
              offset,
              extent,              
              as.character(.expect_exactly_one(fill)),
              !as.logical(.expect_exactly_one(writeback)),
              as.integer(.expect_exactly_one(min_load_count))),
      add_class)
}

ufo_csv <- function(path, read_only = FALSE, min_load_count = 0, check_names=T, header=T, 
                    record_row_offsets_at_interval=1000, initial_buffer_size=32, col_names, 
                    add_class=T) {

  .expect_exactly_one(min_load_count)
  .expect_exactly_one(header)
  .expect_exactly_one(check_names)
  .expect_exactly_one(header)
  .expect_exactly_one(record_row_offsets_at_interval)
  .expect_exactly_one(initial_buffer_size)

  df <- .Call(UFO_C_csv,
              path.expand(.check_path(.expect_exactly_one(path))),                                      # SEXP/*STRSXP*/
              as.logical(.expect_exactly_one(read_only)),                                               # SEXP/*LGLP*/
              as.integer(.expect_exactly_one(min_load_count)),                                          # SEXP/*INTSXP*/
              as.logical(.expect_exactly_one(header)),                                                  # SEXP/*LGLSXP*/
              as.integer(.expect_exactly_one(record_row_offsets_at_interval)),                          # SEXP/*INTSXP*/
              as.integer(.expect_exactly_one(initial_buffer_size)),                                     # SEXP/*INTSXP*/
              as.logical(.expect_exactly_one(add_class)))                                               # SEXP/*LGLSXP*/

  if (!missing(col_names)) {
    names(df) <- col_names
  }
  else if (!header && missing(col_names)) {
    names(df) <- sapply(seq_len(df), function(i) paste0("V", i))
  }
  if(check_names) {
    names(df) <- make.names(names(df), unique=T)
  }

  # handled internally, because it was misbehaving
  # if (add_class) for(col_name in names(df)) {
  #   attr(df[[col_name]], "class") <- "ufo"
  # }

  df
}
# todo row.names

ufo_vector <- function(mode = "logical", length = 0, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  allowed_vector_types <- c("integer", "double", "logical", "complex", "raw", "character")
  if(!mode %in% allowed_vector_types) {
    stop("Vector mode ", mode, " is not supported by UFOs.")
  }

  constructor <- if (mode == "integer") UFO_C_intsxp_empty
    else if (mode == "double" || mode == "numeric") UFO_C_realsxp_empty
    else if (mode == "logical") UFO_C_lglsxp_empty
    else if (mode == "complex") UFO_C_cplxsxp_empty
    else if (mode == "raw") UFO_C_rawsxp_empty
    else if (mode == "character" || mode == "string") UFO_C_strsxp_empty
    else stop("Vector mode ", mode, " is not supported by UFOs.")

  maybe_add_class(.Call(constructor, 
                   as.numeric(length),
                   as.logical(populate_with_NAs),
                   as.integer(.expect_exactly_one(min_load_count))), 
            add_class)
}

ufo_integer <- function(size, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_intsxp_empty,
                  as.numeric(size),
                  as.logical(populate_with_NAs),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_numeric <- function(size, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_realsxp_empty,
                  as.numeric(size),
                  as.logical(populate_with_NAs),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_complex <- function(size, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_cplxsxp_empty,
                  as.numeric(size),
                  as.logical(populate_with_NAs),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_logical <- function(size, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_lglsxp_empty,
                  as.numeric(size),
                  as.logical(populate_with_NAs),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_raw <- function(size, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_rawsxp_empty,
                  as.numeric(size),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_character <- function(size, populate_with_NAs = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_strsxp_empty,
                  as.numeric(size),
                  as.logical(populate_with_NAs),
                  as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_store_bin <- function(path, vector) {
   invisible(.Call(UFO_C_store_bin, .check_path(.expect_exactly_one(path)), vector))
}

ufo_integer_bz2   <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_intsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}
ufo_numeric_bz2   <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_realsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}
ufo_complex_bz2   <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_cplxsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}
ufo_logical_bz2   <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_lglsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}
ufo_raw_bz2       <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_rawsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}
ufo_character_bz2 <- function(path, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_strsxp_bzip2,
                    path.expand(.check_path(.expect_exactly_one(path))),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_write_protect <- function(vector, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_write_protect,
                    vector,
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_bind <- function(..., read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_bind,
                    list(...),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_psql <- function(db, table, column, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_psql,
                    as.character(.expect_exactly_one(db)),
                    as.character(.expect_exactly_one(table)),
                    as.character(.expect_exactly_one(column)),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

ufo_sql_column <- function(db, table, column, writeback = FALSE, read_only = FALSE, min_load_count = 0, add_class) {
  maybe_add_class(.Call(UFO_C_sqlite_column,
                    as.character(.expect_exactly_one(db)),
                    as.character(.expect_exactly_one(table)),
                    as.character(.expect_exactly_one(column)),
                    as.logical(.expect_exactly_one(writeback)),
                    as.logical(.expect_exactly_one(read_only)),
                    as.integer(.expect_exactly_one(min_load_count))),
             add_class)
}

#' Creates a UFO object representing a table from an SQL database. 
#' @param db database connection information
#' @param table the name of the table in the database
#' @param driver a string describing the database driver, one of: SQLite
#' @param read_only sets the vector to be write-protected by the OS
#'                  (optional, false by default).
#' @param chunk_length the minimum number of elements loaded at once,
#'                     will always be rounded up to a full memory page
#'                     (optional, a page by default).
#' @param ... other ufo or db configuration options
#' @return a list containig ufo vectors lazily populated with the values
#'         of individual columns in the specified table 
#' @export
ufo_sql_table <- function(db, table, ...,  writeback = FALSE, driver = "SQLite") {

    columns <- NULL

    if (driver == "SQLite" || driver == "SQLITE" || driver == "sqlite") {
        columns <- sqlite_table_columns(db, table, ...)
    } else {
        stop(paste0("Unsupported database driver: ", driver, ". Use one of: SQLite"))
    }

    result <- lapply(columns, function(column) {
      ufo_sql_column(db = db, table = table, column, writeback = writeback, ...)
    })
    names(result) <- columns
    result
}

sqlite_table_columns <- function(db, table, ...)  {
    connection <- do.call(DBI::dbConnect, c(drv = RSQLite::SQLite(), db, ...))
    result <- DBI::dbSendQuery(connection, paste0("PRAGMA table_info(", DBI::dbQuoteIdentifier(connection, table), ")"))
    columns <- DBI::dbFetch(result)
    DBI::dbClearResult(result)
    print(columns)
    columns$name
}

test <- function() {
  .Call(UFO_C_test)
}


# ufo_integer_seq   <- function(from, to, by, read_only, min_load_count, ...)
# ufo_numeric_seq   <- function(from, to, by, read_only, min_load_count, ...)

# ufo_integer_bin   <- function(path, read_only, min_load_count, ...)
# ufo_numeric_bin   <- function(path, read_only, min_load_count, ...)
# ufo_complex_bin   <- function(path, read_only, min_load_count, ...)
# ufo_logical_bin   <- function(path, read_only, min_load_count, ...)
# ufo_raw_bin       <- function(path, read_only, min_load_count, ...)    

# ufo_integer       <- function(size, populate_with_NAs, min_load_count, ...)
# ufo_numeric       <- function(size, populate_with_NAs, min_load_count, ...)
# ufo_complex       <- function(size, populate_with_NAs, min_load_count, ...)
# ufo_logical       <- function(size, populate_with_NAs, min_load_count, ...)
# ufo_raw           <- function(size,                    min_load_count, ...)
# ufo_character     <- function(size, populate_with_NAs, min_load_count, ...)

# ufo_bind          <- function(..., read_only, min_load_count)
# ufo_write_protect <- function(vector, read_only, min_load_count, ...)
# ufo_psql          <- function(db, table, column, read_only, min_load_count, ...)
# ufo_csv           <- function(path, read_only, min_load_count, check_names, header, ...)
ufo-org/ufo-r-vectors documentation built on Oct. 2, 2022, 11:09 p.m.