R/read_sparse.R
In readsparse: Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

Documented in read.sparse write.sparse

#' @importFrom methods as new .hasSlot
#' @importFrom Rcpp evalCpp
#' @importClassesFrom Matrix RsparseMatrix dgRMatrix ngRMatrix
#' @useDynLib readsparse, .registration=TRUE

check.bool <- function(var, name) {
    if (NROW(var) != 1)
        stop(sprintf("Error: %s must be a single boolean/logical.", name))
    if (typeof(var) != "logical")
        var <- as.logical(var)
    if (is.na(var))
        stop(sprintf("%s cannot be NA.", name))
    return(var)
}

check.int <- function(var, name) {
    if (NROW(var) != 1)
        stop(sprintf("Error: %s must be a single non-negative integer.", name))
    if (typeof(var) != "integer")
        var <- as.integer(var)
    if (is.na(var))
        stop(sprintf("%s cannot be NA.", name))
    if (is.infinite(var))
        stop(sprintf("%s cannot be infinite.", name))
    if (var < 0L)
        stop(sprintf("%s must be non-negative."))
    return(var)
}

cast.df <- function(df) {
    if (inherits(df, c("tibble", "data.table")))
        df <- as.data.frame(df)
    if (inherits(df, "data.frame"))
        df <- as.matrix(df)
    return(df)
}

cast.dense.vec <- function(X) {
    if (inherits(X, c("numeric", "integer", "float32"))) {
        if (typeof(X) != "double")
            X <- as.numeric(X)
        X <- matrix(X, nrow=1L)
    }
    return(X)
}

cast.sparse.vec <- function(X) {
    if (inherits(X, "sparseVector")) {
        X.csr <- new("dgRMatrix")
        X.csr@Dim <- c(1L, X@length)
        X.csr@p <- c(0L, length(X@x))
        X.csr@j <- X@i - 1L
        if (.hasSlot(X, "x")) {
            X.csr@x <- as.numeric(X@x)
        } else {
            X.csr@x <- rep(1., length(X@i))
        }
        X <- X.csr
    }
    return(X)
}

check.for.overflow <- function(lst) {
    if (length(lst) == 1L) {
        if ((lst$err == 1) && ("err" %in% names(lst)))
            stop("Number of rows exceeds INT_MAX.")
        else if (lst$err == 2)
            stop("Number of columns exceeds INT_MAX.")
        else if (lst$err == 3)
            stop("Number of classes exceeds INT_MAX.")
    }
}

process.file.name <- function(fname) {
    fname <- path.expand(fname)
    if (take_as_utf8())
        fname <- enc2utf8(fname)
    # else if (Encoding(fname) != "unknown")
    #     fname <- enc2native(fname)
    return(fname)
}

#' @title Read Sparse Matrix from Text File
#' @description Read a labelled sparse CSR matrix in text format as used by libraries
#' such as SVMLight, LibSVM, ThunderSVM, LibFM, xLearn, XGBoost, LightGBM, and more.
#' 
#' The format is as follows:
#' 
#' \code{<label(s)> <column>:<value> <column>:<value> ...}
#' 
#' with one line per observation/row.
#' 
#' Example line (row):
#' 
#' \code{1 1:1.234 3:20}
#' 
#' This line denotes a row with label (target variable) equal to 1, a value
#' for the first column of 1.234, a value of zero for the second column (which is
#' missing), and a value of 20 for the third column.
#' 
#' The labels might be decimal (for regression), and each row might contain more
#' than one label (must be integers in this case), separated by commas \bold{without}
#' spaces inbetween - e.g.:
#' 
#' \code{1,5,10 1:1.234 3:20}
#' 
#' This line indicates a row with labels 1, 5, and 10 (for multi-class classification).
#' If the line has no labels, it should still include a space before the features.
#' 
#' 
#' The rows might additionally contain a `qid` parameter as used in ranking algorithms,
#' which should always lay inbetween the labels and the features and must be an integer - e.g.:
#' 
#' \code{1 qid:2 1:1.234 3:20}
#' 
#' 
#' The file might optionally contain a header as the first line with metadata
#' (number of rows, number of columns, number of classes). Presence of a header will be
#' automatically detected, and is recommended to include it for speed purposes. Datasets
#' from the extreme classification repository (see references) usually include such a header.
#' 
#' Lines might include comments, which start after a `#` character. Lines consisting
#' of only a `#` will be ignored. When reading from a file, such file might have a
#' BOM (information about encoding uses in Windows sytems), which will be automatically
#' skipped.
#' 
#' @details Note that this function:\itemize{
#' \item Will not make any checks for negative column indices.
#' \item Has a precision of C type `int` for column indices and integer labels
#' (the maximum value that this type can hold can be checked in `.Machine$integer.max`).
#' \item Will fill missing labels with NAs when passing `multilabel=FALSE`.
#' \item Will fill with zeros (empty values) the lines that are empty (that is,
#' they generate a row in the data), but will ignore (that is, will not generate
#' a row in the data) the lines that start with `#`.
#' }
#' 
#' Be aware that the data is represented as a CSR matrix with index pointer of
#' class C `int`, thus the number of rows/columns/non-zero-elements cannot exceed
#' `.Machine$integer.max`.
#' 
#' On 64-bit Windows systems, if compiling the library with a compiler other than MinGW
#' or MSVC, it will not be able to read files larger than 2GB. This should not be a concern
#' if installing it from CRAN or from R itself, as the Windows version at the time
#' of writing can only be compiled with MinGW.
#' 
#' If the file contains a header, and this header denotes a larger number of columns
#' or of labels than the largest index in the data, the resulting object will have
#' this dimension set according to the header. The third entry in the header (number
#' of classes/labels) will be ignored when passing `multilabel=FALSE`.
#' 
#' The function uses different code paths when reading from a file or from a string,
#' and there might be slight differences between the obtained results from them.
#' For example, reading from a file might produce the desired output if the file
#' uses tabs as separators instead of spaces (not supported by most other software
#' and not standard), whereas reading from a string will not.
#' If any such difference is encountered, please submit a bug report in the
#' package's GitHub page.
#' @param file Either a file path from which the data will be read, or a string
#' (`character` variable) containing the text from which the data will be read.
#' In the latter case, must pass `from_string=TRUE`.
#' @param multilabel Whether the input file can have multiple labels per observation.
#' If passing `multilabel=FALSE` and it turns out to have multiple labels, will only
#' take the first one for each row. If the labels are non-integers or have decimal point,
#' the results will be invalid.
#' @param has_qid Whether the input file has `qid` field (used for ranking). If passing
#' `FALSE` and the file does turns out to have `qid`, the features will not be read for any
#' observations.
#' @param integer_labels Whether to output the observation labels as integers.
#' @param index1 Whether the input file uses numeration starting at 1 for the column
#' numbers (and for the label numbers when passing `multilabel=TRUE`). This is usually
#' the case for files downloaded from the repositories in the references. The function
#' will check for whether any of the column indices is zero, and will ignore this
#' option if so (i.e. will assume it is `FALSE`).
#' @param sort_indices Whether to sort the indices of the columns after reading the data.
#' These should already be sorted in the files from the repositories in the references.
#' @param ignore_zeros Whether to avoid adding features which have a value of zero.
#' If the zeros are caused due to numerical rounding in the software that wrote the
#' input file, they can be post-processed by passing `ignore_zeros=FALSE` and then
#' something like `X@x[X@x == 0] = 1e-8`.
#' @param min_cols Minimum number of columns that the output `X` object should have,
#' in case some columns are all missing in the input data.
#' @param min_classes Minimum number of columns that the output `y` object should have,
#' in case some columns are all missing in the input data. Only used when passing
#' `multilabel=TRUE`.
#' @param limit_nrows Maximum number of rows to read from the data. If there are more
#' than this number of rows, it will only read the first 'limit_nrows' rows.
#' If passing zero (the default), there will be no row limit.
#' @param use_altrep Whether to use R's ALTREP system to return C++ vector objects
#' without generating extra data copies. If passing `FALSE`, each piece of data will
#' be copied into a an R-allocated vector and returned as such.
#' 
#' Passing `TRUE` is faster and uses less memory (as there are no redundant data copies),
#' but these ALTREP'd objects can potentially result in some functions/methods running
#' slower on them than on R objects (for example, manually sub-setting the vectors in
#' the S4 Matrix classes that are returned can potentially be slower by some microseconds).
#' @param from_string Whether to read the data from a string variable instead of a file.
#' If passing `from_string=TRUE`, then `file` is assumed to be a variable with the
#' data contents on it.
#' @return A list with the following entries:\itemize{
#' \item `X`: the features, as a CSR Matrix from package `Matrix` (class `dgRMatrix`).
#' \item `y`: the labels. If passing `multilabel=FALSE` (the default), will be a vector
#' (class `numeric` when passing `integer_labels=FALSE`, class `integer` when passing
#' `integer_labels=TRUE`), otherwise will be a binary CSR Matrix (class `ngRMatrix`).
#' \item `qid`: the query IDs used for ranking, as an integer vector.
#' This entry will \bold{only} be present when passing `has_qid=TRUE`.
#' }
#' These can be easily transformed to other sparse matrix types through e.g.
#' `X <- as(X, "CsparseMatrix")`.
#' @seealso \link{write.sparse}
#' @export
#' @examples
#' \dontrun{
#' library(Matrix)
#' library(readsparse)
#' 
#' ### Example input file
#' "1 2:1.21 5:2.05
#' -1 1:0.45 3:0.001 4:-10" -> coded.matrix
#' 
#' r <- read.sparse(coded.matrix, from_string=TRUE)
#' print(r)
#' 
#' ### Convert it back to text
#' recoded.matrix <- write.sparse(file=NULL, X=r$X, y=r$y, to_string=TRUE)
#' cat(recoded.matrix)
#' 
#' ### Example with real file I/O
#' ## generate a random sparse matrix and labels
#' set.seed(1)
#' X <- rsparsematrix(nrow=5, ncol=10, nnz=8)
#' y <- rnorm(5)
#' 
#' ## save into a text file
#' temp_file <- file.path(tempdir(), "matrix.txt")
#' write.sparse(temp_file, X, y, integer_labels=FALSE)
#' 
#' ## inspect the text file
#' cat(paste(readLines(temp_file), collapse="\n"))
#' 
#' ## read it back
#' r <- read.sparse(temp_file)
#' print(r)
#' 
#' ### (Note that columns with all-zeros are discarded,
#' ###  this behavior can be avoided with 'add_header=TRUE')
#' }
#' @references Datasets in this format can be found here:\itemize{
#' \item LibSVM Data: \url{https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/}
#' \item Extreme Classification Repository: \url{http://manikvarma.org/downloads/XC/XMLRepository.html}
#' }
#' 
#' The format is also described at the SVMLight webpage: \url{http://svmlight.joachims.org}.
read.sparse <- function(file, multilabel=FALSE, has_qid=FALSE, integer_labels=FALSE,
                        index1=TRUE, sort_indices=TRUE, ignore_zeros=TRUE,
                        min_cols=0L, min_classes=0L, limit_nrows=0L,
                        use_altrep=TRUE, from_string=FALSE) {
    multilabel      <-  check.bool(multilabel, "multilabel")
    has_qid         <-  check.bool(has_qid, "has_qid")
    integer_labels  <-  check.bool(integer_labels, "integer_labels")
    index1          <-  check.bool(index1, "index1")
    sort_indices    <-  check.bool(sort_indices, "sort_indices")
    ignore_zeros    <-  check.bool(ignore_zeros, "ignore_zeros")
    from_string     <-  check.bool(from_string, "from_string")
    use_altrep      <-  check.bool(use_altrep, "use_altrep")

    min_cols        <-  check.int(min_cols, "min_cols")
    min_classes     <-  check.int(min_classes, "min_classes")
    limit_nrows     <-  check.int(limit_nrows, "limit_nrows")
    
    if (inherits(file, "connection")) {
        file <- paste(readLines(file), collapse="\n")
        from_string <- TRUE
    }
    
    if (!from_string) {

        file <- process.file.name(file)
        
        if (multilabel) {
            read_func <- read_multi_label_R
        } else {
            read_func <- read_single_label_R
        }
        
    } else {
        if (typeof(file) != "character")
            stop("Must pass a character/string variable as input.")
        
        if (multilabel) {
            read_func <- read_multi_label_from_str_R
        } else {
            read_func <- read_single_label_from_str_R
        }
    }
    
    r <- read_func(
        file,
        ignore_zeros,
        sort_indices,
        index1,
        !has_qid,
        limit_nrows,
        use_altrep
    )
    
    if (!length(r))
        stop("Error: could not read file successfully.")
    check.for.overflow(r)
    
    r$ncols     <-  as.integer(max(c(r$ncols, min_cols)))
    r$nclasses  <-  as.integer(max(c(r$nclasses, min_classes)))
    
    features      <-  new("dgRMatrix")
    features@Dim  <-  c(r$nrows, r$ncols)
    features@p    <-  r$indptr
    features@j    <-  r$indices
    features@x    <-  r$values
    
    if (multilabel) {
        labels      <-  new("ngRMatrix")
        labels@Dim  <-  c(r$nrows, r$nclasses)
        labels@p    <-  r$indptr_lab
        labels@j    <-  r$indices_lab
    } else {
        labels <- r$labels
        if (integer_labels)
            labels <- as.integer(labels)
    }

    if (!has_qid && length(r$indptr) > 1L &&
        r$indptr[1L] == r$indptr[length(r$indptr)]) {
        warning("Data has empty features. Perhaps the file has 'qid' field?")
    }
    
    if (!has_qid)
        return(list(X = features, y = labels))
    else
        return(list(X = features, y = labels, qid = r$qid))
}

#' @title Write Sparse Matrix in Text Format
#' @description Write a labelled sparse matrix into text format as used by software
#' such as SVMLight, LibSVM, ThunderSVM, LibFM, xLearn, XGBoost, LightGBM, and others - i.e.:
#' 
#' \code{<labels(s)> <column:value> <column:value> ...}
#' 
#' For more information about the format and usage examples, see \link{read.sparse}.
#' 
#' Can write labels for regression, classification (binary, multi-class, and multi-label),
#' and ranking (with `qid`), but note that most software that makes use of this data
#' format supports only regression and binary classification.
#' @details Be aware that writing sparse matrices to text is not a lossless operation
#' - that is, some information might be lost due to numeric precision, and metadata such
#' as row and column names will not be saved. It is recommended to use `saveRDS` or similar
#' for saving data between R sessions, or to use binary formats for passing between
#' different software such as R->Python.
#' 
#' The option `ignore_zeros` is implemented heuristically, by comparing
#' `abs(x) >= 10^(-decimal_places)/2`, which might not match exactly with
#' the rounding that is done implicitly in string conversions in the libc/libc++
#' functions - thus there might still be some corner cases of all-zeros written into
#' features if the (absolute) values are very close to the rounding threshold.
#' 
#' While R uses C `double` type for numeric values, most of the software that is able to
#' take input data in this format uses `float` type, which has less precision.
#' 
#' The function uses different code paths when writing to a file or to a string,
#' and there might be slight differences between the generated texts from them.
#' If any such difference is encountered, please submit a bug report in the
#' package's GitHub page.
#' @param file Output file path into which to write the data.
#' Will be ignored when passing `to_string=TRUE`.
#' @param X Sparse data to write. Can be a sparse matrix from package `Matrix`
#' (classes: `dgRMatrix`, `dgTMatrix`, `dgCMatrix`, `ngRMatrix`, `ngTMatrix`, `ngCMatrix`)
#' or from package `SparseM` (classes: `matrix.csr`, `matrix.coo`, `matrix.csc`),
#' or a dense matrix of all numeric values, passed either as a `matrix` or as a `data.frame`.
#' 
#' If `X` is a vector (classes `numeric`, `integer`, `dsparseVector`), will be assumed to
#' be a row vector and will thus write one row only.
#' 
#' Note that the data will be casted to `dgRMatrix` in any case.
#' @param y Labels for the data. Can be passed as a vector (`integer` or `numeric`)
#' if each observation has one label, or as a sparse or dense matrix (same format as `X`)
#' if each observation can have more than 1 label. In the latter case, only the non-missing
#' column indices will be written, while the values are ignored.
#' @param qid Secondary label information used for ranking algorithms. Must be an integer vector
#' if passed. Note that not all software supports this.
#' @param integer_labels Whether to write the labels as integers. If passing `FALSE`, they will
#' have a decimal point regardless of whether they are integers or not. If the file is meant
#' to be used for a classification algorithm, one should pass `TRUE` here (the default).
#' For multilabel classification, the labels will always be written as integers.
#' @param index1 Whether the column and label indices (if multi-label) should have numeration
#' starting at 1. Most software assumes this is `TRUE`.
#' @param sort_indices Whether to sort the indices of `X` (and of `y` if multi-label) before
#' writing the data. Note that this will cause in-place modifications if either `X` or `y`
#' are passed as CSR matrices from the `Matrix` package.
#' @param ignore_zeros Whether to ignore (not write) features with a value of zero
#' after rounding to the specified decimal places.
#' @param add_header Whether to add a header with metadata as the first line (number of rows,
#' number of columns, number of classes). If passing `integer_label=FALSE` and `y` is a
#' vector, will write zero as the number of labels. This is not supported by most software.
#' @param decimal_places Number of decimal places to use for numeric values. All values
#' will have exactly this number of places after the decimal point. Be aware that values
#' are rounded and might turn to zeros (will be skipped by default) if they are too small
#' (one can do something like
#' `X@x <- ifelse(X@x >= 0, pmin(X@x, 1e-8), pmax(X@x, -1e-8))` to avoid this).
#' @param append Whether to append text at the end of the file instead of overwriting or
#' creating a new file. Ignored when passing `to_string=TRUE`.
#' @param to_string Whether to write the result into a string (which will be returned
#' from the function) instead of into a file.
#' @return If passing `to_string=FALSE` (the default), will not return anything
#' (`invisible(NULL)`). If passing `to_string=TRUE`, will return a `character`
#' variable with the data contents written into it.
#' @seealso \link{read.sparse}
#' @export
write.sparse <- function(file, X, y, qid=NULL, integer_labels=TRUE,
                         index1=TRUE, sort_indices=TRUE, ignore_zeros=TRUE,
                         add_header=FALSE, decimal_places=8L,
                         append=FALSE, to_string=FALSE) {
    if (is.null(X) || is.null(y))
        stop("Must pass 'X' and 'y'.")
    if (NROW(y) != NROW(X))
        stop("'X' and 'y' must have the same number of rows.")
    if (!to_string) {
        if ((typeof(file) != "character") || NROW(file) == 0L)
            stop("'file' must be a character variable containing a file path.")
    }
    if (NROW(decimal_places) != 1L)
        stop("'decimal_places' must be a single integer.")
    decimal_places <- as.integer(decimal_places)
    if (is.na(decimal_places) || is.infinite(decimal_places) || (decimal_places < 0L))
        stop("Invalid 'decimal_places'.")
    if (decimal_places > 20L) {
        warning("'decimal_places' is greater than 20.")
    }
    
    
    integer_labels  <-  check.bool(integer_labels, "integer_labels")
    index1          <-  check.bool(index1, "index1")
    sort_indices    <-  check.bool(sort_indices, "sort_indices")
    ignore_zeros    <-  check.bool(ignore_zeros, "ignore_zeros")
    add_header      <-  check.bool(add_header, "add_header")
    
    X <- cast.df(X)
    y <- cast.df(y)
    
    X <- cast.dense.vec(X)
    X <- cast.sparse.vec(X)
    if (nrow(X) == 1L) {
        y <- cast.sparse.vec(y)
    } else if (inherits(y, "dsparseVector")) {
        y <- as.numeric(y)
    }


    has_MatrixExtra <- requireNamespace("MatrixExtra", quietly=TRUE)

    ### Note: this check should be made right here due to potential
    ### in-place modifications of the data which would render the input unusable
    ### TODO: this here could benefit from MatrixExtra
    if (sort_indices) {
        
        if (has_MatrixExtra) {
            if (inherits(X, "RsparseMatrix")) {
                X <- MatrixExtra::as.csr.matrix(X)
                X <- MatrixExtra::sort_sparse_indices(X, copy=TRUE)
                sort_indices <- FALSE
            }
        } else {
            if (inherits(y, "RsparseMatrix") && !inherits(y, "ngRMatrix")) {
                y@j <- deepcopy_int(y@j)
            }

            if (inherits(X, "RsparseMatrix") && !inherits(X, "dgRMatrix")) {
                X@j <- deepcopy_int(X@j)
                if (inherits(X, "lsparseMatrix")) {
                    X@x <- deepcopy_log(X@x)
                } else if (inherits(X, "dsparseMatrix")) {
                    X@x <- deepcopy_num(X@x)
                }
            }
        }
    }

    if (has_MatrixExtra) {
        X <- MatrixExtra::as.csr.matrix(X)
    } else {
        if (!inherits(X, "RsparseMatrix"))
            X <- as (X, "RsparseMatrix")
        if (!inherits(X, "generalMatrix"))
            X <- as(X, "generalMatrix")
        if (!inherits(X, "dgRMatrix")) {
            ### Note: casting from ngRMatrix to dgRMatrix is broken in Matrix==1.3.0
            X.csr        <- new("dgRMatrix")
            X.csr@Dim    <-  X@Dim
            X.csr@p      <-  X@p
            X.csr@j      <-  X@j
            if (.hasSlot(X, "x"))
                X.csr@x  <-  as.numeric(X@x)
            else
                X.csr@x  <-  rep(1., length(X@j))
            X <- X.csr
        }
    }
    
    allowed_y_multi <- c("matrix", "matrix.coo", "matrix.csr", "matrix.csc",
                         "dgRMatrix", "dgCMatrix", "dgTMatrix",
                         "ngRMatrix", "ngCMatrix", "ngTMatrix",
                         "lgRMatrix", "lgCMatrix", "lgTMatrix")
    allowed_y_single <- c("integer", "numeric", "factor", "float32")
    allowed_y <- c(allowed_y_multi, allowed_y_single)
    if (!inherits(y, allowed_y))
        stop(sprintf("Invalid 'y' - allowed types: %s", paste0(allowed_y, collapse=",")))
    if (inherits(y, allowed_y_multi)) {
        y_is_multi <- TRUE
        if (!inherits(y, "RsparseMatrix")) {
            if (has_MatrixExtra) {
                y <- MatrixExtra::as.csr.matrix(y, binary=TRUE)
            } else {
                y <- as(y, "RsparseMatrix")
            }
            if (!inherits(y, "dgRMatrix"))
                y <- as(y, "generalMatrix")
        }
    } else {
        if (inherits(y, "float32") && ncol(y) != 1L)
            stop("'float32' objects not supported for multi-label 'y'.")
        y_is_multi <- FALSE
        if (integer_labels) {
            if (typeof(y) != "integer")
                y <- as.integer(y)
        } else {
            if (typeof(y) != "double")
                y <- as.numeric(y)
        }
    }
    
    if (!NROW(qid)) {
        qid <- integer()
    } else {
        if (NROW(qid) != NROW(X))
            stop("'X' and 'qid' must have the same number of rows.")
        if (typeof(qid) != "integer")
            qid <- as.integer(qid)
    }
    
    if (integer_labels) {
        if (add_header) {
            if (y_is_multi) {
                n_classes <- ncol(y)
            } else if (any(is.infinite(y)) || all(is.na(y))) {
                n_classes <- 0L
            } else if (any(y < 0)) {
                n_classes <- length(unique(y))
            } else {
                n_classes <- max(y, na.rm=TRUE)
                if (0L %in% y)
                    n_classes <- n_classes + 1L
                n_classes <- max(n_classes, 1L)
                if (is.na(n_classes) || is.infinite(n_classes))
                    nclasses <- 0L
                if (typeof(n_classes) != "integer")
                    n_classes <- as.integer(n_classes)
            }
        } else
            n_classes <- 0L
    } else {
        n_classes <- 0L
    }
    
    if (!to_string) {

        file <- process.file.name(file)
        
        if (add_header && append && file.exists(file))
            warning("Warning: adding header to existing file with 'append=TRUE'.")
        
        if (y_is_multi) {
            success <- write_multi_label_R(
                file,
                X@p,
                X@j,
                X@x,
                y@p,
                y@j,
                qid,
                ncol(X),
                ncol(y),
                ignore_zeros,
                sort_indices,
                index1,
                add_header,
                decimal_places,
                append
            )
        } else {
            if (integer_labels) {
                success <- write_single_label_integer_R(
                    file,
                    X@p,
                    X@j,
                    X@x,
                    y,
                    qid,
                    ncol(X),
                    n_classes,
                    ignore_zeros,
                    sort_indices,
                    index1,
                    add_header,
                    decimal_places,
                    append
                )
            } else {
                n_classes <- 0L
                success <- write_single_label_numeric_R(
                    file,
                    X@p,
                    X@j,
                    X@x,
                    y,
                    qid,
                    ncol(X),
                    n_classes,
                    ignore_zeros,
                    sort_indices,
                    index1,
                    add_header,
                    decimal_places,
                    append
                )
            }
        }
        if (!success)
            stop("Error: file write failed.")
        return(invisible(NULL))
    } else {
        if (y_is_multi) {
            res <- write_multi_label_to_str_R(
                X@p,
                X@j,
                X@x,
                y@p,
                y@j,
                qid,
                ncol(X),
                ncol(y),
                ignore_zeros,
                sort_indices,
                index1,
                add_header,
                decimal_places
            )
        } else {
            if (integer_labels) {
                res <- write_single_label_integer_to_str_R(
                    X@p,
                    X@j,
                    X@x,
                    y,
                    qid,
                    ncol(X),
                    n_classes,
                    ignore_zeros,
                    sort_indices,
                    index1,
                    add_header,
                    decimal_places
                )
            } else {
                n_classes <- 0L
                res <- write_single_label_numeric_to_str_R(
                    X@p,
                    X@j,
                    X@x,
                    y,
                    qid,
                    ncol(X),
                    n_classes,
                    ignore_zeros,
                    sort_indices,
                    index1,
                    add_header,
                    decimal_places
                )
            }
        }
        
        if (!length(res))
            stop("Error: string write failed.")
        return(res$str)
    }
}
Any scripts or data that you put into this service are public.
readsparse documentation built on April 12, 2025, 1:30 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
readsparse
Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

R/read_sparse.R
In readsparse: Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

Defines functions write.sparse read.sparse process.file.name check.for.overflow cast.sparse.vec cast.dense.vec cast.df check.int check.bool

Documented in read.sparse write.sparse

Try the readsparse package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

readsparse Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

R/read_sparse.R In readsparse: Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

Defines functions write.sparse read.sparse process.file.name check.for.overflow cast.sparse.vec cast.dense.vec cast.df check.int check.bool

Documented in read.sparse write.sparse

Try the readsparse package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

readsparse
Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats

R/read_sparse.R
In readsparse: Read and Write Sparse Matrices in 'SVMLight' and 'LibSVM' Formats