R/read_pheno.R

Defines functions read_phenocovar read_pheno

Documented in read_pheno

#' Read phenotype data
#'
#' Read phenotype data from a CSV file (and, optionally, phenotype
#' covariate data from a separate CSV file). The CSV files may be
#' contained in zip files, separately or together.
#'
#' @param file Character string with path to the phenotype data file
#' (or a zip file containing both the phenotype and phenotype
#' covariate files).
#' @param phenocovarfile Character string with path to the phenotype
#' covariate file. This can be a separate CSV or zip file; if a zip
#' file, it must contain exactly one CSV file. Alternatively, if the
#' `file` argument indicates a zip file that contains two files
#' (phenotypes and phenotype covariates), then this
#' `phenocovarfile` argument must indicate the base name for the
#' phenotype covariate file.
#' @param sep the field separator character
#' @param na.strings a character vector of strings which are to be
#' interpreted as `NA` values.
#' @param comment.char A character vector of length one containing a
#' single character to denote comments within the CSV files.
#' @param transpose If TRUE, the phenotype data will be transposed. The
#' phenotype covariate information is **never** transposed.
#' @param quiet If `FALSE`, print progress messages.
#'
#' @return Either a matrix of phenotype data, or a list containing
#' `pheno` (phenotype matrix) and `phenocovar` (phenotype
#' covariate matrix).
#'
#' @export
#' @keywords IO
#' @seealso [read_cross2()],
#' sample data files at <https://kbroman.org/qtl2/pages/sampledata.html>
#' and <https://github.com/rqtl/qtl2data>
#'
#' @examples
#' \dontrun{
#' file <- paste0("https://raw.githubusercontent.com/rqtl/",
#'                "qtl2data/main/Gough/gough_pheno.csv")
#' phe <- read_pheno(file)
#'
#' phecovfile <- paste0("https://raw.githubusercontent.com/rqtl/",
#'                      "qtl2data/main/Gough/gough_phenocovar.csv")
#' phe_list <- read_pheno(file, phecovfile)
#' }
read_pheno <-
    function(file, phenocovarfile=NULL, sep=",", na.strings=c("-", "NA"),
             comment.char="#", transpose=FALSE, quiet=TRUE)
{
    # handle zip file, which may contain phenocovarfile
    if(grepl("\\.zip$", file)) { # zip file
        dir <- tempdir()
        if(is_web_file(file)) {
            tmpfile <- tempfile()
            if(!quiet) message(" - downloading ", file, "\n       to ", tmpfile)
            utils::download.file(file, tmpfile, quiet=TRUE)
            file <- tmpfile
            on.exit(unlink(tmpfile))
        }

        if(!quiet) message(" - unzipping ", file, "\n       to ", dir)
        file <- path.expand(file)
        stop_if_no_file(file)
        unzipped_files <- utils::unzip(file, exdir=dir)
        basenames <- basename(unzipped_files)

        on.exit({ # clean up when done
            if(!quiet) message(" - cleaning up")
            unlink(unzipped_files)
        }, add=TRUE)

        other_files <- unzipped_files
        if(!is.null(phenocovarfile) && any(basenames == phenocovarfile)) {
            # look for phenocovarfile in the unzipped files
            if(sum(basenames==phenocovarfile) > 1)
                stop("Multiple copies of ", phenocovarfile, " in zip file")
            other_files <- unzipped_files[basenames != phenocovarfile]
            phenocovarfile <- unzipped_files[basenames==phenocovarfile]
        }

        # halt if more than one file left
        if(length(other_files) > 1) {
            stop("Unclear which file to read: ",
                 paste(basename(other_files), collapse=" "))
        }

        # redefine file with new name
        file <- other_files
    }

    # directory containing the data
    file <- path.expand(file)

    # read file
    stop_if_no_file(file)
    if(!quiet) message(" - Reading ", basename(file))
    pheno <- fread_csv_numer(file, sep=sep, na.strings=na.strings,
                             comment.char=comment.char, transpose=transpose)
    pheno <- as.matrix(pheno)

    # read phenocovarfile
    if(!is.null(phenocovarfile)) {
        if(!quiet) message(" - Reading ", basename(phenocovarfile))
        phenocovar <- read_phenocovar(phenocovarfile, sep=sep, na.strings=na.strings,
                                      comment.char=comment.char, quiet=quiet)
        return(list(pheno=pheno, phenocovar=phenocovar))
    }

    pheno
}

# read phenocovar file (to handle case where it's also zipped)
read_phenocovar <-
    function(file, phenocovarfile=NULL, sep=",", na.strings=c("-", "NA"),
             comment.char="#", quiet=TRUE)
{
    # handle zip file, which may contain phenocovarfile
    if(grepl("\\.zip$", file)) { # zip file
        dir <- tempdir()
        if(is_web_file(file)) {
            tmpfile <- tempfile()
            if(!quiet) message(" - downloading ", file, "\n       to ", tmpfile)
            utils::download.file(file, tmpfile, quiet=TRUE)
            file <- tmpfile
            on.exit(unlink(tmpfile))
        }

        if(!quiet) message(" - unzipping ", file, "\n       to ", dir)
        file <- path.expand(file)
        stop_if_no_file(file)
        unzipped_files <- utils::unzip(file, exdir=dir)

        on.exit({ # clean up when done
            if(!quiet) message(" - cleaning up")
            unlink(unzipped_files)
        }, add=TRUE)

        if(length(unzipped_files) > 1) {
            stop("Unclear which file to read: ",
                 paste(basename(unzipped_files), collapse=" "))
        }

        # redefine file with new name
        file <- unzipped_files
    }

    # directory containing the data
    file <- path.expand(file)

    # read file
    stop_if_no_file(file)
    if(!quiet) message(" - Reading ", basename(file))
    fread_csv(file, sep=sep, na.strings=na.strings,
              comment.char=comment.char, transpose=FALSE)
}
rqtl/qtl2 documentation built on March 20, 2024, 6:35 p.m.