Nothing
#' Write genotype and sample data into a Plink BED/BIM/FAM file set.
#'
#' This function writes a genotype matrix (`X`) and its associated locus (`bim`) and individual (`fam`) data tables into three Plink files in BED, BIM, and FAM formats, respectively.
#' This function is a wrapper around the more basic functions
#' [write_bed()],
#' [write_bim()],
#' [write_fam()],
#' but additionally tests that the data dimensions agree (or stops with an error).
#' Also checks that the genotype row and column names agree with the bim and fam tables if they are all present.
#' In addition, if `bim = NULL` or `fam = NULL`, these are auto-generated using
#' [make_bim()] and
#' [make_fam()],
#' which is useful behavior for simulated data.
#' Lastly, the phenotype can be provided as a separate argument and incorporated automatically if `fam = NULL` (a common scenario for simulated genotypes and traits).
#' Below suppose there are `m` loci and `n` individuals.
#'
#' @param file Output file path, without extensions (each of .bed, .bim, .fam extensions will be added automatically as needed).
#' @param X The `m`-by-`n` genotype matrix.
#' @param bim The tibble or data.frame containing locus information.
#' It must contain `m` rows and these columns: `chr`, `id`, `posg`, `pos`, `ref`, `alt`.
#' If `NULL` (default), it will be quietly auto-generated.
#' @param fam The tibble or data.frame containing individual information.
#' It must contain `n` rows and these columns: `fam`, `id`, `pat`, `mat`, `sex`, `pheno`.
#' If `NULL` (default), it will be quietly auto-generated.
#' @param pheno The phenotype to write into the FAM file assuming `fam = NULL`.
#' This must be a length-`n` vector.
#' This will be ignored (with a warning) if `fam` is provided.
#' @param verbose If `TRUE` (default) function reports the paths of the files being written (after autocompleting the extensions).
#' @param append If `TRUE`, appends loci onto the BED and BIM files (default `FALSE`).
#' In this mode, all individuals must be present in each write (only loci are appended); the FAM file is not overwritten if present, but is required at every write for internal validations.
#' If the FAM file already exists, it is not checked to agree with the FAM table provided.
#' PHEN file is always unchanged and ignored if `append = TRUE`.
#' @param write_phen If `TRUE` and `append = FALSE`, writes a .phen file too from the `fam` data provided or auto-generated (using [write_phen()]).
#' Default `FALSE`.
#'
#' @return Invisibly, a named list with items in this order: `X` (genotype matrix), `bim` (tibble), `fam` (tibble).
#' This is most useful when either BIM or FAM tables were auto-generated.
#'
#' @examples
#' # to write existing data `X`, `bim`, `fam` into files "data.bed", "data.bim", and "data.fam",
#' # run like this:
#' # write_plink("data", X, bim = bim, fam = fam)
#'
#' # The following example is more detailed but also more awkward
#' # because (only for these examples) the package must create the file in a *temporary* location
#'
#' # here is an example for a simulation
#'
#' # create 10 random genotypes
#' X <- rbinom(10, 2, 0.5)
#' # replace 3 random genotypes with missing values
#' X[sample(10, 3)] <- NA
#' # turn into 5x2 matrix
#' X <- matrix(X, nrow = 5, ncol = 2)
#'
#' # simulate a trait for two individuals
#' pheno <- rnorm(2)
#'
#' # write this data to BED/BIM/FAM files
#' # output path without extension
#' file_out <- tempfile('delete-me-example')
#' # here all of the BIM and FAM columns except `pheno` are autogenerated
#' write_plink(file_out, X, pheno = pheno)
#'
#' # delete all three outputs when done
#' delete_files_plink( file_out )
#'
#' @seealso
#' [write_bed()],
#' [write_bim()],
#' [write_fam()],
#' [make_bim()],
#' [make_fam()].
#'
#' Plink BED/BIM/FAM format reference:
#' <https://www.cog-genomics.org/plink/1.9/formats>
#'
#' @export
write_plink <- function(
file,
X,
bim = NULL,
fam = NULL,
pheno = NULL,
verbose = TRUE,
append = FALSE,
write_phen = FALSE
) {
# die if things are missing
if (missing(file))
stop('Output file path is required!')
if (missing(X))
stop('Genotype matrix (X) is required!')
if ( append && write_phen )
warning( 'Will not write PHEN file with `append = TRUE`!' )
# check dimensions or create autocomplete data, as needed
# get X dimensions (tested over and over)
m <- nrow(X)
n <- ncol(X)
# BIM checks
if (is.null(bim)) {
# generate completely autocompleted BIM if it's missing in input
# dimensions consistency is automatically ensured
bim <- make_bim(n = m)
} else {
# require that dimensions match
if (nrow(bim) != m)
stop('Num loci in X (', m, ') and bim (', nrow(bim), ') disagree!')
# check that names match, if present
if ( !is.null( rownames(X) ) ) {
if ( any( rownames(X) != bim$id ))
stop('Row names in genotype matrix do not agree with BIM `id` column!')
}
}
# FAM checks
if (is.null(fam)) {
# generate completely autocompleted FAM if it's missing in input
# dimensions consistency is automatically ensured
# everything is made up here
fam <- make_fam(n = n)
if (!is.null(pheno)) {
# we provided phenotype only, check it, then add it to fam
# require that dimensions match
if (length(pheno) != n)
stop('Num ind in X (', n, ') and pheno (', length(pheno), ') disagree!')
# overwrite default phenotype with desired value
fam$pheno <- pheno
}
} else {
# require that dimensions match
if (nrow(fam) != n)
stop('Num ind in X (', n, ') and fam (', nrow(fam), ') disagree!')
# check that names match, if present
if ( !is.null( colnames(X) ) ) {
if ( any( colnames(X) != fam$id ))
stop('Column names in genotype matrix do not agree with FAM `id` column!')
}
# warn if we also specified pheno, as it will be ignored
if (!is.null(pheno))
warning('`pheno` will be ignored, since `fam` was not NULL!')
}
# now everything has been checked and/or created
# time to write file!
write_bed(file, X, verbose = verbose, append = append)
write_bim(file, bim, verbose = verbose, append = append)
if ( !append || !file.exists( add_ext( file, 'fam') ) )
write_fam(file, fam, verbose = verbose)
if ( !append && write_phen )
write_phen( file, fam, verbose = verbose )
# invisibly return the data passed and/or created
return(
invisible(
list(
X = X,
bim = bim,
fam = fam
)
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.