#' @title Convert files to .bed
#'
#' @description
#' Calls PLINK to convert .ped and .map files to the .bed format, reducing
#' file size significantly.
#'
#' @details
#' Both the .map and .ped files generated by the simulation need to be present
#' with the same name in the same directory. The conversion combines the .ped
#' and .map into three new files, being of format .bed, .bim and .fam. The .bed
#' file is the one actually holding genotypes, but all three files are used by
#' PLINK in regressions.\cr
#' Be aware that while .ped files can be viewed in most text editors
#' (as long as their size allows it), .bed files can not.
#'
#' @param ped_file path and name of the file to be converted, excluding file
#' extension.
#' @param bed_file path of the .bed file to be written, excluding file
#' extension.
#' @param del if \code{TRUE}, deletes the \code{ped_file} after conversion.
#' @param plink_path \code{TRUE} if user has added PLINK to the system
#' variable "PATH". Otherwise a string specifying the path to the folder
#' containing plink.exe.
#'
#' @return Does not return anything, but writes the converted \code{ped_file}
#' to disk at the path \code{bed_file}.
#'
#' @export
convert_geno_file <- function(ped_file, bed_file=ped_file, del=TRUE, plink_path=TRUE) {
stopifnot("ped_file needs to be a valid file without extension" =
(file.exists(paste0(ped_file, ".ped"))
&& file.exists(paste0(ped_file, ".map"))),
"bed_file needs to be a valid file without extension" =
(file_ext(bed_file) == ""),
"del needs to be either TRUE or FALSE" = is.logical(del),
"plink_path needs to be a valid path to plink" =
(plink_path == TRUE || file.exists(paste0(plink_path,
"/plink.exe"))))
if (plink_path != TRUE) {
tmp_path <- paste0("SET PATH=", quote_path(plink_path), "; &&")
} else {
tmp_path <- ""
}
shell(paste(tmp_path, "plink --file", quote_path(ped_file),
"--make-bed --out", quote_path(bed_file)))
if (del == TRUE) {
unlink(paste0(ped_file, ".ped"))
unlink(paste0(ped_file, ".map"))
}
}
#' @title Run association analysis
#' @md
#' @description Calls PLINK to run an association analysis on specified data.
#'
#' @details
#' PLINK uses the \code{pheno_name} column to determine which kind of analysis
#' to run on the data.
#' * Use "pheno" for a 1 degree of freedom chi-square test with the case-control
#' representation of the phenotype.
#' * Use "line_pheno" for a linear regression using the case-control
#' representation of the phenotype.
#' * Use "LTFH_pheno" for a linear regression using the LTFH phenotype. Requires
#' that \code{pheno_file} was created/edited by \code{assign_ltfh_phenotype()}.
#' * Use "GWAX_pheno" for a linear regression using the LTFH phenotype. Requires
#' that \code{pheno_file} was created/edited by \code{assign_GWAX_phenotype()}.
#'
#' @param geno_file string specifying path to genotypes file, including file
#' extension.
#' @param pheno_file string specifying path to phenotypes file, including file
#' extension.
#' @param pheno_name column name of phenotype to be used in analysis.
#' @param out_dir directory to save results in, including "/" or "\\\\" at the
#' end.
#' @param bed logical indicating whether or not the genotypes file is a .bed
#' file. \code{FALSE} if the file is .ped.
#' @param plink_path \code{TRUE} if user has added PLINK to the system
#' variable "PATH". Otherwise a string specifying the path to the folder
#' containing plink.exe.
#'
#' @return Does not return anything, but PLINK writes its results to disk in
#' \code{out_dir}. The name of the written file depends on \code{pheno_name}:
#' * pheno: output filename is "GWAS".
#' * line_pheno: output filename is "linear".
#' * LTFH_pheno: output filename is "LTFH".
#' * GWAX_pheno: output filename is "GWAX".
#'
#' The file format of the written file and the columns contained in it depends
#' on which phenotype is used for the analysis.
#'
#' @import tools
#'
#' @export
analysis_association <- function(geno_file, pheno_file, pheno_name, out_dir,
bed=TRUE, plink_path=TRUE) {
stopifnot("geno_file needs to be a valid file" =
(file.exists(geno_file) &&
((file_ext(geno_file) == "ped"
&& file.exists(sub(".ped", ".map", geno_file))) ||
(file_ext(geno_file) == "bed"
&& file.exists(sub(".bed", ".bim", geno_file))
&& file.exists(sub(".bed", ".fam", geno_file))))),
"pheno_file needs to be a valid file" =
(file.exists(pheno_file) &&
file_ext(pheno_file) == "txt"),
"pheno_name does not exist in 'pheno_file'" =
(pheno_name %in% colnames(data.table::fread(pheno_file))),
"out_dir needs to be a valid directory ending with '\\\\' or '/'" =
dir.exists(out_dir) && (substr(out_dir, nchar(out_dir), nchar(out_dir)) == "/" ||
substr(out_dir, nchar(out_dir), nchar(out_dir)) == "\\"),
"bed needs to be either TRUE or FALSE" = is.logical(bed),
"plink_path needs to be a valid path to plink" =
(plink_path == TRUE ||
file.exists(paste0(plink_path, "/plink.exe"))))
if (pheno_name == "pheno") {
out_path <- paste0(out_dir, "GWAS")
} else if (pheno_name == "line_pheno") {
out_path <- paste0(out_dir, "linear")
} else if (pheno_name == "LTFH_pheno") {
out_path <- paste0(out_dir, "LTFH")
} else if (pheno_name == "GWAX_pheno") {
out_path <- paste0(out_dir, "GWAX")
} else if (pheno_name == "child_liab") {
out_path <- paste0(out_dir, "LIAB")
} else if (pheno_name == "child_lg") {
out_path <- paste0(out_dir, "LG")
} else {
stop("'pheno_name' didn't match expected input.")
}
if (plink_path != TRUE) {
tmp_path <- paste0("SET PATH=", quote_path(plink_path), "; &&")
} else{
tmp_path <- ""
}
if (bed) {
file_type <- "--bfile"
} else{
file_type <- "--file"
}
geno_file <- file_path_sans_ext(geno_file)
plink_command <- paste(tmp_path,
"plink", file_type, quote_path(geno_file),
"--pheno", quote_path(pheno_file),
"--pheno-name", pheno_name,
"--out", quote_path(out_path),
"--assoc")
shell(cmd = plink_command)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.