R/read_merlin.R

#' Read MERLIN \code{fam.dat}
#'
#' Read the \code{fam.dat} file generated by LDG for MERLIN
#'
#' The \code{fam.dat} file contains two columns: symbol and description.
#' This file is required to describe the final M + 1 columns in the \code{fam.ped}
#' file, where M is the number of markers used in the analysis and the last column
#' is the affectedness status (the first five columns are always
#' the same - FamID, IndID, PatID, MatID and Gender). Therefore the \code{fam.dat}
#' file will contain M + 1 rows.
#'
#' @param fname The file name to read.
#'
#' @return A \code{data.frame} with two columns (symbol and description) and M + 1
#'         rows, where M is the number of markers used in the analysis and the last
#'         row describes the phenotype of interest.
#' @export
#'
#' @examples read_merlin_dat("merlin_13_famA.dat")
read_merlin_dat <- function(fname) {
  read.table(fname, col.names = c("Symbol", "Description"), stringsAsFactors = FALSE)
}

#' Read MERLIN \code{fam.map}
#'
#' Read the \code{fam.map} file generated by LDG for MERLIN
#'
#' The \code{fam.map} file contains three columns: chromosome, marker name, and
#' marker position in centimorgans.
#'
#' @param fname The file name to read.
#'
#' @return A \code{data.frame} with three columns (chromosome, name and position)
#' and M rows, where M is the number of markers used in the analysis.
#' @export
#'
#' @examples read_merlin_map("merlin_13_famA.map")
read_merlin_map <- function(fname) {
  read.table(fname, header = TRUE, stringsAsFactors = FALSE)
}

#' Read MERLIN \code{fam.ped}
#'
#' Read the \code{fam.ped} file generated by LDG for MERLIN
#'
#' The \code{fam.ped} file contains N rows and 5 + M + 1 columns, where N is
#' the number of samples, M is the number of markers and the last column
#' is the affectedness status (the first five columns are always
#' the same - FamID, IndID, PatID, MatID and Gender).
#'
#' @param fname The file name to read.
#'
#' @return A \code{matrix} with N rows and 5 + M + 1 columns
#' (refer to the details section).
#'
#' @export
#'
#' @examples read_merlin_ped("merlin_13_famA.ped")
read_merlin_ped <- function(fname) {
  ped <- read.table(fname, header = FALSE, stringsAsFactors = FALSE)
  as.matrix(ped)
}


#' Read HapMap2 Annotation File
#'
#' Read the LINKDATAGEN annotation file for MPS and Illumina SNP chip data
#' (rev.807 Dec2013), and select only the chr, bp and cm columns.
#' Based on the b37 build.
#'
#' @param fname The file name to read.
#'
#' @return data.frame with 4,031,388 rows and 3 columns: chrom (char), bp (int)
#'         and cm (double)
#'
#' @examples
#' annot <- read_annot("annotHapMap2U.txt.gz")
#'
#' @export
read_annot <- function(fname) {
  readr::read_tsv(fname, comment = "#",
                  col_types = readr::cols_only(
                    Chrom = "c",
                    physical_position_build37 = "i",
                    deCODE_genetic_map_position = "d"))
}
bahlolab/linkrvis documentation built on May 11, 2019, 5:25 p.m.