R/data.R

#' Synthetic dataset containing single nucleotide polymorphisms (SNP)
#'
#' The raw.data is the simulated dataset which consists of 3,000 independent
#' SNPs
#' and 1,004 individuals belonging to one of 5 populations (200 individuals
#' each) and 4 outlying individuals. The matrix \code{raw.data} contains the
#' number 0, 1,
#' and 2 representing SNP in additive coding. The pairwise genetic distance
#' between
#' populations are listed below (see Balding, 1995):
#' \tabular{cccccc}{
#'      \tab pop1   \tab pop2   \tab  pop3  \tab  pop4  \tab  pop5  \cr
#' pop1 \tab        \tab 0.0040 \tab 0.0059 \tab 0.0085 \tab 0.0101 \cr
#' pop2 \tab 0.0040 \tab        \tab 0.0055 \tab 0.0082 \tab 0.0099 \cr
#' pop3 \tab 0.0059 \tab 0.0055 \tab        \tab 0.0104 \tab 0.0119 \cr
#' pop4 \tab 0.0085 \tab 0.0082 \tab 0.0104 \tab        \tab 0.0139 \cr
#' pop5 \tab 0.0101 \tab 0.0099 \tab 0.0119 \tab 0.0139 \tab
#' }
#'
#' @name raw.data
#' @docType data
#' @format A matrix with 3,000 columns and 1,004 rows
#' @seealso \code{\link{label}} and \code{\link{PC}}
#' @usage data(ipcaps_example)
#' @keywords raw.data
#' @md
#' @references
#' Balding, D.J., and Nichols, R.A. (1995). A method for quantifying
#' differentiation between populations at multi-allelic loci and its
#' implications for investigating identity and paternity. Genetica 96, 3-12.
"raw.data"


#' Marker information of synthetic dataset containing single nucleotide
#' polymorphisms
#' dataset \code{raw.data}
#'
#' A dataset contains a data frame of 3,000 rows and 6 columns of marker
#' information which is ralated to the dataset \code{raw.data}. Each row
#' represents a marker. The columns are chr, ID, GD, position, allele1, and
#' allele2. The columns are the same as in the BIM file, see more details at:
#' \url{http://zzz.bwh.harvard.edu/plink/data.shtml}.
#'
#' @name snp.info
#' @docType data
#' @format A data frame with 6 columns and 3,000 rows
#' @seealso \code{\link{raw.data}} and \code{\link{label}}
#' @usage data(ipcaps_example)
#' @keywords snp.info
"snp.info"


#' Sample information of synthetic dataset containing the top 10 principal
#' components (PC) from the dataset \code{raw.data}
#'
#' A dataset contains a data frame of 1,004 rows and 6 columns of sample
#' information which is ralated to the dataset \code{raw.data}. Each row
#' represents a sample. The columns are FamID, IndID, PatID, MatID, sex, and
#' phenotype. The columns are the same as in the FAM file, see more details at
#' \url{http://zzz.bwh.harvard.edu/plink/data.shtml}.
#'
#' @name ind.info
#' @docType data
#' @format A data frame with 6 columns and 1,004 rows
#' @seealso \code{\link{raw.data}}
#' @usage data(ipcaps_example)
#' @keywords ind.info
"ind.info"



#' Synthetic dataset containing population labels for the dataset
#' \code{raw.data}
#'
#' A dataset contains a character vector of 1,004 elements containing labels or
#' populations of 1,004 individuals which they belong. Five populations and
#' outliers were labeled as "pop1", "pop2", "pop3", "pop4", "pop5", and
#' "outlier".
#'
#' @name label
#' @docType data
#' @format A vector with 1,004 elements.
#' @seealso \code{\link{raw.data}} and \code{\link{PC}}
#' @usage data(ipcaps_example)
#' @keywords label
"label"


#' Synthetic dataset containing the top 10 principal components (PC) from the
#' dataset \code{raw.data}
#'
#' A dataset contains a numeric matrix of 1,004 rows and 10 columns of top 10
#' PCs calculated from the dataset \code{raw.data}. The PCs were calculated
#' using linear principal component analysis (PCA), see more datails at
#' \code{KRIS::cal.pc.linear}
#'
#' @name PC
#' @docType data
#' @format A matrix with 10 columns and 1,004 rows
#' @seealso \code{\link{raw.data}} and \code{\link{label}}
#' @usage data(ipcaps_example_PC10)
#' @keywords PC
"PC"

NULL
kridsadakorn/ipcaps.bioc documentation built on Jan. 22, 2020, 11:18 p.m.