Nothing
# Mega2R: Mega2 for R.
#
# Copyright 2017-2019, University of Pittsburgh. All Rights Reserved.
#
# Contributors to Mega2R: Robert V. Baron and Daniel E. Weeks.
#
# This file is part of the Mega2R program, which is free software; you
# can redistribute it and/or modify it under the terms of the GNU
# General Public License as published by the Free Software Foundation,
# either version 2 of the License, or (at your option) any later
# version.
#
# Mega2R is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# For further information contact:
# Daniel E. Weeks
# e-mail: weeks@pitt.edu
#
# ===========================================================================
#library(famSKATRC)
#library(kinship2)
#' load Mega2 SQLite database and perform initialization for famSKATRC usage
#'
#' @description
#' This populates the \bold{R} data frames with the specified \bold{Mega2} SQLite database. It initializes
#' the fam(ily) table and makes sure the person entries are unique. Finally, it generates a kinship matrix
#' from the family data. It also stores a weighting for the common and rare variant that may be used
#' later if NULL is specified as a weight in \emph{Mega2famSKATRC}. The common weighting is the function
#' \emph{dbeta}(maf, 1, 25). The rare weighting is the function \emph{dbeta}(maf, 0.5, 0.5).
#'
#' @param db specifies the path of a \bold{Mega2} SQLite database containing study data.
#'
#' @param verbose TRUE indicates that diagnostic printouts should be enabled.
#' This value is saved in the returned environment.
#'
#' @param ALPHA TRUE indicates that two runs of famSKAT_RC should be enabled.
#' One with ALPHA numeric ID's and one with numeric IDs ... this is temporary.
#' The default is FALSE.
#'
#' @param ... fed to \emph{dbmega2_import()}; should be bpPosMap= to select from the maps of
#' base pairs, if the default is not desired.
#'
#' @return "environment" containing data frames from an SQLite database and some computed values.
#'
#' @importFrom kinship2 kinship
#' @importFrom stats dbeta
#' @export
#'
#' @note
#' \emph{init_famSKATRC} creates a new data frame, \emph{envir$phe}, containing phenotype observations.
#' In addition, it initializes a matrix to aid
#' in translating a genotype allele matrix to a genotype count matrix.
#'
#' It also initializes the data frame \emph{envir$famSKATRC_results} to zero rows.
#'
#' @seealso \code{\link{Mega2famSKATRC}}
#'
#' @examples
#' db = system.file("exdata", "seqsimm.db", package="Mega2R")
#' ENV = init_famSKATRC(db, verbose = FALSE)
#' ls(ENV)
#'
init_famSKATRC = function (db = NULL, verbose = FALSE, ALPHA = FALSE, ...) {
# id, fullkins, acc
if (is.null(db))
stop("You must specify a database argument!\n", call. = FALSE)
envir = dbmega2_import(db, verbose = verbose, ...)
fam = mkfam(envir = envir)
setfam(fam, envir = envir) # also updates unified_genotype_table
fam = uniqueFamMember(envir = envir)
setfam(fam, envir = envir) # also updates unified_genotype_table
envir$phe = mkphenotype(envir)
envir$phe[envir$phe == 0] = NA
envir$famSKATRC_results = data.frame(chr = character(0), gene = character(0),
nvariants = numeric(0), start = integer(0), end = integer(0),
stringsAsFactors = FALSE)
sample = nrow(fam)
envir$KIN = kinship(1:sample, match(fam$Father, fam$PerPre, nomatch=0),
match(fam$Mother, fam$PerPre, nomatch=0))
envir$ALPHA = ALPHA
if (ALPHA)
envir$KINL = kinship(fam$PerPre, fam$Father, fam$Mother)
envir$wuweights_r = function(maf) {ifelse(maf > 0, dbeta(maf, 1, 25), 0) }
envir$wuweights_c = function(maf) {ifelse(maf > 0, dbeta(maf, 0.5, 0.5), 0) }
return (envir)
}
#' execute the CRAN famSKAT_RC function on a subset of the gene transcripts
#'
#' @description
#' If the \emph{gene} argument is NULL, execute the famSKAT_RC function on the first \emph{gs}
#' gene transcripts (default is gs = 1:100).
#' Update the \emph{envir$famSKATRC_results} data frame with the results.
#' Otherwise, \emph{gene} is a string vector of genes to process. The special value '*' stands
#' for all the known genes.
#'
#' @param gs a subrange of the default transcripts (refRanges) over which to calculate
#' the \emph{DOfamSKATRC} function.
#'
#' @param genes a list of genes over which to calculate the \emph{DOfamSKATRC} function.
#' The value, "*", means use all the transcripts in the selected Bioconductor database.
#' If genes is NULL, the gs range of the internal \emph{refRanges} will be used.
#'
#' @param envir 'environment' containing SQLite database and other globals.
#'
#' @param ... extra arguments that are acceptable to famSKAT_RC. These are listed with the
#' \code{\link{DOfamSKATRC}} function.
#'
#' @return
#' The data frame with the results is stored in the environment and named \emph{famSKATRC_results},
#' viz. envir$famSKATRC_results
#'
#' @export
#'
#' @note
#' A helper function
#' \code{SKAT3arg} is defined for the 3 argument callback function which in turn calls
#' \code{DOfamSKATRC} with the appropriate arguments (including those specific to the
#' \code{Mega2famSKATRC} function).
#'
#' @seealso \code{\link{init_famSKATRC}}, \code{\link{DOfamSKATRC}}
#'
#' @examples
#' db = system.file("exdata", "seqsimm.db", package="Mega2R")
#' ENV = init_famSKATRC(db, verbose = FALSE)
#' ENV$verbose = FALSE
#' ENV$famSKATRC_results = ENV$famSKATRC_results[0, ]
#' Mega2famSKATRC(gs=50:60, envir=ENV, pheno=3)
#'
#' \donttest{
#' # donttestcheck: try this below if there is time
#' Mega2famSKATRC(genes=c("CEP104"), envir=ENV, pheno=3 )
#' }
#'
#' ENV$famSKATRC_results
#'
#Mega2famSKATRC = function (gs = 1:100, genes=NULL, envir = ENV,
# pheno=3, id=NULL, covariates=NULL,
# sqrtweights_c=NULL, sqrtweights_r=NULL,
# binomialimpute=TRUE, acc=1e-6, maf=0.05, phi=c(0,0.2,0.5,0.9) )
Mega2famSKATRC = function (gs = 1:100, genes=NULL, envir = ENV, ...) {
# PHENO genotypes covariates sqrtweights_c sqrtweights_r binomialimpute maf phi acc
if (missing(envir)) envir = get("ENV", parent.frame(), inherits = TRUE)
famSKAT3arg = function(markers_arg, range_arg, envir) {
DOfamSKATRC(markers_arg, range_arg, envir, ...)
}
if (is.null(genes))
applyFnToRanges(famSKAT3arg, envir$refRanges[gs, ], envir$refIndices, envir = envir)
else
applyFnToGenes(famSKAT3arg, genes, envir = envir)
}
#' DofamSKATRC call back function
#'
#' @description
#' Convert the genotypesraw() allele patterns of 0x10001, 0x10002 (or 0x20001), 0x20002, 0
#" from the genotype matrix
#' to the numbers 0, 1, 2, 9 for each marker. (Reverse, the order iff allele "1" has the
#' minor allele frequency.) Ignore markers that have no variants.
#' Finally, invoke \code{famSKAT_RC} with the converted genotype matrix.
#' Save information about the range and the p.value calculated by \code{famSKAT_RC}
#' in \emph{envir$famSKATRC_results}.
#' If you want to change the argument values to this function they should be changed instead
#' when calling the \code{Mega2famSKATRC} function.
#'
#' @param markers_arg a data.frame with the following 5 observations:
#' \describe{
#' \item{locus_link}{is the ordinal ranking of this marker among all loci}
#' \item{locus_link_fill}{is the position of corresponding genotype data in the
#' \emph{unified_genotype_table}}
#' \item{MarkerName}{is the text name of the marker}
#' \item{chromosome}{is the integer chromosome number}
#' \item{position}{is the integer base pair position of marker}
#' }
#'
#' @param range_arg one row of a ranges_arg. The latter is a data frame of at least three
#' integer columns. The columns indicate a range:
#' a chromosome number, a start base pair value, and an end base pair value.
#'
#' @param envir 'environment' containing SQLite database and other globals especially the
#' phenotype_table, \code{phe}.
#'
#' @param pheno is an index into the phenotypes_table to select the phenotype. Missing phenotypes
#' are represented by NA.
#'
#' @param covariates a matrix of covariates for the phenotype.
#'
#' @param id a vector of individuals to be included in the test, a subset of the family members.
#' If NULL is given, all members will be used.
#'
#' @param binomialimpute if TRUE, impute missing genotypes using a binomial distribution.
#'
#' @param sqrtweights_c weight function for common variants, if NULL use weight set in init_famSKAT
#'
#' @param sqrtweights_r weight function for rare variants, if NULL use weight set in init_famSKAT.
#'
#' @param maf threshold used to separate rare from common variants.
#'
#' @param phi a vector of ratios ratios; each indicates the contribution of rare variants.
#'
#' @param acc accuracy used in Davies approximation.
#'
#' @importFrom famSKATRC famSKAT_RC
#' @return None
#'
#' @export
#'
#' @seealso \code{\link{init_famSKATRC}}, \code{\link{Mega2famSKATRC}}
#'
#' @note
#' This function accumulates output in the data frame, \emph{envir$famSKATRC_results}. It will
#' print out the lines as they are generated if \emph{envir$verbose} is TRUE. It does not write
#' the data frame to a file. You must save the data frame.
#' You also must initialize the data frame when necessary.
#'
#' @examples
#' db = system.file("exdata", "seqsimm.db", package="Mega2R")
#' ENV = init_famSKATRC(db, verbose = TRUE)
#' ENV$famSKATRC_results = ENV$famSKATRC_results[0, ]
#' Mega2famSKATRC(gs=1:1, envir=ENV, pheno=3)
#' # this sets one of the many arguments for DOfamSKATRC
#' # but basically prepares the ENV for the direct use of DOfamSKATRC (below).
#'
#' \donttest{
#' # donttestcheck: try this below instead if there is time
#' Mega2famSKATRC(genes=c("CEP104"), envir=ENV, pheno=3 )
#' }
#'
#' # DOfamSKATRC is called within Mega2famSKATRC. init_famSKATRC and Mega2famSKATRC need to be
#' # called to set up the environment for famSKAT_RC to run. BUT, you should ignore DOfamSKATRC
#' # and use Mega2famSKATRC instead.
#' #
#' applyFnToRanges(DOfamSKATRC, ENV$refRanges[50:60, ], ENV$refIndices, envir=ENV)
#' # this will use all the default argument values for DOfamSKATRC
#'
#
DOfamSKATRC = function(markers_arg, range_arg, envir,
pheno=3, id=NULL, covariates=NULL,
sqrtweights_c=NULL, sqrtweights_r=NULL,
binomialimpute=TRUE, acc=1e-6,
maf=.05, phi=c(0, .2, .5, .9) ) {
if (is.null(range_arg))
stop("DOfamSKATRC: range is not defined.", calls. = FALSE)
if (pheno > ncol(envir$phe)) stop("pheno index exceeds phenotype array")
geno = computeDosage(markers_arg, range_arg, envir)
nsnps = ncol(geno)
if (nsnps == 0) return (NULL)
if (nsnps == 1) {
message("Only 1 marker found in range ",
range_arg[envir$refCol[4]], ": chr", range_arg[envir$refCol[1]], " ",
range_arg[envir$refCol[2]], " ", range_arg[envir$refCol[3]])
return (NULL)
}
# The code below is 2 to 3 times faster than that above but is all in Rcpp. The above
# code makes it clearer that markers that have no variant are ignored and that the first
# allele is the one with the most occurrences, and that there must be at least two markers.
# tim2 = system.time ({
# genocol = getgenotypesdos(markers_arg, envir)
# genodos = genocol$geno[ ,1:genocol$ncol]
## genodos = matrix(genocol$geno, nrow=nrow(genocol$geno), ncol=genocol$ncol)
# })
# if (kk != genocol$ncol) {
# message("kk vs ncol")
# }
# if (sum(geno-genodos) != 0) {
# message("geno vs genodos")
# }
chr = as.character(range_arg[,envir$refCol[1]])
gene = as.character(range_arg[,envir$refCol[4]])
start = range_arg[,envir$refCol[2]]
end = range_arg[,envir$refCol[3]]
if (is.null(id)) {
id = 1:length(envir$fam$PerPre)
if (envir$ALPHA)
idL = envir$fam$PerPre
}
if (is.null(sqrtweights_c)) sqrtweights_c = envir$wuweights_c
if (is.null(sqrtweights_r)) sqrtweights_r = envir$wuweights_r
argn = list(PHENO=envir$phe[,pheno], genotypes=geno, id=id, fullkins=envir$KIN,
covariates=covariates, sqrtweights_c=sqrtweights_c, sqrtweights_r=sqrtweights_r,
binomialimpute=binomialimpute, acc=acc, maf=maf, phi=phi)
tim = system.time ({
skat = do.call(famSKAT_RC, argn)
})
if (envir$ALPHA) {
##
argn = list(PHENO=envir$phe[,pheno], genotypes=geno, id=idL, fullkins=envir$KINL,
covariates=covariates, sqrtweights_c=sqrtweights_c, sqrtweights_r=sqrtweights_r,
binomialimpute=binomialimpute, acc=acc, maf=maf, phi=phi)
timL = system.time ({
skatL = do.call(famSKAT_RC, argn)
})
result = c(list(), chr=chr, gene=gene, nvariants=nsnps, start=start, end=end,
tim[1:3], famSKATRC=skat, timL[1:3], skatL=skatL)
##
} else
result = c(list(), chr=chr, gene=gene, nvariants=nsnps, start=start, end=end,
tim[1:3], famSKATRC=skat)
lastp1 = nrow(envir$famSKATRC_results) + 1
if (lastp1 == 1) {
envir$famSKATRC_results = do.call(data.frame, c(result, stringsAsFactors = FALSE))
} else {
envir$famSKATRC_results[lastp1,] = result
}
if (envir$verbose) {
print(envir$famSKATRC_results[lastp1, ])
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.