Nothing
##' Main function of the RVPedigree package
##'
##' The RVPedigree function is the main function of the RVPedigree
##' used package.
##'
##' Under the hood this function calls \code{\link{ASKAT.region}},
##' \code{\link{NormalizedASKAT.region}},
##' \code{\link{VCC1.region}}, \code{\link{VCC2.region}} or
##' \code{\link{VCC3.region}}, depending on the \code{method}
##' parameter specified by the user.
##' @title RVPedigree main function
##' @inheritParams read.haplo
##' @param method character, selects the method to use for the
##' association testing. Can be one of the following:
##' \itemize{
##' \item \code{"ASKAT"} (default)
##' \item \code{"NASKAT"}, normalized ASKAT
##' \item \code{"VCC1"}, VC-C1
##' \item \code{"VCC2"}, VC-C2
##' \item \code{"VCC3"}, VC-C3
##' }
##' @param y vector of phenotype data (one entry per individual), of
##' length \eqn{n}.
##' @param X matrix of covariates including intercept (dimension:
##' \eqn{n \times p}, with \eqn{p} the number of covariates)
##' @param Phi Relationship matrix (i.e. twice the kinship matrix); an
##' \eqn{n \times n} square symmetric positive-definite matrix.
##' @param regions a data frame with details of the genomic regions in
##' which the association test specified by the \code{method}
##' parameter should be run. The data frame should have one row
##' per region and (at least) four columns with the following
##' names:
##' \itemize{
##' \item \code{Name}: Name of the region (e.g. \code{Gene 01})
##' \item \code{Chr}: Chromosome on which the region is located.
##' \item \code{StartPos}: The base pair coordinate at which the
##' region starts
##' \item \code{EndPos}: The base pair coordinate at which the
##' region ends.
##' }
##' Any other columns will be ignored.
##' @param weights optional numeric vector of genotype weights. If
##' this option is not specified, the beta distribution is used
##' for weighting the variants, with each weight given by
##' \eqn{w_i = dbeta(f_i, 1, 25)^2}, with \eqn{f_i} the minor
##' allele frequency (MAF) of variant \eqn{i}. This default is the
##' same as used by the
##' \href{https://cran.r-project.org/package=SKAT}{\code{SKAT}
##' package}. This vector is used as the diagonal of the
##' \eqn{m \times m} matrix \eqn{W}, with \eqn{m} the number of
##' variants.
##' @param Nperm (integer) The number of permutations to be done to
##' calculate the empirical p-value if the VCC2 or VCC3 method is
##' used. For other methods this parameter is ignored (default:
##' 100).
##' @param pvalThreshold (numeric) Threshold for the association
##' p-value. Regions with a p-value below this threshold will not
##' be present in the output data frame (default: 0.1).
##' @param VCC3afterVCC1 (logical) Boolean value that indicates
##' whether the VC-C3 method should automatically be run on the
##' variants passing the p-value threshold set using the
##' \code{pvalThreshold} parameter (default: FALSE).
##' @param Ncores (integer) Number of processor (CPU) cores to be used
##' in parallel when doing running the association analysis. If
##' the number of regions is larger than the number of cores, then
##' each region gets to use maximum one core. If the number of
##' cores is larger than the number of regions and the VCC2 or
##' VCC3 methods are selected, the remaining cores are distributed
##' among the regions to parallelize the permutations used to
##' determine the p-value (default: 1).
##' @return A data frame containing results of the association test
##' specified by the \code{method} parameter for each region in
##' the data frame specified by the \code{regions} parameter. The
##' output data frame contains the following columns:
##' \itemize{
##' \item \code{Score.Test}: the score of the given association test
##' \item \code{P.value}: the p-value of the association test
##' \item \code{N.Markers}: the number of markers in the region
##' \item \code{regionname}: Name of the regions/genes on which you
##' are running the association tests
##' }
##' Note that regions that do not contain any genetic variants
##' will be removed from the output.
##' @author Lennart C. Karssen
##' @export
RVPedigree <- function(method="ASKAT",
y=NULL,
X=NULL,
Phi=NULL,
filename=NULL,
type="bed",
regions=NULL,
weights=NULL,
Nperm=100,
pvalThreshold=0.1,
VCC3afterVCC1=FALSE,
Ncores=1)
{
## Parameter checks
check_method(method)
y <- check_pheno(y)
check_covariates(X, y)
check_relmatrix(Phi)
check_files(filename, type)
check_regions(regions)
check_weights(weights)
check_Ncores(Ncores)
check_VCC3afterVCC1(VCC3afterVCC1, method)
## Compute the eigen vectors and eigen values of the relationship
## matrix.
Phi.eig <- eigen(Phi)
U <- Phi.eig$vectors
S <- Phi.eig$values
## Determine the name of the map file based on the file type.
switch(type,
bed={
mapfile <- sub(".bed$", ".bim", filename)
},
ped={
mapfile <- sub(".ped$", ".map", filename)
}
)
map <- readMapFile(mapfile)
## Compute the null model (excl. genotypes) first and only once
message("Estimating null model...")
switch(method,
ASKAT={
H0 <- Estim.H0.ASKAT(y=y, X=X, S=S, U=U)
},
NASKAT={
H0 <- Estim.H0.NormalizedASKAT(y=y, X=X, S=S, U=U)
},
VCC1={
H0 <- Estim.H0.VCC(y=y, X=X, S=S, U=U)
},
VCC2={
H0 <- Estim.H0.VCC(y=y, X=X, S=S, U=U)
},
VCC3={
H0 <- Estim.H0.VCC(y=y, X=X, S=S, U=U)
}
)
message("Null model estimated.")
## If we have more regions than cores, don't parallelise the
## permutations in the pvalue.VCC2/3 functions. If there are more
## CPU cores than regions the left over CPU cores are distributed
## amongst the regions. See notes in Bitbucket Issue #14.
Nregions <- nrow(regions)
if(Nregions >= Ncores)
{
Ncores.pvalue <- 1
Ncores.regions <- Ncores
} else {
Ncores.regions <- Nregions
Ncores.leftover <- Ncores - Ncores.regions
Ncores.pvalue <- 1 + Ncores.leftover %/% Ncores.regions
}
if (Ncores > 1)
{
message("Using ", Ncores.regions,
" cores to analyse the regions in parallel")
if (method == "VCC2" | method == "VCC3")
message("Using ", Ncores.pvalue,
" cores per region for p-value permutations")
}
## Set up parallel environment for distributing the analysis over
## multiple regions. By default Ncores=1, so no problems are
## expected.
registerDoParallel(Ncores.regions)
message("Starting association analysis of the ", Nregions,
" regions...")
## ## Allocate an empty result data frame to which we can rbind the
## ## outcomes of each region
## results <- data.frame()
region <- 0
results <- foreach(region=1:Nregions, .combine='rbind', .verbose=FALSE) %dopar% {
regionname <- regions[region, "Name"]
chr <- regions[region, "Chr"]
startpos <- regions[region, "StartPos"]
endpos <- regions[region, "EndPos"]
switch(method,
ASKAT={
region.results <- ASKAT.region(y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
map=map,
chr=chr,
startpos=startpos,
endpos=endpos,
regionname=regionname,
U=U,
S=S,
RH.Null=H0,
weights=weights
)
if (!is.na(region.results$P.value) &
region.results$P.value <= pvalThreshold) {
region.results
}
},
NASKAT={
region.results <- NormalizedASKAT.region(y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
map=map,
chr=chr,
startpos=startpos,
endpos=endpos,
regionname=regionname,
U=U,
S=S,
RH.Null=H0,
weights=weights
)
if (!is.na(region.results$P.value) &
region.results$P.value <= pvalThreshold) {
region.results
}
},
VCC1={
region.results <- VCC1.region(y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
map=map,
chr=chr,
startpos=startpos,
endpos=endpos,
regionname=regionname,
U=U,
S=S,
RH.Null=H0,
weights=weights
)
if (!is.na(region.results$P.value) &
region.results$P.value <= pvalThreshold) {
region.results
}
},
VCC2={
region.results <- VCC2.region(y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
map=map,
chr=chr,
startpos=startpos,
endpos=endpos,
regionname=regionname,
U=U,
S=S,
RH.Null=H0,
weights=weights,
Nperm=Nperm,
Ncores=Ncores.pvalue
)
if (!is.na(region.results$P.value) &
region.results$P.value <= pvalThreshold) {
region.results
}
},
VCC3={
region.results <- VCC3.region(y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
map=map,
chr=chr,
startpos=startpos,
endpos=endpos,
regionname=regionname,
U=U,
S=S,
RH.Null=H0,
weights=weights,
Nperm=Nperm,
Ncores=Ncores.pvalue
)
if (!is.na(region.results$P.value) &
region.results$P.value <= pvalThreshold) {
region.results
}
},
{
# This part is executed if the method parameter
# doesn't match any of the above.
stop("Unkown method specified; the method parameter ",
"should be one of: 'ASKAT', 'NASKAT', 'VCC1', ",
"'VCC2', or 'VCC3'" )
}
) # End of switch over methods
} # End of foreach over regions
## If the VCC3afterVCC1 parameter is TRUE, automatically run
## VCC3 on the output regions of VCC1.
if (method == "VCC1" & VCC3afterVCC1) {
message("'VCC3afterVCC1' option set, starting VCC3 run...")
rowsBelowThres <- which(rownames(results) %in% regions$Name)
newRegions <- regions[rowsBelowThres, ]
results <- RVPedigree(method="VCC3",
y=y,
X=X,
Phi=Phi,
type=type,
filename=filename,
regions=newRegions,
weights=weights,
Nperm=Nperm,
pvalThreshold=pvalThreshold,
VCC3afterVCC1=FALSE,
Ncores=Ncores
)
}
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.