#'@include ASEset-class.R
NULL
#' genotype filter methods
#'
#' useful genotype filters
#'
#' hetFilt returns TRUE if the samples is heterozygote, based on stored genotype information
#' present in the phase data.
#'
#' @name ASEset-filters
#' @rdname ASEset-filters
#' @aliases hetFilt hetFilt,ASEset-method
#' @docType methods
#' @param x ASEset object
#' @param source 'genotype' or 'alleleCounts'
#' @param ... internal param
#' @author Jesper R. Gadin, Lasse Folkersen
#' @keywords filter
#' @examples
#'
#' #load example data
#' data(ASEset)
#' a <- ASEset
#'
#' genotype(a) <- inferGenotypes(a)
#' hets <- hetFilt(a)
#'
NULL
#' @rdname ASEset-filters
#' @export
setGeneric("hetFilt", function(x, ...){
standardGeneric("hetFilt")
})
#' @rdname ASEset-filters
#' @export
setMethod("hetFilt", signature(x = "ASEset"),
function(x, source="genotype", ...)
{
if(source=="genotype"){
.heterozygozityFromPhaseArray(phase(x , return.class="array"))
}else if(source=="alleleCounts"){
stop("not implemented")
}else{
stop("source must be 'genotype' or 'alleleCounts' ")
}
}
)
### -------------------------------------------------------------------------
### helpers for hetFilt
###
.heterozygozityFromPhaseArray <- function(x){
!(x[,,1] == x[,,2])
}
#' multi-allelic filter methods
#'
#' filter on multiallelic snps
#'
#' based on the allele counts for all four variants A, T, G and C and returns true
#' if there is counts enough suggesting a third or more alleles. The sensitivity can
#' be specified using 'threshold.count.sample' and 'threshold.frequency'.
#'
#' @name multiAllelicFilt
#' @rdname multiAllelicFilt
#' @aliases multiAllelicFilt multiAllelicFilt,ASEset-method
#' @docType methods
#' @param x \code{ASEset} object
#' @param strand strand to infer from
#' @param filterOver 'eachSample' or 'allSamples'
#' @param threshold.count.sample least amount of counts to try to infer allele
#' @param threshold.frequency least fraction to classify (see details)
#' @param ... internal param
#' @author Jesper R. Gadin, Lasse Folkersen
#' @keywords filter
#' @examples
#'
#' #load example data
#' data(ASEset)
#' a <- ASEset
#'
#' multiAllelicFilt(a)
#'
NULL
#' @rdname multiAllelicFilt
#' @export
setGeneric("multiAllelicFilt", function(x, ...){
standardGeneric("multiAllelicFilt")
})
#' @rdname multiAllelicFilt
#' @export
setMethod("multiAllelicFilt", signature(x = "ASEset"),
function(x, strand="*", threshold.count.sample=10, threshold.frequency=0.10,
filterOver="eachSample"){
#find SNP types
fr <- frequency(x, strand=strand,
threshold.count.sample=threshold.count.sample,
return.class="array")
#apply over alleles
alleles <- apply(fr,c(1,2), function(x){
sum(x >= threshold.frequency)
})
if(filterOver=="allSamples"){
apply(alleles,1, function(x){
any(x > 2 ,na.rm=TRUE)
})
}else if(filterOver=="eachSample"){
alleles > 2
}
})
#' minFreqFilt methods
#'
#' filter on minFreqFilt snps
#'
#' Description info here
#'
#' @name minFreqFilt
#' @rdname minFreqFilt
#' @aliases minfreqFilt minFreqFilt,ASEset-method
#' @docType methods
#' @param x \code{ASEset} object
#' @param strand strand to infer from
#' @param sum 'each' or 'all'
#' @param threshold.frequency least fraction to classify (see details)
#' @param replace.with only option 'zero'
#' @param return.class 'ASEset', 'array' or 'matrix'
#' @param ... internal param
#' @author Jesper R. Gadin, Lasse Folkersen
#' @keywords filter
#' @examples
#'
#' #load example data
#' data(ASEset)
#' a <- ASEset
#'
#' minFreqFilt(a)
#'
NULL
#' @rdname minFreqFilt
#' @export
setGeneric("minFreqFilt", function(x, ...){
standardGeneric("minFreqFilt")
})
#' @rdname minFreqFilt
#' @export
setMethod("minFreqFilt", signature(x = "ASEset"),
function(x, strand="*", threshold.frequency=0.10, replace.with="zero",
return.class="ASEset", sum="all" )
{
thr <- threshold.frequency
fr <- frequency(x, strand=strand, threshold.count.sample=1,
return.class="array")
#create tfmat
if(sum=="each"){
#check if ref and alt allele are present
if(!refExist(x)){stop("ref(x) is empty")}
if(!altExist(x)){stop("alt(x) is empty")}
#check if ref and alt are character, otherwise give a warning and
#coerce to character
ref <- .verboseCoerceToCharacter(ref(x))
alt <- .verboseCoerceToCharacter(alt(x))
tfmat <- .toKeepMatrixMinFreqFilterEach(fr,ref,alt,ncol(x),thr,x@variants)
}else if(sum=="all"){
tfmat <- .Na2False(.toKeepMatrixMinFreqFilterAll(fr, thr))
}
#return object
if(return.class=="ASEset"){
#will only filter out the selected strand, and while the unknown strand is
#based on the plus and minus we have to filter all of them, to have the back
#free. In the future the assays(x)[["countsUnknown"]] will be dropped so it
#always calculated on the fly as the sum of the plus and minus strand
tfarray <- .expandMatrixToArray(tfmat, length(x@variants))
if(strand=="*"){
if(.unknownStrandCountsExists(x)) alleleCounts(x, strand="*", return.class="array")[!tfarray] <- 0
if(.minusStrandCountsExists(x)) alleleCounts(x, strand="-", return.class="array")[!tfarray] <- 0
if(.plusStrandCountsExists(x)) alleleCounts(x, strand="+", return.class="array")[!tfarray] <- 0
}else if(strand=="+" | strand=="-"){
alleleCounts(x, strand=strand, return.class="array")[!tfarray] <- 0
}
return(x)
}
if(return.class=="array") return(.expandMatrixToArray(tfmat, length(x@variants)))
if(return.class=="matrix") return(tfmat)
})
### -------------------------------------------------------------------------
### helpers for minFreqFilt
###
#get the replacement matrix for "all"
.toKeepMatrixMinFreqFilterAll <- function(fr, thr){
tf <- fr >= thr
apply(tf, c(1,2), function(x){sum(x)>=2})
}
#get the replacement matrix for "each"
.toKeepMatrixMinFreqFilterEach <- function(fr,ref,alt,nc,thr,var){
#check that overall dimensions are fine
if(!length(nc)==1) stop("dimensions do not agree in .altAlleleFreqThreshold")
if(any(!dim(fr) == c(length(ref), nc, length(var))) |
!(length(ref) == length(alt)) |
!(length(nc) == 1) |
!(length(thr) == 1)
) stop("dimensions do not agree in .altAlleleFreqThreshold")
#take out count values only for ref allele
ref.ar <- .arrayFromAlleleVector(var, ref, nc )
alt.ar <- .arrayFromAlleleVector(var, alt, nc )
#use on counts
ref.fr <- .subsetArrayToMatrix(fr, ref.ar)
alt.fr <- .subsetArrayToMatrix(fr, alt.ar)
#at least eg. 0.1 in each group
.Na2False(ref.fr > thr) & .Na2False(alt.fr > thr)
#tfFilt <- .Na2False(ref.fr > thr) & .Na2False(alt.fr > thr)
#take out these pairs and look at them (manually control that all is fine)
#this is atm not tested in a unit test, but should be covered by
#another test
#matrix(c(ref.fr[tfFilt], alt.fr[tfFilt]), ncol=2)
#matrix(c(ref.fr[!tfFilt], alt.fr[!tfFilt]), ncol=2)
#extend the matrix to 3d for the subset/replacement now done outside this unit
#.expandMatrixToArray(tfFilt, length(var))
}
#' minCountFilt methods
#'
#' filter on minCountFilt snps
#'
#' Description info here
#'
#' @name minCountFilt
#' @rdname minCountFilt
#' @aliases minCountFilt minCountFilt,ASEset-method
#' @docType methods
#' @param x \code{ASEset} object
#' @param strand strand to infer from
#' @param sum 'each' or 'all'
#' @param threshold.counts cutoff for read counts (see details)
#' @param replace.with only option 'zero'
#' @param return.class 'ASEset', 'array' or 'matrix'
#' @param ... internal param
#' @author Jesper R. Gadin, Lasse Folkersen
#' @keywords filter
#' @examples
#'
#' #load example data
#' data(ASEset)
#' a <- ASEset
#'
#' minCountFilt(a)
#'
NULL
#' @rdname minCountFilt
#' @export
setGeneric("minCountFilt", function(x, ...){
standardGeneric("minCountFilt")
})
#' @rdname minCountFilt
#' @export
setMethod("minCountFilt", signature(x = "ASEset"),
function(x, strand="*", threshold.counts=1,
sum="all", replace.with="zero", return.class="ASEset"){
#set shorter name
thr <- threshold.counts
#extract alleleCounts
ac <- alleleCounts(x, strand=strand, return.class="array")
#create tfmat
if(sum=="all") tfmat <- .toKeepMatrixMinCountFilterAll(ac, thr)
else if(sum=="each"){
#check if ref and alt allele are present
if(!refExist(x)){stop("ref(x) is empty")}
if(!altExist(x)){stop("alt(x) is empty")}
#check if ref and alt are character, otherwise give a warning and
#coerce to character
ref <- .verboseCoerceToCharacter(ref(x))
alt <- .verboseCoerceToCharacter(alt(x))
tfmat <- .toKeepMatrixMinCountFilterEach(ac,ref,alt,ncol(x),thr,x@variants)
}
#return object
if(return.class=="ASEset"){
#will only filter out the selected strand, and while the unknown strand is
#based on the plus and minus we have to filter all of them, to have the back
#free. In the future the assays(x)[["countsUnknown"]] will be dropped so it
#always calculated on the fly as the sum of the plus and minus strand
tfarray <- .expandMatrixToArray(tfmat, length(x@variants))
if(strand=="*"){
if(.unknownStrandCountsExists(x)) alleleCounts(x, strand="*", return.class="array")[!tfarray] <- 0
if(.minusStrandCountsExists(x)) alleleCounts(x, strand="-", return.class="array")[!tfarray] <- 0
if(.plusStrandCountsExists(x)) alleleCounts(x, strand="+", return.class="array")[!tfarray] <- 0
}else if(strand=="+" | strand=="-"){
alleleCounts(x, strand=strand, return.class="array")[!tfarray] <- 0
}
return(x)
}
if(return.class=="array") return(.expandMatrixToArray(tfmat, length(x@variants)))
if(return.class=="matrix") return(tfmat)
})
### -------------------------------------------------------------------------
### helpers for minCountFilt
###
#get the replacement matrix for "all"
.toKeepMatrixMinCountFilterAll <- function(ac, thr){
apply(ac, c(1,2), function(x,thr){sum(x)> thr},thr=thr)
}
#get the replacement matrix for "each"
.toKeepMatrixMinCountFilterEach <- function(ac,ref,alt,nc,thr,var){
#check that overall dimensions are fine
if(!length(nc)==1) stop("dimensions do not agree in .toKeepArrayMinCountFilter")
if(any(!dim(ac) == c(length(ref), nc, length(var))) |
!(length(ref) == length(alt)) |
!(length(nc) == 1) |
!(length(thr) == 1)
) stop("dimensions do not agree in .toKeepArrayMinCountFilter")
#take out count values only for ref allele
ref.ar <- .arrayFromAlleleVector(var, ref, nc )
alt.ar <- .arrayFromAlleleVector(var, alt, nc )
#use on counts
ref.ac <- .subsetArrayToMatrix(ac, ref.ar)
alt.ac <- .subsetArrayToMatrix(ac, alt.ar)
#at least 30 in both groups
(ref.ac > thr) & (alt.ac > thr)
#take out these pairs and look at them (manually control that all is fine)
#this is atm not tested in a unit test, but should be covered by
#another test
#matrix(c(ref.ac[tfFilt], alt.ac[tfFilt]), ncol=2)
#matrix(c(ref.ac[!tfFilt], alt.ac[!tfFilt]), ncol=2)
#extend the matrix to 3d for the subset/replacement now done outside this unit
#.expandMatrixToArray(tfFilt, length(var))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.