Nothing
# helpers
#1. get allele based p and chi-square values for all the samples
get.pvals<-function(x,df,p.cal){
snp1<-unlist(df[x,-c(1:4)])
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1]);y[,2]<-as.integer(y[,2])
rr1<-y[,2]/rowSums(y,na.rm = TRUE)
nonhet<-which(rr1 == 0L | rr1 == 1L | is.na(rr1)==TRUE)
if(length(nonhet)>0){snp1het<-y[-which(rr1 == 0L | rr1 == 1L | is.na(rr1)==TRUE),]} else {snp1het<-y}
homalt<-sum(rr1==1L,na.rm=TRUE)
homref<-sum(rr1==0L,na.rm=TRUE)
NHet<-nrow(na.omit(snp1het))
Nsamp <- NHet+homalt+homref
if(NHet>3L){
propHet<-NHet/length(na.omit(rr1))
medRatio<-median(proportions(as.matrix(snp1het),margin = 1)[,2],na.rm = TRUE)
p.sum<-p.cal[x,2]
p.05<-0.5
p.all<-p.cal[x,1]
n<-unname(rowSums(snp1het,na.rm = TRUE))
chi.het<-sum((((n*p.sum)-snp1het[,2])^2L/n*p.sum)+(((n*(1L-p.sum))-snp1het[,1])^2L/n*(1L-p.sum)),na.rm = TRUE)
chi.het.sum<-chi.het
chi.05<-sum((((n*p.05)-snp1het[,2])^2L/n*p.05)+(((n*(1L-p.05))-snp1het[,1])^2L/n*(1L-p.05)),na.rm = TRUE)
chi.05.sum<-chi.05
chi.all<-sum((((n*p.all)-snp1het[,2])^2L/n*p.all)+(((n*(1L-p.all))-snp1het[,1])^2L/n*(1L-p.all)),na.rm = TRUE)
chi.all.sum<-chi.all
z <- (n*p.sum-snp1het[,2])/sqrt(n*p.sum*(1L-p.sum))
z.het.sum<-sum(z,na.rm = TRUE)
z<-pnorm(z.het.sum,0,sqrt(NHet))
z.05 <- (n*p.05-snp1het[,2])/sqrt(n*p.05*(1L-p.05))
z.05.sum<-sum(z.05,na.rm = TRUE)
z.05<-pnorm(z.05.sum,0,sqrt(NHet))
z.all<- (n*p.all-snp1het[,2])/sqrt(n*p.all*(1L-p.all))
z.all.sum<-sum(z.all,na.rm = TRUE)
z.all<-pnorm(z.all.sum,0,sqrt(NHet))
ll<-data.frame(NHet=NHet,propHet,medRatio,NHomRef=homref,NHomAlt=homalt,propHomAlt=homalt/Nsamp,Nsamp,
pAll=p.all,pHet=p.sum,fis=1L-(NHet/(2L*(homref+(NHet/2L))*(homalt+(NHet/2L)))),
z.het=ifelse(z>0.5, (1L-z)*2L, z*2L),
z.05=ifelse(z.05>0.5, (1L-z.05)*2L, z.05*2L),
z.all=ifelse(z.all>0.5, (1L-z.all)*2L, z.all*2L),
chi.het=pchisq(chi.het,NHet-1L,lower.tail=F),
chi.05=pchisq(chi.05,NHet-1L,lower.tail = F),
chi.all=pchisq(chi.all,NHet-1L,lower.tail = F),
z.het.sum,z.05.sum,z.all.sum,chi.het.sum,chi.05.sum,chi.all.sum)
} else {
ll<-NA
}
return(ll)
}
#' Get allele information for duplicate detection
#'
#' The function to calculate allele median ratios, proportion of heterozygotes
#' and allele probability values under different assumptions (see details),
#' and their chi-square significance values for duplicate detection
#'
#' @param X allele depth table generated from the function
#' \code{hetTgen} (non-normalized)
#' @param x.norm a data frame of normalized allele coverage, output of
#' \code{cpm.normal}. If not provided, calculated using \code{X}.
#' @param Fis numeric. Inbreeding coefficient calculated using \code{h.zygosity()} function
#' @param method character. method to be used for normalization
#' (see \code{cpm.normal} details). Default \code{TMM}
#' @param logratioTrim numeric. percentage value (0 - 1) of variation to be
#' trimmed in log transformation
#' @param sumTrim numeric. amount of trim to use on the combined absolute
#' levels (\dQuote{A} values) for method \code{TMM}
#' @param Weighting logical, whether to compute (asymptotic binomial precision)
#' weights
#' @param Acutoff numeric, cutoff on \dQuote{A} values to use before trimming
#' @param plot.allele.cov logical, plot comparative plots of allele depth
#' coverage in homozygotes and heterozygotes
#' @param verbose logical, whether to print progress
#' @param parallel logical. whether to parallelize the process
#' @param \dots further arguments to be passed to \code{plot}
#'
#' @importFrom stats pchisq pnorm na.omit
#' @importFrom parallel parApply detectCores parLapply stopCluster
#'
#' @details
#' Allele information generated here are individual SNP based and presents the
#' proportion of heterozygotes, number of samples, and deviation of allele
#' detection from a 1:1 ratio of reference and alternative alleles.
#' The significance of the deviation is tested with Z-score test
#' \eqn{Z = \frac{ \frac{N}{2}-N_A}{ \sigma_{x}}},
#' and chi-square test (see references for more details on the method).
#'
#' @return Returns a data frame of median allele ratio, proportion of
#' heterozygotes, number of heterozygotes, and allele probability at different
#' assumptions with their chi-square significance
#'
#' @author Piyal Karunarathne, Pascal Milesi, Klaus Schliep
#'
#' @references
#' \itemize{
#' \item{McKinney, G. J., Waples, R. K., Seeb, L. W., & Seeb, J. E. (2017).
#' Paralogs are revealed by proportion of heterozygotes and deviations in read
#' ratios in genotyping by sequencing data from natural populations.
#' Molecular Ecology Resources, 17(4)}
#' \item{Karunarathne et al. 2022 (to be added)}
#' }
#'
#' @examples
#' \dontrun{data(ADtable)
#' hz<-h.zygosity(vcf,verbose=FALSE)
#' Fis<-mean(hz$Fis,na.rm = TRUE)
#' AI<-allele.info(ADtable,x.norm=ADnorm,Fis=Fis)}
#'
#' @export
allele.info<-function(X,x.norm=NULL,Fis,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,parallel=FALSE,...){
method=match.arg(method)
if(is.null(x.norm)){
x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
}
if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
if(parallel){
numCores<-detectCores()-1
cl<-makeCluster(cl)
p.cal<-parApply(cl=cl,x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
} else {
if(verbose){
message("calculating probability values of alleles")
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0L]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
} else {
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
}
}
if(is.list(p.cal)){
p.cal<-do.call(rbind,p.cal)
} else {
p.cal<-t(p.cal)
}
p.cal[p.cal=="NaN"]<-0
if(plot.allele.cov){
p.list<-list(...)
if(is.null(p.list$pch)) p.list$pch=19
if(is.null(p.list$cex)) p.list$cex=0.6
if(is.null(p.list$col)) p.list$col<-makeTransparent(colorspace::heat_hcl(12,h=c(0,-100),c=c(40,80),l=c(75,40),power=1)[11])
if(is.null(p.list$lcol)) p.list$lcol="tomato"
par(mfrow=c(2,2))
par(mar=c(4,5,2,2))
plot(p.cal$mean.a.homo,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in homozygotes",
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.a.het,p.cal$mean.r.het,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
ylab="Mean coverage of \n ref. allele in heterozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.a.het,p.cal$mean.a.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
ylab="Mean coverage of \n alt. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.r.het,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nref. allele in heterozygotes",
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
par(mfrow=c(1,1))
}
if(parallel){
pvals<-parLapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal,cl=cl)
stopCluster(cl)
} else {
if(verbose){
message("calculating chi-square significance")
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
} else {
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
}
}
pvals<-do.call(rbind,pvals)
pvals<-cbind(X[,1:3],pvals)
pvals<-na.omit(pvals)
ht<-sig.hets(pvals,Fis,plot = FALSE, verbose = verbose)
pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))
return(pvals)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.