#' Kinship estimation
#'
#' @description Estimates kinships for all combinations of pairs of individuals.
#'
#' @param genotypematrix A matrix encoding genotypes with columns of individuals and rows of SNP sites. (0=homozygotic reference, 1=heterozygotic, 2=homozygotic alternative).
#'
#' @param ploidy
#' A data frame with two columns.
#' First column is id (same as column names of genotype matrix).
#' Second column is ploidy (1 or 2).
#' Guess ploidy if not provided.
#'
#' @param skipKIMGENS
#' Binary variable for skipping KIMGENS kinship estimates or not.
#' F if not provided. See 'Details.'
#'
#' @param KIMGENSThreshold
#' A kinship threshold for defining "relatives" in KIMGENS kinship. 0.1 if not provided. See 'Details.'
#'
#' @return A data frame encoding kinship and IBS0 for every pair of individuals
#' @details
#' This functions takes two step:
#' The first step use exKING-robust (including KING-robust (Manichaikul et al. 2010)) to estimate kinship between two diploids and kinship between a haploid and a diploid.
#' The second step use individuals related to the two individuals of interest as "references" to estimate kinship.
#' The median of kinships from relatives of either individuals are averaged to estimate the correct kinship
#' Relatives are defined by if the kinship estimates from the first step are larger than KIMGENSThreshold.
#' Skipping this step will skip haploid-haploid kinship estimate.
#'
#' @references
#' Wang, Y-W and Ané, C. KIMGENS: A novel method to estimate kinship in organisms with mixed haploid diploid genetic systems robust to population structure. Bioinformatics 38(11):3044-3050.
#'
#' @export
#'
#' @examples
#' ancestrygenomatrix<-PopulationSim(1000,c(0.05,0.15,0.25))
#' ancestry<-matrix(c(6,2,0.3,2,6,0.3),nrow=3)
#' pedgeno<-HapdipPedigreeSim(ancestrygenomatrix,pedigree[1:18,],ancestry)
#' kinship(pedgeno)
kinship<-function(genotypematrix,ploidy=NA,skipKIMGENS=F,KIMGENSThreshold=0.1){
HeterozygosityCheck<-apply(genotypematrix,2,function(x)1-sum(x==1)/sum(!is.na(x))) ## check if there are highly homozygous samples
if(any(HeterozygosityCheck>0.9)){
warning(paste0("Sample(s) ",
paste(names(HeterozygosityCheck)[which(HeterozygosityCheck>0.9)],collapse = ", "),
" has heterozygosity < 10%! Are they haploids?\n",
"Continuing but please check if the genotype matrix is coded correctly."))
}
if(any(is.na(ploidy))){ ## infer ploidy
temp<-ifelse(apply(genotypematrix,2,function(x)any(x==2,na.rm = T)),2,1)
ploidy<-data.frame(id=names(temp),ploidy=temp)
}else if(!all(ploidy$id%in%colnames(genotypematrix))){ ##check if everyone has ploidy coded
stop("check ploidy!")
}
kinshipmatrix<-as.data.frame(t(combn(colnames(genotypematrix),2)))
tempkin<-t(apply(kinshipmatrix,1,function(x)
if(all(ploidy$ploidy[ploidy$id%in%x[1:2]]==2)){
return(dipking(genotypematrix[,x[1]],genotypematrix[,x[2]]))
}else if(all(ploidy$ploidy[ploidy$id%in%x[1:2]]==1)){
return(c(kinship=NA,IBS0=NA))
}else if(ploidy$ploidy[ploidy$id%in%x[2]]==1&ploidy$ploidy[ploidy$id%in%x[1]]==2){
return(hapdipking(genotypematrix[,x[1]],genotypematrix[,x[2]]))
}else if(ploidy$ploidy[ploidy$id%in%x[1]]==1&ploidy$ploidy[ploidy$id%in%x[2]]==2){
return(hapdipking(genotypematrix[,x[2]],genotypematrix[,x[1]]))
}else{
return(c(kinship=NA,IBS0=NA))
}))
colnames(tempkin)<-c("kinshipexKING","IBS0exKING")
kinshipmatrix<-cbind(kinshipmatrix,tempkin)
if(!skipKIMGENS){
kinshipmatrix$kinshipKIMGENS<-NA
kinshipmatrix$IBS0KIMGENS<-NA
for(pair in 1:nrow(kinshipmatrix)){
svMisc::progress(pair,nrow(kinshipmatrix))
inds<-c(kinshipmatrix$V1[pair],kinshipmatrix$V2[pair])
submatrix<-lapply(inds,function(x)kinshipmatrix[kinshipmatrix$V1==x|kinshipmatrix$V2==x,])
related<-lapply(submatrix,function(x)which(x$kinshipexKING>KIMGENSThreshold))
related<-mapply(function(x,y)unique(c(y$V1[x][y$V1[x]%in%ploidy$id[ploidy$ploidy==2]],
y$V2[x][y$V2[x]%in%ploidy$id[ploidy$ploidy==2]])),related,submatrix)
related<-mapply(function(x,y){if(ploidy$ploidy[ploidy$id==y]==2){unique(c(x,y))}else{x}},related,inds,SIMPLIFY = F)
if(any(sapply(related,length)!=0)){
if(all(ploidy$ploidy[ploidy$id%in%inds]==1)){
kinshipmatrix[pair,c("kinshipKIMGENS","IBS0KIMGENS")]<-rowMeans(sapply(related,function(x){
hapking(genotypematrix[,inds[1]],genotypematrix[,inds[2]],genotypematrix[,x])}),na.rm = T)
}else if(all(ploidy$ploidy[ploidy$id%in%inds]==2)){
kinshipmatrix[pair,c("kinshipKIMGENS","IBS0KIMGENS")]<-rowMeans(sapply(related,function(x){
dipking(genotypematrix[,inds[1]],genotypematrix[,inds[2]],genotypematrix[,x])}))
}else{
if(ploidy$ploidy[ploidy$id%in%inds[1]]==1){
kinshipmatrix[pair,c("kinshipKIMGENS","IBS0KIMGENS")]<-rowMeans(sapply(related,function(x){
hapdipking(genotypematrix[,inds[2]],genotypematrix[,inds[1]],genotypematrix[,x])}))
}else{
kinshipmatrix[pair,c("kinshipKIMGENS","IBS0KIMGENS")]<-rowMeans(sapply(related,function(x){
hapdipking(genotypematrix[,inds[1]],genotypematrix[,inds[2]],genotypematrix[,x])}))
}
}
}else{
kinshipmatrix[pair,c("kinshipKIMGENS","IBS0KIMGENS")]<-NA
}
}
}
colnames(kinshipmatrix)[c(1:2)]<-c("ind1","ind2")
return(kinshipmatrix)
}
#' Diploid-diploid kinship estimation
#'
#' @description Estimate kinship for a pair of diploid individuals.
#'
#' @param dip1,dip2
#' Vectors encoding the genotype of diploid individuals (0,1,2).
#'
#' @param dipref
#' A vector encoding the genotype of the reference diploid individual (0,1,2).
#' Perform exKING-robust if none provided.
#'
#' @export
#'
#' @examples
#' ##Calculate kinship of a pair of full siblings
#' dip<-rbinom(1000,2,0.5)
#' hap<-rbinom(1000,1,0.5)
#' ### without reference(exKING-robust)
#' dipking(anastomosis(meiosis(dip),hap),anastomosis(meiosis(dip),hap1))
#' ### with reference(KIMGENS)
#' dipking(anastomosis(meiosis(dip),hap),anastomosis(meiosis(dip),hap1),dipref=dip1)
dipking<-function(dip1,dip2,dipref=NA){
if(all(is.na(dipref))){
set<-dip1%in%c(0,1,2)&dip2%in%c(0,1,2)
dip1<-dip1[set]
dip2<-dip2[set]
N1<-sum(dip1==1)
N2<-sum(dip2==1)
AaAa<-sum(dip1==1&dip2==1)
AAaa<-sum((dip1==0&dip2==2)|(dip1==2&dip2==0))
return(c(kinship=1/2-(sum(N1,N2)/min(N1,N2))/4+(AaAa-2*AAaa)/2/min(N1,N2),
IBS0=AAaa/min(N1,N2)))
}else{
if(length(dipref)==0){
return(c(NA,NA))
}else{
if(!is.data.frame(dipref)){
dipref<-data.frame(dipref)
}
results<-apply(dipref,2,function(x){
set<-dip1%in%c(0,1,2)&dip2%in%c(0,1,2)&x%in%c(0,1,2)
dip1<-dip1[set]
dip2<-dip2[set]
x<-x[set]
N<-sum(x==1)
N1<-sum(dip1==1)
N2<-sum(dip2==1)
AaAa<-sum(dip1==1&dip2==1)
AAaa<-sum((dip1==0&dip2==2)|(dip1==2&dip2==0))
return(c(kinship=1/2-1/4*((4*AAaa-2*AaAa+N1+N2)/N),IBS0=AAaa/N))})
kinship<-median(results[1,],na.rm = T)
IBS0<-median(results[2,which(abs(results[1,] - kinship) == min(abs(results[1,] - kinship)))],na.rm = T)
return(c(kinship=kinship,IBS0=IBS0))
}
}
}
#' Haploid-diploid kinship estimation
#'
#' @description Estimate kinship between a haploid and a diploid individuals.
#'
#' @param dip
#' A vector encoding the genotype of a diploid individual (0,1,2).
#'
#' @param hap
#' A vector encoding the genotype of a haploid individual (0,1).
#'
#' @param dipref
#' A vector encoding the genotype of the reference diploid individual (0,1,2).
#' Perform exKING-robust hap-dip kinship estimation if none provided.
#'
#' @export
#'
#' @examples
#' ##Calculate kinship of a pair of full siblings
#' dip<-rbinom(1000,2,0.5)
#' hap<-rbinom(1000,1,0.5)
#' ### without reference(exKING-robust)
#' hapdipking(anastomosis(meiosis(dip),hap),meiosis(dip))
#' ### with reference(KIMGENS)
#' hapdipking(anastomosis(meiosis(dip),hap),meiosis(dip),dipref=dip)
hapdipking<-function(dip,hap,dipref=NA){
if(all(is.na(dipref))){
set<-dip%in%c(0,1,2)&hap%in%c(0,1)
dip<-dip[set]
hap<-hap[set]
N<-sum(dip==1)
AAa<-sum((dip==0&hap==1)|(dip==2&hap==0))
return(c(kinship=1/2-AAa/N,IBS0=AAa/N))
}else{
if(length(dipref)==0){
return(c(NA,NA))
}else{
if(!is.data.frame(dipref)){
dipref<-data.frame(dipref)
}
results<-apply(dipref,2,function(x){
set<-dip%in%c(0,1,2)&hap%in%c(0,1)&x%in%c(0,1,2)
dip<-dip[set]
hap<-hap[set]
x<-x[set]
N<-sum(x==1)
AAa<-sum((dip==0&hap==1)|(dip==2&hap==0))
return(c(kinship=1/2-AAa/N,IBS0=AAa/N))})
kinship<-median(results[1,],na.rm=T)
IBS0<-median(results[2,which(abs(results[1,] - kinship) == min(abs(results[1,] - kinship)))],na.rm=T)
return(c(kinship=kinship,IBS0=IBS0))
}
}
}
#' Haploid-haploid kinship estimation
#'
#' @description Estimate kinship for a pair of haploid individuals.
#'
#' @param hap1,hap2
#' Vectors encoding the genotype of haploid individuals (0,1).
#'
#' @param dipref
#' A vector encoding the genotype of the reference diploid individual (0,1,2).
#' Required!
#'
#' @export
#'
#' @examples
#' ##Calculate kinship of a pair of full siblings
#' dip<-rbinom(1000,2,0.5)
#' ### No exKING-robust
#' hapking(meiosis(dip),meiosis(dip),dipref=dip)
hapking<-function(hap1,hap2,dipref){
if(length(dipref)==0){
return(c(NA,NA))
}else{
if(!is.data.frame(dipref)){
dipref<-data.frame(dipref)
}
results<-apply(dipref,2,function(x){
set<-hap1%in%c(0,1)&hap2%in%c(0,1)&x%in%c(0,1,2)
hap1<-hap1[set]
hap2<-hap2[set]
x<-x[set]
N<-sum(x==1)
Aa<-sum((hap1==0&hap2==1)|(hap1==1&hap2==0))
return(c(kinship=1-Aa/N,IBS0=Aa/N))})
kinship<-median(results[1,],na.rm=T)
IBS0<-median(results[2,which(abs(results[1,] - kinship) == min(abs(results[1,] - kinship)))],na.rm=T)
return(c(kinship=kinship,IBS0=IBS0))
}
}
#' KIMGENS threshold choosing helper
#'
#' @description Output number of reference given a certain threshold
#'
#' @param kins A data frame generated by kinship
#'
#' @param ploidy
#' A data frame with two columns.
#' First column is id (same as column names of genotype matrix).
#' Second column is ploidy (1 or 2).
#' Guess ploidy if not provided.
#'
#' @param thresholds
#' A vector of proposed kinship thresholds for defining "relatives" in KIMGENS kinship.
#'
#' @return A violin plot
#' @details
#' This functions helps user to decide the threshold for KIMGENS.
#' By examining the output violin plot users can identify how many references
#' can be used for each individual given a threshold.
#' We recommend choosing a threshold resulting in on average 4 to 5 references
#' and no individuals having 0 references.
#'
#' @export
#'
#' @examples
#' ancestrygenomatrix<-PopulationSim(1000,c(0.05,0.15,0.25))
#' ancestry<-matrix(c(6,2,0.3,2,6,0.3),nrow=3)
#' pedgeno<-HapdipPedigreeSim(ancestrygenomatrix,pedigree[1:18,],ancestry)
#' kins<-kinship(pedgeno,skipKIMGENS=T)
#' ploidy<-data.frame(id=pedigree$id,ploidy=ifelse(pedigree$sex=="F",2,1))
#' KIMGENSThresholdChooser(kins,ploidy)
KIMGENSThresholdChooser<-function(kins,ploidy,thresholds=c(0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45)){
tempkins<-kins
tempkins$ind1<-kins$ind2
tempkins$ind2<-kins$ind1
tempkins<-rbind(kins,tempkins)
unmeltkin<-reshape2::dcast(tempkins,ind1~ind2,value.var = "kinshipexKING")
rownames<-unmeltkin$ind1
unmeltkin<-apply(unmeltkin[,-1],2,as.numeric)
row.names(unmeltkin)<-rownames
diploids<-which(row.names(unmeltkin)%in%ploidy$id[ploidy$ploidy==2])
unmeltkin[(diploids-1)*nrow(unmeltkin)+diploids]<-0.5
ksize<-sapply(thresholds,function(x){colSums(unmeltkin>x,na.rm = T)})
colnames(ksize)<-thresholds
return(ggplot2::ggplot(reshape2::melt(ksize), ggplot2::aes(x=as.character(Var2), y=value)) +
ggplot2::geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))+
ggplot2::theme_minimal()+ggplot2::labs(x="thresholds", y="number of references"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.