Nothing
#' @title Haplotyping of a dataset composed by several families.
#' @description By analyzing all possible combinations of a parent-offspring pedigree in which parents may be missing (missParProb>0), as long as one child was genotyped, it is possible an unequivocal reconstruction of many parental haplotypes. When neither parent was genotyped (missParProb==1), also it is possible to reconstruct at least two parental haplotypes in certain cases. Regarding offspring haplotypes, if both parents are completely genotyped (missParProb==0), in majority of cases partial offspring haplotypes may be successfully obtained (missOffProb>0).
#' @param data Data containing non-genetic and genetic information of families (or PED file path).
#' @param NAsymbol Icon which will be placed in the NA values of the haplotypes.
#' @param alleSep Icon which will be used as separator of the haplotype alleles.
#' @param invisibleOutput Data are not shown by default.
#' @param dataSummary A summary of the data is shown by default.
#' @return Re-imputed alleles and haplotypes for each loaded family.
#' @import abind
#' @import stats
#' @import utils
#' @export alleHaplotyper
#' @references Medina-Rodriguez, N. Santana A. et al. (2014) alleHap: an efficient algorithm to reconstruct zero-recombinant haplotypes from parent-offspring pedigrees. BMC Bioinformatics, 15, A6 (S-3).
#' @examples
#'
#' ## Haplotype reconstruction for 3 families without missing data.
#' simulatedFams <- alleSimulator(3,3,6)
#' (famsAlls <- simulatedFams[[1]]) # Original data
#' famsList <- alleHaplotyper(famsAlls) # List containing families' alleles and haplotypes
#' famsList$reImputedAlls # Re-imputed alleles
#' famsList$haplotypes # Reconstructed haplotypes
#'
#' ## Haplotype reconstruction of a family containing missing data in a parent.
#' infoFam <- data.frame(famID="FAM002",indID=1:6,patID=c(0,0,1,1,1,1),
#' matID=c(0,0,2,2,2,2),sex=c(1,2,1,2,1,2),phenot=c(2,1,1,2,1,2))
#' Mkrs <- rbind(c(1,4,2,5,3,6),rep(NA,6),c(1,7,2,3,3,2),
#' c(4,7,5,3,6,2),c(1,1,2,2,3,3),c(1,4,2,5,3,6))
#' colnames(Mkrs) <- c("Mk1_1","Mk1_2","Mk2_1","Mk2_2","Mk3_1","Mk3_2")
#' (family <- cbind(infoFam,Mkrs)) # Original data
#' famList <- alleHaplotyper(family) # List containing family's alleles and haplotypes
#' famList$reImputedAlls # Re-imputed alleles
#' famList$haplotypes # Reconstructed haplotypes
#'
#' ## Haplotype reconstruction from a PED file
#' pedFamPath <- file.path(find.package("alleHap"), "examples", "example3.ped") # PED file path
#' pedFamAlls <- alleLoader(pedFamPath,dataSummary=FALSE)
#' pedFamList <- alleHaplotyper(pedFamAlls)
#' pedFamAlls # Original data
#' pedFamList$reImputedAlls # Re-imputed alleles
#' pedFamList$haplotypes # Reconstructed haplotypes
#'
alleHaplotyper=function(data,NAsymbol="?",alleSep="",invisibleOutput=TRUE,dataSummary=TRUE){
############################################# I. INTERNAL FUNCTIONS ###########################################
{
# Haplotyping per family (main function)
famHaplotyper=function(family){
#=== INTERNAL FUNCTIONS ===#
as.familyArray=function(Mkrs){
famSize <- nrow(Mkrs)
nOffs <- famSize-2
nMarkers <- ncol(Mkrs)/2
outArray <- array(t(Mkrs),dim=c(2,nMarkers,famSize),
dimnames=list(haplo=c("hap1","hap2"),marker=paste("Mkr",1:nMarkers,sep=""),
subject=c("father","mother",paste("child",1:nOffs,sep=""))))
return(outArray)
}
as.familyMatrix=function(familyArray){
outArray <- t(array(familyArray,dim=c(2*dim(familyArray)[2],dim(familyArray)[3])))
return(outArray)
}
excludeCase5Mkrs=function(fam,case5){
fam$nMarkers=fam$nMarkers-length(case5$mrk)
fam$imputedAlls=fam$imputedAlls[,-case5$mrkCols]
fam$allelesNumber=fam$allelesNumber[-case5$mrk]
fam$reImpAlls=fam$reImpAlls[,-case5$mrk,,drop=FALSE]
return(fam)
}
findHaps=function(fam){
### Internal Functions ###
canBeEqual=function(hap1,hap2){
compareAlls=function(v1,v2) all(v1==v2,na.rm=TRUE)|all(v1==v2[2:1],na.rm=TRUE)
n=dim(hap1)[2]
comps=vector("logical",n)
for (j in 1:n) comps[j]=compareAlls(hap1[,j],hap2[,j])
return(all(comps))
}
identifyChildHaps=function(childIndex){
child=fam$reImpAlls[,,childIndex+2]
compatible=which(sapply(possibleHapPairs,canBeEqual,child))
if (length(compatible)==1) idHaps=hapPairOrigin[compatible,]
else if (length(compatible)>1){
idHaps=c(0,0)
hpo=hapPairOrigin[compatible,]
for (j in 1:2){
ph=unique(hpo[,j])
if (length(ph)==1) idHaps[j]=ph
}
}
else if (length(compatible)==0) idHaps=c(NA,NA) ## RECOMBINATION EVENT !!!!
idHaps
}
updateGeno=function(genotype,genotypeIDS,markersToUpdate,identifiedAlleles,rowToUpdate){
for (j in 1:length(markersToUpdate)){
allele=identifiedAlleles[j]
mk=markersToUpdate[j]
if (is.na(genotype[rowToUpdate,mk])){
if (is.na(genotype[3-rowToUpdate,mk])){
genotype[rowToUpdate,mk]=allele
genotypeIDS[rowToUpdate,mk]=1
} # Case 10
else if (genotype[3-rowToUpdate,mk]==allele){
if (genotypeIDS[3-rowToUpdate,mk]==0){
genotype[,mk]=genotype[2:1,mk]
genotypeIDS[rowToUpdate,mk]=1
} # Case 6
else{
genotype[rowToUpdate,mk]=allele
genotypeIDS[rowToUpdate,mk]=1
} # Case 7
} # Cases 6,7
else{
if (genotypeIDS[3-rowToUpdate,mk]==0){
genotype[rowToUpdate,mk]=allele
genotypeIDS[1:2,mk]=1
} # Case 8
else{
genotype[rowToUpdate,mk]=allele
genotypeIDS[rowToUpdate,mk]=1
} # Case 9
} # Cases 8,9
} # Cases 6,7,8,9,10
else{ # when !is.na(genotype[rowToUpdate,mk])
if (is.na(genotype[3-rowToUpdate,mk])){
if (genotype[rowToUpdate,mk]==allele){
genotypeIDS[rowToUpdate,mk]=1
} # Case 1
else {
genotype[,mk]=genotype[2:1,mk]
genotype[rowToUpdate,mk]=allele
genotypeIDS[1:2,mk]=1
} # Case 5
} # Cases 1,5
else if (genotype[rowToUpdate,mk]==allele){
genotypeIDS[1:2,mk]=1
} # Case 2
else{
if (genotype[3-rowToUpdate,mk]==allele){
genotype[,mk]=genotype[2:1,mk]
genotypeIDS[1:2,mk]=1
} # Case 3
else{
return(list(genotype=NA,genotypeIDS=NA, errorMkr=mk))
} # Case 4
} # Cases 3,4
} # Cases 1,2,3,4,5
}
return(list(genotype=genotype,genotypeIDS=genotypeIDS))
}
updateFamHaps=function(childIndex,fam){
child=fam$reImpAlls[,,childIndex+2]
childIDS=fam$IDS[,,childIndex+2]
parents=fam$reImpAlls[,,1:2]
parentsIDS=fam$IDS[,,1:2]
idh=fam$idHaps[childIndex,]
for (i in 1:2){
difids=childIDS[i,]-parentsIDS[idh[i],,i]
par2ch=which(difids==-1) # alleles identified in parent but not in child
ch2par=which(difids==1) # alleles identified in child but not in parent
if (length(par2ch)>0){
updatedChild=updateGeno(child,childIDS,par2ch,parents[idh[i],par2ch,i],rowToUpdate=i)
if (exists("errorMkr",updatedChild)){
fam$hapIncidences=paste("Genotyping error or recombination event in marker",
updatedChild$mk,"in subject",childIndex+2)
return(fam)
}
fam$reImpAlls[,,childIndex+2]=updatedChild$genotype
fam$IDS[,,childIndex+2]=updatedChild$genotypeIDS
}
if (length(ch2par)>0){
updatedParent=updateGeno(parents[,,i],parentsIDS[,,i],ch2par,child[i,ch2par],rowToUpdate=idh[i])
if (exists("errorMkr",updatedParent)){
fam$hapIncidences=paste("Genotyping error or recombination event in marker",updatedParent$mk,"in subject",i)
return(fam)
}
fam$reImpAlls[,,i]=updatedParent$genotype
fam$IDS[,,i]=updatedParent$genotypeIDS
idhaps0=which(apply(fam$idHaps,1,function(x) any(x==0)))
if (length(idhaps0)>0){
parentsHaps=fam$reImpAlls[,,1:2]
parentsHaps[fam$IDS[,,1:2]==0]=NA
possibleHapPairs=list(t(parentsHaps[1,,]),rbind(parentsHaps[1,,1],parentsHaps[2,,2]),
rbind(parentsHaps[2,,1],parentsHaps[1,,2]),t(parentsHaps[2,,]))
for (k in idhaps0) fam$idHaps[k,]=identifyChildHaps(k)
}
}
}
return(fam)
}
### Initialization ###
hapPairOrigin=matrix(rbind(c(1,1),c(1,2),c(2,1),c(2,2)),ncol=2)
parentsHaps=fam$reImpAlls[,,1:2]
parentsHaps[fam$IDS[,,1:2]==0]=NA
possibleHapPairs=list(t(parentsHaps[1,,]),rbind(parentsHaps[1,,1],parentsHaps[2,,2]),
rbind(parentsHaps[2,,1],parentsHaps[1,,2]),t(parentsHaps[2,,]))
fam$idHaps=matrix(0,nrow=fam$famSize-2,ncol=2)
for (k in 1:(fam$famSize-2)) fam$idHaps[k,]=identifyChildHaps(k)
if (any(is.na(fam$idHaps))){
idhapsNA=which(apply(fam$idHaps,1,function(x) any(is.na(x))))
fam$hapIncidences=paste("Genotyping error or recombination event in subject(s)",idhapsNA,collapse=",")
fam$haps=writeFamilyHaps(fam)
}
else {
### Haplotyping ###
sumIDS0=sum(fam$IDS)
repeat{
for (k in 1:(fam$famSize-2)) {
fam=updateFamHaps(k,fam)
if (exists("hapIncidences",fam)){
fam$haps <- writeFamilyHaps(fam)
return(fam)
}
}
sumIDS1=sum(fam$IDS)
if (sumIDS1==sumIDS0) break else sumIDS0=sumIDS1
}
fam$haps=writeFamilyHaps(fam)
}
return(fam)
}
includeCase5Mkrs=function(fam,fam0,case5,NAsymbol="?"){
fam0$imputedAlls[,-case5$mrkCols]=fam$imputedAlls
fam0$reImpAlls[,-case5$mrk,]=fam$reImpAlls
fam0$HMZ=matrix(0,nrow=fam0$famSize,ncol=fam0$nMarkers)
fam0$HMZ[,-case5$mrk]=fam$HMZ
fam0$HMZ[,case5$mrk]=t(apply(fam0$reImpAlls[,case5$mrk,,drop=FALSE],3,function(x) apply(x,2,function(a) as.integer(a[1]==a[2]))))
fam0$IDS=array(0,dim=c(2,fam0$nMarkers,fam0$famSize))
fam0$IDS[,-case5$mrk,]=fam$IDS
fam0$idHaps=fam$idHaps
fam0$haps=writeFamilyHaps(fam0)
nf0=names(fam0)
nf=names(fam)
nv=nf[which(!nf%in%nf0)]
for (k in nv) fam0[[k]]=fam[[k]]
return(fam0)
}
inheritedHap=function(hap,parentHaps,hapIDS,pHapsIDS){
# This function determines which one of parentHaps is (or can be) equal to 'hap'; it returns 0
# if both parentHaps are compatible with 'hap'
if(is.null(dim(parentHaps))){
dim(hap) <- dim(hapIDS) <- c(1,1)
dim(parentHaps) <- dim(pHapsIDS) <- c(2,1)
}
canBeSameHap=vector("logical",2)
for (i in 1:2){
pos=which(hapIDS+pHapsIDS[i,]==2)
if (length(pos)==0) canBeSameHap[i]=TRUE
else canBeSameHap[i]=all(hap[pos]==parentHaps[i,pos])
if (length(pos)==ncol(pHapsIDS)&canBeSameHap[i]) return(i)
}
inhrtdHap=which(canBeSameHap)
if (length(inhrtdHap)==2) inhrtdHap=0
else if (length(inhrtdHap)==0) inhrtdHap=3
return(inhrtdHap)
}
initializeIDS=function(fam){
reImpAlls <- as.familyArray(fam$imputedAlls)
### HMZ initialization
HMZ <- t(apply(reImpAlls,3,function(x) apply(x,2,function(a) as.integer(a[1]==a[2]))))
if (nrow(HMZ)==1) HMZ <- t(HMZ)
### IDS initialization
IDS=with(fam,array(NA,dim=c(2,nMarkers,famSize),
dimnames=list(haplo=c("hap1","hap2"),marker=paste("Mkr",1:nMarkers,sep=""),
subject=c("father","mother",paste("child",1:(famSize-2),sep="")))))
IDS[1,,] = IDS[2,,] = t(HMZ)
IDS[is.na(IDS)]<-0
### IDS in children
for (i in 3:fam$famSize)
for (j in 1:fam$nMarkers){
NAchild <- which(is.na(reImpAlls[,j,i]))
NAfather <- which(is.na(reImpAlls[,j,1]))
NAmother <- which(is.na(reImpAlls[,j,2]))
# A child has only one NA value if the other value has been imputed from an homozygous parent or
# maybe after a pass by the three children algorithm, which may have re-imputed only one allele.
if (length(NAchild)==1) {
hompar <- which(HMZ[1:2,j]==1)
chAl <- reImpAlls[3-NAchild,j,i]
proc <- which(c(chAl %in% reImpAlls[,j,1],chAl %in% reImpAlls[,j,2]))
if (length(proc)==1){
if (NAchild==proc){
reImpAlls[,j,i]=reImpAlls[2:1,j,i]
NAchild=3-NAchild
}
IDS[proc,j,i]=1
} else if (length(proc)==2){
if (length(hompar)==1){
if (NAchild==hompar) {
reImpAlls[,j,i]=reImpAlls[2:1,j,i]
NAchild=3-NAchild
}
IDS[hompar,j,i]=1
}
}
if (length(hompar)==1){
parAl=reImpAlls[1,j,hompar]
if (chAl!=parAl){
reImpAlls[NAchild,j,i]=parAl
IDS[NAchild,j,i]=1
}
} else if (length(hompar)==2){
reImpAlls[,j,i]=reImpAlls[1,j,1:2]
IDS[,j,i]=1
}
}
else if (length(NAchild)==0){ # Child without missing alleles
if (length(NAfather)+length(NAmother)==0&HMZ[i,j]==0){ # Parents without missing alleles
fromWhichParent<-list(fromFather=which(reImpAlls[,j,i]%in%reImpAlls[,j,1]),
fromMother=which(reImpAlls[,j,i]%in%reImpAlls[,j,2]))
ia=lapply(fromWhichParent,length)
if (ia[[1]]+ia[[2]]<4){
ia1=which(ia==1)[1]
if (fromWhichParent[[ia1]]!=ia1) reImpAlls[,j,i]=reImpAlls[2:1,j,i]
IDS[,j,i]=1
}
} else if (length(NAfather)>0&length(NAmother)==0){ # Missing alleles only in father
fromFather=which(!reImpAlls[,j,i]%in%reImpAlls[,j,2])
if (length(fromFather)==1){
if (fromFather==2) reImpAlls[,j,i]=reImpAlls[2:1,j,i]
IDS[,j,i]=1
}
} else if (length(NAfather)==0&length(NAmother)>0){ # Missing alleles only in mother
fromMother=which(!reImpAlls[,j,i]%in%reImpAlls[,j,1])
if (length(fromMother)==1){
if (fromMother==1) reImpAlls[,j,i]=reImpAlls[2:1,j,i]
IDS[,j,i]=1
}
}
}
}
### IDS in parents
# IDS in parents is initialized from the child with more IDS==1
for (i in 1:2){
if (fam$famSize==3) keyCh<-1
else{
nrIDS1=if (fam$nMarkers==1) IDS[i,,-(1:2)] else colSums(IDS[i,,-(1:2)],na.rm=TRUE) ## MODIFICADO 20/8/2015
keyCh<-which.max(nrIDS1) ## MODIFICADO 20/8/2015
}
keyMks=which(IDS[i,,keyCh+2]==1)
for (j in keyMks){
pap=which(match(reImpAlls[,j,i],reImpAlls[i,j,keyCh+2])==1)
if (!is.na(pap[1])) if (pap[1]==2) reImpAlls[,j,i]=reImpAlls[2:1,j,i]
IDS[which(!is.na(reImpAlls[,j,i])),j,i]=1
}
}
# Which parents allele has been inherited by child
idHaps<-array(0,dim=c(fam$famSize,2))
for (i in 3:fam$famSize)
for (j in 1:2)
idHaps[i,j]=inheritedHap(reImpAlls[j,,i],reImpAlls[,,j],IDS[j,,i],IDS[,,j])
### New variables attached
fam$reImpAlls<-reImpAlls
fam$HMZ <- HMZ
fam$IDS <- IDS
fam$idHaps <- idHaps
return(fam)
}
makeHapsFromThreeChildren=function(fam){
makeFamilyFromChildren=function(fam){
findHapsfrom3or4Children=function(children,parentsInfo=NULL,NAsymbol="?"){
### Internal Functions
canBeEqual=function(hapPair1,hapPair2){
# Compares two haplotype pairs
for (j in 1:ncol(hapPair1)){
equalMrk=(all(hapPair1[,j]==hapPair2[,j],na.rm=TRUE))|
(all(hapPair1[,j]==hapPair2[,j][2:1],na.rm=TRUE))
if (!equalMrk) return(FALSE)
}
return(TRUE)
}
commonHap=function(twoChildren){
# Finds the possible common haplotypes between two children (stored in the array 'twoChildren').
commonAllelles <- vector("list")
nMarkers <- dim(twoChildren)[2]
for (j in 1:nMarkers) commonAllelles[[j]] <- intersect(twoChildren[,j,1],twoChildren[,j,2])
commonHap <- expand.grid(commonAllelles,KEEP.OUT.ATTRS=FALSE)
names(commonHap) <- paste("Mkr",1:ncol(commonHap),sep="")
return(commonHap)
}
complementHap=function(hap,child){
# Determines the complementary haplotype for a given one.
# This function supose that "hap" is placed in "child", and it finds the other haplotype.
comp <- child[1,]
flip <- (comp==hap)
comp[flip] <- child[2,flip]
return(comp)
}
findParHaps=function(chldHaps){
# Function to determine the parental haplotypes from the haplotypes found in children:
foundHaps=t(apply(chldHaps,3,function(x) apply(x,1,paste,collapse="")))
nHaps=length(unique(as.vector(foundHaps))) # Num de haplotipos distintos
hmzCh=sum(foundHaps[,1]==foundHaps[,2]) # num de hijos homocigotos
if(nHaps==3&hmzCh==0)
return(list(sortedParents=NULL, sortedChildren=chldHaps,
hapIncidences <- "Multiple compatible haplotypes in parents",
alignedParents=FALSE))
hapIncidences=""
parentOrdered=all(apply(foundHaps,2,function(x) length(unique(x)))<3)
if (!parentOrdered){
i=1
repeat{
foundHaps[i,]=foundHaps[i,2:1]
parentOrdered=all(apply(foundHaps,2,function(x) length(unique(x)))<3)
if (!parentOrdered){
foundHaps[i,]=foundHaps[i,2:1]
i=i+1
if (i==4) {
parents=array(NA,dim=c(2,nMrk,2))
hapIncidences <- "Genotyping error, recombination event or inheritance from non-declared parent"
break
}
}
else{
chldHaps[,,i]=chldHaps[2:1,,i]
break
}
}
}
if (hapIncidences==""){
parents=abind(t(unique(chldHaps[1,,],MARGIN=2)), t(unique(chldHaps[2,,],MARGIN=2)),along=3)
parents=abind(sortByRows(parents[,,1]),sortByRows(parents[,,2]),along=3)
if (is.null(parentsInfo)) alignedParents=FALSE else {
eqPar=matrix(NA,2,2)
for (i in 1:2)
for (j in 1:2)
eqPar[i,j]=canBeEqual(parents[,,i],parentsInfo[,,j])
if (any(rowSums(eqPar)==0)|any(colSums(eqPar)==0)) {
alignedParents=FALSE
parents[,,1]<-parents[,,2]<-NA
hapIncidences <- "Parental information is not compatible with haplotypes found in children"
} else{
weq=which(!eqPar,arr.ind=TRUE)
if (length(weq)==0){
alignedParents=FALSE
} else{
alignedParents=TRUE
if (weq[1,1]==weq[1,2]){
parents=parents[,,2:1]
chldHaps=chldHaps[2:1,,]
}
}
}
}
}
return(list(sortedParents=parents, sortedChildren=chldHaps,
hapIncidences=hapIncidences, alignedParents=alignedParents))
}
identifyFourthChild=function(famHaps,fourthChild){
# Function for identifying haplotypes in fourth child
c4possibleHaps=list(t(famHaps$sortedParents[1,,1:2]),t(famHaps$sortedParents[2,,1:2]),
rbind(famHaps$sortedParents[1,,1],famHaps$sortedParents[2,,2]),
rbind(famHaps$sortedParents[2,,1],famHaps$sortedParents[1,,2]))
c4possibleHapsSorted=lapply(c4possibleHaps,function(ph) apply(ph,2,sort))
fourthChild=apply(fourthChild,2,sort)
c4Haps=which(sapply(c4possibleHapsSorted,function(x) all(x==fourthChild | x==fourthChild[2:1,])))
if (length(c4Haps)>0) famHaps$sortedChildren=abind(famHaps$sortedChildren, c4possibleHaps[[c4Haps]],along=3)
else{
famHaps$sortedChildren=abind(famHaps$sortedChildren, fourthChild,along=3)
famHaps$hapIncidences="Haplotypes in one child are not compatible with the haplotypes found in the rest of the offspring"
}
return(famHaps)
}
hapInChild=function(hap,child){
# Determines if the haplotype 'hap' is located in a given child.
# In such a case returns the children haplotypes including 'hap' and its complementary haplotype.
# If a haplotype is not found, the function returns "NULL".
if (is.null(dim(child))) child=matrix(child,ncol=1)
isin=(t(child)==hap)
if (any(rowSums(isin)==0)) return(NULL)
s=which(!isin[,1])
child[,s]=child[2:1,s]
return(matrix(child,2))
}
sortByRows=function(arr) {
# Function for sorting a matrix row by row
return(arr[do.call(order, lapply(1:NCOL(arr), function(i) arr[, i])),])
}
### Initialization
nMrk <- dim(children)[2]
nchld <- dim(children)[3]
if (nchld==4) {
fourthChild=children[,,4]
children=children[,,1:3]
}
if (nchld<3){
return(list(sortedParents=array(NA,dim=c(2,nMrk,2)),sortedChildren=children,alignedParents=FALSE,
hapIncidences="Less than three children. Haplotypes can not be generated at this stage",
childrenIDS=array(0,dim=c(2,nMrk,nchld)), parentsIDS=array(0,dim=c(2,nMrk,2))))
}
### Haplotype finding from three unique children without missing values nor recombination.
commonHaps <- vector("list")
for (i in 1:3) commonHaps[[i]] <- commonHap(children[,,-i])
nch <- sapply(commonHaps,nrow)
refH <- which(nch>0)
chHaps <- list()
for (ref in refH) {
chld=(1:3)[-ref]
# The complemmentary haplotypes are added in each child
possibleHaps <- array(rbind(t(commonHaps[[ref]]), apply(commonHaps[[ref]],1,complementHap,children[,,chld[1]]),
apply(commonHaps[[ref]],1,complementHap,children[,,chld[2]])),dim=c(nMrk,3,nch[ref]))
for (ih in 1:nch[ref]){
identifiedHaps <- t(possibleHaps[,,ih])
uHaps <- unique(identifiedHaps)
for (j in 1:nrow(uHaps)) {
hapsChRef <- hapInChild(uHaps[j,],children[,,ref])
if (!is.null(hapsChRef)){
nHaps <- nrow(unique(rbind(uHaps,hapsChRef)))
if (nHaps<=4){
chh <- rbind(identifiedHaps[c(1,2,1,3),],hapsChRef)
chh <- chh[order(2*c(sort(rep((1:3)[-ref],2)),ref,ref)-c(1,0)),]
nh=nrow(unique(chh)) # Number of different haplotypes in the three children
ndh=nrow(unique(chh[duplicated(chh),])) # Num. of haplotypes which are repeated
if (!(nh==4&ndh==1)) {
chHaps[[length(chHaps)+1]]<-array(cbind(sortByRows(chh[1:2,]),sortByRows(chh[3:4,]),
sortByRows(chh [5:6,])),dim=c(2,nMrk,3))
}
}
}
}
}
}
chHaps <- unique(chHaps)
### Identification and reconstruction of the posible parental haplotypes
famHaps=lapply(chHaps,findParHaps)
noIncidences=which(sapply(famHaps,function(x) nchar(x$hapIncidences))==0)
# If there is only one posible haplotype structure in children
famHaps <- if (length(noIncidences)==0) famHaps[1] else famHaps[noIncidences]
# If there is a fourth child (diferent from the rest of the offspring)
if (nchld>3){
famHaps <- lapply(famHaps,identifyFourthChild,fourthChild)
noIncidences=which(sapply(famHaps,function(x) nchar(x$hapIncidences))==0)
famHaps <- if (length(noIncidences)==0) famHaps[1] else famHaps[noIncidences]
}
### IDS calculation
if (length(famHaps)>1) return(NULL) else famHaps=famHaps[[1]]
ids <- if (famHaps$hapIncidences=="") 1 else 0
famHaps$childrenIDS=array(ids,dim=c(2,nMrk,nchld))
famHaps$parentsIDS=array(ids,dim=c(2,nMrk,2))
### Function output
return(famHaps)
}
isCase5=function(mrk){
# Indicates if a marker is in case 5
globAlNr=length(unique(as.vector(mrk[!is.na(mrk)]))) # Global number of alleles in marker
indivAlNr=apply(mrk,3,function(x) if(any(is.na(x))) 2 else length(unique(x)))
return((globAlNr==2) & all(indivAlNr==2))
}
selChMk=function(ndx,children){
alleMCh=alleMat[ndx,]
noNAMks=which(colSums(alleMCh)==0)
if (length(noNAMks)>0){
c5=which(sapply(noNAMks,function(k) isCase5(children[,k,ndx,drop=FALSE])))
if (length(c5)>0) noNAMks=noNAMks[-c5]
}
if (length(noNAMks)>1){
sch=array(apply(children[,noNAMks,ndx,drop=FALSE],3, function(M) apply(M,2,sort)),dim=c(2,length(noNAMks),3))
idupCh=duplicated(sch,MARGIN=3)
uch=ndx[!idupCh] # unique complete Children
if (length(uch)>=3) return(list(children=uch,markers=noNAMks))
}
}
updateParentsIDS=function(fam){
## IDS in parents is initialized from the child with more IDS==1
for (i in 1:2){
fam$IDS[,,i]=0
nrIDS1 <- if (fam$nMarkers==1) fam$IDS[i,,-(1:2)] else colSums(fam$IDS[i,,-(1:2)],na.rm=TRUE)
keyCh <- which.max(nrIDS1)
keyMks <- which(fam$IDS[i,,keyCh+2]==1)
for (j in keyMks){
chAl <- fam$reImpAlls[i,j,keyCh+2]
newa <- which(!chAl%in%fam$reImpAlls[,j,i])
if(length(newa)>0){
pos <- which(is.na(fam$reImpAlls[,j,i]))[1:length(newa)]
fam$reImpAlls[pos,j,i]=chAl[newa]
}
pap=which(match(fam$reImpAlls[,j,i],fam$reImpAlls[i,j,keyCh+2])==1)
if (!is.na(pap[1])) if (pap[1]==2) fam$reImpAlls[,j,i]=fam$reImpAlls[2:1,j,i]
fam$IDS[which(!is.na(fam$reImpAlls[,j,i])),j,i]=1
}
}
# Which parents allele has been inherited by child
fam$idHaps<-array(0,dim=c(fam$famSize,2))
for (i in 3:fam$famSize)
for (j in 1:2)
fam$idHaps[i,j]=inheritedHap(fam$reImpAlls[j,,i],fam$reImpAlls[,,j],fam$IDS[j,,i],fam$IDS[,,j])
return(fam)
}
compareNA=function(v1,v2) {
same <- (v1 == v2) | (is.na(v1) & is.na(v2))
same[is.na(same)] <- FALSE
return(same)
}
find2Haps=function(mat){
canBeEqual=function(v1,v2) all(v1==v2,na.rm=TRUE)
nr=nrow(mat)
differents=NULL
for (i in 1:(nr-1)){
for(j in 2:nr)
if (!canBeEqual(mat[i,],mat[j,])){
differents=c(i,j)
break
}
if (!is.null(differents)) break
}
if (is.null(differents)){
nna=apply(mat,1,function(x) length(which(is.na(x))))
return(rbind(mat[which.min(nna),],rep(NA,ncol(mat))))
} else{
hap=rbind(mat[i,],mat[j,])
hta=(1:nr)[-c(i,j)]
repeat{
natot0=length(which(is.na(hap)))
for (k in hta){
if(canBeEqual(mat[k,],hap[1,])&!canBeEqual(mat[k,],hap[2,])){
h1na=intersect(which(is.na(hap[1,])),which(!is.na(mat[k,])))
hap[1,h1na]=mat[k,h1na]
hta=hta[-which(hta==k)]
}
else if (!canBeEqual(mat[k,],hap[1,])&canBeEqual(mat[k,],hap[2,])){
h2na=intersect(which(is.na(hap[2,])),which(!is.na(mat[k,])))
hap[2,h2na]=mat[k,h2na]
hta=hta[-which(hta==k)]
}
}
natot1=length(which(is.na(hap)))
if (natot0==natot1) break
}
return(hap)
}
}
tryParentAlign=function(fam,selectedMarkers,parents,idsPr,children,idsCh){
changeMarkers=function(fam,selectedMarkers,parents,idsPr,children,idsCh,parentsOrder=1:2){
fam$reImpAlls[,selectedMarkers,]=abind(parents[,,parentsOrder],children[parentsOrder,,],along=3)
fam$IDS[,selectedMarkers,]=abind(idsPr[,,parentsOrder],idsCh[parentsOrder,,],along=3)
return(fam)
}
f1=f2=fam
f1=changeMarkers(fam,selectedMarkers,parents,idsPr,children,idsCh)
f2=changeMarkers(fam,selectedMarkers,parents,idsPr,children,idsCh,2:1)
f1=updateParentsIDS(f1)
f2=updateParentsIDS(f2)
compat1=!any(f1$idHaps==3)
compat2=!any(f2$idHaps==3)
if (compat1&!compat2){
fam=findHaps(f1)
} else if (compat2&!compat1){
fam=findHaps(f2)
} else if (compat1&compat2){
fam1=findHaps(f1)
fam2=findHaps(f2)
for (k in 1:nChildren){
nam=which(apply(fam$reImpAlls[,selectedMarkers,k+2],2,function(x) any(is.na(x))))
if (length(nam)>0){
for (nn in nam){
nalle=which(is.na(fam$reImpAlls[,selectedMarkers[nn],k+2]))
nallefam1=fam1$reImpAlls[,selectedMarkers[nn],k+2]
nallefam2=fam2$reImpAlls[,selectedMarkers[nn],k+2]
commonAll=nallefam1[which(nallefam1%in%nallefam2)]
commonAll=commonAll[!is.na(commonAll)]
if (length(nalle)==2){
if (length(commonAll)==2) fam$reImpAlls[,selectedMarkers[nn],k+2]=commonAll
else if (length(commonAll)==1) fam$reImpAlls[1,selectedMarkers[nn],k+2]=commonAll
}
else{ #length(nalle)==1
if (length(commonAll)==1){
if (commonAll!=fam$reImpAlls[3-nalle,selectedMarkers[nn],k+2])
fam$reImpAlls[nalle,selectedMarkers[nn],k+2]=commonAll
} else if (length(commonAll)==2){
if (commonAll[1]==commonAll[2]) fam$reImpAlls[nalle,selectedMarkers[nn],k+2]=commonAll[1]
else{
wo=which(commonAll!=fam$reImpAlls[3-nalle,selectedMarkers[nn],k+2])
fam$reImpAlls[nalle,selectedMarkers[nn],k+2]=commonAll[wo]
}
}
}
if(!any(is.na(fam$reImpAlls[,selectedMarkers[nn],k+2])))
if (fam$reImpAlls[1,selectedMarkers[nn],k+2]==fam$reImpAlls[2,selectedMarkers[nn],k+2])
fam$IDS[,selectedMarkers[nn],k+2]=c(1,1)
}
}
}
fam$alignedParents=FALSE
} else {
fam$hapIncidences <- "Genotyping error, recombination event or inheritance from non-declared parent"
}
return(fam)
}
if (!exists("alignedParents",fam)){
if (all(is.na(fam$reImpAlls[,,1:2]))) fam$alignedParents=FALSE
else{
idsM1=ifelse(apply(fam$IDS[,,-(1:2)],2,sum)>0,TRUE,FALSE) # Markers with ids=1 in some sib
c5=apply(fam$reImpAlls[,,-(1:2)],2,function(x){
x=array(x,dim=c(2,1,fam$famSize-2))
isCase5(x)}
)
# Determination of which haps come from father and which from mother (which markers can be used for alignment of parents)
alinMk=which(!apply(fam$reImpAlls[,,1:2],2,function(M) all(compareNA(M[,1],M[,2])))&idsM1&!c5)
fam$alignedParents=if (length(alinMk)>0) TRUE else FALSE
fam$alignmentMks=if (fam$alignedParents) list(alinMk=alinMk,reImpAlls=fam$reImpAlls[,alinMk,,drop=FALSE],
IDS=fam$IDS[,alinMk,,drop=FALSE]) else NULL
}
}
nChildren=fam$famSize-2
alleMat=t(apply(fam$reImpAlls[,,-c(1,2)],3,function(x) apply(x,2,function(x) any(is.na(x))))+0)
# The sum by rows indicates the number of missing markers on each child
combChildren=combn(nChildren,3) # Search for marker combinations for three full children (at least)
scm=apply(combChildren,2,selChMk,fam$reImpAlls[,,-c(1,2)])
if (!is.null(scm)) scm=scm[-which(sapply(scm,is.null))]
nmk=sapply(scm,function(cm){length(cm$markers)})
if (all(nmk<=1)) return(fam)
sl=order(nmk,decreasing = TRUE)
sl=sl[nmk[sl]>1]
selSet=NULL
selMks=NULL
for (k in sl){
nwm=which(!scm[[k]]$markers%in%selMks)
if (length(nwm)>0){
selSet=c(selSet,k)
selMks=c(selMks,scm[[k]]$markers[nwm])
}
}
for (s in selSet) if(any(is.na(fam$reImpAlls[,scm[s][[1]]$markers,]))){
selected=scm[s][[1]]
idsCh=fam$IDS[,selected$markers,-(1:2)]
children=fam$reImpAlls[,selected$markers,-c(1,2)]
partialFam=findHapsfrom3or4Children(fam$reImpAlls[,selected$markers,selected$children+2],
parentsInfo=fam$reImpAlls[,selected$markers,1:2])
if (!is.null(partialFam$sortedParents)){
children[,,selected$children]=partialFam$sortedChildren
idsCh[,,selected$children]=partialFam$childrenIDS
if (length(selected$children)<nChildren){
unSelectedChildren=(1:(fam$famSize-2))[-selected$children]
uHMZ=t(apply(children[,,unSelectedChildren],3,
function(x) apply(x,2,function(a) as.integer(a[1]==a[2]))))
if (nrow(uHMZ)==1) uHMZ=t(uHMZ)
uHMZ[is.na(uHMZ)]=0
for (u in 1:length(unSelectedChildren)) idsCh[,,unSelectedChildren[u]]=rbind(uHMZ[u,],uHMZ[u,])
partFam=list(reImpAlls=abind(partialFam$sortedParents,children,along=3),
IDS=abind(partialFam$parentsIDS,idsCh,along=3))
partFam$famSize=fam$famSize
partFam=findHaps(partFam)
children[,,-selected$children]=partFam$reImpAlls[,,unSelectedChildren+2]
idsCh[,,-selected$children]=partFam$IDS[,,unSelectedChildren+2]
}
if (partialFam$alignedParents){
fam$reImpAlls[,selected$markers,1:2]=partialFam$sortedParents
fam$reImpAlls[,selected$markers,-(1:2)]=children
fam$IDS[,selected$markers,-(1:2)]=idsCh
fam=updateParentsIDS(fam)
} else {
if (!is.null(fam$alignmentMks))
fam=tryParentAlign(fam,selected$markers,partialFam$sortedParents,partialFam$parentsIDS,children,idsCh)
if (is.null(fam$alignmentMks)|!fam$alignedParents){
oldCh=fam$reImpAlls[,,-(1:2)]
oldCh[fam$IDS[,,-(1:2)]==0]=NA
newCh=children
newCh[idsCh==0]=NA
newCh1<-newCh2<-oldCh
newCh1[,selected$markers,]=newCh
newCh2[,selected$markers,]=newCh[2:1,,]
# Checking of those markers with ids=1 which are common in oldCh and newCh
ids1old=t(apply(fam$IDS[,selected$markers,-(1:2)],3,function(M) ifelse(colSums(M)>0,1,0)))
ids1new=t(apply(idsCh,3,function(M) ifelse(colSums(M)>0,1,0)))
ids1comm=ifelse(ids1old+ids1new==2,1,0)
# If any is homozygous, it can be useful to the alignment
ids1comm=which(ids1comm==1,arr.ind=TRUE)
het=apply(ids1comm,1,function(v) children[1,v[2],v[1]]!=children[2,v[2],v[1]])
hetMk=which(het)
if (length(hetMk)>0){
hcm=ids1comm[hetMk[1],]
child0=fam$reImpAlls[,selected$markers,hcm[1]+2]
gmk0=child0[,hcm[2]]
gmk=children[,hcm[2],hcm[1]]
if (all(gmk==gmk0)) {
fam$reImpAlls[,selected$markers,-(1:2)]=children
fam$IDS[,selected$markers,-(1:2)]=idsCh
fam$reImpAlls[,selected$markers,(1:2)]=partialFam$sortedParents
} else{
fam$reImpAlls[,selected$markers,-(1:2)]=children[2:1,,]
fam$IDS[,selected$markers,-(1:2)]=idsCh[2:1,,]
fam$reImpAlls[,selected$markers,(1:2)]=partialFam$sortedParents[,,2:1]
}
fam=updateParentsIDS(fam)
}
else if (all(compareNA(newCh1,newCh2[2:1,,]))){
# If there are homozygous and there are not common ones which lead to problems
fam$reImpAlls[,selected$markers,1:2]=partialFam$sortedParents
fam$reImpAlls[,selected$markers,-(1:2)]=children
fam$IDS[,selected$markers,-(1:2)]=idsCh
unSelectedMarkers=(1:ncol(fam$IDS))[-selected$markers]
for (u in unSelectedMarkers){
cma=intersect(fam$reImpAlls[,u,1],fam$reImpAlls[,u,2])
if (length(cma)==1) cma=c(cma,NA) else if (length(cma)==0) cma=c(NA,NA)
fam$reImpAlls[,u,1]<-fam$reImpAlls[,u,2]<-cma
}
fam$alignedParents=FALSE # This means that parents will be assigned haplotypes
# but it is not known which haps really correspond to father and which correspond to mother.
fam=updateParentsIDS(fam)
}
else{
cpr=compareNA(newCh1,newCh2[2:1,,])
fam$reImpAlls[,selected$markers,-(1:2)]=children
fam$IDS[,selected$markers,-(1:2)]=idsCh
fam$IDS[,,-(1:2)]=(cpr+0)*fam$IDS[,,-(1:2)]
fch=fam$reImpAlls[,,-(1:2)]
ids0=fam$IDS[,,-(1:2)]==0
fch[ids0]=NA
posParHaps=list(unique(t(fch[1,,])),unique(t(fch[2,,]))) # Possible parental haplotypes
phaps=lapply(posParHaps,find2Haps)
for (i in 1:2) fam$reImpAlls[,,i]=phaps[[i]]
fam=updateParentsIDS(fam)
}
}
}
fam=findHaps(fam)
}
}
if (!fam$alignedParents){
fam$unAlignedParentsHaps=fam$reImpAlls[,,1:2]
dimnames(fam$unAlignedParentsHaps)[[3]]=c("Parent1","Parent2")
cnf=list(fam$reImpAlls[,,1]==fam$reImpAlls[,,2],
fam$reImpAlls[,,1]==fam$reImpAlls[2:1,,2])
fid=fam$IDS[,,1]*fam$IDS[,,2]
cnf=lapply(cnf,function(id) {id[is.na(id)]=FALSE; id*fid})
idsC=cnf[[which.max(sapply(cnf,sum))]]
phParAlls=fam$reImpAlls[,,1]
phParAlls[!idsC]=NA
fam$reImpAlls[,,1:2]=phParAlls
fam$IDS[,,1:2]=idsC
fam$haps=writeFamilyHaps(fam)
}
return(fam)
}
na0=sum(fam$IDS)
repeat{
fam <- tryCatch(makeFamilyFromChildren(fam),error=function(e) {
fam$incidences="Some error in processing data"
return(fam)
})
na1=sum(fam$IDS)
if (na1==na0) break
na0=na1
}
return(fam)
}
makeHapsFromTwoChildren=function(fam){
# Function to identify in which case are the parent-offspring haplotypes
identifyCase=function(fam,sortedAlleles,childrenPair){
group=fam$reImpAlls[,sortedAlleles,c(1,2,childrenPair+2),drop=FALSE]
haps=matrix(NA,nrow=4,ncol=2)
for (i in 1:4) for (j in 1:2) haps[i,j]=writeHap(group[j,,i],1)
haps=matrix(haps,ncol=2)
colnames(haps)=c("hap1","hap2")
commChHap=intersect(haps[3,],haps[4,]) # Do the children have one common haplotype?
ncch=length(commChHap)
nChHaps=length(unique(as.vector(haps[3:4,]))) # How many unique haplotypes have the children?
commParHap=intersect(haps[1,],haps[2,]) # Do the parents have one common haplotype?
ncph=length(commParHap)
nParHaps=length(unique(as.vector(haps[1:2,]))) # How many unique haplotypes have the parents?
caso=0
if (ncph==0){
if (nChHaps==2&nParHaps==3){
if (haps[1,1]==haps[1,2]) caso=3
else if(haps[2,1]==haps[2,2]) caso=4
} else if (nChHaps==3){
if (nParHaps==4){
if (commChHap%in%haps[1,]) caso=1
else caso=2
}
}
} else if (ncph==1){
uch=unique(as.vector(haps[3:4,]))
fcomm=length(intersect(uch,haps[1,]))
mcomm=length(intersect(uch,haps[2,]))
if (fcomm==2&mcomm==1&haps[2,1]!=haps[2,2]) caso=6
else if (mcomm==2&fcomm==1&haps[1,1]!=haps[1,2]) caso=5
}
return(caso)
}
# Identification of such markers containing IDS=1 and missing parental values
sortedParentsMarkers=which(apply(fam$IDS[,,1:2],2,sum)==4)
if (is.null(sortedParentsMarkers)) return(fam)
# Markers with two heterozygous children (at least) and both parents with missing data
fam$HMZ=t(apply(fam$reImpAlls,3,function(x) apply(x,2,function(a) as.integer(a[1]==a[2]))))
twoHetChMkr=which(apply(fam$HMZ[-(1:2),],2,function(x) length(which(x==0)))>1)
naParents=which(apply(fam$HMZ[1:2,],2,function(x) if (any(is.na(x))) TRUE else FALSE))
mk2ch=intersect(twoHetChMkr,naParents) # Markers where the makeHapsFromTwoChildren can be applied
# Search for those sibs pairs containing one common allele and two alleles from each other
imputedMarkers=NULL
for (mk in mk2ch){
chPairs=combn(which(fam$HMZ[-(1:2),mk]==0),2)
selec=which(apply(chPairs,2,function(cp){
alleles=fam$reImpAlls[,mk,cp+2,drop=FALSE]
(length(unique(as.vector(alleles)))==3)&(length(intersect(alleles[,,1],alleles[,,2]))==1)
}))
if (length(selec)>0){
chp=chPairs[,selec,drop=FALSE] # Heterozygous children pairs in this marker with a common allele
for (s in 1:length(selec)){
sortedAlleles=sortedParentsMarkers[which(apply(fam$IDS[,sortedParentsMarkers,chp[,s]+2],2,sum)==4)]
if (!is.null(sortedAlleles)){
case=identifyCase(fam,sortedAlleles,chp[,s])
if (case>0){
chAlle=fam$reImpAlls[,mk,chp[,s]+2,drop=FALSE]
commAllele=intersect(chAlle[,,1],chAlle[,,2])
if (case%in%c(1,4,5)) ref=2 else ref=1
for (i in 1:2) if (chAlle[ref,,i]==commAllele) chAlle[,,i]=chAlle[2:1,,i]
fam$reImpAlls[,mk,chp[,s]+2]=chAlle
fam$IDS[,mk,chp[,s]+2]=1
fam=findHaps(fam)
imputedMarkers=c(imputedMarkers,mk)
break
}
}
}
}
}
return(fam)
}
whichCase5Mkrs=function(fam){ # Identifies those markers in case 5 from fam$markers matrix
isCase5=function(mrkPos){
mrk=fam$imputedAlls[,mrkPos]
if (all(is.na(mrk))) return(TRUE)
globAlNr=length(unique(as.vector(mrk[!is.na(mrk)]))) # Global number of alleles in marker
indivAlNr=apply(mrk,1,function(x) if(any(is.na(x))) 2 else length(unique(x)))
if ((globAlNr==2) & all(indivAlNr==2)) return(TRUE) else return(FALSE)
}
mrksPos=matrix(1:ncol(fam$imputedAlls),nrow=2)
case5Mks=which(apply(mrksPos,2,isCase5))
return(list(mrk=case5Mks,mrkCols=mrksPos[,case5Mks]))
}
writeHap=function(hap,ids){
hap[ids==0]=NAsymbol
return(paste(hap,collapse=alleSep))
}
writeFamilyHaps=function(fam){
haps=matrix(NA,nrow=fam$famSize,ncol=2)
for (i in 1:fam$famSize)
for (j in 1:2)
haps[i,j]=writeHap(fam$reImpAlls[j,,i],fam$IDS[j,,i])
haps=matrix(haps,ncol=2)
colnames(haps)=c("hap1","hap2")
return(haps)
}
#=== INITIALIZATION ===#
fam <- list(infoFam=family[,1:6],imputedAlls=family[,-(1:6)],
nMarkers=(ncol(family)-6)/2, famSize=nrow(family))
fam$reImpAlls <- as.familyArray(fam$imputedAlls)
#=== CASE 5 CHECKING ===#
case5Mks <- whichCase5Mkrs(fam)
if (length(case5Mks$mrk)>0){
fam0 <- fam
fam <- excludeCase5Mkrs(fam,case5Mks)
}
#=== HAPLOTYPING ===#
if (fam$nMarkers==0){
haps=paste(rep(NAsymbol,fam0$nMarkers),collapse=alleSep)
haps=matrix(haps,ncol=2,nrow=fam0$famSize)
colnames(haps)=c("hap1","hap2")
reImputedAlls=as.matrix(fam0$imputedAlls)
dimnames(reImputedAlls)=NULL
outFam=list(reImputedAlls=reImputedAlls, haplotypes=cbind(fam$infoFam,haps),
IDS=array(0,dim=c(fam0$famSize,fam0$nMarkers*2)),
haploLog=data.frame(famID=fam0$infoFam[1,1],hapIncidences="Not enough informative markers"),
missingParentsLog=NULL)
}
else if (fam$nMarkers==1){
fam=initializeIDS(fam)
fam$haps=writeFamilyHaps(fam)
if (length(case5Mks$mrk)>0) fam=includeCase5Mkrs(fam,fam0,case5Mks)
outFam=list(reImputedAlls=as.familyMatrix(fam$reImpAlls), haplotypes=cbind(fam$infoFam,fam$haps),
IDS=as.familyMatrix(fam$IDS), haploLog=NULL, missingParentsLog=NULL)
}
else {
fam=initializeIDS(fam)
fam=findHaps(fam)
missingParentsMarkers=apply(fam$reImpAlls[,,1:2],2,function(x) any(is.na(x)))
if (any(missingParentsMarkers)) if (fam$famSize>4) fam=makeHapsFromThreeChildren(fam) # Tries to solve for three or more children
missingParentsMarkers=apply(fam$reImpAlls[,,1:2],2,function(x) any(is.na(x)))
if (any(missingParentsMarkers)) if (fam$famSize>3) fam=makeHapsFromTwoChildren(fam) # Tries to solve for two children
# Two children function should be applied after the three children function,
# since it solves some cases that the other may have left behind
if (length(case5Mks$mrk)>0) fam=includeCase5Mkrs(fam,fam0,case5Mks)
haploLog <- if (exists("hapIncidences",fam)) data.frame(famID=fam$infoFam[1,1], hapIncidences=fam$hapIncidences) else NULL
if (exists("alignedParents",fam)){
if (!fam$alignedParents&!is.null(fam$unAlignedParentsHaps)){
if (length(case5Mks$mrk)>0) {
uaph=array(NA,dim=dim(fam$reImpAlls[,,1:2]))
uaph[,-case5Mks$mrk,]=fam$unAlignedParentsHaps
fam$unAlignedParentsHaps=uaph
}
missingParentsLog <- data.frame(famID=fam$infoFam[1:2,1],indID=c("Parent1","Parent2"),
as.familyMatrix(fam$unAlignedParentsHaps))
} else missingParentsLog <- NULL
} else missingParentsLog <- NULL
outFam=list(reImputedAlls=as.familyMatrix(fam$reImpAlls), haplotypes=cbind(fam$infoFam,fam$haps),
IDS=as.familyMatrix(fam$IDS), haploLog=haploLog, missingParentsLog=missingParentsLog)
}
return(outFam)
}
# Haplotyping multiple families
famsHaplotyper=function(fams){
#=== INITIALIZATION ===#
mrksPos <- matrix(1:(ncol(fams$imputedMkrs)-6),nrow=2)
nMrks <- ncol(mrksPos)
idFams <- unique(fams$imputedMkrs$famID)
fams$reImputedAlls <- NULL
fams$IDS <- NULL
fams$haplotypes <- NULL
fams$haploLog <- NULL
fams$missingParentsLog <- NULL
fams$Vdata <- NULL
#=== FAMILIES SCAN ===#
for (f in idFams) {
family <- subset(fams$imputedMkrs,fams$imputedMkrs$famID==f)
fam <- famHaplotyper(family)
fams$reImputedAlls <- rbind(fams$reImputedAlls,fam$reImputedAlls)
fams$IDS <- rbind(fams$IDS,fam$IDS)
fams$haplotypes <- rbind(fams$haplotypes,fam$haplotypes)
if (!is.null(fam$haploLog)) fams$haploLog=rbind(fams$haploLog,fam$haploLog)
if (!is.null(fam$missingParentsLog))
fams$missingParentsLog=rbind(fams$missingParentsLog,fam$missingParentsLog)
idsCount=c(rowSums(fam$IDS[,mrksPos[1,]]),rowSums(fam$IDS[,mrksPos[2,]]))
nHaps=2*nrow(family)
nFullHaps=length(which(idsCount==nMrks))
nEmptyHaps=length(which(idsCount==0))
nPartialHaps=nHaps-nFullHaps-nEmptyHaps
initialMissingAlls=length(which(is.na(family[,-(1:6)])))
nImputedAlls=initialMissingAlls-length(which(is.na(fam$reImputedAlls)))
fams$Vdata <- rbind(fams$Vdata,
data.frame(famID=family$famID[1],nHaps=nHaps,nFullHaps=nFullHaps,
nEmptyHaps=nEmptyHaps,nPartialHaps=nPartialHaps,
initialMissingAlls,nImputedAlls=nImputedAlls))
}
#=== FAMILIES BOUND ===#
fams$reImputedAlls <- cbind(fams$imputedMkrs[,1:6],fams$reImputedAlls)
colnames(fams$IDS) <- names(fams$reImputedAlls)[-(1:6)] <- paste("Mk",gl(nMrks,2),"_",gl(2,1,nMrks*2),sep="")
fams$haplotypes <- data.frame(fams$haplotypes)
return(fams)
}
# Data Summary
summarizeData=function(fams){
nAlls <- nrow(fams$IDS)*ncol(fams$IDS) # Total number of alleles
pPhasAlls <- sum(fams$IDS)/nAlls # Proportion of IDentified/Sorted alleles
pNonPhasAlls <- 1-pPhasAlls # Proportion of non-IDentified/Sorted alleles
nHapsStats <- colSums(fams$Vdata[,-1]) # Haplotyping Stats
nHaps <- nrow(fams$haplotypes)*2 # Number of haplotypes
pFullHaps <- nHapsStats["nFullHaps"]/nHaps # Proportion of full reconstructed haplotypes
pEmptyHaps <- nHapsStats["nEmptyHaps"]/nHaps # Proportion of empty haplotypes
pPartialHaps <- nHapsStats["nPartialHaps"]/nHaps # Proportion partially reconstructed haplotypes
newImputedAlleles <- nHapsStats["nImputedAlls"] # New imputed alleles
totalImputedAlleles <- nHapsStats["nImputedAlls"]+fams$imputationSummary$nImputedAlls
reImputationRate <- as.vector(totalImputedAlleles/fams$imputationSummary$nMissAlls)
if (is.na(reImputationRate)) reImputationRate <- 0
fams$haplotypingSummary <- data.frame(nAlls,pPhasAlls,pNonPhasAlls,nHaps,pFullHaps,pEmptyHaps,pPartialHaps,
newImputedAlleles,reImputationRate,haplotypingTime) # Haplotyping summary
fams[["Vdata"]] <- NULL; row.names(fams[["haplotypingSummary"]]) <- NULL
return(fams)
}
}
############################################## II. RE-IMPUTATION ##############################################
{
# Preliminary imputation (marker by marker)
fams <- alleImputer(data,invisibleOutput=invisibleOutput, dataSummary=dataSummary)
}
############################################### III. HAPLOTYPING ##############################################
{
# Haplotyping and time computing
haplotypingTime <- system.time(fams <- famsHaplotyper(fams))[[3]]
}
############################################### IV. DATA SUMMARY #############################################
{
fams <- summarizeData(fams)
}
############################################### V. DATA STORING #############################################
{
if (is.character(data)) {
baseName <- file_path_sans_ext(data)
outName1 <- paste(baseName,"reImputed.ped",sep="_")
outName2 <- paste(baseName,"haplotypes.txt",sep="_")
write.table(fams$reImputedAlls,sep=" ",quote=FALSE,file=outName1)
if (any(fams$hapIncidences[,-1]!="")) {
outName3 <- paste(baseName,"hapIncidences.txt",sep="_")
write.table(fams$hapIncidences,sep=" ",quote=FALSE,file=outName3)
}
}
}
############################################## VI. FUNCTION OUTPUT ###########################################
{
if (dataSummary==TRUE) {
### Data summary printing
cat("\n========= HAPLOTYPING SUMMARY ==========")
cat(paste("\nRe-imputation rate:",round(fams$haplotypingSummary$reImputationRate,4)))
cat(paste("\nProportion of phased alleles:",round(fams$haplotypingSummary$pPhasAlls,4)))
cat(paste("\nProportion of non-phased alleles:",round(fams$haplotypingSummary$pNonPhasAlls,4)))
cat(paste("\nProportion of missing haplotypes:",round(fams$haplotypingSummary$pEmptyHaps,4)))
cat(paste("\nProportion of partial haplotypes:",round(fams$haplotypingSummary$pPartialHaps,4)))
cat(paste("\nProportion of full haplotypes:",round(fams$haplotypingSummary$pFullHaps,4)))
cat(paste("\nHaplotyping time:",round(fams$haplotypingSummary$haplotypingTime,4)))
cat("\n========================================\n")
### Haplotyping message printing
if (!is.null(fams$haploLog)) {
cat("Incidences were detected. Some haplotypes could not be generated properly.")
}
### Storage path
if (is.character(data)) {
cat("\nGenerated data have been stored in:")
cat(paste("\n",outName1,"\n",sep="")); cat(outName2)
if (any(fams$hapIncidences[,2]!="")) cat(paste("\n",outName3,sep=""))
}
}
### Cleaning empty haplotype Incidences
if (is.null(fams$haploLog)) fams$haploLog <- NULL
### Returning (or not) the output
if (invisibleOutput) return(invisible(fams)) else return(fams)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.