R/classparsimnet.r
In haplotypes: Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

Documented in calcclust clustersG ext.nodes fillna geodist.corr has.cycle parsnet recdistG ren.dupli steplimit tryconnect tryconnectG xhaps

### class Parsimnet, functions and methods
##march 2015, cnrakt
##function .TempletonProb is taken from package pegas version 0.6, authors Emmanuel Paradis, Klaus Schliep.
##Some internal structures of pieplot is taken from package network with modifications, author Carter T. Butts.

#################May 2016, cnrakt#################

#fixing a bug/update, May 2016
#internal function: calculate steplimit

steplimit<-function(seqlength,prob=.95)
{
    
    # .TempletonProb is taken from package pegas version 0.6, authors Emmanuel Paradis, Klaus Schliep.
    .TempletonProb <- function(j, S, b = 2, r = 1)
    {
        br <- b * r
        P <- numeric(max(j))
        L_jm <- function(q, j, m) {
            jm1 <- j - 1
            qonbr <- q/br
            (2*q)^jm1 * (1 - q)^(2*m + 1) * (1 - qonbr) *
            (2 - q*(br + 1)/br)^jm1 *
            (1 - 2*q*(1 - qonbr))
        }
        for (i in seq_along(P)) {
            M <- S - i
            denom <- integrate(L_jm, 0, 1, j = i, m = M)$value
            ## eq.7 from Templeton et al. 1992:
            out <- integrate(function(q) q*L_jm(q, j = i, m = M), 0, 1)$value/denom
            P[i] <- 1 - out
        }
        cumprod(P)[j]
    }
    
    i<-1
    test<-1
    testvec<-NULL
    while(test>=prob&&i<seqlength)
    {
        test<-.TempletonProb(i,seqlength)
        if(is.nan(test)) break()
        i<-i+1
        testvec<-c(testvec,test)
        
    }
    testvec<-testvec[-(i-1)]
    return(testvec)
}


#internal function: tryconnect

tryconnect<-function(dmat,newhapnam,step,prevp,nexp,prevpair,nhap)
{
    
    if(step>1)
    {
        
        dmat<- fillna(dmat,prevpair,prevp,nexp,step)
        temproxhapsobj<-xhaps(dmat,prevpair,prevp,nexp,step,newhapnam,rownames=c(prevp,nexp))
        temprodmat<-temproxhapsobj$d
        temproprevpair<-temproxhapsobj$prevpair
        tempronewhapnam<-nrow(temprodmat)
        
        
        net<-list(d=temprodmat,newhapnam=tempronewhapnam,step=step,prevp=prevp,nexp=nexp,prevpair=temproprevpair)
        
    } else {
        
        net<-list(d=dmat,newhapnam=newhapnam,step=step,prevp=prevp,nexp=nexp,prevpair=prevpair)
        
        
    }
    
    
    return(net)
    
}


#internal function: fillna

fillna<-function(dmat,prevpair,prevp,nexp,step)
{
    nas<-is.na(dmat[nexp,])
    nas[nexp]<-FALSE
    filna<-dmat[prevp,nas]
    filna[is.na(filna)]<-0
    dmat[nas,nexp]<-dmat[nexp,nas]<-filna+step
    
    return(dmat)
    
}

#bug fixed in calculating , May 2016
#internal function: xhap

xhaps<-function(dmat,prevpair,prevp,nexp,step,newhapnam,rownames)

{
    
    prevpair<-c(prevpair,nexp)
    prevpair<-prevpair[order(prevpair)]
    
    convec<-dmat[prevp,]
    convec[nexp]<-step
    diffvec<-rep(1,length(convec))
    diffvec[prevp]<-1
    diffvec[nexp]<--1
    convec[prevp]<-0
    tempprevpair<-prevpair
    eskiprevpair<-prevpair
    rownam<-rownames
    
    
    komsi<-numeric(0)
    
    for(s in 1:(step-1))
    {
        
        if(s>1) komsi<-(s-1):1
        addp<-matrix(c(convec+diffvec*s,komsi),1,)
        navec<-matrix(rep(NA,nrow(dmat)),1,)
        addpm<-matrix(addp[,tempprevpair],1,)
        navec[,tempprevpair]<-addpm
        rownames(navec)<-paste(rownam[1],rownam[2],sep="_")
        dmattemp<-rbind(dmat,navec)
        diffmatrix<- t(-dmattemp[newhapnam+1,]+t(dmattemp[-c(newhapnam+1),]))
        reppoi<-apply(abs(diffmatrix),1,sum,na.rm=TRUE)
        newhapnam<-newhapnam+1
        rownames(addp)<-paste(rownam[1],rownam[2],sep="_")
        rownames(addpm)<-rownames(addp)
        dmat<-rbind(dmat,navec)
        dmat<-cbind(dmat,t(cbind(navec,NA)))
        tempprevpair<-c(tempprevpair,newhapnam)
        
        
    }
    
    list(d=abs(dmat),prevpair=tempprevpair,newhapnam=newhapnam)
    
}



#update/fixing a bug
#internal function:calcclust

calcclust<-function(dmat,step=1)
{
    diag(dmat)<-0
    pairs<-which(dmat<=step,arr.ind=TRUE)
    nr<-nrow(dmat)
    komsilist<-vector("list",nr)
    
    for(i in 1:nr) komsilist[[i]]<-pairs[pairs[,2]==i,1]
    
    
    iter<-TRUE
    
    while(iter)
    {
        
        
        prevkomsilist<-komsilist
        
        for(i in 1:nr)
        {
            k<-komsilist[[i]]
            unk<-c()
            for(t in k) unk<-c(unk, komsilist[[t]])
            komsilist[[i]]<-unique(unk)
        }
        
        nr<-length(komsilist)
        for(i in nr:1)
        {
            k<-komsilist[[i]]
            unk<-c()
            for(t in k) unk<-c(unk, komsilist[[t]])
            komsilist[[i]]<-unique(unk)
        }
        
        
        iter<- !identical(prevkomsilist,komsilist)
        
    }
    
    komsilist<-unique(komsilist)
    l<-length(komsilist)
    for(i in 1:l) komsilist[[i]]<-sort(komsilist[[i]])
    komsilist<-unique(komsilist)
    l<-length(komsilist)
    
    mat<-matrix(NA,nr,2)
    for(i in 1: l)
    {
        mat[komsilist[[i]],1]<-i
        mat[komsilist[[i]],2]<-komsilist[[i]]
        
    }
    
    return(mat)
}




#internal function: recdistG

recdistG<-function(d,dmat,clustmat)
{
    
    nhap<-nrow(d)
    tempdmat<-dmat
    tempdmat[1:nhap,1:nhap]<-d
    clusters<-unique(clustmat[,1])
    
    for(i in clusters)
    {
        ind<-clustmat[clustmat[,1]==i,2]
        tempdmat[ind,ind]<-dmat[ind,ind]
    }
    return(tempdmat)
    
}


#fixing a bug in tryconnectG,  Recycling array of length 1 in vector-array arithmetic is deprecated warning.
#fixing a bug,  feb 2018
#internal function: tryconnectG
tryconnectG<-function(dmat,prevset,nexpset,nhap)
{
    scorevec<-vector("numeric",0)
    parsimscorevec<-vector("numeric",0)
    nexpvec<-vector("numeric",0)
    prevpvec<-vector("numeric",0)
    xvec<-vector("numeric",0)
    jumplist<-vector("list",0)
    
    
    for(i in 1:length(prevset))
    {
        
        prevp<-prevset[i]
        
        for(j in 1:length(nexpset))
        {
            
            nexp<-nexpset[j]
            nexpvec<-c(nexpvec,nexp)
            prevpvec<-c(prevpvec,prevp)
            
            if(prevp<=nhap)
            {
                if(nexp<=nhap)
                {
                    
                    x<-dmat[prevp,nexp]
                    jump<-dmat
                    
                    
                    jump[nexp,prevset]<-jump[prevset,nexp]<-jump[prevp,prevset]+x
                    jump[nexp,prevp]<-jump[prevp,nexp]<-x
                    if(length(nexpset)>1)
                    {
                        remmat<-matrix(jump[nexp,prevset],length(nexpset[-j]),length(prevset),byrow=TRUE)+jump[nexpset[-j],nexp]
                        jump[nexpset[-j],prevset]<-remmat
                        jump[prevset,nexpset[-j]]<-t(remmat)
                    }
                    test<-jump[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]-dmat[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]
                    score<-sum(test)
                    parssc<-x+sum(jump==1,na.rm=TRUE)/2
                    
                    if(any(test<0,na.rm=TRUE))
                    {
                        jump[nexpset, prevset]<-jump[prevset,nexpset]<-jump[nexpset, prevset]-min(test)
                        score<-sum(test-min(test))
                        x<-x-min(test)
                        parssc<-x+sum(jump==1,na.rm=TRUE)/2
                        
                        
                    }
                    parsimscorevec<-c(parsimscorevec,parssc)
                    scorevec<-c(scorevec,score)
                    xvec<-c(xvec,x)
                    jumplist<-c(jumplist,list(jump[nexpset, prevset]))
                }
                
                if(nexp>nhap)
                {
                    
                    minkomsi<-which.min(dmat[nexp,nexpset[nexpset<=nhap]])
                    komsi<-nexpset[nexpset<=nhap][minkomsi]
                    x<- dmat[prevp,komsi]-dmat[nexp,komsi]
                    jump<-dmat
                    jump[nexp,prevset]<-jump[prevset,nexp]<-jump[prevp,prevset]+x
                    
                    
                    jump[nexp,prevp]<-jump[prevp,nexp]<-x
                    if(length(nexpset)>1)
                    {
                        remmat<-matrix(jump[nexp,prevset],length(nexpset[-j]),length(prevset),byrow=TRUE)+jump[nexpset[-j],nexp]
                        jump[nexpset[-j],prevset]<-remmat
                        jump[prevset,nexpset[-j]]<-t(remmat)
                    }
                    test<-jump[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]-dmat[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]
                    score<-sum(test)
                    parssc<-x+sum(jump==1,na.rm=TRUE)/2
                    
                    if(any(test<0,na.rm=TRUE))
                    {
                        jump[nexpset, prevset]<-jump[prevset,nexpset]<-jump[nexpset, prevset]-min(test)
                        score<-sum(test-min(test))
                        x<-x-min(test)
                        parssc<-x+sum(jump==1,na.rm=TRUE)/2
                        
                        
                    }
                    parsimscorevec<-c(parsimscorevec,parssc)
                    scorevec<-c(scorevec,score)
                    xvec<-c(xvec,x)
                    jumplist<-c(jumplist,list(jump[nexpset, prevset]))
                }
            }
            
            
            if(prevp>nhap)
            {
                if(nexp<=nhap)
                {
                    minkomsi<-which.min(dmat[prevp,prevset[prevset<=nhap]])
                    komsi<-prevset[prevset<=nhap][minkomsi]
                    x<- dmat[nexp,komsi]-dmat[prevp,komsi]
                    jump<-dmat
                    jump[nexp,prevset]<-jump[prevset,nexp]<-jump[prevp,prevset]+x
                    
                    
                    
                    jump[nexp,prevp]<-jump[prevp,nexp]<-x
                    if(length(nexpset)>1)
                    {
                        remmat<-matrix(jump[nexp,prevset],length(nexpset[-j]),length(prevset),byrow=TRUE)+jump[nexpset[-j],nexp]
                        jump[nexpset[-j],prevset]<-remmat
                        jump[prevset,nexpset[-j]]<-t(remmat)
                    }
                    test<-jump[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]-dmat[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]
                    score<-sum(test)
                    parssc<-x+sum(jump==1,na.rm=TRUE)/2
                    
                    if(any(test<0,na.rm=TRUE))
                    {
                        jump[nexpset, prevset]<-jump[prevset,nexpset]<-jump[nexpset, prevset]-min(test)
                        score<-sum(test-min(test))
                        x<-x-min(test)
                        parssc<-x+sum(jump==1,na.rm=TRUE)/2
                        
                    }
                    parsimscorevec<-c(parsimscorevec,parssc)
                    scorevec<-c(scorevec,score)
                    xvec<-c(xvec,x)
                    jumplist<-c(jumplist,list(jump[nexpset, prevset]))
                }
                
                
                
                
                if(nexp>nhap)
                {
                    minkomsiP<-which.min(dmat[prevp,prevset[prevset<=nhap]])
                    komsiP<-prevset[prevset<=nhap][minkomsiP]
                    minkomsiN<-which.min(dmat[nexp,nexpset[nexpset<=nhap]])
                    komsiN<-nexpset[nexpset<=nhap][minkomsiN]
                    x<- dmat[komsiN,komsiP] - dmat[prevp,komsiP]-dmat[nexp,komsiN]
                    jump<-dmat
                    jump[nexp,prevset]<-jump[prevset,nexp]<-jump[prevp,prevset]+x
                    
                    
                    
                    jump[nexp,prevp]<-jump[prevp,nexp]<-x
                    if(length(nexpset)>1)
                    {
                        remmat<-matrix(jump[nexp,prevset],length(nexpset[-j]),length(prevset),byrow=TRUE)+jump[nexpset[-j],nexp]
                        jump[nexpset[-j],prevset]<-remmat
                        jump[prevset,nexpset[-j]]<-t(remmat)
                    }
                    test<-jump[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]-dmat[nexpset[nexpset<=nhap], prevset[prevset<=nhap]]
                    score<-sum(test)
                    parssc<-x+sum(jump==1,na.rm=TRUE)/2
                    
                    if(any(test<0,na.rm=TRUE))
                    {
                        jump[nexpset, prevset]<-jump[prevset,nexpset]<-jump[nexpset, prevset]-min(test)
                        score<-sum(test-min(test))
                        x<-x-min(test)
                        parssc<-x+sum(jump==1,na.rm=TRUE)/2
                        
                    }
                    parsimscorevec<-c(parsimscorevec,parssc)
                    scorevec<-c(scorevec,score)
                    xvec<-c(xvec,x)
                    jumplist<-c(jumplist,list(jump[nexpset, prevset]))
                    
                }
                
            }
            
            
        }
        
        
    }
    
    minnetlen<-min(parsimscorevec)
    
    maxpar<-which(parsimscorevec==minnetlen)
    best<-maxpar[which.min(scorevec[maxpar])]
    
    return(list(scorevec=scorevec[best],parsimvec=parsimscorevec[best],prevpvec=prevpvec[best],nexpvec=nexpvec[best],xvec=xvec[best],jump=jumplist[best]))
    
}


#bug fixed in calculating maximum connection limit,it was removed from clustersG function May 2016
#internal function: clustersG

clustersG<-function(d,nhap)
{
    test<-geodist.corr(d)-d
    test[is.na(test)]<-Inf
    if(length(test)>0) if(min(test,na.rm=TRUE)<0) stop("inconsistency in distance matrix, missing data in DNA sequences may cause problem when using algorithmic method. Consider using heuristic method.")
    
    dmat<-d
    clustmat<- calcclust(dmat,1)
    clusters<-unique(clustmat[,1])
    if(length(clusters)==1) loop<-FALSE else loop<-TRUE
    lind<-0
    while(loop)
    {
        
        
        
        dmat<-recdistG(d,dmat,clustmat)
        tempdmat<-dmat
        newhapnam<-nrow(dmat)
        clusters<-unique(clustmat[,1])
        clustcombn<-combn(clusters,2)
        scorevec<-vector("numeric",0)
        parsimvec<-vector("numeric",0)
        prevpvec<-vector("numeric",0)
        nexpvec<-vector("numeric",0)
        xvec<-vector("numeric",0)
        nexpsetlist<-vector("list",0)
        prevsetlist<-vector("list",0)
        jumplist<-vector("list",0)
        
        for(k in 1:ncol(clustcombn))
        
        {
            ind<-clustcombn[,k]
            size1<-sum(clustmat[,1]==ind[1])
            size2<-sum(clustmat[,1]==ind[2])
            if(size1>size2)
            {
                clA<-clustmat[clustmat[,1]==ind[2],2]
                clB<-clustmat[clustmat[,1]==ind[1],2]
            }
            
            if(size1<=size2)
            {
                clA<-clustmat[clustmat[,1]==ind[1],2]
                clB<-clustmat[clustmat[,1]==ind[2],2]
            }
            
            nexpset<-clA
            prevset<-clB
            
            trG.obj<-tryconnectG(dmat,prevset,nexpset,nhap)
            scorevec<-c(scorevec,trG.obj$scorevec)
            parsimvec<-c(parsimvec,trG.obj$parsimvec)
            prevpvec<-c(prevpvec,trG.obj$prevpvec)
            nexpvec<-c(nexpvec,trG.obj$nexpvec)
            xvec<-c(xvec,trG.obj$xvec)
            nexpsetlist<-c(nexpsetlist,list(nexpset))
            prevsetlist<-c(prevsetlist,list(prevset))
            jumplist<-c(jumplist,trG.obj$jump)
            
        }
        
        
        
        minnetlen<-min(parsimvec)
        maxpar<-which(parsimvec==minnetlen)
        best<-maxpar[which.min(scorevec[maxpar])]
        prevset<-prevsetlist[[best]]
        nexpset<-nexpsetlist[[best]]
        prevp<-prevpvec[best]
        nexp<-nexpvec[best]
        x<-xvec[best]
        jump<-jumplist[[best]]
        
        
        tempdmat[nexpset,prevset]<-jump
        tempdmat[prevset,nexpset]<-t(jump)
        
        
        if(x>1)
        {
            
            tempnet<-tryconnect(tempdmat,newhapnam,step=x,prevp=prevp,nexp,c(nexp,prevp),nhap)
            tempdmat<-tempnet$d
            newhaps<-(nrow(dmat)+1):nrow(tempdmat)
            
            remprevpset<-prevset[!prevset==prevp]
            
            if(length(remprevpset)>0)
            {
                
                addh<-tempdmat[prevp,remprevpset]
                addxh<-tempdmat[prevp,newhaps]
                recdmat<-matrix(addxh,length(addh),length(addxh),byrow=TRUE)+addh
                
                tempdmat[remprevpset,newhaps]<-recdmat
                tempdmat[newhaps,remprevpset]<-t(recdmat)
            }
            remnexpset<-nexpset[!nexpset==nexp]
            
            if(length(remnexpset)>0)
            {
                
                addh<-tempdmat[nexp,remnexpset]
                addxh<-tempdmat[nexp,newhaps]
                recdmat<-matrix(addxh,length(addh),length(addxh),byrow=TRUE)+addh
                
                tempdmat[remnexpset,newhaps]<-recdmat
                tempdmat[newhaps,remnexpset]<-t(recdmat)
            }
        }
        
        
        dmat<-tempdmat
        clustmat<- calcclust(dmat,1)
        
        
        if(length(unique(clustmat[,1]))==1) loop<-FALSE
        
        
    }
    
    return(dmat)
}




#bug fixed in calculating slot d, May 2016
#internal function: parsnet
#prob can be NULL
parsnet<-function(d,seqlength,prob=.95)
{
    
    d<-as.matrix(d)
    diag(d)<-NA
    rwnd<-rownames(d)
    if(is.null(rwnd)) rownames(d)<-1:nrow(d)
    nhaps<-vector("numeric",0)
    nhap<-nrow(d)
    if(!is.null(prob))
    {
        tempprobs<-steplimit(seqlength,prob)
        conlim<-length(tempprobs)
        clusters<-calcclust(d,step=conlim)
        
        if(all(clusters[,1]==1))
        {
            
            dmat<-clustersG(d,nhap)
            dmat<-geodist.corr(dmat)
            nm<-paste("net",1,sep="")
            grouplist<-list(1:nrow(d))
            dmatlist<-list(dmat)
            names(grouplist)<-nm
            names(dmatlist)<-nm
            numnet<-length(dmatlist)
            for(i in 1:numnet) rownames(dmatlist[[i]])<- ren.dupli(rownames(dmatlist[[i]]))
            
            return(list(d=dmatlist,tempProbs=tempprobs,conlimit=conlim,prob=prob,nhap=nhap,rowindex=grouplist))
        } else {
            
            cl<-unique(clusters[,1])
            dmatlist<-vector("list",length(cl))
            grouplist<-vector("list",length(cl))
            for(i in 1: length(cl))
            {
                group<-clusters[clusters[,1]==cl[i],2]
                dma<-d[group,group,drop=FALSE]
                dmat<-clustersG(dma,length(group))
                dmat<-geodist.corr(dmat)
                grouplist[[i]]<-group
                dmatlist[[i]]<-dmat
                nhaps<-c(nhaps,length(group[group<=nhap]))
            }
            nm<-paste("net",1:length(cl),sep="")
            names(grouplist)<-nm
            names(dmatlist)<-nm
            numnet<-length(dmatlist)
            for(i in 1:numnet) rownames(dmatlist[[i]])<- ren.dupli(rownames(dmatlist[[i]]))
            
            return(list(d=dmatlist,tempProbs=tempprobs,conlimit=conlim,prob=prob,nhap=nhaps,rowindex=grouplist))
        }
    } else {
        
        
        tempprobs<-numeric(0)
        conlim<-Inf
        dmat<-clustersG(d,nhap)
        dmat<-geodist.corr(dmat)
        nm<-paste("net",1,sep="")
        grouplist<-list(1:nhap)
        dmatlist<-list(dmat)
        names(grouplist)<-nm
        names(dmatlist)<-nm
        
        numnet<-length(dmatlist)
        for(i in 1:numnet) rownames(dmatlist[[i]])<- ren.dupli(rownames(dmatlist[[i]]))
        
        
        return(list(d=dmatlist,tempProbs=tempprobs,conlimit=conlim,prob=0,nhap=nhap,rowindex=grouplist))
        
    }
    
}






#class Parsimnet

setClass(Class="Parsimnet", representation=representation(d="list",tempProbs="numeric",conlimit="numeric",prob="numeric",nhap="numeric",rowindex="list"))



#Set initialize Class Haplotype
setMethod("initialize", "Parsimnet", function(.Object,d=list(),tempProbs=numeric(0),conlimit=numeric(0),prob=numeric(0), nhap=numeric(0),rowindex=list())
{
    .Object@d<-d
    .Object@tempProbs<-tempProbs
    .Object@conlimit<-conlimit
    .Object@prob<-prob
    .Object@nhap<-nhap
    .Object@rowindex<-rowindex
    .Object
})



#fixing a bug
#Show method for Dna objects
setMethod("show","Parsimnet", function(object)
{
    
    cat("*** S4 Object of Class Parsimnet ***\n\n")
    cat("\nNumber of networks: \n")
    cat(length(object@d),"\n")
    cat("\nCalculated maximum connection steps at:",100*object@prob,"%:","\n")
    cat(object@conlimit,"\n")
    cat("\nNumber of haplotypes in each network: \n")
    cat(object@nhap,"\n")
    cat("\nNumber of intermediates in each network: \n")
    if(length(object@d)) cat(sapply(object@d,nrow)-object@nhap,"\n")
    cat("\nScore of each network (number of vertices minus one): \n")
    if(length(object@d)) cat(length(object),"\n")
    cat("\n\nslots of an object Parsimnet:\n", slotNames(object))
    
    
})


#new method 2016, cnrakt
#ren.dupli
ren.dupli<-function(x)
{
    
    rn<-x
    tb<-table(rn)
    if(any(tb>1))
    {
        dupl<-which(tb>1)
        
        for(t in 1:length(dupl))
        {
            namdup<-names(tb[dupl[t]])
            
            rn[rn==namdup][-1]<-paste(namdup,"_",2:tb[dupl[t]],sep="")
            
        }
        
        #warning("Duplicate names are renamed.")
    }
    
    rn
}


#re-run the codes after fixing bugs/update
#Generic parsimnet

setGeneric (
name= "parsimnet",
def=function(x,...)standardGeneric("parsimnet")
)


#parsimnet method for Dna objects
#Notes: ... additional arguments to phangorn's

setMethod(f="parsimnet", signature= "Dna", definition=function(x,indels="sic",prob=.95)
{
    
    

    if(length(prob)>1||(prob<0.01|prob>0.99)&&(!is.null(prob))) stop("probability must be NULL or a numeric vector of length 1, in the range [0.01,0.99]")
    h<-haplotypes::haplotype(x,indels=indels)
    d<-h@d
    
    
    
        netw<-parsnet(d,x@seqlengths[1],prob=prob)
        
        return(new("Parsimnet",d=netw$d,tempProbs=netw$tempProbs,conlimit=netw$conlimit,prob=netw$prob, nhap=netw$nhap,rowindex=netw$rowindex))
        
    
    
}
)


#parsimnet method for matrix objects

setMethod(f="parsimnet", signature= "matrix", definition=function(x,seqlength,prob=.95)
{
    
    if(length(prob)>1||(prob<0.01|prob>0.99)&&(!is.null(prob))) stop("probability must be NULL or a numeric vector of length 1, in the range [0.01,0.99]")
    
    h<-haplotypes::haplotype(x)
    d<-h@d
    netw<-parsnet(d,seqlength=seqlength,prob=prob)
    new("Parsimnet",d=netw$d,tempProbs=netw$tempProbs,conlimit=netw$conlimit,prob=netw$prob,nhap=netw$nhap,rowindex=netw$rowindex)
    
}
)



#parsimnet method for dist objects

setMethod(f="parsimnet", signature= "dist", definition=function(x,seqlength,prob=.95)
{
    if(length(prob)>1||(prob<0.01|prob>0.99)&&(!is.null(prob))) stop("probability must be NULL or a numeric vector of length 1, in the range [0.01,0.99]")
    h<-haplotypes::haplotype(x)
    d<-h@d
    netw<-parsnet(d,seqlength=seqlength,prob=prob)
    new("Parsimnet",d=netw$d,tempProbs=netw$tempProbs,conlimit=netw$conlimit,prob=netw$prob,nhap=netw$nhap,rowindex=netw$rowindex)
}
)




#plot method for Parsimnet objects

setMethod(f="plot", signature(x = "Parsimnet", y = "missing"), definition=function(x,y,net=1,inter.labels=FALSE,...)
{
    
    dots <- list(...)
    arg<-names(dots)
    netnum<-length(x@d)
    i<-net
    netd<-x@d
    nm<-names(netd)[i]
    net<-netd[[i]]
    nhap<-x@nhap[i]
    rn<-rownames(net)
    net[net!=1]<-0
    if(nrow(net)==1) net[is.na(net)]<-1
    g<-network(net)
    if(!inter.labels&&(length(rn)>nhap)) rn[(nhap+1):length(rn)]<-NA
    if(!any(arg=="label")) dots$label<-rn
    if(!any(arg=="usearrows")) dots$usearrows <- FALSE
    if(nrow(net)>1) { if(!any(arg=="mode")) dots$mode<-"kamadakawai" } else   if(!any(arg=="mode")) dots$mode<-"circle"
    if(!any(arg=="pad")) dots$pad <- 1
    if(!any(arg=="label.cex")) dots$label.cex<-0.75
    if(!any(arg=="vertex.cex")) dots$vertex.cex <- c(rep(0.8,nhap),rep(0.5,nrow(net)-nhap))
    if(!any(arg=="vertex.col"))dots$vertex.col<-c(rep(2,nhap),rep(4,nrow(net)-nhap))
    if(!any(arg=="main")) dots$main<-nm
    args <- c(list(x = g),dots)
    do.call(plot, args)
    
}
)

#bug in col, fixed, may 2021
#new function: pieplot
#Generic pieplot

setGeneric (
name= "pieplot",
def=function(x,y,...)standardGeneric("pieplot")
)



setMethod(f="pieplot", signature= c("Parsimnet","Haplotype"), definition=function(x,y,net=1,factors, coord = NULL,inter.labels=FALSE,interactive=FALSE, rex=1,...)
{
    
    #Some internal structures of pieplot is taken from package network, author Carter T. Butts.
    
    p<-x
    h<-y
    dots <- list(...)
    arg<-names(dots)
    
    ns<-sum(h@freq)
    g<-grouping(h,factors)$hapmat
    lev<-ncol(g)
    hi<-p@rowindex[[net]]
    g<-g[hi,]
    rmc<-(apply(g,2,sum)>0)
    g<-g[, rmc]
    
    nhap<-p@nhap[net]
    
    d<- p@d[[net]]
    rn<-rownames(d)
    rownames(d)<-rn
    colnames(d)<-rn
    nm<-names(p@d[net])
    
    d[d!=1]<-0
    if(nrow(d)==1) net[is.na(d)]<-1
    
    d[lower.tri(d)]<-0
    con<-which(d==1,arr.ind=TRUE)
    
    nw<-network::network(d)
    
    if(!any(arg=="col")) dots$col<- rainbow(lev)
    dots$col<- dots$col[rmc]
    if(!inter.labels&&(length(rn)>nhap)) rn[(nhap+1):length(rn)]<-NA
    if(!any(arg=="label")) dots$label<-rn
    if(nrow(d)>1) { if(!any(arg=="mode")) dots$mode<-"fruchtermanreingold" } else   if(!any(arg=="mode")) dots$mode<-"circle"
    if(!any(arg=="pad")) dots$pad <- 0.5
    if(!any(arg=="label.cex")) dots$label.cex<-1
    if(!any(arg=="edge.col")) dots$edge.col<-1
    if(!any(arg=="edge.lwd")) dots$edge.lwd<-1
    if(!any(arg=="edge.lty")) dots$edge.lty<-1
    if(!any(arg=="vertex.col")) dots$vertex.col<-1
    if(!any(arg=="label.col")) dots$label.col<-1
    if(!any(arg=="label.pos")) dots$label.pos<-0
    if(!any(arg=="label.pad")) dots$label.pad<-1
    if(!any(arg=="displaylabels")) dots$displaylabels <-TRUE
    if(!any(arg=="xlab")) dots$xlab = ""
    if(!any(arg=="ylab")) dots$ylab = ""
    if(!any(arg=="vertex.sides")) dots$vertex.sides<-50
    if(!any(arg=="edges")) dots$edges<-200
    
    if(!any(arg=="radius"))
    {
        rad<-apply(g,1,sum)*rep(0.8,nhap)
        dots$radius<- rex*rad/max(rad)
    }
    if(!any(arg=="vertex.cex"))
    {
        
        if(nrow(d)-nhap) dots$vertex.cex <- c(rep(0.5,nrow(d)-nhap))*min(dots$radius)
    }
    
    dots$radius<-c(dots$radius,dots$vertex.cex)
    
    
    if(is.null(coord))
    {
        if(dots$mode=="circle") coords<-network::network.layout.circle(nw=nw, layout.par=dots$layout.par)
        if(dots$mode=="fruchtermanreingold") coords<-network::network.layout.fruchtermanreingold(nw=nw, layout.par=dots$layout.par)
        if(dots$mode=="kamadakawai") coords<-network::network.layout.kamadakawai(nw=nw, layout.par=dots$layout.par)
    } else coords<-coord
    
    cx <- coords[, 1]
    cy <- coords[, 2]
    
    xlim<-range(cx)
    ylim<-range(cy)
    xlim<-xlim+c(-max(dots$radius)/2-dots$pad ,max(dots$radius)/2+dots$pad)
    ylim<-ylim+c(-max(dots$radius)/2-dots$pad ,max(dots$radius)/2+dots$pad)
    plot(0, 0, xlim = xlim, ylim = ylim, type = "n", xlab =  dots$xlab, ylab =  dots$ylab, asp = 1, axes = FALSE,main=nm)
    
    x1<-cx[con[,1]];x2<-cx[con[,2]]
    y1<-cy[con[,1]];y2<-cy[con[,2]]
    
    for(i in 1:length(x1)) lines(c(x1[i],x2[i]), c(y1[i],y2[i]),col=dots$edge.col,lwd=dots$edge.lwd, lty= dots$edge.lty)
    
    
    par.args<-names(par())
    plot.network.default.args<-names(formals(network::plot.network.default))
    args1<-union(par.args, plot.network.default.args)
    
    floating.pie.args<-names(formals(plotrix::floating.pie))
    polygon.args<-names(formals(graphics::polygon))
    args2<-union(floating.pie.args, polygon.args)
    
    arg1_2<-setdiff(args1, args2)
    #localfloating.pie<-function()
    
    localfloating.pie<-function(bos) bos
    
    eval(parse(text=paste("localfloating.pie<-","function", "(","...,", paste( arg1_2,collapse=",",sep=" "),")","{","plotrix::floating.pie(...)","}",collapse="")))
    
    rds<-dots$radius
    cls<-dots$col
    dots2<-dots[is.na(match(names(dots),c("radius","col")))]
    
    for(i in 1: nhap)
    {
        wg<-g[i,]>0
        args.flp <- c(list(xpos = cx[i],ypos=cy[i],x=g[i,][wg], radius=rds[i],col= cls[wg],startpos=0),c(dots2,list(main=nm)))
        do.call(localfloating.pie,  args.flp)
    }
    
    
    if(nrow(d)-nhap) network::network.vertex(cx[-c(1:nhap)], cy[-c(1:nhap)], radius=dots$vertex.cex , sides = dots$vertex.sides, border = 1, col = dots$vertex.col, lty = 1, rot = 0, lwd = 1)
    
    
    
    
    if (dots$displaylabels & (!all(dots$label == ""))) {
        
        if (dots$label.pos == 0) {
            xhat <- yhat <- rhat <- rep(0, nrow(d))
            xoff <- cx - mean(cx)
            yoff <- cy - mean(cy)
            roff <- sqrt(xoff^2 + yoff^2)
            for (i in (1:nrow(d))) {
                ij <- unique(c(d[d[, 2] == i & d[, 1] != i, 1],
                d[d[, 1] == i & d[, 2] != i, 2]))
                ij.n <- length(ij)
                if (ij.n > 0) {
                    for (j in ij) {
                        dx <- cx[i] - cx[j]
                        dy <- cy[i] - cy[j]
                        dr <- sqrt(dx^2 + dy^2)
                        xhat[i] <- xhat[i] + dx/dr
                        yhat[i] <- yhat[i] + dy/dr
                    }
                    xhat[i] <- xhat[i]/ij.n
                    yhat[i] <- yhat[i]/ij.n
                    rhat[i] <- sqrt(xhat[i]^2 + yhat[i]^2)
                    if (!is.nan(rhat[i]) && rhat[i] != 0) {
                        xhat[i] <- xhat[i]/rhat[i]
                        yhat[i] <- yhat[i]/rhat[i]
                    }
                    else {
                        xhat[i] <- xoff[i]/roff[i]
                        yhat[i] <- yoff[i]/roff[i]
                    }
                }
                else {
                    xhat[i] <- xoff[i]/roff[i]
                    yhat[i] <- yoff[i]/roff[i]
                }
                if (is.nan(xhat[i]) || xhat[i] == 0)
                xhat[i] <- 0.01
                if (is.nan(yhat[i]) || yhat[i] == 0)
                yhat[i] <- 0.01
            }
            
            xhat <- xhat
            yhat <- yhat
        }
        else if (dots$label.pos < 5) {
            xhat <- switch(dots$label.pos, 0, -1, 0, 1)
            yhat <- switch(dots$label.pos, -1, 0, 1, 0)
        }
        else if (dots$label.pos == 6) {
            xoff <- cx - mean(cx)
            yoff <- cy- mean(cy)
            roff <- sqrt(xoff^2 + yoff^2)
            xhat <- xoff/roff
            yhat <- yoff/roff
        }
        else {
            xhat <- 0
            yhat <- 0
        }
        os <- par()$cxy * mean(dots$label.cex, na.rm = TRUE)
        lw <- strwidth(dots$label, cex = dots$label.cex)/2
        lh <- strheight(dots$label, cex = dots$label.cex)/2
        
        text(cx + xhat * rds + (lh * dots$label.pad + lw) * ((xhat > 0) - (xhat < 0)), cy + yhat * rds + (lh * dots$label.pad + lh) * ((yhat > 0) - (yhat < 0)), dots$label, cex = dots$label.cex, col = dots$label.col, offset = 0)
    }
    
    
    
    if(interactive)
    
    {
        "%iin%" <- function(x, int) (x >= int[1]) & (x <= int[2])
        if (interactive && ((length(cx) > 0))) {
            os <- c(0.2, 0.4) * par()$cxy
            textloc <- c(min(cx) - dots$pad, max(cy) + dots$pad )
            tm <- "Select a vertex to move, or click \"Finished\" to end."
            tmh <- strheight(tm)
            tmw <- strwidth(tm)
            text(textloc[1], textloc[2], tm, adj = c(0, 0.5))
            fm <- "Finished"
            finx <- c(textloc[1], textloc[1] + strwidth(fm))
            finy <- c(textloc[2] - 3 * tmh - strheight(fm)/2, textloc[2] -
            3 * tmh + strheight(fm)/2)
            finbx <- finx + c(-os[1], os[1])
            finby <- finy + c(-os[2], os[2])
            rect(finbx[1], finby[1], finbx[2], finby[2], col = "white")
            text(finx[1], mean(finy), fm, adj = c(0, 0.5))
            clickpos <- unlist(locator(1))
            if ((clickpos[1] %iin% finbx) && (clickpos[2] %iin% finby)) {
                cl <- match.call()
                cl$interactive <- FALSE
                cl$coord <- cbind(cx, cy)
                return(eval.parent(cl))
            }
            else {
                clickdis <- sqrt((clickpos[1] - cx)^2 + (clickpos[2] -
                cy)^2)
                selvert <- match(min(clickdis), clickdis)
                if (all(dots$label == ""))
                dots$label <- 1:nrow(d)
                rect(textloc[1], textloc[2] - tmh/2, textloc[1] +
                tmw, textloc[2] + tmh/2, border = "white", col = "white")
                tm <- "Where should I move this vertex?"
                tmh <- strheight(tm)
                tmw <- strwidth(tm)
                text(textloc[1], textloc[2], tm, adj = c(0, 0.5))
                fm <- paste("Vertex", dots$label[selvert], "selected")
                finx <- c(textloc[1], textloc[1] + strwidth(fm))
                finy <- c(textloc[2] - 3 * tmh - strheight(fm)/2,
                textloc[2] - 3 * tmh + strheight(fm)/2)
                finbx <- finx + c(-os[1], os[1])
                finby <- finy + c(-os[2], os[2])
                rect(finbx[1], finby[1], finbx[2], finby[2], col = "white")
                text(finx[1], mean(finy), fm, adj = c(0, 0.5))
                clickpos <- unlist(locator(1))
                cx[selvert] <- clickpos[1]
                cy[selvert] <- clickpos[2]
                cl <- match.call()
                cl$coord <- cbind(cx, cy)
                return(eval.parent(cl))
                
                
            }
        }
        
        
    }
    invisible(cbind(cx, cy))
}
)



#new function: pielegend
#Generic pielegend

setGeneric (
name= "pielegend",
def=function(p,h,...)standardGeneric("pielegend")
)


setMethod(f="pielegend", signature= c("Parsimnet","Haplotype"), definition=function(p,h,net=1,factors,...)
{
    dots <- list(...)
    arg<-names(dots)
    ns<-sum(h@freq)
    g<-grouping(h,factors)$hapmat
    lev<-ncol(g)
    hi<-p@rowindex[[net]]
    g<-g[hi,]
    rmc<-(apply(g,2,sum)>0)
    fac<-levels(factor(factors))[rmc]
    
    if(!any(arg=="col")) dots$col<- rainbow(lev)
    dots$col<- dots$col[rmc]
    
    if(!any(arg=="fill")) dots$fill <-dots$col
    if(!any(arg=="legend")) dots$legend<-fac
    if(!any(arg=="x")&&!any(arg=="y")) dots$x<-"topright"
    do.call(graphics::legend, dots)
    
})



#Update
#length method for Parsimnet objects

setMethod(f="length", signature= "Parsimnet", definition=function(x)
{
    
    sapply(x@d,nrow)-1
    
}
)

#new method September 2016, cnrakt
#names method for Parsimnet objects

setMethod("names","Parsimnet", function(x)
{
    names(x@d)
})

#new method September 2016, cnrakt
#names replace method for Dna objects

setReplaceMethod("names","Parsimnet", function(x,value)
{
    if(is.numeric(value)) value<-as.character(value)
    names(x@d)<-value
    x
})
#new method September 2016, cnrakt
#rownames method for Parsimnet objects

setMethod("rownames","Parsimnet", function(x)
{
    n<-length(x@d)
    rownameslist<-vector("list",n)
    for(i in 1:n)
    {
        rownameslist[[i]]<-rownames(x@d[[i]])
        
    }
    rownameslist
})

#new method September 2016, cnrakt
#rownames replace method for Dna objects

setReplaceMethod("rownames","Parsimnet", function(x,value)
{
    
    n<-length(x@d)
    for(i in 1:n)
    {
        rn<-as.character(value[[i]])
        rownames(x@d[[i]])<-rn
        colnames(x@d[[i]])<-rn
    }
    
    return(x)
})


#Coerce Parsimnet object to a list

setMethod("as.list","Parsimnet", function(x)
{
    l<-list(x@d,x@tempProbs, x@conlimit,x@prob,x@nhap,x@rowindex)
    names(l)<-c("d","tempProbs","conlimit","prob","nhap","rowindex")
    l
})



#as.network method for parsimnet objects
#setOldClass("as.network")
setMethod(f="as.network", signature= c("Parsimnet"), definition=function(x,net=1,...)
{
    
    net<-x@d[[net]]
    rn<-rownames(net)
    nhap<-x@nhap[net]
    net[net!=1]<-0
    if(nrow(net)==1) net[is.na(net)]<-1
    g<-network(net,...)
    g
})




#as.networx
#setOldClass("as.networx")
setMethod(f="as.networx", signature= c("Parsimnet"), definition=function(x,net=1,...)
{
    
    nhap<-x@nhap[net]
    d<-x@d[[net]][1:nhap,1:nhap]
    rn<-rownames(d)
    n<-NJ(d)
    spp<-as.splits(n,...)
    netp<-as.networx(spp)
    netp
})



#new function
#internal function geodist.corr (for fixing a bug in parsimnet)
geodist.corr<-function(dmat,...)
{
    net<-dmat
    net[net!=1]<-0
    if(nrow(net)==1) net[is.na(net)]<-1
    g<-geodist(network(net),...)$gdist
    rownames(g)<-rownames(dmat)
    colnames(g)<-colnames(dmat)
    g
}



#new internal function
#internal function has.cycle
has.cycle<-function(d)
{
    
    d<-as.matrix(d)
    dupd<-!duplicated(d)
    d<-d[dupd,dupd]
    d[d!=1]<-0
    d[lower.tri(d)]<-0
    return(1+sum(d)-nrow(d))
    
}



#new internal function
#internal function:  ext.nodes
ext.nodes<-function(d)
{
    d<-as.matrix(d)
    dupd<-!duplicated(d)
    if(sum(dupd)<3) return(rownames(d))
    d<-d[dupd,dupd]
    d[d!=1]<-0
    lab<-(rownames(d))
    r.s<-rowSums(d)
    extseqslabs<-match(lab[r.s==1],rownames(d))
    return(rownames(d)[extseqslabs])
}




#new function: homopoly
#Generic homopoly

setGeneric (
name= "homopoly",
def=function(x,...)standardGeneric("homopoly")
)



setMethod(f="homopoly", signature= c("Dna"), definition=function(x,indels="sic",...)
{
    
    indmet<- c("sic","5th","missing")
    matchmet <- pmatch(indels, indmet)
    if (is.na(matchmet))
    stop("invalid indel coding method", paste("", indels))
    indels<-indmet[matchmet]
    x<-haplotypes::as.dna(haplotypes::haplotype(x,indels=indels))
    dat<-haplotypes::as.phyDat(x,indels=indels)
    p<-parsimnet(x,indels=indels,prob=NULL,...)
    tree<-ape::nj(p@d[[1]][1:p@nhap,1:p@nhap])
    ci<-phangorn::CI(tree,dat,sitewise=TRUE)
    homop<-which(ci<1)
    
    ind<-homop[homop>ncol(x)]
    
    if(length(ind))
    {
        indn<-ind-ncol(x)
        ind<-ci[ind]
        names(ind)<-indn
    }
    
    su<-homop[homop<=ncol(x)]
    sun<-su
    su<-ci[su]
    names(su)<-sun
    
    
    
    return(c(indels=list(ind), subs=list(su)))
    
    
})
Any scripts or data that you put into this service are public.
haplotypes documentation built on Nov. 5, 2025, 5:36 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
haplotypes
Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

R/classparsimnet.r
In haplotypes: Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

Defines functions ext.nodes has.cycle geodist.corr ren.dupli parsnet clustersG tryconnectG recdistG calcclust xhaps fillna tryconnect steplimit

Documented in calcclust clustersG ext.nodes fillna geodist.corr has.cycle parsnet recdistG ren.dupli steplimit tryconnect tryconnectG xhaps

Try the haplotypes package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplotypes Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

R/classparsimnet.r In haplotypes: Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

Defines functions ext.nodes has.cycle geodist.corr ren.dupli parsnet clustersG tryconnectG recdistG calcclust xhaps fillna tryconnect steplimit

Documented in calcclust clustersG ext.nodes fillna geodist.corr has.cycle parsnet recdistG ren.dupli steplimit tryconnect tryconnectG xhaps

Try the haplotypes package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplotypes
Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony

R/classparsimnet.r
In haplotypes: Manipulating DNA Sequences and Estimating Unambiguous Haplotype Network with Statistical Parsimony