R/genotypeProbTable_bis.R

Defines functions genotypeProbTable_bis

Documented in genotypeProbTable_bis

#' @title genotypeProbTable_bis
#' @description function to calculate the probability of genotypes given the MOI
#' @param bbn1 bayesian network
#' @param resQQ list of results from the inference
#' @param bplot plot results
#' @param numMarkers  number of markers
#' @param freq allele frequencies
#' @importFrom graphics abline barplot layout legend par
#' @importFrom stats aggregate ecdf
#' @return matrix of genotype probabilities
#' @export
genotypeProbTable_bis <- function(bbn1,resQQ,bplot=FALSE,numMarkers=4, freq){
  sysName<-unlist(lapply(strsplit(names(resQQ),"_",fixed=TRUE),function(x){return(x[2])}))
  #PRUEBA BORRAR
  
  # (3.1) analizo diferencias entre probabilidades ALELICAS y GENOTIPICAS
  lprobPconj <- lprobMconj <- lprobP <- lprobM <- lprobG<-list()
 
  for(i in seq_along(bbn1$alelFreq)){
    ## ATENCION: tengo que comparar por separado la linea paterna y la materna!! 
    #matcheo filas de ambas tablas para comparar
    iop<-match(resQQ[[i]][,1],names(freq[[sysName[i]]]))
    iom<-match(resQQ[[i+(length(resQQ)/2)]][,1],names(freq[[sysName[i]]]))
    
    # (3.1.1) probabilidades conjuntas P(G,E) y condicionales  P(G|E) 
    pPopFreqP <- freq[[sysName[i]]][iop] #prob poblacional
    pBNetP    <- pBNetP1   <- resQQ[[i]];       # pbNet1P1:prob conjunta
    pBNetP[,"prob"]<-pBNetP[,"prob"]/sum(pBNetP[,"prob"]) #prob condicional
    
    pPopFreqM <- freq[[sysName[i]]][iom]              #prob poblacional
    pBNetM    <- pBNetM1   <- resQQ[[i+(length(resQQ)/2)]];   # pbNet1M1:prob conjunta
    pBNetM[,"prob"]<-pBNetM[,"prob"]/sum(pBNetM[,"prob"])   #prob condicional
    
    # probabilidades condicionales
    pBNp<-cbind(bnetP=pBNetP[,"prob"],pop=pPopFreqP)
    pBNm<-cbind(bnetM=pBNetM[,"prob"],pop=pPopFreqM)
    rownames(pBNp)<-names(pPopFreqP)
    rownames(pBNm)<-names(pPopFreqM)
    
    #probabilidades conjuntas
    pBNp1<-cbind(bnetP=pBNetP1[,"prob"],pop=pPopFreqP)
    pBNm1<-cbind(bnetM=pBNetM1[,"prob"],pop=pPopFreqM)
    rownames(pBNp1)<-names(pPopFreqP)
    rownames(pBNm1)<-names(pPopFreqM)
    
    
    # (3.1.2) tabla de probabilidades linea paterna y linea materna
    #notar que solo se reportan probabilidades que tienen pBNet>0
    #esto no deberia afectar el calculo de la divergencia KL (px log10(py/px) se anula si px=0 | py=0)
    lprobP[[sysName[i]]] <- pBNp
    lprobM[[sysName[i]]] <- pBNm
    
    lprobPconj[[sysName[i]]] <- pBNp1
    lprobMconj[[sysName[i]]] <- pBNm1
    
    # (3.2) prob de genotipos fBNet 
    #este codigo lo reemplace por el debajo para lidiar con un par de
    #bugs: 1) cuando las tablas de probabilidades tenian una sola fila
    #         se perdia el nombre con info del alelo
    #      2) al ramar el data.frame final podia suceder que eaux tenga diferente numero de
    #         filas que aux2 y se reciclara silenciosamente (!!!!)
    if(FALSE){
      aux    <- genotypeProbs(pBNp[,1],pBNm[,1])
      
      # prob de genotipos BNet USANDO probabilidad Conjunta 
      # (lo hago para chequear la cuenta de la KLdiv)
      auxConj <- genotypeProbs(pBNp1[,1],pBNm1[,1])
      
      ## prob de genotipos poblacionales
      aux2    <- genotypeProbs(bbn1$alelFreq[[sysName[[i]]]],bbn1$alelFreq[[sysName[[i]]]])
      
      #lprobG[[sysName[i]]]<-data.frame(geno=rownames(aux2),pop=aux2,bnet=aux,geno_conj=auxConj)
      lprobG[[sysName[i]]]<-data.frame(geno=names(aux2),pop=aux2,bnet=aux,geno_conj=auxConj)
    }else{
      # (3.2) prob de genotipos fBNet 
      #aux    <- genotypeProbs(pBNp[,1],pBNm[,1])
      pp <- pBNp[,1];names(pp) <- rownames(pBNp)
      mm <- pBNm[,1];names(mm) <- rownames(pBNm)
      aux    <- genotypeProbs(pp,mm)
      
      # prob de genotipos BNet USANDO probabilidad Conjunta 
      # (lo hago para chequear la cuenta de la KLdiv)
      #auxConj <- genotypeProbs(pBNp1[,1],pBNm1[,1])
      pp <- pBNp1[,1];names(pp) <- rownames(pBNp1)
      mm <- pBNm1[,1];names(mm) <- rownames(pBNm1)
      auxConj <- genotypeProbs(pp,mm)
      
      ## prob de genotipos poblacionales
      aux2    <- genotypeProbs(bbn1$alelFreq[[sysName[[i]]]],bbn1$alelFreq[[sysName[[i]]]])
      
      #lprobG[[sysName[i]]]<-data.frame(geno=rownames(aux2),pop=aux2,bnet=aux,geno_conj=auxConj)
      
      #corrected bug 2023/05/19
      vbnet <- vconj <- aux2 * 0 
      vbnet[names(aux)]     <- aux
      vconj[names(auxConj)] <- auxConj
      lprobG[[sysName[i]]]  <- data.frame(geno=names(aux2),pop=aux2,bnet=vbnet,geno_conj=vconj)
      
    }
    
    
    if(bplot & i<=numMarkers){     #nolint  
      barplot(t(lprobP[[sysName[i]]]),beside=TRUE,col=c("pink","gray"),main=paste("P:",sysName[i]))
      barplot(t(lprobM[[sysName[i]]]),beside=TRUE,col=c("pink","gray"),main=paste("M:",sysName[i]))
      plot(aux,aux2,log10="xy",xlab="p_geno bnet",ylab="p_geno pop")
      abline(0,1,col="grey",lty=2)
      
      legend("topleft",c(paste("KL(p_bnet,p_pop):",signif(KLd(aux,aux2),2)),
                         paste("KL(p_pop,p_bnet):",signif(KLd(aux2,aux),2))),cex=0.8,bty="n")
    }
  }
  return(list(lprobG=lprobG,lprobP=lprobP,lprobM=lprobM))
}

Try the forensIT package in your browser

Any scripts or data that you put into this service are public.

forensIT documentation built on April 4, 2025, 12:22 a.m.