R/calcTippet.R

Defines functions calcTippet

Documented in calcTippet

#' @title calcTippet
#' @author Oyvind Bleka
#' @description Performing tippet analysis for a given model
#' @details The procedure is a wrapper for the numerical integration function.
#' @param tipIdx Index in condOrder which are replaced with a random man
#' @param mlefitHp A fitted object returned from contLikMLE (under Hp)
#' @param mlefitHd A fitted object returned from contLikMLE (under Hd)
#' @param niter Number of non-contributor iterations 
#' @param type type of Tippet analysis to conduct (MLE or INT)
#' @param LRobs The user can send an observed LR to superimpose to the plot
#' @param MLEopt MLE options (must include nDone, steptol, delta, difftol, seed is optional)
#' @param INTopt INT options (must include reltol, maxEval, dev)
#' @param seed Seed used for reproducibility. Note that seed can also be a variable in MLEopt option 
#' @param verbose Whether printing simulation progress. Default is TRUE
#' @return returned non-contributor LR values
#' @export 

calcTippet = function(tipIdx, mlefitHp, mlefitHd, niter=100, type="MLE", LRobs=NULL, MLEopt=NULL, INTopt=NULL, seed = NULL, verbose=TRUE)  {
  if(!is.null(seed)) set.seed(seed)
  if(!type%in%c("MLE","INT")) stop("Type not supported!")
  
  #Obtain model variants
  maxThreads = mlefitHp$maxThreads
  if(is.null(LRobs) && type=="MLE") LRobs = (mlefitHp$fit$loglik-mlefitHd$fit$loglik)/log(10) #obtain observed LR for MLE (log10 scale)
  
  #Obtain calculation options (Note that MLE method is always done first)
  if(is.null(MLEopt)) MLEopt = list(nDone=3,steptol=1e-4,delta=1,difftol=0.01,seed=NULL)
  if(type=="INT" && is.null(INTopt)) INTopt = list(reltol=0.001, maxEval = 0, dev=3)  
  
  #Helpfunction to calculate the likelihood  
  calcLogLik = function(mod) { #noticed modified reference object
    
    #ALways calculate MLE based first      
    mle <- calcMLE(mod$nC,mod$samples,mod$popFreq,refDataRM,mod$condOrder,mod$knownRef, mod$kit, mod$DEG, mod$BWS, mod$FWS, 
                   mod$AT, mod$prC, mod$lambda, mod$fst, mod$knownRel, mod$ibd, mod$minF, mod$normalize, 
                   priorBWS = mod$priorBWS, priorFWS = mod$priorFWS, verbose=FALSE, maxThreads=maxThreads,
                   delta=MLEopt$delta, difftol=MLEopt$difftol,seed=MLEopt$seed,steptol=MLEopt$steptol, adjQbp=mod$adjQbp) 
      
    if(type=="MLE") {
      return(mle$fit$loglik)
      
      #Otherwise continue calculating 
    } else if(type=="INT") {
      lims = euroformix::getParamLimits(mle,dev=INTopt$dev)
      int <- calcINT(mod$nC,mod$samples,mod$popFreq, lims$lower, lims$upper, refDataRM, mod$condOrder,mod$knownRef, mod$kit, 
                     mod$DEG, mod$BWS, mod$FWS,  mod$AT, mod$prC, mod$lambda, mod$fst, mod$knownRel, mod$ibd, 
                     mod$minF, mod$normalize, mod$priorBWS, mod$priorFWS, verbose=FALSE, maxThreads=maxThreads,
                     reltol = INTopt$reltol, maxEval = INTopt$maxEval, scale = lims$scale, adjQbp=mod$adjQbp ) 
      
      return(mle$fit$loglik)
    }
  }
  
  #Obtain Frequency information to Randomize non-contributors (provided under Hd)
  locs <- mlefitHd$prepareC$markerNames #loci to evaluate
  fst = setNames(mlefitHd$prepareC$fst,locs) #obtain fst per marker
  popFreq = mlefitHd$model$popFreq
  refData = mlefitHd$model$refData

  refKnownIdxHd = unique(c(which(mlefitHd$model$condOrder>0),mlefitHp$model$knownRef)) #index of known  individuals under Hd (contributors and known non-contributors)
  #refKnownIdxHp = setdiff(which(mlefitHp$model$condOrder>0),tipIdx) #index of known individuals under Hp (exlude tippet index=POI)
  refRelIdx = mlefitHd$model$knownRel #index of related individual
  ibd = mlefitHd$model$ibd
  calcUnderHd = any(fst>0) #check if re-calculation under Hd is needed
  if(!calcUnderHd && !is.null(ibd) && ibd[1]<1 && length(refRelIdx)==1 && refRelIdx==tipIdx) calcUnderHd = TRUE #must recalculate under Hd if POI is the related individual
  logLikHd = mlefitHd$fit$loglik #extract likelihood value under Hd
  
  # CALCULATE OUTCOME LIST OF RANDOM MAN (RM), with corresponding probabilities
  Glist <- list() 
  for(loc in locs) {
    refDataLoc = refData[[loc]] #keep a copy (assume correct structure already)
    if(!loc%in%names(refData)) refDataLoc = lapply(refData, function(x) x[[loc]]) #other structure
    freqAll = popFreq[[loc]] #get frequencies
    
    #Need to include missing alleles (avoid bug from v4.0.8):
    alleles = c(unlist(refDataLoc[refKnownIdxHd]),unlist(refDataLoc[refRelIdx])) #get alleles
    newA = alleles[!alleles%in%names(freqAll)] #new alleles
    if(length(newA)>0) {
      newA = setNames(rep(mlefitHd$model$minF,length(newA)),newA)
      freqAll = c(freqAll,newA)
    }
    if(mlefitHd$model$normalize) freqAll = freqAll/sum(freqAll)
    Glist[[loc]] = euroformix::calcGjoint(freq=freqAll,nU=1,fst=fst[loc],refK=unlist(refDataLoc[refKnownIdxHd]),refR=unlist(refDataLoc[refRelIdx]),ibd=ibd)
  } 
  #NOTICE THE KNOWN REFERENCE(S) GIVEN AS UNDER HD  
  if(verbose) print(paste0("Calculating ",niter," LR values...")) 
  
  #Initiate PROGRESS BAR:
  progcount = 1  #counter
  progbar <- txtProgressBar(min = 0, max = niter, style = 3) #create progress bar
  logLikHpRM <- logLikHdRM <- LRRM <- NULL
  for(m in seq_len(niter)) { #for each random individual from the population
    
    refDataRM = refData #copy reference data to include genotype of random profile
    #Draw random profile
    for(loc in locs) {
      GenoLoc = Glist[[loc]]
      randIdx = sample( seq_along(GenoLoc$Gprob),1,prob=GenoLoc$Gprob) #get random index
      GenoSim = GenoLoc$G[randIdx,] #Obtain random genotype
      
      #Insert to correct structure
      if(loc%in%names(refData)) {
        refDataRM[[loc]][[tipIdx]] = GenoSim #include
      } else {
        refDataRM[[tipIdx]][[loc]] = GenoSim #include
      }
    }

    #CALCULATE UNDER Hp (and possibly under Hd)    
    logLikHp = calcLogLik(mlefitHp$model) #calculate under Hp
    if(calcUnderHd) logLikHd = calcLogLik(mod=mlefitHd$model) #calculate under Hd    

    #Store:    
    logLikHpRM[m] = logLikHp
    logLikHdRM[m] = logLikHd
    LRRM[m] = (logLikHp-logLikHd)/log(10) #convert to log10

    #Plot to figure after each x iterations:
    if(verbose && m%%(niter/10)==0)  plotTippet(LRRM[1:m],LRobs,mtxt=paste0(type,"-based"))

    progcount <- progcount + 1
    setTxtProgressBar(progbar,progcount) #update progress bar
  } #for each tippet
  stat = plotTippet(LRRM,lr0=LRobs,returnStatsOnly=TRUE)  #finally calculate statistics
  ret = list(LRobs=LRobs, LRRM=LRRM, logLikHpRM=logLikHpRM, logLikHdRM=logLikHdRM, type=type, stat=stat)  
  return(ret)
}
oyvble/euroformix documentation built on Aug. 25, 2023, 11:14 a.m.