R/objectives.R

Defines functions probabilities.function empty.alleles relatedness.factors genotype.factors likelihood.constructs.per.locus create.likelihood.per.locus penalties create.likelihood.vectors create.likelihood create.likelihood.log

Documented in create.likelihood create.likelihood.log create.likelihood.vectors penalties

probabilities.function <- function(hypothesis, cons, doR=FALSE) {
  # Creates a probability function.
  #
  # The probability function computes all probabilities associated with a
  # specific set of genotypes. 
  #
  # In practice, this function makes is easy to substitute C vs R
  # implementations, as well as specialize the C implementations. 

  if((!hypothesis$doDropin) && !doR) {
    probabilities.no.dropin.C <- function(res, vDoseDropout, csp, unc, ...) {
        if(length(res) != ncol(vDoseDropout))
          stop("output vector and vDoseDropout have incompatible sizes.")
        if(length(csp) != nrow(vDoseDropout))
          stop("csp and vDoseDropout have incompatible sizes.")
        if(length(unc) != nrow(vDoseDropout))
          stop("unc and vDoseDropout have incompatible sizes.")
        if(any(dim(vDoseDropout) != dim(cons$zeroAll)))
          stop("expected vDoseDropout and zeroAll to match.")
        .Call(.cpp.probabilitiesNoDropin, res, vDoseDropout, !csp & !unc, csp,
              cons$zeroAll, PACKAGE="likeLTD") 
    }
    return(probabilities.no.dropin.C) 
  } else if(hypothesis$doDropin && !doR) {
    probabilities.with.dropin.C <- function(res, vDoseDropout, csp, unc, rate) {
       if(length(res) != ncol(vDoseDropout))
         stop("output vector and vDoseDropout have incompatible sizes.")
       if(length(csp) != nrow(vDoseDropout))
         stop("csp and vDoseDropout have incompatible sizes.")
       if(length(unc) != nrow(vDoseDropout))
         stop("unc and vDoseDropout have incompatible sizes.")
       if(any(dim(vDoseDropout) != dim(cons$zeroAll)))
         stop("expected vDoseDropout and zeroAll to match.")
       if(any(dim(cons$freqMat) != dim(cons$zeroAll)))
         stop("expected vDoseDropout and zeroAll to match.")
       .Call(.cpp.probabilitiesWithDropin, res, vDoseDropout, !csp & !unc, csp,
             cons$zeroAll, cons$freqMat, rate, PACKAGE="likeLTD")
     }
    return(probabilities.with.dropin.C)
  }
  # Otherwise, return an R function. 
  function(res, vDoseDropout, csp, unc, rate) {
     res <- res                                             *
            selective.col.prod(!csp & !unc, vDoseDropout)   *
            selective.col.prod(csp, 1 - vDoseDropout)  
    if(hypothesis$doDropin) 
      res <- res                                                           *
             selective.col.prod(csp & cons$zeroAll, rate * cons$freqMat)   *
             selective.col.prod(!csp & !unc & cons$zeroAll,
                                1 - rate * cons$freqMat)
    res
  }
}


empty.alleles = function(genotypes, dropoutPresence, nUnknowns) {
  # Mask over genotype matrix indicating empty/null elements.
  #
  # Parameters:
  #   genotypes: Matrix of indices into dosage matrix.
  #   dropoutPresence: Profiles with dropout.
  # Returns: A matrix indicating which alleles will be NA in dosage.
  if(!is.matrix(dropoutPresence)) stop("Expected matrix in input.")
  if(ncol(genotypes) == 0) return(matrix(nrow=nrow(dropoutPresence), ncol=0))
  if(nrow(dropoutPresence) != 0) knownZero = rowSums(dropoutPresence) == 0
  else knownZero = array(FALSE, ncol(genotypes))
  if(nUnknowns == 0) 
    return(matrix(knownZero, nrow=length(knownZero), ncol=ncol(genotypes)))
  # Performs following call in C:
  # iota = 1:length(knownZero)
  # apply(genotypes, 2, function(n) (!iota %in% n) & knownZero) 
  # A new object is returned. 
  .Call(.cpp.emptyAlleles, genotypes, knownZero, PACKAGE="likeLTD")
}

relatedness.factors <- function(input, genotypes, alleleDb, queriedProfile,
                                relatedness=c(0, 0)) {
  # Creates relatedness factor 
  #
  # The relatedness factor takes into account the ancestry of the queried
  # subject and modifies allele frequencies accordingly.

  if(any(abs(relatedness) > 1e-8)) {
    indices = c(which(rownames(alleleDb) %in% queriedProfile[[1]][[1]]), 
                which(rownames(alleleDb) %in% queriedProfile[[1]][[2]]) )
    frequencies = alleleDb[indices, 1]
    # treat all hom rares as het
    rareIndex = which(rownames(alleleDb)=="rares"|rownames(alleleDb)=="-1")
    if(length(rareIndex)==0) rareIndex = as.integer(0)    

    .Call(.cpp.relatednessFactors, input, relatedness, genotypes, indices,
          frequencies, rareIndex, PACKAGE="likeLTD")
  }
  input 
  # defactored = function(n) {
  #   result = 1.0 - sum(relatedness)
  #   hasFirst = n %in% indices[[1]]
  #   hasSecond = n %in% indices[[2]]
  #   if(any(hasFirst)) {
  #     factor = relatedness[[1]] * 0.5 / alleleDb[indices[[1]], 1]
  #     if(!all(hasFirst)) factor = factor * 0.5
  #     result = result + factor
  #   }
  #   if(any(hasSecond)) {
  #     factor = relatedness[[1]] * 0.5 / alleleDb[indices[[2]], 1]
  #     if(!all(hasSecond)) factor = factor * 0.5
  #     result = result + factor
  #   }
  #   if(any(hasFirst) & any(hasSecond)) {
  #     factor = relatedness[[2]] / alleleDb[indices[[1]], 1] /
  #              alleleDb[indices[[2]], 1] 
  #     if(indices[[1]] != indices[[2]]) factor = factor * 0.5
  #     result = result + factor
  #   }
  #   return(result)
  # }
  # apply(genotypes[1:2, ], 2, defactored) 
}

genotype.factors <- function(genotypes, alleleDb, nUnknowns, doDropin,
                             queriedProfile, relatedness=c(0, 0)) {
  # Fraction cum heterozygote factors.
  #

  if(nUnknowns == 0 & !doDropin) {
    het = 1
    fractions <- matrix(alleleDb[genotypes, 1], ncol=ncol(genotypes))
    result <- apply(fractions, 2, prod) * het
  }
  else {
    # The following is performed directly in C. 
    # n = nrow(genotypes) / 2
    # odd = 2*1:n - 1; even = 2*1:n
    # het <- 1 + matrix(genotypes[odd, ] < genotypes[even, ], nrow=length(odd))
    # het <- apply(het, 2, prod)
    # fractions <- matrix(alleleDb[genotypes, 1], ncol=ncol(genotypes))
    # result <- apply(fractions, 2, prod) * het
    result <- .Call(.cpp.fractionsAndHet, genotypes, alleleDb[, 1],
                    PACKAGE="likeLTD")
  }

  relatedness.factors(result, genotypes, alleleDb, queriedProfile, relatedness)
}

likelihood.constructs.per.locus = function(hypothesis) {
  # Creates the locus-specific data needed by each likehood function.
  #
  # Parameters:
  #   hypothesis: A hypothesis, for instance one returned by
  #               prosecution.hypothesis(...) or defence.hypothesis(...)
  alleles = rownames(hypothesis$alleleDb)
  if(is.null(alleles)) stop("Could not figure out alleles names.")
  alleles.vector = function(n) alleles %in% unlist(n)
  cspPresence     = apply(hypothesis$cspProfile, 1, alleles.vector)
  dropoutPresence = apply(hypothesis$dropoutProfs, 1, alleles.vector)
  uncPresence     = apply(hypothesis$uncProf, 1, alleles.vector)
  if(!is.matrix(cspPresence))
    cspPresence = matrix(ncol=0, nrow=length(alleles))
  if(!is.matrix(dropoutPresence))
    dropoutPresence = matrix(ncol=0, nrow=length(alleles))
  if(!is.matrix(uncPresence))
    uncPresence = matrix(ncol=0, nrow=length(alleles))

  missingReps = apply(hypothesis$cspProfile, 1, is.na)

  genotypes <- compatible.genotypes(cspPresence, dropoutPresence, alleles,
                                    hypothesis$nUnknowns, hypothesis$doDropin,
                                    missingReps)
  zeroAll = empty.alleles(genotypes, dropoutPresence, hypothesis$nUnknowns) 
  # Only take into account relatedness between Q and X unedr Hd 
  if(hypothesis$hypothesis=="defence")
	{
  	factors = genotype.factors(genotypes, hypothesis$alleleDb,
                             hypothesis$nUnknowns, hypothesis$doDropin,
                             hypothesis$queriedProfile,
                             hypothesis$relatedness) 
	} else {
  	factors = genotype.factors(genotypes, hypothesis$alleleDb,
                             hypothesis$nUnknowns, hypothesis$doDropin,
                             hypothesis$queriedProfile,
                             c(0,0))
	}

  list(cspPresence=cspPresence, dropoutPresence=dropoutPresence,
       uncPresence=uncPresence, missingReps=missingReps,
       genotypes=genotypes, zeroAll=zeroAll, factors=factors,
       freqMat=hypothesis$alleleDb[, 1])
}

create.likelihood.per.locus <- function(hypothesis, addAttr=FALSE, likeMatrix = FALSE) {
  # Creates a likelyhood function for a given hypothesis and locus
  #
  # A hypothesis is given by the number of unknown contributors, whether to model
  # dropin, so on and so forth.

  cons = likelihood.constructs.per.locus(hypothesis)
  doR = !is.null(hypothesis$doR) && hypothesis$doR == TRUE
  probabilities = probabilities.function(hypothesis, cons, doR=doR)

  result.function <- function(locusAdjustment, power, dropout,
                              degradation=NULL, rcont=NULL, dropin=NULL, ...) {
    # Likelyhood function for a given hypothesis and locus
    #
    # This function is specific to the hypothesis for which it was created.
    # It hides everything except the nuisance parameters over which to
    # optimize.
    #
    # Parameters:
    #   locusAdjustment: a scalar floating point value.
    #   power: a scalar floating point value.
    #   dropout: the dropout rate for each replicate.
    #   degradation: relative degradation from each profiled individual in this
    #                hypothesis
    #   rcont: relative contribution from each profiled individual and unknown
    #          contributor in this hypothesis If it is one less than that, then
    #          an additional 1 is inserted at a position given by
    #          hypothesis$refIndiv. In general, this means either the queried
    #          individual (if subject to dropout and prosecution hypothesis) or
    #          the first individual subject to droput is the reference individual.
    #   ...: Any other parameter, e.g. for penalty functions. 
    #        These parameters are ignored here.
    # Returns: A scalar value giving the likelihood for this locus and
    #          hypothesis
    if(is.null(degradation)) degradation = c()
    if(is.null(rcont)) rcont = c()
    if(length(rcont) + 1 == hypothesis$nUnknowns
                            + ncol(cons$dropoutPresence)) {
      if(hypothesis$refIndiv == 1) rcont = c(1, rcont)
      else if(hypothesis$refIndiv > length(rcont)) rcont = c(rcont, 1)
      else rcont = c(rcont[1:hypothesis$refIndiv-1], 1,
                     rcont[hypothesis$refIndiv:length(rcont)])
    }
    if(length(rcont) != hypothesis$nUnknowns + ncol(cons$dropoutPresence))
      stop(sprintf("rcont should be %d long.",
                   hypothesis$nUnknowns + ncol(cons$dropoutPresence)))
    if(length(degradation) != hypothesis$nUnknowns +
                              ncol(cons$dropoutPresence))
      stop(sprintf("degradation should be %d long.",
                   hypothesis$nUnknowns + ncol(cons$dropoutPresence)))
    if(length(dropout) != length(cons$missingReps))
      stop(sprintf("dropout should be %d long.", ncol(cons$dropoutPresence)))
    if(any(rcont < 0)) stop("found negative relative contribution.")
    if(any(degradation < 0)) stop("found negative degradation parameter.")
    if(hypothesis$doDropin && is.null(dropin)) 
      stop("Model requires missing argument 'dropin'")
    else if(is.null(dropin)) dropin = 0
    if(hypothesis$doDropin && dropin < 0) 
      stop("Dropin rate should be between 0 and 1 (included).")
    if(power >= 0)
      stop("Power parameter cannot be positive or null.")
    if(any(dropout < 0) || any(dropout > 1)) 
      stop("Dropout rates must be between 0 and 1 (included).")
    if(length(locusAdjustment) != 1)
      stop("locusAdjustment should be a scalar")
    if(locusAdjustment < 0) stop("locusAdjustment must be positive.")

    allEPG <- all.epg.per.locus(rcont, degradation, cons$dropoutPresence,
                                hypothesis$alleleDb[, 2], cons$genotypes,
                                hypothesis$nUnknowns > 0)
    # Goes to C to do exponentiation. This is quite expensive an operation.
    # Doing it in C is only interesting for larger matrices and when compiled
    # with OpenMP. 
    # allEPG = (allEPG * locusAdjustment)^power
    # Nothing is returned by this function. Works directly on allEPG. 
    .Call(.cpp.powerAdjustment, allEPG, cons$zeroAll, locusAdjustment,
          power, PACKAGE="likeLTD")
          
    # If any allEPG are infinite change to zero
    # so unlikely that probability might as well be zero
    allEPG[which(is.infinite(allEPG))] = 0

    # res: (Partial) Likelihood per allele.
    res = array(1, length(cons$factors))
    # Loop over replicates.
    for(i in 1:length(cons$missingReps)) {
      if(!cons$missingReps[i]) {
        csp = cons$cspPresence[, i]
        unc = cons$uncPresence[, i]

        # Goes to C to do fraction. Avoids intermediate step.
        # For smaller matrices, the C code seems a bit slower. For larger
        # matrices, it can be twice as fast. 
        # vDoseDropout = allEPG * dropout[i]
        # vDoseDropout = vDoseDropout / (vDoseDropout + 1 - dropout[i])
        vDoseDropout <- .Call(.cpp.doseFraction, allEPG, cons$zeroAll, dropout[[i]],
                              PACKAGE="likeLTD")
        # When there is no reference individual the dropin rate is just the dropin probability
	      if(nrow(hypothesis$dropoutProfs)+hypothesis$nUnknowns==0) 
		      {		
		      dirate = dropin
		      } else {
		      dirate = dropin * (1 - dropout[i])
		      }
        res <- probabilities(res, vDoseDropout, csp, unc,
                             dirate)
      } # End of if(any(csp)) 
    } # End of loop over replicates.

    # Figure out likelihood for good and return.
    if(likeMatrix==FALSE)
	{
    	return(sum(res * cons$factors))
	} else {
	return(res * cons$factors)
	}
  }

  if(addAttr) {
    attr(result.function, "hypothesis") <- hypothesis
    attr(result.function, "constructs") <- cons
  }
  result.function
}


# Penalties to apply to the likelihood.
# Documentation is in man directory.
penalties <- function(locusAdjustment, power, dropout, degradation=NULL,
                      rcont=NULL, dropin=NULL, locusAdjPenalty=50,
                      dropinPenalty=2, degradationPenalty=50, bemn=-4.35,
                      besd=0.38, ...) {
  result = 1
  # Normalizes by number of loci so product of penalties same as in old code.
  # Some penalties are per locus and other for all locus. Hence this bit of run
  # around.
  normalization = 1.0 / max(length(locusAdjustment), 1)

  if(!missing(dropin) & !is.null(dropin))
    result = result * exp(-dropin * dropinPenalty * normalization)
  if(!missing(degradation) & !is.null(degradation))
    result = result * exp(-sum(degradation) * degradationPenalty 
                           * normalization )
  if(!missing(power) & !is.null(power))
    result = result * dnorm(power, bemn, besd) ^ normalization
  if(!missing(locusAdjustment) & !is.null(locusAdjustment))
    result = result * dgamma(as.vector(unlist(locusAdjustment)),
                             locusAdjPenalty, locusAdjPenalty)

  return(result)
}

# Creates a likelihood function from the input hypothesis
# Documentation is in man directory.
create.likelihood.vectors <- function(hypothesis, addAttr=FALSE, likeMatrix=FALSE, ...) {

  hypothesis = add.args.to.hypothesis(hypothesis, ...)
  sanity.check(hypothesis) # makes sure hypothesis has right type.
  locusCentric = transform.to.locus.centric(hypothesis)
  functions <- mapply(create.likelihood.per.locus, locusCentric,
                      MoreArgs=list(addAttr=addAttr, likeMatrix=likeMatrix))

  if(is.null(hypothesis$locusAdjPenalty)) hypothesis$locusAdjPenalty = 50 
  if(is.null(hypothesis$dropinPenalty)) hypothesis$dropinPenalty = 2 
  if(is.null(hypothesis$degradationPenalty)) hypothesis$degradationPenalty = 50 
  if(is.null(hypothesis$bemn)) hypothesis$bemn = -4.35
  if(is.null(hypothesis$besd)) hypothesis$besd = 0.38 

  likelihood.vectors <- function(locusAdjustment, power, dropout,
                                 degradation=NULL, rcont=NULL, dropin=NULL,
                                 locusAdjPenalty=hypothesis$locusAdjPenalty, 
				 dropinPenalty=hypothesis$dropinPenalty,
                                 degradationPenalty=hypothesis$degradationPenalty, 
				 bemn=hypothesis$bemn,
                                 besd=hypothesis$besd, ...) {
    # Call each and every function in the array.
    arguments = list(power=power, dropout=dropout,
                     degradation=degradation, rcont=rcont,
                     dropinPenalty=dropinPenalty,
                     degradationPenalty=degradationPenalty, bemn=bemn,
                     besd=besd, dropin=dropin)
    callme <- function(objective, adj) {
      args = append(arguments, list(locusAdjustment=adj))
      do.call(objective, args)
    }
    if(length(locusAdjustment) == 1)
      locusAdjustment = rep(locusAdjustment, length(functions))
    if(setequal(names(locusAdjustment), colnames(hypothesis$cspProfile)))
      locusAdjustment <- locusAdjustment[colnames(hypothesis$cspProfile)]
    objectives = mapply(callme, functions, locusAdjustment)
    arguments = append(arguments, list(locusAdjustment=locusAdjustment))
    arguments = append(arguments, list(...))
    pens <- do.call(penalties, arguments)
    list(objectives=objectives, penalties=pens)
  }
  if(addAttr) {
    attr(likelihood.vectors, "hypothesis") <- hypothesis
    attr(likelihood.vectors, "functions") <- functions
  }
  return(likelihood.vectors)
}


# Creates a likelihood function from the input hypothesis
# Documentation is in man directory.
create.likelihood <- function(hypothesis, addAttr=FALSE, ...) {

  vecfunc <- create.likelihood.vectors(hypothesis, addAttr, ...)

  likelihood.scalar <- function(...) { 
    result <- vecfunc(...) 
    prod(result$objectives * result$penalties)
  }

  attributes(likelihood.scalar) <- attributes(vecfunc)
  return(likelihood.scalar)
}

# Creates a likelihood function from the input hypothesis
# Documentation is in man directory.
create.likelihood.log <- function(hypothesis, addAttr=FALSE, ...) {

  vecfunc <- create.likelihood.vectors(hypothesis, addAttr, ...)
  likelihood.log <- function(...) { 
    result <- vecfunc(...) 
    sum(log(result$objectives)) + sum(log(result$penalties))
  }
  attributes(likelihood.log) <- attributes(vecfunc)
  return(likelihood.log)
}

Try the likeLTD package in your browser

Any scripts or data that you put into this service are public.

likeLTD documentation built on May 1, 2019, 7:58 p.m.