R/functions.R

gene.drop.fn <- function(g1,g2){
  if( is.na(g1) | is.na(g2) ){
    goff <- NA
  }else if( g1 == 0 && g2 == 1 ){
    goff <- sample( x = 0:1, size = 1, prob = rep(0.5,2) )
  }else if( g1 == 1 && g2 == 0 ){
    goff <- sample( x = 0:1, size = 1, prob = rep(0.5,2) )
  }else if( g1 == 1 && g2 == 1 ){
    goff <- sample( x = 0:2, size = 1, prob = c(0.25,0.5,0.25) )
  }else if( g1 == 0 && g2 == 0 ){
    goff <- 0
  }else if( g1 == 2 && g2 == 0 ){
    goff <- 1
  }else if( g1 == 0 && g2 == 2 ){
    goff <- 1
  }else if( g1 == 1 && g2 == 2 ){
    goff <- sample( x = 1:2, size = 1, prob = rep(0.5,2) )
  }else if( g1 == 2 && g2 == 1 ){
    goff <- sample( x = 1:2, size = 1, prob = rep(0.5,2) )
  }else if( g1 == 2 && g2 == 2 ){
    goff <- 2
  }else{
    goff <- "error"
  }
  return(goff)
}

GeneDropSim.fn <- function(trio.list, id, dt.vec, fd.indices, carriers=dt.vec, n = 1e3, k = 10, nf = 1){
  n.bail <- k*n; i <- 1;
  share.vec <- logical(n.bail); occur.vec <- logical(n.bail);
  geno.vec = rep(NA,length(id))
  names(geno.vec) = id
  geno.vec[fd.indices] = 0

  while( sum(occur.vec) < n & i <= n.bail ){
      founder <- sample(fd.indices,nf,replace = FALSE)
      geno.vec[founder] <- 1
      geno.vec.sim = geno.vec
      for (j in 1:length(trio.list))
        geno.vec.sim <- GeneDrop(trio.list[[j]], geno.vec.sim)
      if( any(is.na(geno.vec.sim))) stop("GeneDrop returned NA genotype.")
      share.vec[i] <- all( geno.vec.sim[carriers]==1 ) & all( geno.vec.sim[setdiff(dt.vec,carriers)]==0 )      
      occur.vec[i] <- any( geno.vec.sim[dt.vec]==1 )
      geno.vec[founder] <- 0
      i <- i + 1
    }
  return(sum(share.vec)/sum(occur.vec)) # note that these vectors have length n.bail
}

GeneDropSim.allsubsets.fn <- function(trio.list, id, dt.vec, fd.indices, carriers=dt.vec, n = 1e3, k = 10, nf = 1){
  n.bail <- k*n; i <- 1;
  share.vec <- character(n.bail); occur.vec <- logical(n.bail);
  geno.vec = rep(NA,length(id))
  names(geno.vec) = id
  geno.vec[fd.indices] = 0
  carriers = sort(carriers)

  while( sum(occur.vec) < n & i <= n.bail ){
      founder <- sample(fd.indices,nf,replace = FALSE)
      geno.vec[founder] <- 1
      geno.vec.sim = geno.vec
      for (j in 1:length(trio.list))
        geno.vec.sim <- GeneDrop(trio.list[[j]], geno.vec.sim)
      if( any(is.na(geno.vec.sim))) stop("GeneDrop returned NA genotype.")
      if (all( geno.vec.sim[setdiff(dt.vec,carriers)]==0 ) & any( geno.vec.sim[dt.vec]>0 )) share.vec[i] <- paste(carriers[geno.vec.sim[carriers]>0],collapse=";")     
      occur.vec[i] <- any( geno.vec.sim[dt.vec]>0 )
      geno.vec[founder] <- 0
      i <- i + 1
    }
    
  return(table(share.vec[share.vec!=""])/sum(occur.vec)) # note that these vectors have length n.bail
}

GeneDropSimExcessSharing.fn <- function(trio.list, id, dt.vec, fd.indices, phihat, RVfreq, carriers=dt.vec, ord=5, n = 1e3, k = 10)
{

#if (ord > 5) stop ("Order of the polynomial approximation cannot exceed 5.")
if (ord <= 0) stop ("Order of the polynomial approximation must be at least 1.")
if (any(phihat < 0)) stop ("Mean kinship between founders must be non-negative.")

n.bail <- k*n; 
geno.vec = rep(NA,length(id))
names(geno.vec) = id
geno.vec[fd.indices] = 0
  
nrep = length(phihat)
papprox = pMC = PFU.vec = rep(NA,nrep)
nf = length(fd.indices)
  
# Vector of phia
phi.vec = compute.phi.vec(nf,2*nf-ord)

# Loop over the estimates of phi in the vector phihat 
for (r in 1:nrep)
  {
  theta = max(infer.theta(phihat[r],phi.vec))
  # Debugging code
  # print (theta)
  if (theta>=0)
  {
  
  i <- 1;
  share.vec <- logical(n.bail); occur.vec <- logical(n.bail);

  # Distribution of number of duplicated alleles
  distri = theta^(0:ord)/factorial(0:ord)
  distri = distri/sum(distri)

  while( sum(occur.vec) < n & i <= n.bail )
      {
      # Define vector of indices of the allele copies in the founders
      copyindex = 1:(2*nf)
      
      # Sample number of duplicated alleles
      nduplic = sample(0:ord,1,prob=distri)
      
  # Debugging code
  # print (nduplic)

     # If no frequency is specified for the RV, it implies a single copy of the RV
     if (missing(RVfreq))
       RVcopies = 1
     # Else the number of copies of the RV is sampled from a truncated binomial distribution from 1 to 2*nf-duplic
     else 
       {
       distri.RVcopies = dbinom(1:(2*nf-nduplic),2*nf-nduplic,RVfreq)/(1-dbinom(0,2*nf-nduplic,RVfreq))
       RVcopies = sample(1:(2*nf-nduplic),prob=distri.RVcopies)
       }
  
      # Sample RV among the alleles
      RV = sample.int(2*nf-nduplic,RVcopies)

      # If RV is among the duplicated alleles, sample two gene copies for the RV
      if (RV[1] <= nduplic)
        RVi = sample.int(length(copyindex),2,replace = FALSE)
      # Else sample a single gene copy for the RV
      else RVi = sample.int(length(copyindex),1)
      founder <- rep(fd.indices,2)[copyindex[RVi]]
      copyindex = copyindex[-RVi]
      
      # If more than one copy of the RV is introduced
      if (length(RV) > 1)
      {
      for (k in 2:length(RV))
        {  
        # If RV is among the duplicated alleles, sample two founders who introduce it
        if (RV[k] <= nduplic)
          RVi = sample.int(length(copyindex),2,replace = FALSE)
        # Else sample a single founder introducing the RV
        else RVi = sample.int(length(copyindex),1)
        founder <- c(founder,rep(fd.indices,2)[copyindex[RVi]])
        copyindex = copyindex[-RVi]        
        }
      }
      tab.founder = table(founder)
      founder = unique(founder)
      # Debugging code
      #print (tab.founder)
      geno.vec[founder] = tab.founder

      geno.vec.sim = geno.vec
      for (j in 1:length(trio.list))
        geno.vec.sim <- GeneDrop(trio.list[[j]], geno.vec.sim)
      if( any(is.na(geno.vec.sim))) stop("GeneDrop returned NA genotype.")
      share.vec[i] <- all( geno.vec.sim[carriers]>0 & all( geno.vec.sim[setdiff(dt.vec,carriers)]==0 ) )
      occur.vec[i] <- any( geno.vec.sim[dt.vec]>0 )
      geno.vec[founder] <- 0
      # Debugging code
      #print (geno.vec)
      i <- i + 1
    }
    pMC[r] = sum(share.vec)/sum(occur.vec) # note that these vectors have length n.bail
    }
    else warning(paste("Negative mean value for the truncated Poisson approximation to the distribution for the number of alleles distinct by descent among the founders when phihat = ",phihat[r]," Try increasing ord."))
  }
  return(pMC)
}

get.fd.indices <- function(ped){
  dv = kindepth(ped)
  which(dv==0)
  }
  
syounkin/RVsharing documentation built on May 31, 2019, 12:47 a.m.