R/familyStructure.R

familyStructure <-
function (i, cumind=cumind, m.carrier=1, interaction=TRUE, depend=1, parms=c(0.016, 3), base.dist="Weibull", frailty.dist="gamma",
            vbeta=c(-1.13, 2.35, 0.5), dominant.m=TRUE, dominant.s= TRUE, 
            variation="secondgene", allelefreq= c(0.02, 0.2), mrate=0, 
            probandage=c(45,1.5), agemin=20, agemax=100)
{
    
tmpdata<-numeric()
indID<-c(cumind+1,cumind+2)
motherID<-c(0,0) 
fatherID<-c(0,0) 
gender<-c(1,0) ## 0--Female, 1--Male
proband<-c(0,0) ## 0--Non-proband,  1-- proband
generation<-c(1,1) ## generation number, 0 in second generation means married from outside
relation <- c(4,4)  #1=proband, 2=sib, 3=child, 4=parent, 5=sibchild, 6=hasband, 7=sibspouse  
# SecNum <- 2, we can fix the number of sibs in second generation (eg., 2 sibs)

## number of sibs in second generation be 2 to 5 
## truncated negative binomial (>=2 and <=5) with prob=sibprob=0.4551, and rr=1
SecNum <-sample(c(2,3,4,5), 1, replace=TRUE,prob=c(0.4991, 0.2720, 0.1482, 0.0807)) 
    
## generate gender by prob=0.5      
    
tmpgender<-sample(c(1,0), SecNum, replace=TRUE, prob=c(0.5,0.5))
    
SecNum<-length(tmpgender)
    
if (SecNum > 0) { ## at least one sample in second generation
      
  #tmpproband <- 1 # sample(SecNum,1,replace=TRUE)
      
  NumMem<-2*SecNum+cumind+2
      
  for (j in 1:SecNum) {
        
    if (j==1){ # first one is assigned as the proband
      proband <- c(proband, c(1,0))
      relation <- c(relation, c(1,6))
    }
    else {
      proband<-c(proband, c(0,0))
      relation <- c(relation, c(2,7))
    }    
    indID<- c(indID, 2*j+cumind+1, 2*j+cumind+2 )
    fatherID<-c(fatherID, c(cumind+1,0))
    motherID<-c(motherID, c(cumind+2,0))
        
    if (tmpgender[j]==0) gender<-c(gender, c(1,0)) 
    else gender<-c(gender, c(0,1))
     
    generation<-c(generation, c(2,0)) 
        
    ThiNum<-sample(c(2,3,4,5), 1, replace=TRUE, prob=c(0.4991, 0.2720, 0.1482, 0.0807)) 
      
    for (k in 1:ThiNum) {
      proband<-c(proband,0)
      indID<- c(indID, NumMem+k)
      
      if(j==1) relation <- c(relation, 3)
      else relation <- c(relation, 5)
      
      if (gender[indID==(2*j+cumind+1)]==0) {
        fatherID<-c(fatherID, 2*j+cumind+2)
        motherID<-c(motherID, 2*j+cumind+1)
        } 
      else {
        fatherID<-c(fatherID, 2*j+cumind+1)
        motherID<-c(motherID, 2*j+cumind+2)
        }
      ## generate gender by prob=0.5
      gender<-c(gender,sample(c(1, 0), 1,replace=TRUE, prob=c(0.5, 0.5)))
      generation<-c(generation, 3)           
      }#Close for for(k in 1:ThiNum)
        NumMem<-NumMem+ThiNum  
        
    } #Close for  for (j in 1:SecNum)
      
  nIndi <- length(indID)     
  famID<-rep(i, nIndi)    
  ageonset<-rep(0, nIndi)      ## age at onset
  censorage<-rep(0, nIndi)     ## current age
  status<-rep(0, nIndi)        ## disease staus
  affected <- rep(0, nIndi)	
  disgene.m <- rep(0, nIndi)	
  disgene.s <- rep(0, nIndi)	
  ParentsG.m <- rep(0, nIndi)  ## parents genotype; it can be 1 to 9; 0 if founders
  pos <- c(1:nIndi) 
  
  
  #------------------------------------------------------------------------------
  # Familial correlation will be generated by gamma frailty, log-normal frailty, or secondgene
      
  ## Log-normal frailty (logX~Normal with mean=0, variance=depend)
  if (is.null(frailty.dist)) alpha=0
  else if (frailty.dist=="lognormal") alpha <- rnorm(1, mean=0, sd=1/sqrt(depend))
  ## Gamma Frailty (with mean 1, variance=depend)
  else if (frailty.dist=="gamma") alpha <- log(rgamma(1, shape=depend, scale=1/depend))
  else stop("unrecognized frailty distribution")
  

  #------------------------------------------------------------------------------
  ###  Generating the current age
  # First generation with default mean 65 and sd=2.5; Second gen with default mean 45 and sd=2.5; third gene has mean age difference of 20 with their parents and sd=1
  #Probands age generated with truncated normal
  
  prob.age <- rtruncnorm(1,a=agemin, b=agemax, mean=probandage[1], sd=probandage[2])    
  
  genepos<-pos[generation==1]
  censorage[genepos]<- rnorm(length(genepos), mean=prob.age+20, sd=1.5)
  min.page1<- min(censorage[genepos])
#  if(min.page1 < agemin) stop("agemin is set too large.")
  genepos<-pos[generation==0] #spouse 
  censorage[genepos]<- rnorm(length(genepos), mean=probandage[1], sd=probandage[2])
  genepos<-pos[generation==2] 
   
  for (j in 1:length(genepos)) {
    if(j==1) censorage[genepos[1]] <- prob.age
    else censorage[genepos[j]] <- rtruncnorm(1,a=agemin,b=min.page1-14, mean=prob.age[1]-j-1, sd=1.5)
    sonpos<-pos[fatherID==indID[genepos[j]] | motherID==indID[genepos[j]]]
    min.page2 <- min(censorage[indID==fatherID[sonpos[1]] | indID==motherID[sonpos[1]]])
    #if(min.page2 < agemin) stop("agemin is too large.")
    for (k in 1:length(sonpos)) {
      #censorage[sonpos[k]]<- rtruncnorm(1,a=agemin, b=min.page2, mean=min.page2-20-k, sd=1.5)
      censorage[sonpos[k]]<- rnorm(1,mean=min.page2-20-k, sd=1.5)      
      }#Close for for(k in 1:length(sonpos))
    }#Close for for (j in 1:length(genepos))
      
  #------------------------------------------------------------------------------
            
  if(length(allelefreq)==1) allelefreq <- c(allelefreq,0) 
  AAq <- allelefreq^2
  Aaq <- 2*allelefreq*(1-allelefreq)
  aaq <- (1-allelefreq)^2
  G <- cbind(AAq, Aaq, aaq)
      
  #----------------------------------------------------------------------------
  ## For generating genotypes, we first generate the proband's genotype and based on that, we generate other family members' genotypes
      
  # Generate proband's genotype given its age at onset and gender
  prob.age <- censorage[proband==1] # proband's current age
  prob.sex <- gender[proband==1]
  pGene <- fgene(base.dist, frailty.dist, depend=depend, affage=prob.age-agemin, affsex=prob.sex, interaction=interaction, variation=variation, parms=parms, vbeta=vbeta, alpha=alpha, pg=0, m.carrier=m.carrier, dominant.m=dominant.m, aq = allelefreq)
  #------------------------------------------------------------------------------ 
     
  if(variation=="secondgene") ngene <- c(1,2)
  else  ngene <- 1
      
  for(g in ngene){
      if( m.carrier==1 & g==1) {
        if(dominant.m) gg <- 1:2
        else gg <- 1
        prob.G <- sample(gg, 1, replace=TRUE, prob=pGene[1,gg])
        }#Close for if( m.carrier==1 & g==1)
      else prob.G <- sample(c(1,2,3),1, replace=TRUE, prob=pGene[g,])
      #generate proband's parents' genotype based on proband's genotype  
      G1 <- parents.g(prob.G, g=g, allelefreq=allelefreq) 

      nsibs <- sum(proband==0 & generation==2)
      sib.G <- kids.g(nsibs, G1) # generate kids' genotype given the proband's genotype
  
    if(g==1){
      # ParentsG.m[generation==2] <- 3*(G1[1]-1) + G1[2] 
      # ParentsG.m = parents' genotype; value can be 1 to 9.
      # let AA==1, Aa==2, aa==3, 
      # 9 possible combinations of parents are 1=(1,1), 2=(1,2), 3=(1,3), 4=(2,1), 5=(2,2), 6=(2,3), 7=(3,1), 8=(3,2), 9=(3,3)
      # disegene.m = geno of marjor gene as 1(AA), 2(Aa), or 3(aa)
      disgene.m[ generation==1 ] <- G1 
      disgene.m[ proband==1 ] <- prob.G
      disgene.m[ proband==0 & generation==2 ] <- sib.G	
      }	
    else{
      # disegene.s = genotype of second gene as 1(AA), 2(Aa), or 3(aa)
      disgene.s[generation==1 ] <- G1
      disgene.s[proband==1] <- prob.G
      disgene.s[proband==0 & generation==2 ] <- sib.G	
      }	 
    }#close for for(g in ngene)
      
  #------------------------------------------------------------------------------
  # genotypes of first and second gene for generation=0 which is founder by random selection among AA, Aa, aa 
  disgene.m[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[1,] )
  
  if(variation=="secondgene"){
    disgene.s[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[2,] )
    }
      
  # genotypes of third generation
  for(i in indID[generation==3]){
    m.g <- disgene.m[indID==motherID[indID==i]]
    f.g <- disgene.m[indID==fatherID[indID==i]]
        
    disgene.m[indID==i] <- kids.g(1, c(m.g, f.g) )

    if(variation=="secondgene"){
      disgene.s[indID==i] <- kids.g(1, c(disgene.s[indID==motherID[indID==i]], disgene.s[indID==fatherID[indID==i]]))
      }
    }#Close for for(i in indID[generation==3])
      
  ## We dichotomize 3 genotypes into two (carrier or not) depending on the model 
  ## dominant.m=1 if major gene is based on dominant model, 0 for recessive model )
  ## dominant.s=1 if second gene is based on dominant model, 0 for recessive model )
      
  if(dominant.m)  majorgene <- ifelse(disgene.m==3, 0,1)
  else majorgene <- ifelse(disgene.m==1,1,0)
      
  if(variation=="secondgene"){
    if(dominant.s) secondgene <- ifelse(disgene.s==3,0,1)
    else   secondgene <- ifelse(disgene.s==1 , 1, 0)
      }
  else secondgene <- rep(0, nIndi)
      
  #------------------------------------------------------------------------------
  ## simulate the age at onset for the family member: each generation may have different frailty, so simulated seperately.
      

  if(variation=="secondgene"){
    if(interaction) x <- cbind(gender, majorgene, gender*majorgene, secondgene)
    else x <- cbind(gender, majorgene, secondgene)
  }
  else if(interaction) x <- cbind(gender, majorgene, gender*majorgene)
  else   x <- cbind(gender, majorgene)
  
  xbeta <- c(x %*% vbeta)
  
  # gen3 = generation 1,2,3   
  gen3 <- ifelse(generation==2 | generation==0, 2, ifelse(generation==1, 1,3))
  affected <- (proband==1)

  #------------------------------------------------------------------------------
  if(variation == "frailty"){
#  if(TRUE){
    uni <- runif(nIndi, 0, 1)
    ageonset <- apply(cbind(xbeta,uni), 1, inv.surv, base.dist=base.dist, parms=parms, alpha=alpha)
  }
  else{
  ## generating ageonsets for probands
   genepos <- pos[proband==1]
   xvbeta <- xbeta[genepos]
   affage <- censorage[genepos]
   affage.min <- ifelse(affage>agemin, affage-agemin, 0)
   uni <- runif(length(genepos), 0,1)
   ageonset[genepos] <- apply(cbind(xvbeta,affage.min, uni), 1, inv.survp, base.dist=base.dist, parms=parms, alpha=alpha)
  
  ## generate ageonset for non-affected ones
   genepos <- pos[proband==0]
   xvbeta <- xbeta[genepos]
   uni <- runif(length(genepos), 0, 1)
   ageonset[genepos] <- apply(cbind(xvbeta,uni), 1, inv.surv, base.dist=base.dist, parms=parms, alpha=alpha)
  }
  
  ageonset <- ageonset+agemin
  currentage <- ifelse(censorage > agemax, agemax, censorage)
  time <- pmin(currentage, ageonset)   
  status <- ifelse(currentage >= ageonset, 1, 0)
            
  # Generating missing genotypes 
  # mgene is genotype with missing genotype NA
  mgene <-  majorgene 
  mm <- round((length(mgene)-1)*mrate)  # no. of missing 
  mgene[is.element(indID, sample(indID[!proband], mm)) ] <- NA

  fsize <- length(famID) # family size
  naff <- sum(status) # number of affected in the family
  tmpdata<-cbind(
  famID, indID, gender, motherID, fatherID, proband, generation,
  majorgene=disgene.m, secondgene=disgene.s, ageonset, currentage, 
  time, status, mgene, relation, fsize, naff)
  }#Close for if (SecNum > 0)
    
return(tmpdata)

}

Try the FamEvent package in your browser

Any scripts or data that you put into this service are public.

FamEvent documentation built on Nov. 17, 2022, 5:06 p.m.