R/familyStructure_c.R
In FamEvent: Family Age-at-Onset Data Simulation and Penetrance Estimation

familyStructure_c <-
function (i, cumind=cumind, m.carrier=1, interaction=c(FALSE, FALSE), depend=c(1,2), 
          parms=list(c(0.016, 3),c(0.016, 3)), 
          base.dist=c("Weibull","Weibull"), frailty.dist="gamma",
          vbeta=list(c(-1.13, 2.35), c(-1.13, 2.35)), dominant.m=TRUE, dominant.s= TRUE, 
          variation="secondgene", allelefreq= c(0.02, 0.2), mrate=0, 
          probandage=c(45,1.5), agemin=20, agemax=100)
{
tmpdata<-numeric()
indID<-c(cumind+1,cumind+2)
motherID<-c(0,0) 
fatherID<-c(0,0) 
gender<-c(1,0) ## 0--Female, 1--Male
proband<-c(0,0) ## 0--Non-proband,  1-- proband
generation<-c(1,1) ## generation number, 0 in second generation means married from outside
relation <- c(4,4)  #1=proband, 2=sib, 3=child, 4=parent, 5=sibchild, 6=hasband, 7=sibspouse  
# SecNum <- 2, we can fix the number of sibs in second generation (eg., 2 sibs)

## number of sibs in second generation be 2 to 5 
## truncated negative binomial (>=2 and <=5) with prob=sibprob=0.4551, and rr=1
SecNum <-sample(c(2,3,4,5), 1, replace=TRUE,prob=c(0.4991, 0.2720, 0.1482, 0.0807)) 
    
## generate gender by prob=0.5      
    
tmpgender<-sample(c(1,0), SecNum, replace=TRUE, prob=c(0.5,0.5))
    
SecNum<-length(tmpgender)
    
if (SecNum > 0) { ## at least one sample in second generation
      
  #tmpproband <- 1 # sample(SecNum,1,replace=TRUE)
      
  NumMem<-2*SecNum+cumind+2
      
  for (j in 1:SecNum) {
        
    if (j==1){ # first one is assigned as the proband
      proband <- c(proband, c(1,0))
      relation <- c(relation, c(1,6))
    }
    else {
      proband<-c(proband, c(0,0))
      relation <- c(relation, c(2,7))
    }    
    indID<- c(indID, 2*j+cumind+1, 2*j+cumind+2 )
    fatherID<-c(fatherID, c(cumind+1,0))
    motherID<-c(motherID, c(cumind+2,0))
        
    if (tmpgender[j]==0) gender<-c(gender, c(1,0)) 
    else gender<-c(gender, c(0,1))
     
    generation<-c(generation, c(2,0)) 
        
    ThiNum<-sample(c(2,3,4,5), 1, replace=TRUE, prob=c(0.4991, 0.2720, 0.1482, 0.0807)) 
    # Thi_num  <- sample(c(0, 1, 2), 1, replace = TRUE, prob = c(0.1089, 0.3420, 0.5491))
    
    for (k in 1:ThiNum) {
      proband<-c(proband,0)
      indID<- c(indID, NumMem+k)
      
      if(j==1) relation <- c(relation, 3)
      else relation <- c(relation, 5)
      
      if (gender[indID==(2*j+cumind+1)]==0) {
        fatherID<-c(fatherID, 2*j+cumind+2)
        motherID<-c(motherID, 2*j+cumind+1)
        } 
      else {
        fatherID<-c(fatherID, 2*j+cumind+1)
        motherID<-c(motherID, 2*j+cumind+2)
        }
      ## generate gender by prob=0.5
      gender<-c(gender,sample(c(1, 0), 1,replace=TRUE, prob=c(0.5, 0.5)))
      generation<-c(generation, 3)           
      }#Close for for(k in 1:ThiNum)
        NumMem<-NumMem+ThiNum  
        
    } #Close for  for (j in 1:SecNum)
      
  nIndi <- length(indID)     
  famID<-rep(i, nIndi)    
  ageonset<-rep(0, nIndi)      ## age at onset
  censorage<-rep(0, nIndi)     ## current age
  status<-rep(0, nIndi)        ## disease staus
  affected <- rep(0, nIndi)	
  disgene.m <- rep(0, nIndi)	
  disgene.s <- rep(0, nIndi)	
  ParentsG.m <- rep(0, nIndi)  ## parents genotype; it can be 1 to 9; 0 if founders
  pos <- c(1:nIndi) 
  
  
  #------------------------------------------------------------------------------
  # Familial correlation will be generated by gamma frailty, log-normal frailty, or secondgene
      
  ## Log-normal frailty (logX~Normal with mean=0, variance=1/depend)
  if (frailty.dist=="none" || frailty.dist=="secondgene" || is.null(frailty.dist)) z1 <- z2 <- 0
  else if (frailty.dist=="lognormal") {
    z1 <- rnorm(1, mean=0, sd=1/sqrt(depend[1]))
    z2 <- rnorm(1, mean=0, sd=1/sqrt(depend[2]))
  }
  ## Gamma Frailty (with mean 1, variance=depend)
  else if (frailty.dist=="gamma"){
    z1 <- log(rgamma(1, shape=depend[1], scale=1/depend[1]))
    z2 <- log(rgamma(1, shape=depend[2], scale=1/depend[2]))
    # k1 = depend[1], k2 = depend[2]
    # zj~Gamma(kj, 1/kj); var(zj)=1/kj; Kendall's tau=1/(1+2*kj)
    
  } 
  else if (frailty.dist=="cgamma"){
    z0 <- rgamma(1, shape = depend[3], scale = 1/depend[3])
    z1 <- log(rgamma(1, shape = depend[1], scale = 1/(depend[1] + depend[3])) + z0 * depend[3]/(depend[1] + depend[3]))
    z2 <- log(rgamma(1, shape = depend[2], scale = 1/(depend[2] + depend[3])) + z0 * depend[3]/(depend[2] + depend[3]))
    # k1=depend[1], k2=depend[2], k0=depend[3]
    # zj~Gamma(k0+kj, 1/(k0+kj)); var(zj)=1/(k0+kj); Kendall's tau=1/(1+2*(k0+kj))
  } 
  else if (frailty.dist=="clognormal") {
    # var(z1)=1/depend[1]; var(z2)=1/depend[2], corr(z1, z2)=depend[3]
    cov <- depend[3]/sqrt(depend[1]*depend[2])
    z <- mvrnorm(1, mu = c(0,0), Sigma = matrix(c(1/depend[1], cov, cov,1/depend[2]), ncol=2))
    z1 <- z[1]
    z2 <- z[2]
  }
  else stop("unrecognized frailty distribution")
  
  alpha <- c(z1, z2)  # frailties in log scale # alpha=c(0,0) if variation=="none" or "secondgene"

  #------------------------------------------------------------------------------
  ###  Generating the current age
  # First generation with default mean 65 and sd=2.5; Second gen with default mean 45 and sd=2.5; third gene has mean age difference of 20 with their parents and sd=1
  #Probands age generated with truncated normal
  
  prob.age <- rtruncnorm(1,a=agemin, b=agemax, mean=probandage[1], sd=probandage[2])    
  
  genepos<-pos[generation==1]
  censorage[genepos]<- rnorm(length(genepos), mean=prob.age+20, sd=1.5)
  min.page1<- min(censorage[genepos])
#  if(min.page1 < agemin) stop("agemin is set too large.")
  genepos<-pos[generation==0] #spouse 
  censorage[genepos]<- rnorm(length(genepos), mean=probandage[1], sd=probandage[2])
  genepos<-pos[generation==2] 
  
  for (j in 1:length(genepos)) {
    if(j==1) censorage[genepos[1]] <- prob.age
    else censorage[genepos[j]] <- rtruncnorm(1,a=agemin,b=min.page1-14, mean=prob.age[1]-j-1, sd=1.5)
    sonpos<-pos[fatherID==indID[genepos[j]] | motherID==indID[genepos[j]]]
    min.page2 <- min(censorage[indID==fatherID[sonpos[1]] | indID==motherID[sonpos[1]]])
    #if(min.page2 < agemin) stop("agemin is too large.")
    for (k in 1:length(sonpos)) {
      #censorage[sonpos[k]]<- rtruncnorm(1,a=agemin, b=min.page2, mean=min.page2-20-k, sd=1.5)
      censorage[sonpos[k]]<- rnorm(1,mean=min.page2-20-k, sd=1.5)      
      }#Close for for(k in 1:length(sonpos))
    }#Close for for (j in 1:length(genepos))
      
  #------------------------------------------------------------------------------
  
  if(length(allelefreq)==1) allelefreq <- c(allelefreq,0) 
  AAq <- allelefreq^2
  Aaq <- 2*allelefreq*(1-allelefreq)
  aaq <- (1-allelefreq)^2
  G <- cbind(AAq, Aaq, aaq)
      
  #----------------------------------------------------------------------------
  ## For generating genotypes, we first generate the proband's genotype and based on that, we generate other family members' genotypes
  # Generate proband's genotype given its age at onset and gender
  prob.age <- censorage[proband==1] # proband's current age
  prob.sex <- gender[proband==1]
  pGene <- fgene_c(base.dist, frailty.dist, depend=depend, affage=prob.age-agemin, affsex=prob.sex, interaction=interaction, variation=variation, parms=parms, vbeta=vbeta, alpha=alpha, pg=0, m.carrier=m.carrier, dominant.m=dominant.m, aq = allelefreq)
  #------------------------------------------------------------------------------ 

  if(variation=="secondgene") ngene <- c(1,2)
  else  ngene <- 1
  
  for(g in ngene){
      if( m.carrier==1 & g==1) {
        if(dominant.m) gg <- 1:2
        else gg <- 1
        prob.G <- sample(gg, 1, replace=TRUE, prob=pGene[1,gg])
        }#Close for if( m.carrier==1 & g==1)
      else prob.G <- sample(c(1,2,3),1, replace=TRUE, prob=pGene[g,])
      #generate proband's parents' genotype based on proband's genotype  
      G1 <- parents.g(prob.G, g=g, allelefreq=allelefreq) 

      nsibs <- sum(proband==0 & generation==2)
      sib.G <- kids.g(nsibs, G1) # generate kids' genotype given the proband's genotype
  
    if(g==1){
      # ParentsG.m[generation==2] <- 3*(G1[1]-1) + G1[2] 
      # ParentsG.m = parents' genotype; value can be 1 to 9.
      # let AA==1, Aa==2, aa==3, 
      # 9 possible combinations of parents are 1=(1,1), 2=(1,2), 3=(1,3), 4=(2,1), 5=(2,2), 6=(2,3), 7=(3,1), 8=(3,2), 9=(3,3)
      # disegene.m = geno of marjor gene as 1(AA), 2(Aa), or 3(aa)
      disgene.m[ generation==1 ] <- G1 
      disgene.m[ proband==1 ] <- prob.G
      disgene.m[ proband==0 & generation==2 ] <- sib.G	
      }	
    else{
      # disegene.s = genotype of second gene as 1(AA), 2(Aa), or 3(aa)
      disgene.s[generation==1 ] <- G1
      disgene.s[proband==1] <- prob.G
      disgene.s[proband==0 & generation==2 ] <- sib.G	
      }	 
    }#close for for(g in ngene)
      
  #------------------------------------------------------------------------------
  # genotypes of first and second gene for generation=0 which is founder by random selection among AA, Aa, aa 
  disgene.m[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[1,] )
  
  if(variation=="secondgene"){
    disgene.s[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[2,] )
    }
      
  
  # genotypes of third generation
  for(i in indID[generation==3]){
    m.g <- disgene.m[indID==motherID[indID==i]]
    f.g <- disgene.m[indID==fatherID[indID==i]]
        
    disgene.m[indID==i] <- kids.g(1, c(m.g, f.g) )

    if(variation=="secondgene"){
      disgene.s[indID==i] <- kids.g(1, c(disgene.s[indID==motherID[indID==i]], disgene.s[indID==fatherID[indID==i]]))
      }
    }#Close for for(i in indID[generation==3])
      
  ## We dichotomize 3 genotypes into two (carrier or not) depending on the model 
  ## dominant.m=1 if major gene is based on dominant model, 0 for recessive model )
  ## dominant.s=1 if second gene is based on dominant model, 0 for recessive model )
      
  if(dominant.m)  majorgene <- ifelse(disgene.m==3, 0,1)
  else majorgene <- ifelse(disgene.m==1,1,0)
      
  if(variation=="secondgene"){
    if(dominant.s) secondgene <- ifelse(disgene.s==3,0,1)
    else   secondgene <- ifelse(disgene.s==1, 1, 0)
      }
  else secondgene <- rep(0, nIndi)
      
  #------------------------------------------------------------------------------
  ## simulate the age at onset for the family member: each generation may have different frailty, so simulated seperately.
      
 
  if(variation=="secondgene"){
    if(interaction[1]) x1 <- cbind(gender, majorgene, gender*majorgene, secondgene)
    else x1 <- cbind(gender, majorgene, secondgene)
    if(interaction[2]) x2 <- cbind(gender, majorgene, gender*majorgene, secondgene)
    else x2 <- cbind(gender, majorgene, secondgene)
  }
  else{ 
    if(interaction[1]) x1 <- cbind(gender, majorgene, gender*majorgene)
    else   x1 <- cbind(gender, majorgene)
    if(interaction[2]) x2 <- cbind(gender, majorgene, gender*majorgene)
    else   x2 <- cbind(gender, majorgene)
  }
  
  xbeta1 <- c(x1 %*% vbeta[[1]])
  xbeta2 <- c(x2 %*% vbeta[[2]])
  
  # gen3 = generation 1,2,3   
  gen3 <- ifelse(generation==2 | generation==0, 2, ifelse(generation==1, 1,3))
  affected <- (proband==1)

  #------------------------------------------------------------------------------
    uni <- runif(nIndi, 0, 1)
  
    ageonset <- apply(cbind(xbeta1, xbeta2, uni), 1, inv_surv_c, base.dist=base.dist, parms=parms, alpha=alpha)
    h1 <- hazards(dist=base.dist[1], ageonset, parms[[1]], cuts=NULL)*exp(alpha[1]+xbeta1)
    h2 <- hazards(dist=base.dist[2], ageonset, parms[[2]], cuts=NULL)*exp(alpha[2]+xbeta2)
    true_status <- ifelse(runif(nIndi) < h1/(h1 + h2), 1, 2) 

  ageonset <- ageonset+agemin
  currentage <- ifelse(censorage > agemax, agemax, censorage)
  time <- pmin(currentage, ageonset)   
  status <- ifelse(currentage >= ageonset, true_status, 0)
            
  # Generating missing genotypes 
  # mgene is genotype with missing genotype NA
  mgene <-  majorgene 
  mm <- round((length(mgene)-1)*mrate)  # no. of missing 
  mgene[is.element(indID, sample(indID[!proband], mm)) ] <- NA

  fsize <- length(famID) # family size
  naff <- sum(status%in%c(1,2)) # number of affected in the family
  df1 <- sum(status==1)
  df2 <- sum(status==2)
  tmpdata<-cbind(
  famID, indID, gender, motherID, fatherID, proband, generation,
  majorgene=disgene.m, secondgene=disgene.s, ageonset, currentage, 
  time, status, true_status, mgene, relation, fsize, naff, df1, df2)
  }#Close for if (SecNum > 0)
    
return(tmpdata)

}
Any scripts or data that you put into this service are public.
FamEvent documentation built on July 3, 2024, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
FamEvent
Family Age-at-Onset Data Simulation and Penetrance Estimation

R/familyStructure_c.R
In FamEvent: Family Age-at-Onset Data Simulation and Penetrance Estimation

Try the FamEvent package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

FamEvent Family Age-at-Onset Data Simulation and Penetrance Estimation

R/familyStructure_c.R In FamEvent: Family Age-at-Onset Data Simulation and Penetrance Estimation

Try the FamEvent package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

FamEvent
Family Age-at-Onset Data Simulation and Penetrance Estimation

R/familyStructure_c.R
In FamEvent: Family Age-at-Onset Data Simulation and Penetrance Estimation