Nothing
familyStructure_c <-
function (i, cumind=cumind, m.carrier=1, interaction=c(FALSE, FALSE), depend=c(1,2),
parms=list(c(0.016, 3),c(0.016, 3)),
base.dist=c("Weibull","Weibull"), frailty.dist="gamma",
vbeta=list(c(-1.13, 2.35), c(-1.13, 2.35)), dominant.m=TRUE, dominant.s= TRUE,
variation="secondgene", allelefreq= c(0.02, 0.2), mrate=0,
probandage=c(45,1.5), agemin=20, agemax=100)
{
tmpdata<-numeric()
indID<-c(cumind+1,cumind+2)
motherID<-c(0,0)
fatherID<-c(0,0)
gender<-c(1,0) ## 0--Female, 1--Male
proband<-c(0,0) ## 0--Non-proband, 1-- proband
generation<-c(1,1) ## generation number, 0 in second generation means married from outside
relation <- c(4,4) #1=proband, 2=sib, 3=child, 4=parent, 5=sibchild, 6=hasband, 7=sibspouse
# SecNum <- 2, we can fix the number of sibs in second generation (eg., 2 sibs)
## number of sibs in second generation be 2 to 5
## truncated negative binomial (>=2 and <=5) with prob=sibprob=0.4551, and rr=1
SecNum <-sample(c(2,3,4,5), 1, replace=TRUE,prob=c(0.4991, 0.2720, 0.1482, 0.0807))
## generate gender by prob=0.5
tmpgender<-sample(c(1,0), SecNum, replace=TRUE, prob=c(0.5,0.5))
SecNum<-length(tmpgender)
if (SecNum > 0) { ## at least one sample in second generation
#tmpproband <- 1 # sample(SecNum,1,replace=TRUE)
NumMem<-2*SecNum+cumind+2
for (j in 1:SecNum) {
if (j==1){ # first one is assigned as the proband
proband <- c(proband, c(1,0))
relation <- c(relation, c(1,6))
}
else {
proband<-c(proband, c(0,0))
relation <- c(relation, c(2,7))
}
indID<- c(indID, 2*j+cumind+1, 2*j+cumind+2 )
fatherID<-c(fatherID, c(cumind+1,0))
motherID<-c(motherID, c(cumind+2,0))
if (tmpgender[j]==0) gender<-c(gender, c(1,0))
else gender<-c(gender, c(0,1))
generation<-c(generation, c(2,0))
ThiNum<-sample(c(2,3,4,5), 1, replace=TRUE, prob=c(0.4991, 0.2720, 0.1482, 0.0807))
# Thi_num <- sample(c(0, 1, 2), 1, replace = TRUE, prob = c(0.1089, 0.3420, 0.5491))
for (k in 1:ThiNum) {
proband<-c(proband,0)
indID<- c(indID, NumMem+k)
if(j==1) relation <- c(relation, 3)
else relation <- c(relation, 5)
if (gender[indID==(2*j+cumind+1)]==0) {
fatherID<-c(fatherID, 2*j+cumind+2)
motherID<-c(motherID, 2*j+cumind+1)
}
else {
fatherID<-c(fatherID, 2*j+cumind+1)
motherID<-c(motherID, 2*j+cumind+2)
}
## generate gender by prob=0.5
gender<-c(gender,sample(c(1, 0), 1,replace=TRUE, prob=c(0.5, 0.5)))
generation<-c(generation, 3)
}#Close for for(k in 1:ThiNum)
NumMem<-NumMem+ThiNum
} #Close for for (j in 1:SecNum)
nIndi <- length(indID)
famID<-rep(i, nIndi)
ageonset<-rep(0, nIndi) ## age at onset
censorage<-rep(0, nIndi) ## current age
status<-rep(0, nIndi) ## disease staus
affected <- rep(0, nIndi)
disgene.m <- rep(0, nIndi)
disgene.s <- rep(0, nIndi)
ParentsG.m <- rep(0, nIndi) ## parents genotype; it can be 1 to 9; 0 if founders
pos <- c(1:nIndi)
#------------------------------------------------------------------------------
# Familial correlation will be generated by gamma frailty, log-normal frailty, or secondgene
## Log-normal frailty (logX~Normal with mean=0, variance=1/depend)
if (frailty.dist=="none" || frailty.dist=="secondgene" || is.null(frailty.dist)) z1 <- z2 <- 0
else if (frailty.dist=="lognormal") {
z1 <- rnorm(1, mean=0, sd=1/sqrt(depend[1]))
z2 <- rnorm(1, mean=0, sd=1/sqrt(depend[2]))
}
## Gamma Frailty (with mean 1, variance=depend)
else if (frailty.dist=="gamma"){
z1 <- log(rgamma(1, shape=depend[1], scale=1/depend[1]))
z2 <- log(rgamma(1, shape=depend[2], scale=1/depend[2]))
# k1 = depend[1], k2 = depend[2]
# zj~Gamma(kj, 1/kj); var(zj)=1/kj; Kendall's tau=1/(1+2*kj)
}
else if (frailty.dist=="cgamma"){
z0 <- rgamma(1, shape = depend[3], scale = 1/depend[3])
z1 <- log(rgamma(1, shape = depend[1], scale = 1/(depend[1] + depend[3])) + z0 * depend[3]/(depend[1] + depend[3]))
z2 <- log(rgamma(1, shape = depend[2], scale = 1/(depend[2] + depend[3])) + z0 * depend[3]/(depend[2] + depend[3]))
# k1=depend[1], k2=depend[2], k0=depend[3]
# zj~Gamma(k0+kj, 1/(k0+kj)); var(zj)=1/(k0+kj); Kendall's tau=1/(1+2*(k0+kj))
}
else if (frailty.dist=="clognormal") {
# var(z1)=1/depend[1]; var(z2)=1/depend[2], corr(z1, z2)=depend[3]
cov <- depend[3]/sqrt(depend[1]*depend[2])
z <- mvrnorm(1, mu = c(0,0), Sigma = matrix(c(1/depend[1], cov, cov,1/depend[2]), ncol=2))
z1 <- z[1]
z2 <- z[2]
}
else stop("unrecognized frailty distribution")
alpha <- c(z1, z2) # frailties in log scale # alpha=c(0,0) if variation=="none" or "secondgene"
#------------------------------------------------------------------------------
### Generating the current age
# First generation with default mean 65 and sd=2.5; Second gen with default mean 45 and sd=2.5; third gene has mean age difference of 20 with their parents and sd=1
#Probands age generated with truncated normal
prob.age <- rtruncnorm(1,a=agemin, b=agemax, mean=probandage[1], sd=probandage[2])
genepos<-pos[generation==1]
censorage[genepos]<- rnorm(length(genepos), mean=prob.age+20, sd=1.5)
min.page1<- min(censorage[genepos])
# if(min.page1 < agemin) stop("agemin is set too large.")
genepos<-pos[generation==0] #spouse
censorage[genepos]<- rnorm(length(genepos), mean=probandage[1], sd=probandage[2])
genepos<-pos[generation==2]
for (j in 1:length(genepos)) {
if(j==1) censorage[genepos[1]] <- prob.age
else censorage[genepos[j]] <- rtruncnorm(1,a=agemin,b=min.page1-14, mean=prob.age[1]-j-1, sd=1.5)
sonpos<-pos[fatherID==indID[genepos[j]] | motherID==indID[genepos[j]]]
min.page2 <- min(censorage[indID==fatherID[sonpos[1]] | indID==motherID[sonpos[1]]])
#if(min.page2 < agemin) stop("agemin is too large.")
for (k in 1:length(sonpos)) {
#censorage[sonpos[k]]<- rtruncnorm(1,a=agemin, b=min.page2, mean=min.page2-20-k, sd=1.5)
censorage[sonpos[k]]<- rnorm(1,mean=min.page2-20-k, sd=1.5)
}#Close for for(k in 1:length(sonpos))
}#Close for for (j in 1:length(genepos))
#------------------------------------------------------------------------------
if(length(allelefreq)==1) allelefreq <- c(allelefreq,0)
AAq <- allelefreq^2
Aaq <- 2*allelefreq*(1-allelefreq)
aaq <- (1-allelefreq)^2
G <- cbind(AAq, Aaq, aaq)
#----------------------------------------------------------------------------
## For generating genotypes, we first generate the proband's genotype and based on that, we generate other family members' genotypes
# Generate proband's genotype given its age at onset and gender
prob.age <- censorage[proband==1] # proband's current age
prob.sex <- gender[proband==1]
pGene <- fgene_c(base.dist, frailty.dist, depend=depend, affage=prob.age-agemin, affsex=prob.sex, interaction=interaction, variation=variation, parms=parms, vbeta=vbeta, alpha=alpha, pg=0, m.carrier=m.carrier, dominant.m=dominant.m, aq = allelefreq)
#------------------------------------------------------------------------------
if(variation=="secondgene") ngene <- c(1,2)
else ngene <- 1
for(g in ngene){
if( m.carrier==1 & g==1) {
if(dominant.m) gg <- 1:2
else gg <- 1
prob.G <- sample(gg, 1, replace=TRUE, prob=pGene[1,gg])
}#Close for if( m.carrier==1 & g==1)
else prob.G <- sample(c(1,2,3),1, replace=TRUE, prob=pGene[g,])
#generate proband's parents' genotype based on proband's genotype
G1 <- parents.g(prob.G, g=g, allelefreq=allelefreq)
nsibs <- sum(proband==0 & generation==2)
sib.G <- kids.g(nsibs, G1) # generate kids' genotype given the proband's genotype
if(g==1){
# ParentsG.m[generation==2] <- 3*(G1[1]-1) + G1[2]
# ParentsG.m = parents' genotype; value can be 1 to 9.
# let AA==1, Aa==2, aa==3,
# 9 possible combinations of parents are 1=(1,1), 2=(1,2), 3=(1,3), 4=(2,1), 5=(2,2), 6=(2,3), 7=(3,1), 8=(3,2), 9=(3,3)
# disegene.m = geno of marjor gene as 1(AA), 2(Aa), or 3(aa)
disgene.m[ generation==1 ] <- G1
disgene.m[ proband==1 ] <- prob.G
disgene.m[ proband==0 & generation==2 ] <- sib.G
}
else{
# disegene.s = genotype of second gene as 1(AA), 2(Aa), or 3(aa)
disgene.s[generation==1 ] <- G1
disgene.s[proband==1] <- prob.G
disgene.s[proband==0 & generation==2 ] <- sib.G
}
}#close for for(g in ngene)
#------------------------------------------------------------------------------
# genotypes of first and second gene for generation=0 which is founder by random selection among AA, Aa, aa
disgene.m[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[1,] )
if(variation=="secondgene"){
disgene.s[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[2,] )
}
# genotypes of third generation
for(i in indID[generation==3]){
m.g <- disgene.m[indID==motherID[indID==i]]
f.g <- disgene.m[indID==fatherID[indID==i]]
disgene.m[indID==i] <- kids.g(1, c(m.g, f.g) )
if(variation=="secondgene"){
disgene.s[indID==i] <- kids.g(1, c(disgene.s[indID==motherID[indID==i]], disgene.s[indID==fatherID[indID==i]]))
}
}#Close for for(i in indID[generation==3])
## We dichotomize 3 genotypes into two (carrier or not) depending on the model
## dominant.m=1 if major gene is based on dominant model, 0 for recessive model )
## dominant.s=1 if second gene is based on dominant model, 0 for recessive model )
if(dominant.m) majorgene <- ifelse(disgene.m==3, 0,1)
else majorgene <- ifelse(disgene.m==1,1,0)
if(variation=="secondgene"){
if(dominant.s) secondgene <- ifelse(disgene.s==3,0,1)
else secondgene <- ifelse(disgene.s==1, 1, 0)
}
else secondgene <- rep(0, nIndi)
#------------------------------------------------------------------------------
## simulate the age at onset for the family member: each generation may have different frailty, so simulated seperately.
if(variation=="secondgene"){
if(interaction[1]) x1 <- cbind(gender, majorgene, gender*majorgene, secondgene)
else x1 <- cbind(gender, majorgene, secondgene)
if(interaction[2]) x2 <- cbind(gender, majorgene, gender*majorgene, secondgene)
else x2 <- cbind(gender, majorgene, secondgene)
}
else{
if(interaction[1]) x1 <- cbind(gender, majorgene, gender*majorgene)
else x1 <- cbind(gender, majorgene)
if(interaction[2]) x2 <- cbind(gender, majorgene, gender*majorgene)
else x2 <- cbind(gender, majorgene)
}
xbeta1 <- c(x1 %*% vbeta[[1]])
xbeta2 <- c(x2 %*% vbeta[[2]])
# gen3 = generation 1,2,3
gen3 <- ifelse(generation==2 | generation==0, 2, ifelse(generation==1, 1,3))
affected <- (proband==1)
#------------------------------------------------------------------------------
uni <- runif(nIndi, 0, 1)
ageonset <- apply(cbind(xbeta1, xbeta2, uni), 1, inv_surv_c, base.dist=base.dist, parms=parms, alpha=alpha)
h1 <- hazards(dist=base.dist[1], ageonset, parms[[1]], cuts=NULL)*exp(alpha[1]+xbeta1)
h2 <- hazards(dist=base.dist[2], ageonset, parms[[2]], cuts=NULL)*exp(alpha[2]+xbeta2)
true_status <- ifelse(runif(nIndi) < h1/(h1 + h2), 1, 2)
ageonset <- ageonset+agemin
currentage <- ifelse(censorage > agemax, agemax, censorage)
time <- pmin(currentage, ageonset)
status <- ifelse(currentage >= ageonset, true_status, 0)
# Generating missing genotypes
# mgene is genotype with missing genotype NA
mgene <- majorgene
mm <- round((length(mgene)-1)*mrate) # no. of missing
mgene[is.element(indID, sample(indID[!proband], mm)) ] <- NA
fsize <- length(famID) # family size
naff <- sum(status%in%c(1,2)) # number of affected in the family
df1 <- sum(status==1)
df2 <- sum(status==2)
tmpdata<-cbind(
famID, indID, gender, motherID, fatherID, proband, generation,
majorgene=disgene.m, secondgene=disgene.s, ageonset, currentage,
time, status, true_status, mgene, relation, fsize, naff, df1, df2)
}#Close for if (SecNum > 0)
return(tmpdata)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.