Nothing
familyStructure <-
function (i, cumind = cumind, m.carrier = 1, variation = "none", interaction = FALSE,
add.x = FALSE, x.dist = NULL, x.parms = NULL, depend = NULL,
base.dist = "Weibull", frailty.dist = NULL, base.parms = c(0.016, 3),
vbeta = c(1, 1), allelefreq = 0.02, dominant.m = TRUE, dominant.s = TRUE,
mrate = 0, probandage = c(45, 2), agemin = 20, agemax = 100)
{
tmpdata <- numeric()
indID <- c(cumind+1,cumind+2)
motherID <- c(0,0)
fatherID <- c(0,0)
gender <- c(1,0) ## 0--Female, 1--Male
proband <- c(0,0) ## 0--Non-proband, 1-- proband
generation <- c(1,1) ## generation number, 0 in second generation means married from outside
relation <- c(4,4) #1=proband, 2=sib, 3=child, 4=parent, 5=sibchild, 6=hasband, 7=sibspouse
# SecNum <- 2, we can fix the number of sibs in second generation (eg., 2 sibs)
## number of sibs in second generation be 2 to 5
## truncated negative binomial (>=2 and <=5) with prob=sibprob=0.4551, and rr=1
SecNum <-sample(c(2,3,4,5), 1, replace=TRUE,prob=c(0.4991, 0.2720, 0.1482, 0.0807))
## generate gender by prob=0.5
tmpgender<-sample(c(1,0), SecNum, replace=TRUE, prob=c(0.5,0.5))
SecNum<-length(tmpgender)
if (SecNum > 0) { ## at least one sample in second generation
#tmpproband <- 1 # sample(SecNum,1,replace=TRUE)
NumMem<-2*SecNum+cumind+2
for (j in 1:SecNum) {
if (j==1){ # first one is assigned as the proband
proband <- c(proband, c(1,0))
relation <- c(relation, c(1,6))
}
else {
proband<-c(proband, c(0,0))
relation <- c(relation, c(2,7))
}
indID<- c(indID, 2*j+cumind+1, 2*j+cumind+2 )
fatherID<-c(fatherID, c(cumind+1,0))
motherID<-c(motherID, c(cumind+2,0))
if (tmpgender[j]==0) gender<-c(gender, c(1,0))
else gender<-c(gender, c(0,1))
generation<-c(generation, c(2,0))
ThiNum<-sample(c(2,3,4,5), 1, replace=TRUE, prob=c(0.4991, 0.2720, 0.1482, 0.0807))
for (k in 1:ThiNum) {
proband<-c(proband,0)
indID<- c(indID, NumMem+k)
if(j==1) relation <- c(relation, 3)
else relation <- c(relation, 5)
if (gender[indID==(2*j+cumind+1)]==0) {
fatherID<-c(fatherID, 2*j+cumind+2)
motherID<-c(motherID, 2*j+cumind+1)
}
else {
fatherID<-c(fatherID, 2*j+cumind+1)
motherID<-c(motherID, 2*j+cumind+2)
}
## generate gender by prob=0.5
gender<-c(gender,sample(c(1, 0), 1,replace=TRUE, prob=c(0.5, 0.5)))
generation<-c(generation, 3)
}#Close for for(k in 1:ThiNum)
NumMem<-NumMem+ThiNum
} #Close for for (j in 1:SecNum)
nIndi <- length(indID)
famID <- rep(i, nIndi)
ageonset <- rep(0, nIndi) ## age at onset
censorage <- rep(0, nIndi) ## current age
status <- rep(0, nIndi) ## disease staus
affected <- rep(0, nIndi)
disgene.m <- rep(0, nIndi)
disgene.s <- rep(0, nIndi)
ParentsG.m <- rep(0, nIndi) ## parents genotype; it can be 1 to 9; 0 if founders
pos <- c(1:nIndi)
#------------------------------------------------------------------------------
# Familial correlation will be generated by gamma frailty, log-normal frailty, or secondgene
## Log-normal frailty (logX~Normal with mean=0, variance=1/depend)
if (is.null(frailty.dist)) alpha=0
else if (frailty.dist=="lognormal") alpha <- rnorm(1, mean=0, sd=1/sqrt(depend))
## Gamma Frailty (with mean 1, variance=1/depend)
else if (frailty.dist=="gamma") alpha <- log(rgamma(1, shape=depend, scale=1/depend))
else stop("unrecognized frailty distribution")
#------------------------------------------------------------------------------
# variation is kinship
## library(kinship2)
if (variation == 'kinship'){
sex <- ifelse(gender == 1, 'male', 'female')
mped <- pedigree(indID, fatherID, motherID, sex = sex, famid = famID)
kmat <- as.matrix(kinship(mped))
alpha <- mvrnorm(1, mu = rep(0,nrow(kmat)), Sigma = 2*kmat/depend) # covariance matrix variance = 1/depend
}
#------------------------------------------------------------------------------
### Generating the current age
# First generation with default mean 65 and sd=2.5; Second gen with default mean 45 and sd=2.5; third gene has mean age difference of 20 with their parents and sd=1
#Probands age generated with truncated normal
prob.age <- rtruncnorm(1,a=agemin, b=agemax, mean=probandage[1], sd=probandage[2])
genepos<-pos[generation==1]
censorage[genepos]<- rnorm(length(genepos), mean=prob.age+20, sd=1.5)
min.page1<- min(censorage[genepos])
# if(min.page1 < agemin) stop("agemin is set too large.")
genepos<-pos[generation==0] #spouse
censorage[genepos]<- rnorm(length(genepos), mean=probandage[1], sd=probandage[2])
genepos<-pos[generation==2]
for (j in 1:length(genepos)) {
if(j==1) censorage[genepos[1]] <- prob.age
else censorage[genepos[j]] <- rtruncnorm(1,a=agemin,b=min.page1-14, mean=prob.age[1]-j-1, sd=1.5)
sonpos<-pos[fatherID==indID[genepos[j]] | motherID==indID[genepos[j]]]
min.page2 <- min(censorage[indID==fatherID[sonpos[1]] | indID==motherID[sonpos[1]]])
#if(min.page2 < agemin) stop("agemin is too large.")
for (k in 1:length(sonpos)) {
#censorage[sonpos[k]]<- rtruncnorm(1,a=agemin, b=min.page2, mean=min.page2-20-k, sd=1.5)
censorage[sonpos[k]]<- rnorm(1,mean=min.page2-20-k, sd=1.5)
}#Close for for(k in 1:length(sonpos))
}#Close for for (j in 1:length(genepos))
#------------------------------------------------------------------------------
# generate additional covariate
if(add.x){
if(x.dist =="normal") newx <- rnorm(nIndi, mean=x.parms[1], sd=x.parms[2])
else if(x.dist =="binomial") newx <- rbinom(nIndi, size = x.parms[1], prob = x.parms[2])
}
#------------------------------------------------------------------------------
if(length(allelefreq)==1) allelefreq <- c(allelefreq,0)
AAq <- allelefreq^2
Aaq <- 2*allelefreq*(1-allelefreq)
aaq <- (1-allelefreq)^2
G <- cbind(AAq, Aaq, aaq)
#----------------------------------------------------------------------------
## For generating genotypes, we first generate the proband's genotype and based on that, we generate other family members' genotypes
# Generate proband's genotype given its age at onset and gender
prob.age <- censorage[proband==1] # proband's current age
prob.sex <- gender[proband==1]
if(add.x) prob.x <- newx[proband==1]
if(variation == "kinship"){
prob.alpha <- alpha[proband == 1]
} else prob.alpha = alpha
if(add.x) pGene <- fgeneZX(base.dist, frailty.dist, depend=depend, affage=prob.age-agemin,
affsex=prob.sex, affx = prob.x, interaction=interaction, variation=variation,
base.parms=base.parms, vbeta=vbeta, alpha=prob.alpha, pg=c(0,0), m.carrier=m.carrier,
dominant.m=dominant.m, aq = allelefreq)
else pGene <- fgeneZ(base.dist, frailty.dist, depend=depend, affage=prob.age-agemin,
affsex=prob.sex, interaction=interaction, variation=variation,
base.parms=base.parms, vbeta=vbeta, alpha=prob.alpha, pg=c(0,0), m.carrier=m.carrier,
dominant.m=dominant.m, aq = allelefreq)
#------------------------------------------------------------------------------
if(variation=="secondgene") ngene <- c(1,2)
else ngene <- 1
for(g in ngene){
if( m.carrier==1 & g==1) {
if(dominant.m) gg <- 1:2
else gg <- 1
prob.G <- sample(gg, 1, replace=TRUE, prob=pGene[1,gg])
}#Close for if( m.carrier==1 & g==1)
else prob.G <- sample(c(1,2,3),1, replace=TRUE, prob=pGene[g,])
#generate proband's parents' genotype based on proband's genotype
G1 <- parents.g(prob.G, g=g, allelefreq=allelefreq)
nsibs <- sum(proband==0 & generation==2)
sib.G <- kids.g(nsibs, G1) # generate kids' genotype given the proband's genotype
if(g==1){
# ParentsG.m[generation==2] <- 3*(G1[1]-1) + G1[2]
# ParentsG.m = parents' genotype; value can be 1 to 9.
# let AA==1, Aa==2, aa==3,
# 9 possible combinations of parents are 1=(1,1), 2=(1,2), 3=(1,3), 4=(2,1), 5=(2,2), 6=(2,3), 7=(3,1), 8=(3,2), 9=(3,3)
# disegene.m = geno of marjor gene as 1(AA), 2(Aa), or 3(aa)
disgene.m[ generation==1 ] <- G1
disgene.m[ proband==1 ] <- prob.G
disgene.m[ proband==0 & generation==2 ] <- sib.G
}
else{
# disegene.s = genotype of second gene as 1(AA), 2(Aa), or 3(aa)
disgene.s[generation==1 ] <- G1
disgene.s[proband==1] <- prob.G
disgene.s[proband==0 & generation==2 ] <- sib.G
}
}#close for for(g in ngene)
#------------------------------------------------------------------------------
# genotypes of first and second gene for generation=0 which is founder by random selection among AA, Aa, aa
disgene.m[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[1,] )
if(variation=="secondgene"){
disgene.s[generation==0] <- sample(c(1,2,3), sum(generation==0), replace=TRUE, prob=G[2,] )
}
# genotypes of third generation
for(i in indID[generation==3]){
m.g <- disgene.m[indID==motherID[indID==i]]
f.g <- disgene.m[indID==fatherID[indID==i]]
disgene.m[indID==i] <- kids.g(1, c(m.g, f.g) )
if(variation=="secondgene"){
disgene.s[indID==i] <- kids.g(1, c(disgene.s[indID==motherID[indID==i]], disgene.s[indID==fatherID[indID==i]]))
}
}#Close for for(i in indID[generation==3])
## We dichotomize 3 genotypes into two (carrier or not) depending on the model
## dominant.m=1 if major gene is based on dominant model, 0 for recessive model )
## dominant.s=1 if second gene is based on dominant model, 0 for recessive model )
if(dominant.m) majorgene <- ifelse(disgene.m==3, 0,1)
else majorgene <- ifelse(disgene.m==1,1,0)
if(variation=="secondgene"){
if(dominant.s) secondgene <- ifelse(disgene.s==3,0,1)
else secondgene <- ifelse(disgene.s==1 , 1, 0)
}
else secondgene <- rep(0, nIndi)
#------------------------------------------------------------------------------
## simulate the age at onset for the family member: each generation may have different frailty, so simulated seperately.
## x matrix and xbeta
x <- cbind(gender, majorgene)
if(interaction) x <- cbind(gender, majorgene, gender*majorgene)
if(variation == "secondgene") x <- cbind(x, secondgene)
if(add.x) x <- cbind(x, newx)
xbeta <- c(x %*% vbeta)
# gen3 = generation 1,2,3
gen3 <- ifelse(generation==2 | generation==0, 2, ifelse(generation==1, 1,3))
affected <- (proband==1)
#------------------------------------------------------------------------------
if(variation == "frailty" | variation == "kinship"){
uni <- runif(nIndi, 0, 1)
ageonset <- apply(cbind(xbeta,alpha, uni), 1, inv2.surv, base.dist=base.dist, parms=base.parms)
} else{
## generating ageonsets for probands
genepos <- pos[proband==1]
xvbeta <- xbeta[genepos]
affage <- censorage[genepos]
affage.min <- ifelse(affage>agemin, affage-agemin, 0)
uni <- runif(length(genepos), 0,1)
ageonset[genepos] <- apply(cbind(xvbeta,affage.min, uni), 1, inv.survp, base.dist=base.dist, parms=base.parms, alpha=alpha)
## generate ageonset for non-affected ones
genepos <- pos[proband==0]
xvbeta <- xbeta[genepos]
uni <- runif(length(genepos), 0, 1)
ageonset[genepos] <- apply(cbind(xvbeta,uni), 1, inv.surv, base.dist=base.dist, parms=base.parms, alpha=alpha)
}
ageonset <- ageonset+agemin
currentage <- ifelse(censorage > agemax, agemax, censorage)
time <- pmin(currentage, ageonset)
status <- ifelse(currentage >= ageonset, 1, 0)
# Generating missing genotypes
# mgene is genotype with missing genotype NA
mgene <- majorgene
mm <- round((length(mgene)-1)*mrate) # no. of missing
mgene[is.element(indID, sample(indID[!proband], mm)) ] <- NA
fsize <- length(famID) # family size
naff <- sum(status) # number of affected in the family
if(add.x){
tmpdata<-cbind(
famID, indID, gender, motherID, fatherID, proband, generation,
majorgene=disgene.m, secondgene=disgene.s, ageonset, currentage,
time, status, mgene, newx, relation, fsize, naff)
}
else
tmpdata <- cbind(
famID, indID, gender, motherID, fatherID, proband, generation,
majorgene=disgene.m, secondgene=disgene.s, ageonset, currentage,
time, status, mgene, relation, fsize, naff)
}#Close for if (SecNum > 0)
return(tmpdata)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.