Nothing
#' Process Generations for Pedigree Simulation
#'
#' This function iterates through generations in a pedigree simulation, assigning IDs,
#' creating data frames, determining sexes, and managing pairing within each generation.
#'
#' @inheritParams simulatePedigree
#' @inheritParams createGenDataFrame
#' @return A data frame representing the simulated pedigree, including columns for family ID (`fam`),
buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) {
for (i in 1:Ngen) {
idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = ""))
# idGen <- ifelse(i==1,
# paste(i,"-",1:sizeGens[i]),
# paste(i,"-",sizeGens[i-1]:sizeGens[i]))
### For each generation, create a separate dataframe
df_Ngen <- createGenDataFrame(
sizeGens = sizeGens,
genIndex = i,
idGen = idGen
)
### Let's deal with the sex in each generation first
df_Ngen$sex <- determineSex(idGen = idGen, sexR = sexR)
# print(paste("tiger",i))
# The first generation
if (i == 1) {
df_Ngen$spID[1] <- df_Ngen$id[2]
df_Ngen$spID[2] <- df_Ngen$id[1]
df_Ngen$sex[1] <- "F"
df_Ngen$sex[2] <- "M"
}
## Connect male and female into couples in each generations
marR_crt <- (1 + marR) / 2
usedFemaleIds <- numeric()
usedMaleIds <- numeric()
# reserve the single persons
if (i != 1 && i != Ngen) {
nMerriedFemale <- round(sum(df_Ngen$sex == "F") * marR_crt)
nMerriedMale <- round(sum(df_Ngen$sex == "M") * marR_crt)
# make sure there are same numbers of married males and females
if (nMerriedFemale >= nMerriedMale) {
nMerriedFemale <- nMerriedMale
} else {
nMerriedMale <- nMerriedFemale
}
# get the number of single males and females
nSingleFemale <- sum(df_Ngen$sex == "F") - nMerriedFemale
nSingleMale <- sum(df_Ngen$sex == "M") - nMerriedMale
# sample single ids from male ids and female ids
usedFemaleIds <- sample(df_Ngen$id[df_Ngen$sex == "F"], nSingleFemale)
## print(c("Used F", usedFemaleIds))
usedMaleIds <- sample(df_Ngen$id[df_Ngen$sex == "M"], nSingleMale)
## print(c("Used M", usedMaleIds))
usedIds <- c(usedFemaleIds, usedMaleIds)
# Create spouses
for (j in seq_len(nrow(df_Ngen))) {
if (df_Ngen$id[j] %in% usedIds) {
next
} else {
# idx <- j+1
if (df_Ngen$sex[j] == "F") {
for (k in seq_len(nrow(df_Ngen))) {
idr <- df_Ngen$id[k]
tgt <- (!(idr %in% usedIds)) & df_Ngen$sex[k] == "M"
# tgt <- ifelse(is.na(tgt),FALSE,TRUE)
if (tgt) {
df_Ngen$spID[j] <- df_Ngen$id[k]
df_Ngen$spID[k] <- df_Ngen$id[j]
usedIds <- c(usedIds, df_Ngen$id[j], df_Ngen$id[k])
break
} else {
next
}
}
} else {
for (k in seq_len(nrow(df_Ngen))) {
idr <- df_Ngen$id[k]
tgt <- (!(idr %in% usedIds)) & df_Ngen$sex[k] == "F"
# tgt <- ifelse(is.na(tgt),FALSE,TRUE)
if (tgt) {
df_Ngen$spID[j] <- df_Ngen$id[k]
df_Ngen$spID[k] <- df_Ngen$id[j]
usedIds <- c(usedIds, df_Ngen$id[j], df_Ngen$id[k])
break
} else {
next
}
}
}
}
# print(usedIds)
}
}
if (i == 1) {
df_Fam <- df_Ngen
} else {
df_Fam <- rbind(df_Fam, df_Ngen)
}
}
return(df_Fam)
}
#' Process Generation Connections
#'
#' This function processes connections between each two generations in a pedigree simulation.
#' It marks individuals as parents, sons, or daughters based on their generational position and relationships.
#' The function also handles the assignment of couple IDs, manages single and coupled individuals,
#' and establishes parent-offspring links across generations.
#' @param df_Fam A data frame containing the simulated pedigree information up to the current generation.
#' Must include columns for family ID, individual ID, generation number, spouse ID (spID),
#' and sex. This data frame is updated in place to include flags for parental status (ifparent),
#' son status (ifson), and daughter status (ifdau), as well as couple IDs.
#' @inheritParams simulatePedigree
#' @inheritParams createGenDataFrame
#'
#' @details
#' The function iterates through each generation, starting from the second, to establish connections based on mating and parentage.
#' For the first generation, it sets the parental status directly. For subsequent generations, it calculates the number of couples,
#' the expected number of offspring, and assigns offspring to parents. It handles gender-based assignments for sons and daughters,
#' and deals with the nuances of single individuals and couple formation. The function relies on external functions `assignCoupleIds`
#' and `adjustKidsPerCouple` to handle specific tasks related to couple ID assignment and offspring number adjustments, respectively.
#'
#' @return The function updates the `df_Fam` data frame in place, adding or modifying columns related to parental and offspring status,
#' as well as assigning unique couple IDs. It does not return a value explicitly.
#'
buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, kpc, rd_kpc) {
df_Fam$ifparent <- FALSE
df_Fam$ifson <- FALSE
df_Fam$ifdau <- FALSE
for (i in 1:Ngen) {
# generation 1 doesn't need any mother and father
if (i == 1) {
df_Ngen <- df_Fam[df_Fam$gen == i, ]
df_Ngen$ifparent <- TRUE
df_Ngen$ifson <- FALSE
df_Ngen$ifdau <- FALSE
df_Fam[df_Fam$gen == i, ] <- df_Ngen
} else {
# calculate the number of couples in the i-1 th generation
N_couples <- (sizeGens[i - 1] - sum(is.na(df_Fam$spID[df_Fam$gen == i - 1]))) * 0.5
# calculate the number of members in the i th generation that have a link to the couples in the i-1 th generation
N_LinkedMem <- N_couples * kpc
# decompose the linked members into females and males respectively
N_LinkedFemale <- round(N_LinkedMem * (1 - sexR))
N_LinkedMale <- N_LinkedMem - N_LinkedFemale
# Create a pool for used male children and female children respectively
usedFemaleIds <- numeric()
usedMaleIds <- numeric()
usedIds <- c(usedFemaleIds, usedMaleIds)
# get the df for the i the generation
df_Ngen <- df_Fam[df_Fam$gen == i, ]
df_Ngen$ifparent <- FALSE
df_Ngen$ifson <- FALSE
df_Ngen$ifdau <- FALSE
df_Ngen$coupleId <- NA_character_
df_Ngen <- df_Ngen[sample(nrow(df_Ngen)), ]
# Start to connect children with mother and father
#
if (verbose) {
print(
"Step 2.1: mark a group of potential sons and daughters in the i th generation"
)
}
# try to rewrite the code
# count the number of couples in the i th gen
countCouple <- (nrow(df_Ngen) - sum(is.na(df_Ngen$spID))) * .5
# Now, assign couple IDs for the current generation
df_Ngen <- assignCoupleIds(df_Ngen)
# get the number of linked female and male children after excluding the single children
# get a vector of single person id in the ith generation
IdSingle <- df_Ngen$id[is.na(df_Ngen$spID)]
SingleF <- sum(df_Ngen$sex == "F" & is.na(df_Ngen$spID))
CoupleF <- N_LinkedFemale - SingleF
SingleM <- sum(df_Ngen$sex == "M" & is.na(df_Ngen$spID))
CoupleM <- N_LinkedMale - SingleM
df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen, i = i, Ngen = Ngen, sizeGens = sizeGens, CoupleF = CoupleF)
if (verbose) {
print(
"Step 2.2: mark a group of potential parents in the i-1 th generation"
)
}
df_Ngen <- df_Fam[df_Fam$gen == i - 1, ]
df_Ngen$ifparent <- FALSE
df_Ngen$ifson <- FALSE
df_Ngen$ifdau <- FALSE
df_Ngen <- df_Ngen[sample(nrow(df_Ngen)), ]
# Create a pool for the used parents
usedParentIds <- numeric()
for (k in 1:sizeGens[i - 1]) {
# first check if the number of married couples surpass the marriage rate
if (sum(df_Ngen$ifparent) / nrow(df_Ngen) >= marR) {
break
} else {
# check if the id is used and if the member has married
if (!(df_Ngen$id[k] %in% usedParentIds) & !is.na(df_Ngen$spID[k])) {
df_Ngen$ifparent[k] <- TRUE
df_Ngen$ifparent[df_Ngen$spID == df_Ngen$id[k]] <- TRUE
usedParentIds <- c(usedParentIds, df_Ngen$id[k], df_Ngen$spID[k])
} else {
next
}
}
}
df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen
if (verbose) {
print(
"Step 2.3: connect the i and i-1 th generation"
)
}
if (i == 1) {
next
} else {
# get the df for i and i-1 th generations
df_Ngen <- df_Fam[df_Fam$gen %in% c(i, i - 1), ]
sizeI <- sizeGens[i - 1]
sizeII <- sizeGens[i]
# create a vector with ordered ids that should be connected to a parent
# print(df_Ngen)
IdSon <- df_Ngen$id[df_Ngen$ifson == TRUE & df_Ngen$gen == i]
# print(IdSon)
IdDau <- df_Ngen$id[df_Ngen$ifdau == TRUE & df_Ngen$gen == i]
# print(IdDau)
IdOfp <- evenInsert(IdSon, IdDau)
# generate link kids to the couples
random_numbers <- adjustKidsPerCouple(nMates = sum(df_Ngen$ifparent) / 2, kpc = kpc, rd_kpc = rd_kpc)
# cat("final random numbers",random_numbers, "\n")
# cat("mean",sum(random_numbers)/length(random_numbers), "\n")
# create two vectors for maId and paId; replicate the ids to match the same length as IdOfp
IdMa <- numeric()
IdPa <- numeric()
usedIds <- numeric()
idx <- 1
for (l in 1:sizeI) {
# check if the id is used
if (!df_Ngen$id[l] %in% usedIds) {
# check if the member can be a parent
if (df_Ngen$ifparent[l] == TRUE && df_Ngen$sex[l] == "F") {
usedIds <- c(usedIds, df_Ngen$id[l], df_Ngen$spID[l])
IdMa <- c(IdMa, rep(df_Ngen$id[l], random_numbers[idx]))
IdPa <- c(IdPa, rep(df_Ngen$spID[l], random_numbers[idx]))
idx <- idx + 1
} else if (df_Ngen$ifparent[l] == TRUE && df_Ngen$sex[l] == "M") {
usedIds <- c(usedIds, df_Ngen$id[l], df_Ngen$spID[l])
IdPa <- c(IdPa, rep(df_Ngen$id[l], random_numbers[idx]))
IdMa <- c(IdMa, rep(df_Ngen$spID[l], random_numbers[idx]))
idx <- idx + 1
} else {
next
}
} else {
next
}
}
# the length of IdMa and IdPa can be longer than the vector of offspring, so truncated it
### making sure sampling out the single people instead of couples
if (length(IdPa) - length(IdOfp) > 0) {
if (verbose) {
print("length of IdPa", length(IdPa), "\n")
}
IdRm <- sample.int(length(IdPa), size = length(IdPa) - length(IdOfp))
IdPa <- IdPa[-IdRm]
IdMa <- IdMa[-IdRm]
} else if (length(IdPa) - length(IdOfp) < 0) {
# cat("length of IdOfp", length(IdOfp), "\n")
# cat("length of IdPa", length(IdPa), "\n")
# cat("length of IdSingle", length(IdMa), "\n")
IdRm <- resample(IdSingle, size = length(IdOfp) - length(IdPa))
IdOfp <- IdOfp[!(IdOfp %in% IdRm)]
}
# if (length(IdMa)- length(IdOfp) > 0){
# IdRm <- sample.int(length(IdMa),size =length(IdMa)-length(IdOfp))
# IdPa <- IdPa[-IdRm]
# IdMa <- IdMa[-IdRm]
# }else if (length(IdMa)-length(IdOfp) < 0) {
# IdRm <- sample.int(length(IdOfp),size =length(IdOfp)-length(IdMa))
# IdOfp <- IdOfp[-IdRm]
# }
# print(matrix(c(IdPa, IdMa), ncol = 2))
# print(IdPa)
# print(IdOfp)
# put the IdMa and IdPa into the dfFam with correspondent OfpId
for (m in seq_along(IdOfp)) {
df_Ngen[df_Ngen$id == IdOfp[m], "pat"] <- IdPa[m]
df_Ngen[df_Ngen$id == IdOfp[m], "mat"] <- IdMa[m]
}
# print(df_Ngen)
df_Fam[df_Fam$gen == i, ] <- df_Ngen[df_Ngen$gen == i, ]
df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen[df_Ngen$gen == i - 1, ]
}
}
}
return(df_Fam)
}
#' Simulate Pedigrees
#' This function simulates "balanced" pedigrees based on a group of parameters:
#' 1) k - Kids per couple;
#' 2) G - Number of generations;
#' 3) p - Proportion of males in offspring;
#' 4) r - Mating rate.
#'
#' @importFrom stats runif
#' @param kpc Number of kids per couple. An integer >= 2 that determines how many kids each fertilized mated couple will have in the pedigree. Default value is 3. Returns an error when kpc equals 1.
#' @param Ngen Number of generations. An integer >= 2 that determines how many generations the simulated pedigree will have. The first generation is always a fertilized couple. The last generation has no mated individuals.
#' @param sexR Sex ratio of offspring. A numeric value ranging from 0 to 1 that determines the proportion of males in all offspring in this pedigree. For instance, 0.4 means 40 percent of the offspring will be male.
#' @param marR Mating rate. A numeric value ranging from 0 to 1 which determines the proportion of mated (fertilized) couples in the pedigree within each generation. For instance, marR = 0.5 suggests 50 percent of the offspring in a specific generation will be mated and have their offspring.
#' @param rd_kpc logical. If TRUE, the number of kids per mate will be randomly generated from a poisson distribution with mean kpc. If FALSE, the number of kids per mate will be fixed at kpc.
#' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version.
#' @param balancedMar Not fully developed yet. Always \code{TRUE} in the current version.
#' @param verbose logical If TRUE, print progress through stages of algorithm
#' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
#' \itemize{
#' \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
#' \item{ID: The unique personal ID of each simulated individual. The first digit is the fam id; the fourth digit is the generation the individual is in; the following digits represent the order of the individual within his/her pedigree. For example, 100411 suggests this individual has a family id of 1, is in the 4th generation, and is the 11th individual in the 4th generation.}
#' \item{gen: The generation the simulated individual is in.}
#' \item{dadID: Personal ID of the individual's father.}
#' \item{momID: Personal ID of the individual's mother.}
#' \item{spID: Personal ID of the individual's mate.}
#' \item{sex: Biological sex of the individual. F - female; M - male.}
#' }
#' @export
simulatePedigree <- function(kpc = 3,
Ngen = 4,
sexR = .5,
marR = 2 / 3,
rd_kpc = FALSE,
balancedSex = TRUE,
balancedMar = TRUE,
verbose = FALSE) {
# SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
# SexRatio <- sexR / (1 - sexR)
# Calculate the expected family size in each generations
sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR)
# famSizeIndex <- 1:sum(sizeGens)
if (verbose) {
print(
"Step 1: Let's build the connection within each generation first"
)
}
df_Fam <- buildWithinGenerations(
sizeGens = sizeGens,
Ngen = Ngen, sexR = sexR, marR = marR
)
if (verbose) {
print(
"Step 2: Let's try to build connection between each two generations"
)
}
df_Fam <- buildBetweenGenerations(
df_Fam = df_Fam, Ngen = Ngen,
sizeGens = sizeGens, verbose = verbose, marR = marR, sexR = sexR, kpc = kpc, rd_kpc = rd_kpc
)
df_Fam <- df_Fam[, 1:7]
df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]
colnames(df_Fam)[c(2, 4, 5)] <- c("ID", "dadID", "momID")
# connect the detached members
df_Fam[is.na(df_Fam$momID) & is.na(df_Fam$dadID) & df_Fam$gen > 1, ]
# if the sex rate is .5, make there is a 50% chance to change male to female and female to male
# doesn't seem to produce the expected results, sometimes leads to moms being classified as dads
# if (sexR == .5 & runif(1) > .5) {
# df_Fam$sex[df_Fam$sex == "M"] <- "F1"
# df_Fam$sex[df_Fam$sex == "F"] <- "M"
# df_Fam$sex[df_Fam$sex == "F1"] <- "F"
# }
# print(df_Fam)
return(df_Fam)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.