R/epinetr_utils.R

Defines functions loadGeno estEffects fitRnd normalisedrnorm selectSeeds getSeeds getEpi geno2hap hap2geno calcAF updatePedigree calcEpiScale testPop

Documented in loadGeno

# Test to see if object is of class 'Population'
testPop <- function(pop) {
  if (!is(pop, "Population")) {
    stop("pop must be of class Population")
  }
}


# Calculate scaling factor for epistasis
calcEpiScale <- function(pop) {
  epiVals <- getEpi(pop)

  pop$epiScale <- sqrt(pop$VarG - pop$VarA) / sd(epiVals)
  pop$epiOffset <- -mean(epiVals) * pop$epiScale

  return(pop)
}


# Update the pedigree data frame with new phenotypic components
updatePedigree <- function(pop) {
  # Evaluate all phenotypic components across population
  pheno <- getPheno(pop)

  # Ensure that epistasis is orthogonal to additive effects
  if (pop$H2 > pop$h2 && !is.null(pop$epiNet) && pop$h2 > 0 && !is.null(pop$additive)) {
    pop <- getSeeds(pop)
    pop$epiOffset <- 0
    pop$epiScale <- 1
    epi <- getEpi(pop)

    pop$epiScale <- sqrt(pop$VarG - pop$VarA)/sd(epi)
    epi <- epi * pop$epiScale
    pop$epiOffset <- -mean(epi)
    epi <- epi + pop$epiOffset
    pheno[, 2] <- epi
    pheno[, 4] <- rowSums(pheno[, 1:3])
  }

  if (pop$H2 < 1) {
    pheno[, 3] <- fitRnd(pheno[, 1] + pheno[, 2], normalisedrnorm,
      length(pheno[, 1]), sqrt(pop$VarE))
    pheno[, 4] <- rowSums(pheno[, 1:3])
  }

  # Store total phenotypic value for each member of the population
  if (pop$select == "TGV") {
    pop$phenotype <- pheno[, 1] + pheno[, 2]
  } else if (pop$select == "EBV") {
    pop$phenotype <- pheno[, 5]
  } else {
    pop$phenotype <- pheno[, 4]
  }

  sex <- pop$isMale
  sex[pop$isMale] <- "M"
  sex[!pop$isMale] <- "F"

  pop$ped <- data.frame(ID = pop$ID, Sire = rep(0, pop$popSize), Dam = rep(0,
    pop$popSize), Additive = pheno[, 1], Epistatic = pheno[, 2], Environmental = pheno[,
    3], Phenotype = pheno[, 4], EBV = pheno[, 5], Sex = sex)

  return(pop)
}


# Calculate allele frequencies from haplotypes
calcAF <- function(hap) {
  return(.colMeans(hap[[1]] + hap[[2]], m = nrow(hap[[1]]), n = ncol(hap[[1]])) / 2)
}


# Transform a set of haplotypes into a genotypes matrix
hap2geno <- function(hap) {
  geno <- rbind(hap[[1]], hap[[2]])
  geno <- matrix(geno, nrow = nrow(geno) / 2, ncol = ncol(geno) * 2)
  return(geno)
}


# Transform a genotypes matrix into a set of haplotypes
geno2hap <- function(geno) {
  hap <- list()

  geno <- matrix(geno, nrow = nrow(geno) * 2, ncol = ncol(geno) / 2)
  hap[[1]] <- geno[1:(nrow(geno) / 2), ]
  hap[[2]] <- geno[(nrow(geno) / 2 + 1):nrow(geno), ]

  return(hap)
}


# Save the state of the random number generator
# saveRNG <- function() {
#   if (exists(".Random.seed", .GlobalEnv)) {
#     oldseed <- .GlobalEnv$.Random.seed
#   } else {
#     oldseed <- NULL
#   }
#
#   return(oldseed)
# }


# Restore the state of the random number generator
# restoreRNG <- function(oldseed) {
#   if (!is.null(oldseed)) {
#     .GlobalEnv$.Random.seed <- oldseed
#   } else {
#     rm(".Random.seed", envir = .GlobalEnv)
#   }
# }


# Get the epistasis for each individual
getEpi <- function(pop, geno = NULL) {
  epi <- getEpistasis(pop, scale = FALSE, geno = geno)

  # Get dimension of indices
  dimensions <- dim(epi)

  # Get epistatic values per individual
  epi <- .rowSums(epi, dimensions[1], dimensions[2])

  return(epi)
}


# Select seeds to minimise covariance with additive effects
getSeeds <- function(pop) {
  geno <- pop$hap[[1]][, pop$qtl] + pop$hap[[2]][, pop$qtl]
  add <- (geno %*% t(t(pop$additive)))[, 1]
  n <- ncol(pop$epiNet$Incidence)
  retval <- GA::ga(type = "real-valued", fitness = selectSeeds, pop, geno, add, lower = rep(10^5, n), upper = rep(10^8, n), run = 15)
  pop$epiNet$Seeds <- floor(retval@solution[1, ])

  return(pop)
}


# Objective function for selecting seeds to minimise covariance
selectSeeds <- function(x, pop, geno, add) {
  x <- floor(x)
  pop$epiScale <- 1
  pop$epiOffset <- 0
  pop$epiNet$Seeds <- floor(x)
  epi <- getEpi(pop, geno)

  return(-abs(cov(add, epi)))
}


# Returns a set of normalised values with a mean of 0 and a standard
# deviation of sdev.
normalisedrnorm <- function(n, sdev) {
  env <- rnorm(n)
  env <- (env - mean(env)) / sd(env) * sdev

  return(env)
}


# Returns a random set of values taken from a normal distribution,
# with a mean of 0 and a standard deviation of sdev.  Covariance
# with a second vector, vec, will be minimised within a tolerance
# given, with iter.max iterations.
fitRnd <- function(vec, fun, ..., tolerance = 0.005, iter.max = 1000) {
  best <- fun(...)

  if (abs(cov(vec, best)) > 0) {
    for (i in 1:iter.max) {
      vec2 <- fun(...)

      if (abs(cov(vec, vec2)) < abs(cov(vec, best))) {
        best <- vec2
      }

      if (abs(cov(vec, best)) <= tolerance) {
        break
      }
    }
  }

  return(best)
}


estEffects <- function(pop) {
  if (!pop$h2Estuser)
    message("Estimating heritability...")

  # Generate the GRM
  M <- t(pop$hap[[1]] + pop$hap[[2]])
  p <- rowMeans(M)/2
  M <- M - 1
  P <- 2 * (p - 0.5)
  Z <- M - P
  ZtZ <- crossprod(Z)
  d <- 2 * sum(p * (1 - p))
  G <- ZtZ/d

  if (!pop$h2Estuser) {
    # Get eigenvalues and eigenvectors
    SVD <- eigen(G, symmetric = TRUE)

    # Get h2 estimate
    y <- (pop$phenotype - mean(pop$phenotype))/sd(pop$phenotype)
    loglike <- function(x) {
      ll1 <- (length(y) - 1) * log(x + 1)
      ll2 <- sum(log(x * SVD$values + 1))
      ll3 <- (x + 1) * sum(as.numeric(crossprod(y, SVD$vectors)^2)/(x *
        SVD$values + 1))
      return(-ll1 + ll2 + ll3)
    }
    grad <- function(x) {
      dll1 <- (length(y) - 1)/(x + 1)
      dll2 <- sum(SVD$values/(x * SVD$values + 1) + ((as.numeric(crossprod(y,
        SVD$vectors))/(x * SVD$values + 1))^2) * (1 - SVD$values))
      return(-dll1 + dll2)
    }
    tmp <- constrOptim(1, loglike, grad, matrix(1, 1, 1), 0, method = "BFGS")$par
    pop$h2Est <- tmp/(tmp + 1)
  }

  # Get SNP effect estimates
  message("Estimating SNP effects...")
  diag(G) <- diag(G) + 0.001
  lambda <- (1 - pop$h2Est)/pop$h2Est
  Ginv <- solve(G)
  Gil <- Ginv * lambda
  diag(Gil) <- diag(Gil) + 1
  Gil <- solve(Gil)
  ebv <- Gil %*% pop$phenotype
  pop$addEst <- (1/d * Z %*% Ginv %*% ebv)[, 1]

  return(pop)
}


#' Load epinetr genotype file.
#'
#' Load genotypes from a previous epinetr session.
#'
#' When outputting all genotypes during an epinetr simulation run, the genotypes
#' will be written to a serialised format unique to epinetr. The \code{loadGeno}
#' function will load these genotypes into memory as a single matrix object.
#'
#' @param filename the filename for the epinetr genotypes file.
#'
#' @return a numeric matrix holding the genotypes
#'
#' @author Dion Detterer, Paul Kwan, Cedric Gondro
#'
#' @export
#'
#' @examples
#' # Load genotype file
#' filename <- system.file("extdata", "geno.epi", package = "epinetr")
#' geno <- loadGeno(filename)
#'
#' # Use genotypes as basis for new population
#' pop <- Population(
#'   map = map100snp, QTL = 20, genotypes = geno,
#'   broadH2 = 0.8, narrowh2 = 0.6, traitVar = 40
#' )
#' @seealso \code{\link{Population}}
loadGeno <- function(filename) {
  geno <- getSerialMat(filename)

  if (is.null(geno)) {
    stop("Could not open file")
  }

  return(geno)
}

Try the epinetr package in your browser

Any scripts or data that you put into this service are public.

epinetr documentation built on March 18, 2022, 7:01 p.m.