R/rfitness.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

Documented in rfitness

## Copyright 2013, 2014, 2015, 2016, 2017 Ramon Diaz-Uriarte

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.


create_eq_ref <- function(g) {
    ## "random" gives more prob. to genotypes with
    ## number of mutated genes close to g/2.
    ## This gives equal prob to having the reference
    ## be of any of the possible number of mutated genes.
    nm <- sample(g, 1)
    ref <- c(rep(1, nm), rep(0, g - nm))
    sample(ref)
}



get_magellan_binaries <-
    function(names_binaries = c("fl_statistics", "fl_generate", "fl_genchains"))
{
    rarch <- Sys.getenv('R_ARCH')
    nn_names_binaries <- names_binaries
    if(.Platform$OS.type == "windows")
        names_binaries <- paste0(names_binaries, ".exe")
    if(nzchar(rarch)) {
        rarch <- sub("^/", "", rarch)
        magellan_binaries <-  system.file(package = "OncoSimulR", "exec",
                                          rarch, names_binaries)
    } else {
        magellan_binaries <-  system.file(package = "OncoSimulR", "exec",
                                          names_binaries)
    }
    names(magellan_binaries) <- nn_names_binaries
    return(magellan_binaries)
}

## pkg.env <- new.env()
## ## The next will not install with error
## ## ERROR: hard-coded installation path:
## please report to the package maintainer and use ‘--no-staged-install’
## pkg.env <- c(pkg.env, get_magellan_binaries())



fl_statistics_binary <- function() get_magellan_binaries("fl_statistics")
fl_generate_binary <- function() get_magellan_binaries("fl_generate")


rfitness <- function(g, c= 0.5,
                     sd = 1,
                     mu = 1,
                     reference = "random", ## "random", "max", or the vector,
                                     ## e.g., rep(1, g). If random, a
                                     ## random genotype is chosen as
                     ## reference. If "max" this is rep(1, g)
                     scale = NULL, ## a two-element vector: min and max
                     wt_is_1 = c("subtract", "divide", "force", "no"),
                     ## wt_is_1 = TRUE, ## wt has fitness 1
                     log = FALSE, ## log only makes sense if all values >
                                 ## 0. scale with min > 0, and/or set
                                 ## wt_is_1 = divide
                     min_accessible_genotypes = NULL,
                     accessible_th = 0,
                     truncate_at_0 = TRUE,
                     K = 1,
                     r = TRUE,
                     i = 0, # Ising, cost incompatibility
                     I = -1, # Ising, sd for "i"
                     circular = FALSE, # Ising, circular arrangement
                     e = 0, # Eggbox, +/- e
                     E = -1, # Eggbox, noise on "e"
                     H = -1, # HoC stdev
                     s = 0.1, # mean multiplivative
                     S = -1, # SD multiplicative
                     d = 0, # disminishing/increasing for multiplicative
                     o = 0, # mean optimum
                     O = -1, # sd optimum
                     p = 0, # mean production for non 0 allele (optimum)
                     P = -1, # sd for p
                    
                     model = c("RMF", "Additive", 
                               "NK", "Ising", "Eggbox", "Full")) {
    ## Like Franke et al., 2011 and others of Krug. Very similar to Greene
    ## and Crona, 2014. And this allows moving from HoC to purely additive
    ## changing c and sd.

    ## FIXME future: do this with order too?
    ##    - do not generate a matrix of genotypes but a matrix of ordered genot.
    wt_is_1 = match.arg(wt_is_1)
    model = match.arg(model)
    if(is_null_na(g)) stop("Number of genes argument (g) is null or NA")
    m <- generate_matrix_genotypes(g)
    done <- FALSE
    ## attempts <- 0 ## for debugging/tracking purposes
    while(!done) {
        if(model == "RMF") {
            ## attempts <- attempts + 1
            f_r <- rnorm(nrow(m), mean = mu, sd = sd)
            if(inherits(reference, "character") && length(reference) == 1) {
                if(reference == "random") {
                    referenceI <- m[sample(nrow(m), 1), ]
                } else if(reference == "max") {
                    referenceI <- rep(1, g)
                } else if(reference == "random2") {
                    referenceI <- create_eq_ref(g)
                }
            } else {
                referenceI <- reference
            }
            d_reference <- apply(m, 1, function(x) sum(abs(x - referenceI)))
            f_det <- -c * d_reference
            ## f_det <- rowSums(m) * slope/nrow(m) ## this is Greene and Krona
            fi <- f_r + f_det
        } else if (model == "Additive") {
      ## get fitness effect for mutations in each gene
      mutants <-rep(1,g)
            ## FIXME: Why not just?
            ## f_single_mut <- rnorm(g, mean = mu, sd = sd)
            f_single_mut <- sapply(mutants, FUN = function(x) 
                                            rnorm(x, mean = mu, sd = sd))
      ## find which gene is mutated 
      m2 <- m == 1
      ## Sum the fitness effect of that mutation to generate a vector fi with
      ## the fitness for each mutant condition
      fi <- apply(m2, MARGIN = 1, FUN = function (x) sum(x*f_single_mut))
      ## remove unnecessary variables
      rm (f_single_mut, m2)
        } else if(model == "NK") {
            if(K >= g) stop("It makes no sense to have K >= g")
            argsnk <- paste0("-K ", K,
                             ifelse(r, " -r ", " "),
                             g, " 2")
            fl1 <- system2(fl_generate_binary(), args = argsnk, stdout = TRUE)[-1]
        } else if (model == "Ising") {
      argsIsing <- paste0("-i ", i, " -I ", I ,
                          ifelse(circular, " -c ", " "),
                          g, " 2")
      fl1 <- system2(fl_generate_binary(), args = argsIsing, stdout = TRUE)[-1]
    } else if (model == "Eggbox") {
      argsEgg <- paste0("-e ", e, " -E ", E," ", g, " 2")
      fl1 <- system2(fl_generate_binary(), args = argsEgg, stdout = TRUE)[-1]
    } else if (model == "Full") {
      if(K >= g) stop("It makes no sense to have K >= g")
      argsFull <- paste0("-K ", K, ifelse(r, " -r ", " "),
                         "-i ", i, " -I ", I , ifelse(circular, " -c ", " "),
                         "-e ", e, " -E ", E, " ",
                         "-H ", H, " ",
                         "-s ", s, " -S ", S, " -d ", d, " ",
                         "-o ", o, " -O ", O, " -p ", p, " -P ", P, " ",
                         g, " 2")
      fl1 <- system2(fl_generate_binary(), args = argsFull, stdout = TRUE)[-1]
    }
    if (model == "Eggbox" || model == "Ising" || model == "Full" || model == "NK") {
            fl1 <- matrix(
                as.numeric(unlist(strsplit(paste(fl1, collapse = " "), " "))),
                ncol = g + 1, byrow = TRUE)
            m1 <- fl1[, 1:g]
            fi <- fl1[, g + 1]

            ## For scaling, etc, all that matters, if anything, is the wildtype

            ## We could order by doing this
            ## But I am not 100% sure this will always be the same as
            ## generate_matrix_genotypes
            ## oo <- do.call(order,
            ##               c(list(muts),
            ##                 as.list(data.frame(m1[, rev(1:ncol(m1))]))))
            ## m2 <- m1[oo, ]
            ## Or create an id and order by it?

            ## When we get to 20 genes, this is slow, about 18 seconds each
            ## apply. Matching is fast (< 0.5 seconds)
            gtstring <- apply(m, 1, function(x) paste0(x, collapse = ""))
            gtstring2 <- apply(m1, 1, function(x) paste0(x, collapse = ""))
            oo <- match(gtstring, gtstring2)
            fi <- fi[oo]
            ## make sure no left overs
            rm(gtstring, gtstring2, oo, fl1, m1)

            ## Had we not ordered, do this!!!
            ## Which one is WT?
            ## muts <- rowSums(m1)
            ## w_wt <- which(muts == 0)
            ## if(w_wt != 1) {
            ##     f_a <- fi[1]
            ##     fi[1] <- fi[w_wt]
            ##     fi[w_wt] <- f_a
            ##     rm(f_a)
            ## }
            ## m[] <- m1
            ## rm(m1)
            ## rm(fl1)
        }

        if(!is.null(scale)) {
            fi <- (fi - min(fi))/(max(fi) - min(fi))
            fi <- scale[1] + fi * (scale[2] - scale[1])
        }
        if(wt_is_1 == "divide") {
            ## We need to shift to avoid ratios involving negative numbers and
            ## we need to avoid having any fitness at 0, specially the wt.  If
            ## any negative value, add the min, and shift by the magnitude of
            ## the min to avoid any at 0.

            ## If you use scale and wt_is_1, this will move the scale. It is
            ## not possible to obtain a linear transformation that will keep
            ## the min and max of the scale, and wt at 1.
            min_fi <- min(fi)
            if(min_fi < 0)
                fi <- fi + 2 * abs(min(fi))
            fi <- fi/fi[1]
        } else if (wt_is_1 == "subtract") {
            fi <- fi - fi[1] + 1.0
        } else if(wt_is_1 == "force") {
            fi[1] <- 1.0
            if(!is.null(scale)) {
                if( (1 > scale[2]) || (1 < scale[1]))
                    warning("Using wt_is_1 = force and scale, but scale does ",
                            "not include 1")
            }
        }
        if(log) {
            ## If you want logs, you certainly do not want
            ## the log of a negative number
            fi[fi < 0] <- 0
            if(wt_is_1 == "no") {
                fi <- log(fi)
            } else {
                ## by decree, fitness of wt is 1. So shift everything
                fi <- log(fi) + 1
            }
            ## former expression, but it was more confusing
            ## fi <- log(fi/fi[1]) + 1
            
        }
        if(truncate_at_0) {
            ## yes, truncate but add noise to prevent identical
            fi[fi <= 0] <- runif(sum(fi <= 0),
                                 min = 1e-10,
                                 max = 1e-9)
        }
        m <- cbind(m, Fitness = fi)
        if(!is_null_na(min_accessible_genotypes)) {
            ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
            ## Recall accessibleGenotypes includes the wt, if accessible.
            num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
            ## message("\n     num_accessible_genotypes = ", num_accessible_genotypes, "\n")
            if(num_accessible_genotypes >= min_accessible_genotypes) {
                done <- TRUE
                attributes(m) <- c(attributes(m),
                                   accessible_genotypes = num_accessible_genotypes,
                                   accessible_th = accessible_th)
            } else {
                ## Cannot start again with a fitness column
                m <- m[, -ncol(m), drop = FALSE]
            }
        } else {
            done <- TRUE
        }
    }
    ## message("\n number of attempts = ", attempts, "\n")
    class(m) <- c(class(m), "genotype_fitness_matrix")
    return(m)
}










## rfitness <- function(g, c= 0.5,
##                      sd = 1,
##                      mu = 1,
##                      reference = "random", ## "random", "max", or the vector,
##                                      ## e.g., rep(1, g). If random, a
##                                      ## random genotype is chosen as
##                                      ## reference. If "max" this is rep(1, g)
##                      scale = NULL, ## a two-element vector: min and max
##                      wt_is_1 = c("subtract", "divide", "force", "no"),
##                      ## wt_is_1 = TRUE, ## wt has fitness 1
##                      log = FALSE, ## log only makes sense if all values >
##                                  ## 0. scale with min > 0, and/or set
##                                  ## wt_is_1 = divide
##                      min_accessible_genotypes = NULL,
##                      accessible_th = 0,
##                      truncate_at_0 = TRUE) {
##     ## Like Franke et al., 2011 and others of Krug. Very similar to Greene
##     ## and Crona, 2014. And this allows moving from HoC to purely additive
##     ## changing c and sd.

##     ## FIXME future: do this with order too?
##     ##    - do not generate a matrix of genotypes but a matrix of ordered genot.
##     wt_is_1 = match.arg(wt_is_1)
##     if(is_null_na(g)) stop("Number of genes argument (g) is null or NA")
##     m <- generate_matrix_genotypes(g)
##     done <- FALSE
##     ## attempts <- 0 ## for debugging/tracking purposes
##     while(!done) {
##         ## attempts <- attempts + 1
##         f_r <- rnorm(nrow(m), mean = mu, sd = sd)
##         if(inherits(reference, "character") && length(reference) == 1) {
##             if(reference == "random") {
##                 referenceI <- m[sample(nrow(m), 1), ]
##             } else if(reference == "max") {
##                 referenceI <- rep(1, g)
##             } else if(reference == "random2") {
##                 referenceI <- create_eq_ref(g)
##             }
##         } else {
##             referenceI <- reference
##             }
##         d_reference <- apply(m, 1, function(x) sum(abs(x - referenceI)))
##         f_det <- -c * d_reference
##         ## f_det <- rowSums(m) * slope/nrow(m) ## this is Greene and Krona
##         fi <- f_r + f_det

##         if(!is.null(scale)) {
##             fi <- (fi - min(fi))/(max(fi) - min(fi))
##             fi <- scale[1] + fi * (scale[2] - scale[1])
##         }
##         if(wt_is_1 == "divide") {
##             ## We need to shift to avoid ratios involving negative numbers and
##             ## we need to avoid having any fitness at 0, specially the wt.  If
##             ## any negative value, add the min, and shift by the magnitude of
##             ## the min to avoid any at 0.

##             ## If you use scale and wt_is_1, this will move the scale. It is
##             ## not possible to obtain a linear transformation that will keep
##             ## the min and max of the scale, and wt at 1.
##             min_fi <- min(fi)
##             if(min_fi < 0)
##                 fi <- fi + 2 * abs(min(fi))
##             fi <- fi/fi[1]
##         } else if (wt_is_1 == "subtract") {
##             fi <- fi - fi[1] + 1.0
##         } else if(wt_is_1 == "force") {
##             fi[1] <- 1.0
##             if(!is.null(scale)) {
##                 if( (1 > scale[2]) || (1 < scale[1]))
##                     warning("Using wt_is_1 = force and scale, but scale does ",
##                             "not include 1")
##             }
##         }
##         if(truncate_at_0) {
##             fi[fi <= 0] <- 1e-9
##         }
##         if(log) {
##             fi <- log(fi/fi[1]) + 1
##         }
##         m <- cbind(m, Fitness = fi)
##         if(!is_null_na(min_accessible_genotypes)) {
##             ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
##             ## Recall accessibleGenotypes includes the wt, if accessible.
##             num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
##             ## message("\n     num_accessible_genotypes = ", num_accessible_genotypes, "\n")
##             if(num_accessible_genotypes >= min_accessible_genotypes) {
##                 done <- TRUE
##                 attributes(m) <- c(attributes(m),
##                                    accessible_genotypes = num_accessible_genotypes,
##                                    accessible_th = accessible_th)
##             } else {
##                 ## Cannot start again with a fitness column
##                 m <- m[, -ncol(m), drop = FALSE]
##             }
##         } else {
##             done <- TRUE
##         }
##     }
##     ## message("\n number of attempts = ", attempts, "\n")
##     class(m) <- c(class(m), "genotype_fitness_matrix")
##     return(m)
## }
Any scripts or data that you put into this service are public.
OncoSimulR documentation built on Nov. 8, 2020, 8:31 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OncoSimulR
Forward Genetic Simulation of Cancer Progression with Epistasis

R/rfitness.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

Defines functions rfitness fl_generate_binary fl_statistics_binary get_magellan_binaries create_eq_ref

Documented in rfitness

Try the OncoSimulR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OncoSimulR Forward Genetic Simulation of Cancer Progression with Epistasis

R/rfitness.R In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

Defines functions rfitness fl_generate_binary fl_statistics_binary get_magellan_binaries create_eq_ref

Documented in rfitness

Try the OncoSimulR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OncoSimulR
Forward Genetic Simulation of Cancer Progression with Epistasis

R/rfitness.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis