R/simulation.R

# pkg.env <- new.env()

###set = function(x) { pkg.env$x = x }

###get = function() { pkg.env$x }




#' Holds data necessary for a simulation.
#' @export
SIM = new.env()



#' Holds general package options
#' @export
pkg.opts = new.env()
pkg.opts$recombination = 1


#' Set recombination model to either poisson (no interference) or chi-square.
#'
#'
#' @param model Either "poisson" or "chisq"
#'
#' @examples
#'
#'
#' generateUniformGeneticMap()
#'
#' do_plots = 0
#'
#' setRecombinationModel("chisq")
#' if(do_plots == 1)
#'  hist(generateRecombinationDistances(100000),n=200)
#'
#' setRecombinationModel("poisson")
#' if(do_plots == 1)
#'  hist(generateRecombinationDistances(100000),n=200)
#'
#' @export
setRecombinationModel = function(model) {

    if(model == "poisson") { pkg.opts$recombination = 0 }
    else
        if(model == "chisq") { pkg.opts$recombination = 1 }
        else
        { stop("model should be poisson or chisq")}


}






#' Starts and initializes the data structures required for a simulation. A VCF file
#' should be read beforehand with the function readVCF.
#'
#' @param vcf Input vcf file of a region (can be .gz). Must contain phased data.
#' @param totalNumberOfIndividuals Maximum Number of individuals to allocate memory for. Set it above the number of individuals you want to simulate.
#' @param subset A subset of individual IDs to use for simulation
#' @param randomdata If 1, disregards the genotypes in the vcf file and generates independent markers that are not in LD.
#' @param typeOfGeneticMap Specify whether to download a genetic map for this chromosome
#'
#' @examples
#' library("sim1000G")
#' library(gplots)
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#'
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100)
#'
#'
#' genetic_map_of_region = system.file(
#'    "examples",
#'    "chr4-geneticmap.txt",
#'    package = "sim1000G"
#' )
#'
#' readGeneticMapFromFile(genetic_map_of_region)
#'
#' pdf(file=tempfile())
#' plotRegionalGeneticMap(vcf$vcf[,2]+1)
#' dev.off()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' @export
startSimulation = function(vcf, totalNumberOfIndividuals = 2000,
                           subset = NA, randomdata = 0,
                           typeOfGeneticMap = "download"
                           ) {

        cat("[#####...] Creating SIM object\n");

        # Original haplotypes from 1000 genomes / other data


        if(class(subset) == "logical" ) {


            SIM$population_gt1 = vcf$gt1
            SIM$population_gt2 = vcf$gt2
            SIM$individual_ids = vcf$individual_ids

        } else {


            s = which(vcf$individual_ids %in% subset)

            SIM$population_gt1 = vcf$gt1[,s]
            SIM$population_gt2 = vcf$gt2[,s]
            SIM$individual_ids = vcf$individual_ids[s]

            cat("Using", ncol(SIM$population_gt1), " individuals in simulation\n")


        }



        if(randomdata) {

            #str(SIM$population_gt1)
            # individuals in columns

            x = SIM$population_gt1*0

            for(i in 1:nrow(x)) x[i,] = rbinom( ncol(x), 1 , 0.25)

            SIM$population_gt1 = x
            SIM$population_gt2 = x

            # SIM$haplodata = haplodata(  t( cbind(SIM$population_gt1, SIM$population_gt2) ) )
            #    ldplot(SIM$haplodata$cor,ld.type="r")

        }




        # Generate haplodata object

        haplomatrix = t( cbind(SIM$population_gt1, SIM$population_gt2) )

        #dim(haplomatrix)

        meanv = apply(haplomatrix,2,mean)
        #print( range(meanv) )

        non_polymorphic = which( meanv == 0 | meanv == 1 )
        polymorphic = which( meanv > 0  & meanv < 1 )

        if(length(non_polymorphic) >  0 )  {
            cat("Warning: Some variants are not polymorphic. (n=" , non_polymorphic , ")\n");
        }



        SIM$non_polymorphic = non_polymorphic
        SIM$polymorphic = polymorphic


        SIM$haplodata = haplodata( haplomatrix[,polymorphic]  )
        cat("[#####...] Haplodata object created\n");



        # Variant information

        SIM$varinfo = vcf$vcf[,1:8]
        SIM$bp = vcf$vcf[,2]





        if(length(ls(geneticMap) ) == 0) {

            cat("Downloading genetic map for chromosome " , vcf$vcf[1,1],"\n" )

            readGeneticMap(vcf$vcf[1,1])


            #stop("ERROR: Genetic map has not been read yet\n");
        } else {

         s = range(geneticMap$bp)

         vcf_chrom = sub("^chr","",vcf$vcf[,1])
         gm_chrom = sub("chr","",geneticMap$chr[1])

         if( sum ( SIM$bp > s[2] | SIM$bp < s[1] ) > 0 ) {
            stop("Error: Genetic map does not match the region being simulated\n");

         }

         if(gm_chrom[1] != vcf_chrom[1]) {
            cat(gm_chrom,vcf_chrom[1],"\n");

            stop("Error: mismatch between chromosomes in genetic map and vcf ");
         }



        }



        SIM$cm = approx( geneticMap$bp, geneticMap$cm, SIM$bp )$y

        SIM$N_markers = nrow(vcf$gt1)

        SIM$pool = NA
        SIM$npool = 0

        SIM$total_individuals = totalNumberOfIndividuals
        SIM$individuals_generated = 0

        SIM$gt1 = matrix(nrow = SIM$total_individuals, ncol = SIM$N_markers, -1)
        SIM$gt2 = matrix(nrow = SIM$total_individuals, ncol = SIM$N_markers, -1)


        SIM$origin1 = matrix(nrow = SIM$total_individuals, ncol = SIM$N_markers, -1)
        SIM$origin2 = matrix(nrow = SIM$total_individuals, ncol = SIM$N_markers, -1)


        #SIM$cm = seq(0, 3200, l=dim(SIM$gt1)[2] )
        SIM$npool = 0
        SIM$last_ancestral_index = 10


}




SIM$refreshPool = function() {

    SIM$npool = 500
    SIM$pool = haplosim2(SIM$npool, SIM$haplodata, summary = F)$data

    s = sample(1: SIM$npool, SIM$npool)
    #SIM$pool = SIM$pool[s,]

}


SIM$generateNewHaplotypes = function(n = -1) {

    if(SIM$npool < 2) {

       # cat("Generate new haplotype pool..\n");

        t <- system.time( SIM$refreshPool() )
        print(t)



    }

    nvar = nrow(SIM$population_gt1)

    gt1 = rep(0, nvar)
    gt2 = rep(0, nvar)


    gt1[SIM$polymorphic] = SIM$pool[SIM$npool,]
    gt2[SIM$polymorphic] = SIM$pool[SIM$npool-1,]


    GT = list(gt1 = gt1 , gt2 = gt2 )

    #GT = list(gt1 =  SIM$pool[SIM$npool,],  gt2 = SIM$pool[SIM$npool-1,] )


    SIM$npool = SIM$npool - 2

    SIM$last_ancestral_index = SIM$last_ancestral_index  + 1

    return(GT)

}


SIM$addUnrelatedIndividual = function() {


    #    e1 = environment()
    #    print(parent.env(e1))
    #    print(ls(  parent.env(e1)   ))




    newGenotypes = SIM$generateNewHaplotypes()

    if(SIM$individuals_generated >= SIM$total_individuals) {

        stop("No more space for saving new individual genotypes. You can increase the parameter maximumNumberOfIndividuals when calling function startSimulation.")

    }


    SIM$individuals_generated = SIM$individuals_generated + 1
    j = SIM$individuals_generated


    # cat("N less than 0 is:", sum(newGenotypes$gt1<0)  , sum (newGenotypes$gt2<0)  , "id=", j,"\n")

    # cat("Adding individual ",j, " from pool\n");


    SIM$gt1[j,] = newGenotypes$gt1
    SIM$gt2[j,] = newGenotypes$gt2

    SIM$origin1[j,] = SIM$last_ancestral_index
    SIM$origin2[j,] = -SIM$last_ancestral_index


    return(j)
}





SIM$addUnrelatedIndividualWithHaplotype = function(new_haplotype, ancestral_index) {


    #    e1 = environment()
    #    print(parent.env(e1))
    #    print(ls(  parent.env(e1)   ))



    newGenotypes = SIM$generateNewHaplotypes()

    if(length(new_haplotype) !=  length(newGenotypes$gt1)) {

        stop("new_haplotype length is wrong");
    }



    if(SIM$individuals_generated >= SIM$total_individuals) {

        stop("No more space for saving new individual genotypes. You can increase the parameter maximumNumberOfIndividuals when calling function startSimulation.")

    }


    SIM$individuals_generated = SIM$individuals_generated + 1
    j = SIM$individuals_generated

    # cat("Adding individual ",j, " from pool\n");


    SIM$gt1[j,] = newGenotypes$gt1
    SIM$gt2[j,] = newGenotypes$gt2

    SIM$origin1[j,] = SIM$last_ancestral_index
    SIM$origin2[j,] = -SIM$last_ancestral_index


    SIM$gt2[j,] = new_haplotype
    SIM$origin2[j,] = - ancestral_index



    return(j)
}











SIM$addIndividualFromGenotypes = function(gt1,gt2) {

    if(SIM$individuals_generated >= SIM$total_individuals) {

        stop("No more space for saving new individual genotypes. You can increase the parameter maximumNumberOfIndividuals when calling function startSimulation.")

    }

    SIM$individuals_generated = SIM$individuals_generated + 1
    j = SIM$individuals_generated

    # cat("Adding individual ",j, " from specified genotypes\n");

    SIM$gt1[j,] = gt1
    SIM$gt2[j,] = gt2


    return(j)
}




debug_flag = 0


SIM$mate = function(i, j) {

    # For now, we hope i,j are of different sex

    recomb1 = generateSingleRecombinationVector(SIM$cm)
    recomb2 = generateSingleRecombinationVector(SIM$cm)


    # FATHER1 = SIM$gt1[i,]
    # FATHER2 = SIM$gt2[i,]
    # MOTHER1 = SIM$gt1[i,]
    # MOTHER2 = SIM$gt2[i,]



    gt1 =  recomb1 * SIM$gt1[i,]  +  (1-recomb1) * SIM$gt2[i,]
    gt2 =  recomb2 * SIM$gt1[j,]  +  (1-recomb2) * SIM$gt2[j,]



    #gt1 = SIM$gt1[i,]
    #gt1[recomb1==1] = SIM$gt2[i,][recomb1==1]

    #gt2 = SIM$gt1[j,]
    #gt2[recomb1==1] = SIM$gt2[j,][recomb1==1]

    #cat("Mate: ",i," " ,j,"\n");

    #p1 = paste(SIM$gt1[i,],collapse="")
    #p2 = paste(SIM$gt2[i,],collapse="")
    #p3 = paste(gt1,collapse="")

    #cat("R1:", paste(recomb1,collapse=""),"\n",sep="")
    #cat("R2:", paste(recomb2,collapse=""),"\n",sep="")

    #cat(p1,"\n",p2,"\n",p3,"\n",sep="")

    SWAP = runif(1) < 0.5
    if(SWAP) { t = gt1; gt1 = gt2; gt2 = t; }


    index = SIM$addIndividualFromGenotypes(gt1, gt2)


    #gt1 =  recomb1 * SIM$gt1[i,]  +  (1-recomb1) * SIM$gt2[i,]
    #gt2 =  recomb2 * SIM$gt1[j,]  +  (1-recomb2) * SIM$gt2[j,]

    if(!SWAP) {
        SIM$origin1[index,] = recomb1 * SIM$origin1[i,] + (1-recomb1) * SIM$origin2[i,];
        SIM$origin2[index,] = recomb2 * SIM$origin1[j,] + (1-recomb2) * SIM$origin2[j,];
    } else {
        SIM$origin2[index,] = recomb1 * SIM$origin1[i,] + (1-recomb1) * SIM$origin2[i,];
        SIM$origin1[index,] = recomb2 * SIM$origin1[j,] + (1-recomb2) * SIM$origin2[j,];
    }

    # last_gt <<- gt1 + gt2

    return (index)

}



SIM$reset = function() {
    SIM$pool = NA
    SIM$npool = 0

    SIM$individuals_generated = 0
}




#' Removes all individuals that have been simulated and resets the simulator.
#'
#'
#' @return nothing
#'
#' @examples
#'
#' resetSimulation()
#'
#' @export
resetSimulation = function() {

    SIM$reset()

}


#' Generates variant data for n unrelated individuals
#'
#'
#' @param N how many individuals to generate
#'
#' @return IDs of the generated individuals
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 , min_maf = 0.12)
#'
#' genetic_map_of_region =
#'    system.file("examples",
#'      "chr4-geneticmap.txt",
#'      package = "sim1000G")
#'
#' readGeneticMapFromFile(genetic_map_of_region)
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 1200)
#' ids = generateUnrelatedIndividuals(20)
#'
#' # See also the documentation on our github page
#'
#' @export
generateUnrelatedIndividuals = function(N=1) {

    id = c()
    for(i in 1:N) id[i] = SIM$addUnrelatedIndividual()


    return( id )

}


















#' Simulates genotypes for 1 family with 1 offspring
#'
#'
#'
#' @param family_id What will be the family_id (for example: 100)
#'
#' @return family structure object
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'    min_maf = 0.12 ,max_maf = NA)
#'
#' genetic_map_of_region = system.file("examples",
#'    "chr4-geneticmap.txt",
#'    package = "sim1000G")
#' readGeneticMapFromFile(genetic_map_of_region)
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 1200)
#' fam1 = newNuclearFamily(1)
#' fam2 = newNuclearFamily(2)
#'
#' # See also the documentation on our github page
#'
#' @export
newNuclearFamily = function(family_id) {

    fam = data.frame(family_id = family_id  ,
                     id = c(1,2,3) ,
                     father = c(0,0,1) ,
                     mother = c(0,0,2) ,
                     sex = c(1,2,1),
                     generation = c(1,1,2)
    )


    j1 = SIM$addUnrelatedIndividual()
    j2 = SIM$addUnrelatedIndividual()
    j3 = SIM$mate(j1,j2)


    fam$gtindex = c(j1,j2,j3)

    fam
}

#' Simulates genotypes for 1 family with n offspring
#'
#'
#'
#' @param family_id What will be the family_id (for example: 100)
#' @param noffspring Number of offsprings that this family will have
#'
#' @return family structure object
#'
#' @examples
#'
#' ped_line = newFamilyWithOffspring(10,3)
#'
#'
#' @export
newFamilyWithOffspring = function(family_id, noffspring = 2) {

    fam = data.frame(fid = family_id  ,
                     id = c(1:2) ,
                     father = c(0,0),
                     mother = c(0,0),
                     sex = c(1,2),
                     generation = c(1,1)
    )


    j1 = SIM$addUnrelatedIndividual()
    j2 = SIM$addUnrelatedIndividual()

    fam$gtindex = c(j1,j2)

    for(i in 1:noffspring) {
        j3 = SIM$mate(j1,j2)
        newFamLine = c(family_id, i+10, 1,2, sample(c(1,2), 1) ,2 , j3)
        fam = rbind(fam, newFamLine)
    }

    return (fam)
}









#' Generates genotype data for a family of 3 generations
#'
#'
#'
#' @param familyid What will be the family_id (for example: 100)
#' @param noffspring2 Number of offspring in generation 2
#' @param noffspring3 Number of offspring in generation 3 (vector of length noffspring2)
#'
#' @return family structure object
#'
#' @export
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped_line = newFamily3generations(12, 3, c(3,3,2) )
newFamily3generations = function(familyid,
                                 noffspring2 = 2,
                                 noffspring3 = c(1,1)
                                 ) {

    if(length(noffspring3) == noffspring2){

        fam = data.frame(fid = familyid  ,
                         id = c(1:2) ,
                         father = c(0,0),
                         mother = c(0,0),
                         sex = c(1,2),
                         generation = c(1,1)
        )


        j1 = SIM$addUnrelatedIndividual()
        j2 = SIM$addUnrelatedIndividual()

        fam$gtindex = c(j1,j2)

        id2 <- 2
        for(i in 1:noffspring2) {
            id2 <- id2 + 1
            ids <- id2 + 1
            j3 = SIM$mate(j1,j2)
            gender2 <- sample(c(1,2), 1)
            genders <- ifelse(gender2 == 1, 2, 1)
            newFamLine = c(familyid, id2, 1,2, gender2,2,  j3)
            fam = rbind(fam, newFamLine)

            if(length(noffspring3[i] > 0)){
                j4 <- SIM$addUnrelatedIndividual()
                newFamLine = c(familyid, ids, 0,0, genders ,2,  j4)
                fam = rbind(fam, newFamLine)

                id3 <- ids
                for(j in 1:noffspring3[i]){
                    id3 <- id3 + 1
                    j5 <- SIM$mate(j3, j4)
                    father_id <- ifelse(gender2 == 1, id2, ids)
                    mother_id <- ifelse(gender2 == 2, id2, ids)
                    newFamLine = c(familyid, id3, father_id, mother_id, sample(c(1, 2), 1) ,3,  j5)
                    fam = rbind(fam, newFamLine)


                }
                id2 <- id3
            }
        }

        return (fam)
    }  else  {
        print(noffspring2)
        print(noffspring3)
        stop("The vector for number of offspring in the third generation (possibly zeros) must be equal to the number of offspring in the second generation")
    }
}






#' Computes pairwise IBD1/2 for a specific pair of individuals
#'
#'
#'
#' @param i Index of first individual
#' @param j Index of second individual
#'
#' @return Mean IBD1 and IBD2 as computed from shared haplotypes
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#'
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' v = computePairIBD12(1, 3)
#'
#' cat("IBD1 of pair = ", v[1], "\n");
#' cat("IBD2 of pair = ", v[2], "\n");
#'
#'
#' @export
computePairIBD12 = function(i,j) {

    q1 = SIM$origin1[i,]
    q2 = SIM$origin2[i,]

    r1 = SIM$origin1[j,]
    r2 = SIM$origin2[j,]


    # table(q1,q2)

    IBD1H1 = q1 == r1 | q1 == r2
    IBD1H2 = q2 == r1 | q2 == r2


    IBD12 = mean(IBD1H1 | IBD1H2)
    IBD2 = mean(   (q1 == r1 & q2 == r2 )  | (q1 == r2 & q2 == r1) )


    c(IBD1=IBD12-IBD2, IBD2=IBD2)
}






#' Computes pairwise IBD1 for a specific pair of individuals.
#' See function computePairIBD12 for description.
#'
#'
#'
#' @param i Index of first individual
#' @param j Index of second individual
#'
#' @return Mean IBD1 as computed from shared haplotypes
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#' # For realistic data use the function downloadGeneticMap
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' v = computePairIBD1(1, 3)
#'
#' cat("IBD1 of pair = ", v, "\n");
#'
#' @export
computePairIBD1 = function(i,j) {

    q1 = SIM$origin1[i,]
    q2 = SIM$origin2[i,]

    r1 = SIM$origin1[j,]
    r2 = SIM$origin2[j,]



    IBD1H1 = q1 == r1 | q1 == r2
    IBD1H2 = q2 == r1 | q2 == r2

    IBD12 = mean(IBD1H1 | IBD1H2)
    IBD2 = mean(   (q1 == r1 & q2 == r2 )  | (q1 == r2 & q2 == r1) )


    IBD1=IBD12-IBD2

    IBD1

}



#' Computes pairwise IBD2 for a specific pair of individuals
#'
#'
#'
#' @param i Index of first individual
#' @param j Index of second individual
#'
#' @return Mean IBD2 as computed from shared haplotypes
#'
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#' # For realistic data use the function downloadGeneticMap
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' v = computePairIBD2(1, 3)
#'
#' cat("IBD2 of pair = ", v, "\n");
#'
#' @export
computePairIBD2 = function(i,j) {

  q1 = SIM$origin1[i,]
  q2 = SIM$origin2[i,]

  r1 = SIM$origin1[j,]
  r2 = SIM$origin2[j,]

   IBD2 = mean(   (q1 == r1 & q2 == r2 )  | (q1 == r2 & q2 == r1) )


  IBD2

}















#' Utility function that prints a matrix. Useful for IBD12 matrices.
#'
#'
#' @param m Matrix to be printed
#'
#' @examples
#'
#' printMatrix (  matrix(runif(16), nrow=4) )
#' @export
#'
printMatrix = function(m) {
    cat("      " , " " , sprintf("[%4d] ", 1:nrow(m) )  , "\n")

    for(i in 1:nrow(m)) {

        cat(sprintf("[%4d]",i) , " " , sprintf(" %.3f ", m[i,]) , "\n")
    }

}




#' Retrieve a matrix of simulated genotypes for a specific set of individual IDs
#'
#'
#' @param ids Vector of ids of individuals to retrieve.
#'
#' @examples
#'
#' library("sim1000G")
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#' # For realistic data use the function downloadGeneticMap
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' retrieveGenotypes(ped1$gtindex)
#'
#' @export
#'
retrieveGenotypes = function(ids) {

    ids = as.numeric(ids)
    m = SIM$gt1[ids,] + SIM$gt2[ids,]

    rownames(m) = ids

    m

}




saved_SIM = new.env()


#' Save the data for a simulation for later use. When simulating multiple populations it
#' allows saving and restoring of simulation data for each population.
#'
#' @param id Name the simulation will be saved as.
#'
#' @examples
#'
#'
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#'
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.12 ,max_maf = NA)
#'
#'
#' # For realistic data use the functions downloadGeneticMap
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' saveSimulation("sim1")
#'
#' @export
#'
saveSimulation = function(id) {
    saved_SIM[[id]] = as.list(SIM)
    SIM = new.env()


}







#' Load some previously saved simulation data by function saveSimulation
#'
#' @param id Name the simulation to load which was previously saved by saveSimulation
#'
#' @examples
#'
#'
#' examples_dir = system.file("examples", package = "sim1000G")
#' vcf_file = file.path(examples_dir, "region.vcf.gz")
#'
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'            min_maf = 0.12 ,max_maf = NA)
#'
#' # For a realistic genetic map
#' # use the function readGeneticMap
#' generateUniformGeneticMap()
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#'
#' ped1 = newNuclearFamily(1)
#'
#' saveSimulation("sim1")
#'
#' vcf = readVCF( vcf_file, maxNumberOfVariants = 100 ,
#'                min_maf = 0.02 ,max_maf = 0.5)
#'
#' startSimulation(vcf, totalNumberOfIndividuals = 200)
#' saveSimulation("sim2")
#'
#' loadSimulation("sim1")
#'
#'
#'
#' @export
#'
loadSimulation = function(id) {

    print(names(saved_SIM))

    x = (saved_SIM[[id]])

    for(i in names(x)) SIM[[i]] = x[[i]]

    cat(" N=" , length(SIM$individual_ids), " individuals in origin simulation pool.\n")

    SIM$npool = 0

}

Try the sim1000G package in your browser

Any scripts or data that you put into this service are public.

sim1000G documentation built on June 10, 2019, 1:01 a.m.