R/generate_hmm_seq.R

Defines functions generate_hmm_seq

Documented in generate_hmm_seq

#' Function to generate a DNA sequence, given a HMM and the length of the sequence to be generated.
#'
#' By Coghlan (2011)
#'
#' @param transitionmatrix transitionmatrix
#' @param emissionmatrix emissionmatrix
#' @param initialprobs initialprobs
#' @param seqlength seqlength
#'
#' @export

generate_hmm_seq <- function(transitionmatrix,
                             emissionmatrix,
                             initialprobs,
                             seqlength){

  nucleotides     <- c("A", "C", "G", "T")   # Define the alphabet of nucleotides
  states          <- c("AT-rich", "GC-rich") # Define the names of the states
  mysequence      <- character()             # Create a vector for storing the new sequence
  mystates        <- character()

  # Create a vector for storing the state that each position in the new sequence
  # was generated by
  # Choose the state for the first position in the sequence:
  firststate      <- sample(states, 1,
                            replace=TRUE,
                            prob=initialprobs)

  # Get the probabilities of the current nucleotide, given that we are in the state "firststate":
  probabilities   <- emissionmatrix[firststate,]

  # Choose the nucleotide for the first position in the sequence:
  firstnucleotide <- sample(nucleotides, 1,
                            replace =TRUE,
                            prob=probabilities)

  mysequence[1]   <- firstnucleotide         # Store the nucleotide for the first position of the sequence
  mystates[1]     <- firststate              # Store the state that the first position in the sequence was generated by

  for (i in 2:seqlength)
  {
    prevstate    <- mystates[i-1]           # Get the state that the previous nucleotide in the sequence was generated by
    # Get the probabilities of the current state, given that the previous nucleotide was generated by state "prevstate"
    stateprobs   <- transitionmatrix[prevstate,]

    # Choose the state for the ith position in the sequence:
    state        <- sample(states, 1,
                           replace  =TRUE,
                           prob=stateprobs)

    # Get the probabilities of the current nucleotide, given that we are in the state "state":
    probabilities <- emissionmatrix[state,]

    # Choose the nucleotide for the ith position in the sequence:
    nucleotide   <- sample(nucleotides, 1,
                           replace  = TRUE,
                           prob=probabilities)

    mysequence[i] <- nucleotide             # Store the nucleotide for the current position of the sequence
    mystates[i]  <- state                   # Store the state that the current position in the sequence was generated by
  }

  for (i in 1:length(mysequence))
  {
    nucleotide   <- mysequence[i]
    state        <- mystates[i]
    print(paste("Position", i, ", State", state, ", Nucleotide = ", nucleotide))
  }
}
brouwern/compbio4all documentation built on Dec. 19, 2021, 11:47 a.m.