learnEM: Expectation-Maximization algorithm to estimate the model...

Description Usage Arguments Details Value References See Also Examples

Description

Expectation-Maximization algorithm to estimate the model parameters based on a single or multiple observed sequences.

Usage

1
learnEM(hmm, sequences, iter = 100, delta = 1e-05, pseudo = 0, print = TRUE )

Arguments

hmm

a list with the necessary variables to define a hidden Markov model.

sequences

sequences of observations to be used as training set. HMM and PHMM use a matrix. GHMM uses a 3D array.

iter

a value that sets the maximum number of iterations to run.

delta

a value set to be the minimum error considered as a convergence criteria.

pseudo

a value set to consider pseudo-counts.

print

a logical value to print the error at each iteration.

Details

This function can be used for univariate or multivariate distributions. HMM and PHMM use a matrix with different sequences as rows and consecutive observations in the columns. GHMM uses an array with the variables as rows, consecutive observations in the columns and different sequences as slices.

Value

A "list" that contains the estimated hidden Markov model parameters.

References

Cited references are listed on the RcppHMM manual page.

See Also

generateObservations , verifyModel

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
## Values for a hidden Markov model with categorical observations
# Set the model parameters
n <- c("First","Second")
m <- c("A","T","C","G")
A <- matrix(c(0.8,0.2,
              0.1,0.9),
            nrow = 2,
            byrow = TRUE)

B <- matrix(c(0.2, 0.2, 0.3, 0.3,
              0.4, 0.4, 0.1, 0.1),
            nrow = 2,
            byrow = TRUE)

Pi <- c(0.5, 0.5)

params <- list( "Model" = "HMM",
                "StateNames" = n,
                "ObservationNames" = m,
                "A" = A,
                "B" = B,
                "Pi" = Pi)

HMM <- verifyModel(params)

# Data simulation
set.seed(100)
length <- 100
seqs <- 10

# Multiple sequences to be used as training set
observationSequences<- c()
for(i in 1:seqs){
  Y <- generateObservations(HMM , length)$Y
  observationSequences <- rbind(observationSequences , Y)
}

# New model random initialization
# Model to be trained
set.seed(1000)
newModel <- initHMM(2,4) 
n = c("X1","X2")
m = c("A","T","C","G")
newModel <- setNames(newModel,
                     list( "StateNames" = n,
                           "ObservationNames" = m) )


  newModel <- learnEM(newModel,
                      observationSequences,
                      iter= 50, 
                      delta = 1E-5,
                      pseudo = 3,
                      print = TRUE)


print(newModel)   

## Values for a hidden Markov model with discrete observations

n <- c("Low","Normal","High")

A <- matrix(c(0.5, 0.3,0.2,
              0.2, 0.6, 0.2,
              0.1, 0.3, 0.6),
            ncol=length(n), byrow=TRUE)

B <- c(2600,  # First distribution with mean 2600
       2700,  # Second distribution with mean 2700
       2800)  # Third distribution with mean 2800

Pi <- rep(1/length(n), length(n))

HMM.discrete <- verifyModel(list("Model"="PHMM", "StateNames" = n, "A" = A, "B" = B, "Pi" = Pi))

# Data simulation
set.seed(100)
length <- 100
seqs <- 50

# Multiple sequences to be evaluated
observationSequences<- c()
for(i in 1:seqs){
  Y <- generateObservations(HMM.discrete , length)$Y
  observationSequences <- rbind(observationSequences , Y)
}

dim(observationSequences)
# New model random initialization
# Model to be trained
set.seed(1000)
newModel <- initPHMM(3) 

  newModel <- learnEM(newModel,
                      observationSequences,
                      iter= 50, 
                      delta = 1E-5,
                      print = FALSE)


print(newModel)   


## Values for a hidden Markov model with continuous observations                          
# Number of hidden states = 3
# Univariate gaussian mixture model

N = c("Low","Normal", "High")
A <- matrix(c(0.5, 0.3,0.2,
              0.2, 0.6, 0.2,
              0.1, 0.3, 0.6),
            ncol= length(N), byrow = TRUE)

Mu <- matrix(c(0, 50, 100), ncol = length(N))
Sigma <- array(c(144, 400, 100), dim = c(1,1,length(N)))
Pi <- rep(1/length(N), length(N))

HMM.cont.univariate <- verifyModel(list( "Model"="GHMM", 
                              "StateNames" = N,
                              "A" = A, 
                              "Mu" = Mu, 
                              "Sigma" = Sigma, 
                              "Pi" = Pi))

# Data simulation
set.seed(100)
length <- 100
seqs <- 50

# Multiple sequences to be evaluated
observationSequences<- array(0, dim = c(1, length, seqs) )
for(i in 1:seqs){
  Y <- generateObservations(HMM.cont.univariate , length)$Y
  observationSequences[,,i] <- Y
}

dim(observationSequences)

# New model random initialization
# Model to be trained
set.seed(1000)
newModel <- initGHMM(3) 

  newModel <- learnEM(newModel,
                      observationSequences,
                      iter= 50, 
                      delta = 1E-5,
                      print = FALSE)


print(newModel)   


## Values for a hidden Markov model with continuous observations                          
# Number of hidden states = 2
# Multivariate gaussian mixture model
# Observed vector with dimensionality of 3
N = c("X1","X2")
M <- 3

# Same number of dimensions
Sigma <- array(0, dim =c(M,M,length(N)))
Sigma[,,1] <- matrix(c(1.0,0.8,0.8,
                       0.8,1.0,0.8,
                       0.8,0.8,1.0), ncol = M,  
                     byrow = TRUE)
Sigma[,,2] <- matrix(c(1.0,0.4,0.6,
                       0.4,1.0,0.8,
                       0.6,0.8,1.0), ncol = M,
                     byrow = TRUE)
Mu <- matrix(c(0, 5, 
               10, 0, 
               5, 10), 
             nrow = M, 
             byrow = TRUE)

A <- matrix(c(0.6,0.4,
              0.3, 0.7), 
            ncol = length(N),
            byrow = TRUE)
Pi <- c(0.5, 0.5)

HMM.cont.multi <- verifyModel(list( "Model" = "GHMM",
                              "StateNames" = N,
                              "A" = A, 
                              "Mu" = Mu, 
                              "Sigma" = Sigma, 
                              "Pi" = Pi))

# Data simulation
set.seed(100)
length <- 100
seqs <- 50

# Multiple sequences to be evaluated
observationSequences<- array(0, dim = c(M, length, seqs) )
for(i in 1:seqs){
  Y <- generateObservations(HMM.cont.multi , length)$Y
  observationSequences[,,i] <- Y
}

dim(observationSequences)

# New model random initialization
# Model to be trained
set.seed(1000)
newModel <- initGHMM(2, M) 

  newModel <- learnEM(newModel,
                      observationSequences,
                      iter= 50, 
                      delta = 1E-5,
                      print = FALSE)


print(newModel)   

Example output

Attaching package: 'RcppHMM'

The following object is masked from 'package:stats':

    setNames

Iteration: 1 Error: 129.177
Iteration: 2 Error: 0.271401
Iteration: 3 Error: 0.307328
Iteration: 4 Error: 0.336967
Iteration: 5 Error: 0.36252
Iteration: 6 Error: 0.383634
Iteration: 7 Error: 0.398634
Iteration: 8 Error: 0.405395
Iteration: 9 Error: 0.402151
Iteration: 10 Error: 0.388198
Iteration: 11 Error: 0.364291
Iteration: 12 Error: 0.332601
Iteration: 13 Error: 0.296216
Iteration: 14 Error: 0.258419
Iteration: 15 Error: 0.222025
Iteration: 16 Error: 0.188993
Iteration: 17 Error: 0.160352
Iteration: 18 Error: 0.136362
Iteration: 19 Error: 0.116758
Iteration: 20 Error: 0.100999
Iteration: 21 Error: 0.0884441
Iteration: 22 Error: 0.078471
Iteration: 23 Error: 0.0705292
Iteration: 24 Error: 0.0641612
Iteration: 25 Error: 0.059001
Iteration: 26 Error: 0.0547626
Iteration: 27 Error: 0.0512265
Iteration: 28 Error: 0.0482258
Iteration: 29 Error: 0.0456345
Iteration: 30 Error: 0.0433578
Iteration: 31 Error: 0.0413249
Iteration: 32 Error: 0.0394828
Iteration: 33 Error: 0.0377917
Iteration: 34 Error: 0.0362222
Iteration: 35 Error: 0.0347522
Iteration: 36 Error: 0.0333654
Iteration: 37 Error: 0.0320496
Iteration: 38 Error: 0.0307955
Iteration: 39 Error: 0.0295963
Iteration: 40 Error: 0.0284468
Iteration: 41 Error: 0.0273428
Iteration: 42 Error: 0.0262814
Iteration: 43 Error: 0.0252601
Iteration: 44 Error: 0.0242767
Iteration: 45 Error: 0.0233296
Iteration: 46 Error: 0.0224174
Iteration: 47 Error: 0.0215388
Iteration: 48 Error: 0.0206925
Iteration: 49 Error: 0.0198775
Iteration: 50 Error: 0.0190928
Finished at Iteration: 50 with Error: 0.0190928
$Model
[1] "HMM"

$StateNames
[1] "X1" "X2"

$ObservationNames
[1] "A" "T" "C" "G"

$A
          [,1]      [,2]
[1,] 0.8553389 0.1446611
[2,] 0.2342260 0.7657740

$B
          [,1]      [,2]       [,3]      [,4]
[1,] 0.4236472 0.3853220 0.08365655 0.1073742
[2,] 0.1861825 0.2436424 0.28485828 0.2853168

$Pi
[1] 0.414988 0.585012

[1]  50 100
Finished at Iteration: 5 with Error: 1.45519e-11
$Model
[1] "PHMM"

$StateNames
[1] "x1" "x2" "x3"

$A
             [,1] [,2]         [,3]
[1,] 4.599936e-78    1 1.148525e-99
[2,] 4.316816e-72    1 1.957702e-92
[3,] 1.740518e-78    1 1.420892e-99

$B
[1] 2460.561 2707.668 2460.502

$Pi
[1] 5.134025e-74 1.000000e+00 1.002407e-94

[1]   1 100  50
Finished at Iteration: 50 with Error: 0.435508
$Model
[1] "GHMM"

$StateNames
[1] "x1" "x2" "x3"

$A
           [,1]       [,2]       [,3]
[1,] 0.47316765 0.49967593 0.02715642
[2,] 0.00115568 0.08904449 0.90979983
[3,] 0.07579572 0.06890702 0.85529726

$Mu
          [,1]     [,2]     [,3]
[1,] -8.518939 8.837019 70.45303

$Sigma
, , 1

         [,1]
[1,] 58.76265

, , 2

         [,1]
[1,] 63.95226

, , 3

         [,1]
[1,] 946.4277


$Pi
          [,1]      [,2]      [,3]
[1,] 0.1399854 0.1735725 0.6864421

[1]   3 100  50
Finished at Iteration: 5 with Error: 3.63798e-11
$Model
[1] "GHMM"

$StateNames
[1] "x1" "x2"

$A
          [,1]      [,2]
[1,] 0.5941620 0.4058380
[2,] 0.3000708 0.6999292

$Mu
             [,1]        [,2]
[1,] -0.001520297  4.95184050
[2,] 10.001335156 -0.05034776
[3,]  5.001407674  9.99553504

$Sigma
, , 1

          [,1]      [,2]      [,3]
[1,] 2.2745687 0.7055973 0.4284549
[2,] 0.7055973 0.4306873 0.1374672
[3,] 0.4284549 0.1374672 0.2868994

, , 2

          [,1]      [,2]      [,3]
[1,] 1.5675345 0.7764713 0.3144361
[2,] 0.7764713 1.2159926 0.3046068
[3,] 0.3144361 0.3046068 0.2514231


$Pi
     [,1] [,2]
[1,] 0.58 0.42

RcppHMM documentation built on May 2, 2019, 8:56 a.m.