Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
warning = FALSE,
collapse = TRUE,
comment = "#>"
)
## ----setup, echo=FALSE--------------------------------------------------------
library(localScore)
## -----------------------------------------------------------------------------
score.values <- -3:2 # Possible score values
score.probability <- c(0.4, 0.0, 0.1, 0.0, 0.2, 0.3) # Associated score probabilities
score.expectation <- sum(score.values * score.probability) # Score expectation
print(score.expectation)
n <- 1000 # sequence size
score.sequence <- sample(score.values, n, replace = TRUE, prob = score.probability)
## -----------------------------------------------------------------------------
score.lindley <- lindley(score.sequence)
oldpar <- par(no.readonly = TRUE)
par(mfrow = c(2,1))
plot(score.sequence, typ = 'l', main = "Sequence score")
plot(score.lindley, typ = 's', main = "Associated Lindley process")
par(oldpar)
## -----------------------------------------------------------------------------
segments.sequence <- localScoreC(score.sequence)
localScore.sequence <- segments.sequence$localScore # Local score and position of the segment
print(localScore.sequence)
subLocalScore.sequence <- segments.sequence$suboptimalSegmentScores # suboptimal local scores and positions
print(head(subLocalScore.sequence))
## -----------------------------------------------------------------------------
# Exact p-value (computational limitation, see help(daudin))
daudin(localScore.sequence["value"],
sequence_length = n,
score_probabilities = score.probability,
sequence_min = min(score.values),
sequence_max = max(score.values))
# Karlin and Dembo approximation (n big)
karlin(localScore.sequence["value"],
sequence_length = n,
score_probabilities = score.probability,
sequence_min = min(score.values),
sequence_max = max(score.values))
# Improved Karlin and Dembo approximation (computational limitation, see help(mcc))
mcc(localScore.sequence["value"],
sequence_length = n,
score_probabilities = score.probability,
sequence_min = min(score.values),
sequence_max = max(score.values))
## -----------------------------------------------------------------------------
transitionMatrix <- matrix(c(0.2, 0.3, 0.5,
0.3, 0.4, 0.3,
0.2, 0.4, 0.4), byrow = TRUE, ncol = 3)
score.values <- c(-3, -1, 2)
row.names(transitionMatrix) <- score.values
score.stationary.distribution <- stationary_distribution(transitionMatrix)
score.expectation <- sum(score.values * score.stationary.distribution)
print(score.expectation)
# Generating example markov sequence of score:
n <- 10000
score.sequence <- transmatrix2sequence(matrix = transitionMatrix,
length = n,
score = score.values)
head(score.sequence)
## -----------------------------------------------------------------------------
segments.sequence <- localScoreC(score.sequence)
localScore.sequence <- segments.sequence$localScore # Local score and position of the segment
print(localScore.sequence)
subLocalScore.sequence <- segments.sequence$suboptimalSegmentScores # suboptimal local scores and positions
print(head(subLocalScore.sequence))
## -----------------------------------------------------------------------------
# Exact p-value (computational limitation, see help(exact_mc))
exact_mc(local_score = localScore.sequence["value"],
m = transitionMatrix,
sequence_length = n,
prob0 = score.stationary.distribution)
## -----------------------------------------------------------------------------
# Some sort of drifting markov sequence generator
sequence.generator <- function(n, P1, P2, score_values) {
nstate <- dim(P1)[1]
sequence.sim <- rep(NA, n)
sequence.sim[1] <- sample(1:nstate, 1, prob = stationary_distribution(P1))
for (i in 2:n) {
P <- (n - i) / (n - 1) * P1 + (i - 1) / (n - 1) * P2
sequence.sim[i] <- sample(1:nstate, 1, prob = P[sequence.sim[i - 1],])
}
return(score_values[sequence.sim])
}
P1 <- matrix(c(0.2, 0.3, 0.5,
0.3, 0.4, 0.3,
0.2, 0.4, 0.4), byrow = TRUE, ncol = 3)
P2 <- matrix(c(0.2, 0.1, 0.7,
0.6, 0.4, 0.0,
0.8, 0.2, 0.2), byrow = TRUE, ncol = 3)
score.values <- c(-4, -1, 2)
n <- 500
score.sequence <- sequence.generator(n, P1, P2, score.values)
head(score.sequence)
mean(score.sequence) # Expected to be strictly negative
## -----------------------------------------------------------------------------
score.lindley <- lindley(score.sequence)
x <- 1:n
lw1 <- loess(score.sequence ~ x)
oldpar <- par(no.readonly = TRUE)
par(mfrow = c(2,1))
{{{plot(score.sequence, typ = 'l', col = "grey", main = "Sequence score")
lines(x, lw1$fitted[x], col = "red")}}}
plot(score.lindley, typ = 's', main = "Associated Lindley process")
par(oldpar)
## -----------------------------------------------------------------------------
segments.sequence <- localScoreC(score.sequence)
localScore.sequence <- segments.sequence$localScore # Local score and position of the segment
print(localScore.sequence)
subLocalScore.sequence <- segments.sequence$suboptimalSegmentScores # suboptimal local scores and positions
print(head(subLocalScore.sequence))
## -----------------------------------------------------------------------------
p.value <- monteCarlo(local_score = localScore.sequence["value"],
FUN = function(n, P1, P2, score_values) {
return(sequence.generator(n, P1, P2, score_values))
},
n = n, P1 = P1, P2 = P2, score_values = score.values, #generative function parameters
plot = TRUE,
numSim = 1000 # low number here to compute the vignette in a "short" time
)
print(p.value)
## -----------------------------------------------------------------------------
transitionMatrix <- matrix(c(0.2, 0.3, 0.5,
0.3, 0.4, 0.3,
0.2, 0.4, 0.4), byrow = TRUE, ncol = 3)
score.values <- c(-3, -1, 2)
row.names(transitionMatrix) <- score.values
score.stationary.distribution <- stationary_distribution(transitionMatrix)
score.expectation <- sum(score.values * score.stationary.distribution)
print(score.expectation)
# Generating example markov sequence of score:
n <- 10000
score.sequence <- transmatrix2sequence(matrix = transitionMatrix,
length = n,
score = score.values)
## -----------------------------------------------------------------------------
segments.sequence <- localScoreC(score.sequence)
subLocalScore.sequence <- segments.sequence$suboptimalSegmentScores
## -----------------------------------------------------------------------------
iVisualExcursion <- 1 #first strictly positive excursion
iSegment <- subLocalScore.sequence[iVisualExcursion,]
print(iSegment)
# determining i in the sense of Karlin and Dembo (1990) (i >= iVisualExcursion)
i <- sum(segments.sequence$RecordTime <= iSegment$begin)
print(i)
proba_theoretical_ith_excursion_markov(iSegment$value, score.values, transitionMatrix, score.values, i = i)
## -----------------------------------------------------------------------------
library(localScore)
help(localScore)
ls(pos = 2)
mySeq <- sample(-2:2, 100, replace = TRUE, prob = c(0.5, 0.3, 0.05, 0.1, 0.05))
mySeq
scoreSequenceExpectation <- sum(c(-2:2)*c(0.5, 0.3, 0.05, 0.1, 0.05))
scoreSequenceExpectation
localScoreC(mySeq)
## -----------------------------------------------------------------------------
library(localScore)
mySeq <- sample(c(-3,2,0,1,5), 100, replace = TRUE, prob = c(0.5, 0.3, 0.05, 0.1, 0.05))
head(mySeq)
localScoreC(mySeq)
## -----------------------------------------------------------------------------
seq1 <- c(1,-2,3,1,-1,2)
seq2 <- c(1,-2,3,1,-1,2,-1,1)
localScoreC(seq1)
localScoreC(seq2)
## -----------------------------------------------------------------------------
score_reels <- c(-1, -0.5, 0, 0.5, 1)
proba_score_reels <- c(0.2, 0.3, 0.1, 0.2, 0.2)
sample_from_model <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
seq.essai <- sample_from_model(score.sple = score_reels, proba.sple = proba_score_reels,
length.sple = 10)
localScoreC(seq.essai)
## -----------------------------------------------------------------------------
# Loading a fasta protein
data(Seq219)
Seq219
# or using your own fasta sequence
#MySeqAA_P49755 <- as.character(read.table(file="P49755.fasta",skip=1)[,1])
#MySeqAA_P49755
# Loading a scoring function
data(HydroScore)
?HydroScore
# or using your own scoring function
# HydroScoreKyte<- loadScoreFromFile("Kyte1982.txt")
# Transforming the amino acid sequence into a score sequence with the score function
# in HydroScore file
SeqScore_P49755 <- CharSequence2ScoreSequence(Seq219, HydroScore)
head(SeqScore_P49755)
length(SeqScore_P49755)
# Computing the local score
localScoreC(SeqScore_P49755)
## ----fig.width=7.2, fig.height=4----------------------------------------------
monteCarlo(local_score = 10, FUN = function(x) {
return(sample(x = x, size = length(x), replace = TRUE))
}, x = SeqScore_P49755)
## -----------------------------------------------------------------------------
# Example
score_reels <- c(-1, -0.5, 0, 0.5, 1)
proba_score_reels <- c(0.2, 0.3, 0.1, 0.2, 0.2)
sample_from_model <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
monteCarlo(5.5,
FUN = sample_from_model, plot = TRUE, score.sple = score_reels, proba.sple = proba_score_reels,
length.sple = 100, numSim = 1000
)
## ----fig.width=7.2, fig.height=4----------------------------------------------
fu <- function(n, size, prob, shift) {
rbinom(size = size, n = n, prob = prob) + shift
}
karlinMonteCarlo(12,
FUN = fu, n = 10000, size = 8, prob = 0.2, shift = -2,
sequence_length = 1000000, simulated_sequence_length = 10000
)
## ----fig.width=7.2, fig.height=4----------------------------------------------
score_reels <- c(-1.5, -0.5, 0, 0.5, 1.5)
proba_score_reels <- c(0.2, 0.3, 0.1, 0.2, 0.2)
fu <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
karlinMonteCarlo(85.5,
FUN = fu, score.sple = score_reels, proba.sple = proba_score_reels,
length.sple = 10000, numSim = 10000, sequence_length = 100000,
simulated_sequence_length = 10000
)
## -----------------------------------------------------------------------------
daudin(
local_score = 15, sequence_length = 500, score_probabilities =
c(0.2, 0.3, 0.3, 0.0, 0.1, 0.1), sequence_min = -3, sequence_max = 2
)
## -----------------------------------------------------------------------------
score_reels <- c(-1, -0.5, 0, 0.5, 1)
proba_score_reels <- c(0.2, 0.3, 0.1, 0.2, 0.2)
sample_from_model <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
seq.essai <- sample_from_model(score.sple = score_reels, proba.sple = proba_score_reels,
length.sple = 100)
localScoreC(seq.essai)
C <- 10 # homothetic coefficient
localScoreC(as.integer(C*seq.essai))
## -----------------------------------------------------------------------------
RealScores2IntegerScores(score_reels, proba_score_reels, coef = C)
M.s.r <- RealScores2IntegerScores(score_reels, proba_score_reels, coef = C)$ExtendedIntegerScore
M.s.prob <- RealScores2IntegerScores(score_reels, proba_score_reels, coef = C)$ProbExtendedIntegerScore
M.SL <- localScoreC(as.integer(C * seq.essai))$localScore[1]
M.SL
pval.E <- daudin(
local_score = M.SL, sequence_length = length(seq.essai), score_probabilities = M.s.prob,
sequence_min = -10, sequence_max = 10
)
pval.E
## -----------------------------------------------------------------------------
SL.real <- localScoreC(seq.essai)$localScore[1]
SL.real
pval.MC <- monteCarlo(
local_score = SL.real, FUN = sample_from_model,
score.sple = score_reels, proba.sple = proba_score_reels,
length.sple = length(seq.essai), plot = TRUE, numSim = 10000
)
pval.MC
## -----------------------------------------------------------------------------
score.v <- -2:1
score.p <- c(0.3, 0.2, 0.2, 0.3)
sum(score.v*score.p)
karlin(
local_score = 14, sequence_length = 100000, sequence_min = -2, sequence_max = 1,
score_probabilities = c(0.3, 0.2, 0.2, 0.3)
)
karlin(
local_score = 14, sequence_length = 1000, sequence_min = -2, sequence_max = 1,
score_probabilities = c(0.3, 0.2, 0.2, 0.3)
)
## -----------------------------------------------------------------------------
# With missing score values
karlin(
local_score = 14, sequence_length = 1000, sequence_min = -3, sequence_max = 1,
score_probabilities = c(0.3, 0.2, 0.0, 0.2, 0.3)
)
## -----------------------------------------------------------------------------
mcc(
local_score = 14, sequence_length = 1000, sequence_min = -3, sequence_max = 2,
score_probabilities = c(0.2, 0.3, 0.3, 0.0, 0.1, 0.1)
)
daudin(
local_score = 14, sequence_length = 1000, score_probabilities =
c(0.2, 0.3, 0.3, 0.0, 0.1, 0.1), sequence_min = -3, sequence_max = 2
)
karlin(
local_score = 14, sequence_length = 1000, sequence_min = -3, sequence_max = 2,
score_probabilities = c(0.2, 0.3, 0.3, 0.0, 0.1, 0.1)
)
## -----------------------------------------------------------------------------
automatic_analysis(sequences = list("x1" = c(1,-2,2,3,-2, 3, -3, -3, -2)), model = "iid")
## -----------------------------------------------------------------------------
score <- c(-2, -1, 0, 1, 2)
proba_score <- c(0.2, 0.3, 0.1, 0.2, 0.2)
sum(score*proba_score)
sample_from_model <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
seq.essai <- sample_from_model(score.sple = score, proba.sple = proba_score, length.sple = 5000)
MyAnalysis <- automatic_analysis(
sequences = list("x1" = seq.essai),
distribution = proba_score, score_extremes = c(-2, 2), model = "iid"
)$x1
MyAnalysis$"p-value"
MyAnalysis$"method applied"
MyAnalysis$localScore$localScore
## -----------------------------------------------------------------------------
score_reels <- c(-1, -0.5, 0, 0.5, 1)
proba_score_reels <- c(0.2, 0.3, 0.1, 0.2, 0.2)
sample_from_model <- function(score.sple, proba.sple, length.sple) {
sample(score.sple,
size = length.sple, prob = proba.sple, replace = TRUE
)
}
seq.essai <- sample_from_model(score.sple = score_reels, proba.sple = proba_score_reels, length.sple = 1000)
# Homothetie
C <- 10
RealScores2IntegerScores(score_reels,proba_score_reels, coef=C)
M.s.r <- RealScores2IntegerScores(score_reels, proba_score_reels, coef = C)$ExtendedIntegerScore
M.s.prob <- RealScores2IntegerScores(score_reels, proba_score_reels, coef = C)$ProbExtendedIntegerScore
# The analysis
MyAnalysis <- automatic_analysis(
sequences = list("x1" = as.integer(C * seq.essai)), model = "iid",
distribution = M.s.prob, score_extremes = range(M.s.r)
)
MyAnalysis$x1$"p-value"
MyAnalysis$x1$"method applied"
# Without the homothety, the function gives a wrong result
# MyAnalysis2 <- automatic_analysis(sequences = list("x1" = seq.essai), model = "iid")
# MyAnalysis2$x1$"p-value"
# MyAnalysis2$x1$"method applied"
## -----------------------------------------------------------------------------
scoreValues <- c(-2, -1, 2)
mTransition <- matrix(c(0.2, 0.3, 0.5, 0.3, 0.4, 0.3, 0.2, 0.4, 0.4), byrow = TRUE, ncol = 3)
initialProb <- stationary_distribution(mTransition)
exact_mc(local_score = 50, m = mTransition, sequence_length = 100,
score_values = scoreValues, prob0 = initialProb)
## -----------------------------------------------------------------------------
scoreValues <- c(-2, -1, 2)
mTransition <- matrix(c(0.2, 0.3, 0.5, 0.3, 0.4, 0.3, 0.2, 0.4, 0.4), byrow = TRUE, ncol = 3)
rownames(mTransition) <- scoreValues
initialProb <- stationary_distribution(mTransition)
exact_mc(local_score = 50, m = mTransition, sequence_length = 100)
## -----------------------------------------------------------------------------
MyTransMat <-
matrix(c(
0.3, 0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.1, 0.2, 0.3, 0.3, 0.4, 0.1, 0.1, 0.1, 0.3, 0.3, 0.1, 0.0, 0.3,
0.1, 0.1, 0.2, 0.3, 0.3
), ncol = 5, byrow = TRUE)
MySeq.CM <- transmatrix2sequence(matrix = MyTransMat, length = 150, score = -2:2)
MySeq.CM
AA.CM <- automatic_analysis(sequences = list("x1" = MySeq.CM), model = "markov")
AA.CM
## -----------------------------------------------------------------------------
Ls.CM <- AA.CM$x1$localScore[[1]][1]
monteCarlo(
local_score = Ls.CM,
FUN = transmatrix2sequence, matrix = MyTransMat,
length=150, score = -2:2,
plot = FALSE, numSim = 10000
)
## ----fig.width=7.2------------------------------------------------------------
set.seed(1)
mySeq <- sample(c(-3,2,0,1,5), 100, replace = TRUE, prob = c(0.5, 0.3, 0.05, 0.1, 0.05))
lindley(mySeq)
oldpar <- par(no.readonly = TRUE)
par(mfrow = c(2,1))
plot(lindley(mySeq), type = "s")
plot(mySeq,typ = 'l')
par(oldpar)
## -----------------------------------------------------------------------------
set.seed(1)
mySeq <- sample(c(-3,2,0,1,5), 100, replace = TRUE, prob = c(0.5, 0.3, 0.05, 0.1, 0.05))
mySeq
recordTimes(mySeq)
## -----------------------------------------------------------------------------
seq1 <- sample(7:8, size = 10, replace = TRUE)
seq2 <- sample(2:3, size = 15, replace = TRUE)
l <- list(seq1, seq2)
r <- scoreSequences2probabilityVector(l)
r
length(r)
## -----------------------------------------------------------------------------
data(Seq219)
data(HydroScore)
SeqScore <- CharSequence2ScoreSequence(Seq219, HydroScore)
n <- length(SeqScore)
n
## -----------------------------------------------------------------------------
LS <- localScoreC(SeqScore)$localScore[1]
LS
## -----------------------------------------------------------------------------
prob <- scoreSequences2probabilityVector(list(SeqScore))
prob
## -----------------------------------------------------------------------------
time.daudin <- system.time(
res.daudin <- daudin(
local_score = LS, sequence_length = n,
score_probabilities = prob,
sequence_min = min(SeqScore),
sequence_max = max(SeqScore)
)
)
res.daudin
## -----------------------------------------------------------------------------
time.karlin <- system.time(
res.karlin <- karlin(
local_score = LS, sequence_length = n,
score_probabilities = prob,
sequence_min = min(SeqScore),
sequence_max = max(SeqScore)
)
)
res.karlin
## -----------------------------------------------------------------------------
time.mcc <- system.time(
res.mcc <- mcc(
local_score = LS, sequence_length = n,
score_probabilities = prob,
sequence_min = min(SeqScore),
sequence_max = max(SeqScore)
)
)
res.mcc
## -----------------------------------------------------------------------------
time.MonteCarlo1 <- system.time(
res.MonteCarlo1 <- monteCarlo(
local_score = LS,
FUN = function(x) {
return(sample(
x = x, size = length(x),
replace = TRUE
))
},
x = SeqScore, numSim = 200
)
)
res.MonteCarlo1
## -----------------------------------------------------------------------------
time.MonteCarlo2 <- system.time(
res.MonteCarlo2 <- monteCarlo(
local_score = LS,
FUN = function(x) {
return(sample(
x = x, size = length(x),
replace = TRUE
))
},
x = SeqScore, numSim = 10000
)
)
res.MonteCarlo2
## -----------------------------------------------------------------------------
res.pval <- c(
Daudin = res.daudin, Karlin = res.karlin, MCC = res.mcc,
MonteCarlo1 = res.MonteCarlo1, MonteCarlo1 = res.MonteCarlo2
)
names(res.pval) <- c("Exact", "Approximation", "Improved appx", "MonteCarlo1", "MonteCarlo2")
res.pval
## -----------------------------------------------------------------------------
rbind(time.daudin, time.karlin, time.mcc,time.MonteCarlo1, time.MonteCarlo2)
## -----------------------------------------------------------------------------
data(Seq31)
SeqScore.Short <- CharSequence2ScoreSequence(Seq31, HydroScore)
n.short <- length(SeqScore.Short)
n.short
## -----------------------------------------------------------------------------
SeqScore.S <- SeqScore.Short
LS.S <- localScoreC(SeqScore.S)$localScore[1]
prob.S <- scoreSequences2probabilityVector(list(SeqScore.S))
LS.S
prob.S
## -----------------------------------------------------------------------------
time.daudin <- system.time(
res.daudin. <- daudin(
local_score = LS.S, sequence_length = n.short,
score_probabilities = prob.S,
sequence_min = min(SeqScore.S),
sequence_max = max(SeqScore.S)
)
)
time.karlin <- system.time(
res.karlin <- try(karlin(
local_score = LS.S, sequence_length = n.short,
score_probabilities = prob.S,
sequence_min = min(SeqScore.S),
sequence_max = max(SeqScore.S)
))
)
time.mcc <- system.time(
res.mcc <- try(mcc(
local_score = LS.S, sequence_length = n.short,
score_probabilities = prob.S,
sequence_min = min(SeqScore.S),
sequence_max = max(SeqScore.S)
))
)
time.karlinMonteCarlo <- system.time(
res.karlinMonteCarlo <-
karlinMonteCarlo(
local_score = LS.S, plot = FALSE,
sequence_length = n.short,
simulated_sequence_length = 1000,
FUN = sample, x = min(SeqScore.S):max(SeqScore.S),
size = 1000, prob = prob.S, replace = TRUE,
numSim = 10000
)
)
time.MonteCarlo <- system.time(
res.MonteCarlo <- monteCarlo(
local_score = LS.S, plot = FALSE,
FUN = function(x) {
return(sample(
x = x, size = length(x),
replace = TRUE
))
},
x = SeqScore.S, numSim = 10000
)
)
## -----------------------------------------------------------------------------
res.pval <- c(Daudin = res.daudin, MonteCarlo = res.MonteCarlo)
names(res.pval) <- c("Daudin", "MonteCarlo")
res.pval
rbind(time.daudin, time.MonteCarlo)
## -----------------------------------------------------------------------------
set.seed(1)
prob.bis <- dnorm(-5:5, mean = -0.5, sd = 1)
prob.bis <- prob.bis / sum(prob.bis)
names(prob.bis) <- -5:5
# Score Expectation
sum((-5:5)*prob.bis)
time.mcc <- system.time(
res.mcc <- mcc(
local_score = LS.S, sequence_length = n.short,
score_probabilities = prob.bis,
sequence_min = min(SeqScore.S),
sequence_max = max(SeqScore.S)
)
)
time.daudin <- system.time(
res.daudin <- daudin(
local_score = LS.S, sequence_length = n.short,
score_probabilities = prob.bis,
sequence_min = min(SeqScore.S),
sequence_max = max(SeqScore.S)
)
)
simu <- function(n, p) {
return(sample(x = -5:5, size = n, replace = TRUE, prob = p))
}
time.MonteCarlo <- system.time(
res.MonteCarlo <-
monteCarlo(
local_score = LS.S, plot = FALSE,
FUN = simu, n.short, prob.bis, numSim = 100000
)
)
## -----------------------------------------------------------------------------
res.pval <- c(MCC=res.mcc,Daudin = res.daudin, MonteCarlo = res.MonteCarlo)
names(res.pval) <- c("MCC", "Daudin", "MonteCarlo")
res.pval
rbind(time.mcc,time.daudin, time.MonteCarlo)
## -----------------------------------------------------------------------------
data(Seq1093)
SeqScore.Long <- CharSequence2ScoreSequence(Seq1093, HydroScore)
n.Long <- length(SeqScore.Long)
n.Long
SeqScore.Long <- CharSequence2ScoreSequence(Seq1093, HydroScore)
LS.L <- localScoreC(SeqScore.Long)$localScore[1]
LS.L
prob.L <- scoreSequences2probabilityVector(list(SeqScore.Long))
prob.L
sum(prob.L*as.numeric(names(prob.L)))
## ----echo=FALSE---------------------------------------------------------------
time.daudin.L <- system.time(
res.daudin.L <- daudin(
local_score = LS.L, sequence_length = n.Long,
score_probabilities = prob.L,
sequence_min = min(SeqScore.Long),
sequence_max = max(SeqScore.Long)
)
)
time.karlin.L <- system.time(
res.karlin.L <- karlin(
local_score = LS.L, sequence_length = n.Long,
score_probabilities = prob.L,
sequence_min = min(SeqScore.Long),
sequence_max = max(SeqScore.Long)
)
)
time.mcc.L <- system.time(
res.mcc.L <- mcc(
local_score = LS.L, sequence_length = n.Long,
score_probabilities = prob.L,
sequence_min = min(SeqScore.Long),
sequence_max = max(SeqScore.Long)
)
)
time.MonteCarlo.L <- system.time(
res.MonteCarlo.L <-
monteCarlo(
local_score = LS.L,
FUN = sample, x = min(SeqScore.Long):max(SeqScore.Long),
size = n.Long, prob = prob.L, replace = TRUE, plot = FALSE,
numSim = 10000
)
)
time.karlinMonteCarlo.L <- system.time(
res.karlinMonteCarlo.L <-
karlinMonteCarlo(
local_score = LS.L,
sequence_length = n.Long,
simulated_sequence_length = 1000,
FUN = sample, x = min(SeqScore.Long):max(SeqScore.Long),
size = 1000, prob = prob.L, replace = TRUE,
numSim = 10000, plot = FALSE
)
)
## -----------------------------------------------------------------------------
res.pval.L <- c(res.daudin.L, res.mcc.L, res.karlin.L, res.karlinMonteCarlo.L$p_value,res.MonteCarlo.L)
names(res.pval.L) <- c("Daudin", "MCC", "Karlin", "KarlinMonteCarlo", "MonteCarlo")
res.pval.L
## -----------------------------------------------------------------------------
rbind(
time.daudin.L, time.karlin.L, time.mcc.L, time.karlinMonteCarlo.L,
time.MonteCarlo.L
)
## -----------------------------------------------------------------------------
MySeqsList <- list(Seq31, Seq219, Seq1093)
names(MySeqsList) <- c("Q09FU3.fasta", "P49755.fasta", "Q60519.fasta")
MySeqsScore <- lapply(MySeqsList, FUN = CharSequence2ScoreSequence, HydroScore)
AA <- automatic_analysis(MySeqsScore, model = "iid")
AA$Q09FU3.fasta
AA$Q09FU3.fasta$`method applied`
AA$Q60519.fasta$`method applied`
## -----------------------------------------------------------------------------
cbind(prob, prob.S, prob.L, "3 sequences" = scoreSequences2probabilityVector(MySeqsScore))
## -----------------------------------------------------------------------------
daudin.bis <- daudin(local_score = LS.S, sequence_length = n.short, score_probabilities = scoreSequences2probabilityVector(MySeqsScore), sequence_max = 5, sequence_min = -5)
daudin.bis
AA$Q09FU3.fasta$`p-value`
# automatic_analysis(sequences=list('MySeq.Short'=MySeq.Short), model='iid', distribution=proba.S)
## -----------------------------------------------------------------------------
library(localScore)
data(HydroScore)
data(SeqListSCOPe)
MySeqScoreList <- lapply(SeqListSCOPe, FUN = CharSequence2ScoreSequence, HydroScore)
head(MySeqScoreList)
AA <- automatic_analysis(sequences = MySeqScoreList, model = "iid")
AA[[1]]
# the p-value of the first 10 sequences
sapply(AA, function(x) {
x$`p-value`
})[1:10]
# the 20th smallest p-values
sort(sapply(AA, function(x) {
x$`p-value`
}))[1:20]
which(sapply(AA, function(x) {
x$`p-value`
}) < 0.05)
table(sapply(AA, function(x) {
x$`method`
}))
# The maximum sequence length equals 404 so it here normal that the exact method is used for all the 606 sequences of the data base
scoreSequences2probabilityVector(MySeqScoreList)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.