Nothing
## ============================
## Matrix of substitution costs
## ============================
seqcost <- function(seqdata, method, cval=NULL, with.missing=FALSE,
miss.cost=NULL, time.varying=FALSE, weighted=TRUE, transition="both", lag=1,
missing.trate=FALSE, state.prop=NULL, prop.weights=NULL, prop.type=list(),
proximities=FALSE, miss.cost.fixed=!missing.trate,
state.features=state.prop, feature.weights=prop.weights, feature.type = prop.type) {
if (!inherits(seqdata,"stslist")){
stop(" [!] data is NOT a sequence object, see seqdef function to create one")
}
metlist <- c("CONSTANT","TRATE", "FEATURES", "FUTURE", "INDELS", "INDELSLOG")
if (missing(method) || !method %in% metlist){
stop(" [!] method must be one of: ", paste(metlist,collapse=" "))
}
transitionlist <- c("previous", "next", "both")
if (!transition %in% transitionlist){
stop(" [!] transition must be one of: ", paste(transitionlist,collapse=" "))
}
ret <- list()
ret$indel <- 1
alphabet <- attr(seqdata,"alphabet")
cval4cond <- time.varying && method=="TRATE" && transition=="both"
if (is.null(cval)) {
cval <- ifelse(cval4cond, 4, 2)
}
if (is.null(miss.cost)) {
miss.cost <- cval
}
## Adding an entry for for missing state
if (with.missing) {
## if(!miss.cost.fixed && method %in% c("TRATE","FUTURE","INDELS","INDELSLOG") {
## message(" [>] Substitution costs for missing values derived from data")
## } else {
## message(" [>] Substitution costs for missing values set as ",miss.cost)
## }
alphabet <- c(alphabet, attr(seqdata,"nr"))
}
alphsize <- length(alphabet)
if (method=="CONSTANT") {
if (is.na(cval)){
stop("no value for the constant substitution-cost")
}
if (time.varying) {
time <- ncol(seqdata)
message(" [>] creating ",alphsize,"x",alphsize,"x",time,
" time varying substitution-cost matrix using ",
cval," as constant value")
costs <- array(cval, dim=c(alphsize, alphsize, time))
for (i in 1:time) {
diag(costs[,,i]) <- 0
}
}
else {
message(" [>] creating ",alphsize,"x",alphsize,
" substitution-cost matrix using ",cval," as constant value")
costs <- matrix(cval,nrow=alphsize,ncol=alphsize)
diag(costs) <- 0
}
}
if (method=="FUTURE") {
chisqdista <- function(mat){
cs <- colSums(mat)
if(any(cs==0)){
cs[cs==0] <- Inf
}
Pdot <- 1/cs
n <- nrow(mat)
dist <- matrix(0, nrow=n, ncol=n)
for(i in 1:n){
if(i<n){
for(j in (i+1):n){
dist[i,j] = sum(Pdot*(mat[i,]-mat[j,])**2)
dist[j,i] = dist[i,j]
}
}
}
return(sqrt(dist))
}
if(time.varying){
stop(" [!] time.varying substitution cost is not (yet) implemented for method FUTURE.")
}
message(" [>] creating substitution-cost matrix using common future...")
tr <- seqtrate(seqdata, time.varying=FALSE, weighted=weighted, lag=lag, with.missing = with.missing)
costs <- chisqdista(tr)
diag(costs) <- 0
ret$indel <- .5*max(costs)
}
## GR suggestion: use feature.weights, state.features, feature.type in place of
## prop.weights, state.prop and prop.type
if (method=="FEATURES") {
if(time.varying){
stop(" [!] time.varying substitution cost is not (yet) implemented for method FEATURES.")
}
if(is.null(state.features) || nrow(state.features)!=length(alphabet)){
stop(" [!] state.features should be a data.frame containing one row per state (possibly one for missing values).")
}
if(!requireNamespace("cluster")){
stop(" [!] cluster library is required to use FEATURES method.")
}
if(is.null(feature.weights)){
feature.weights <- rep(1, ncol(state.features))
}
costs <- as.matrix(daisy(state.features, metric="gower", weights=feature.weights, type=feature.type))
diag(costs) <- 0
ret$indel <- .5*max(costs)
}
if(method=="INDELS"||method=="INDELSLOG"){
if(time.varying){
stop(" [!] time.varying substitution cost is not (yet) implemented for INDELS and INDELSLOG.")
}
ww <- attr(seqdata, "weights")
if(is.null(ww) ||!weighted){
ww <- rep(1, nrow(seqdata))
}
indels <- as.numeric(prop.table(xtabs(rep(ww, ncol(seqdata))~unlist(seqdata)))[alphabet])
indels[is.na(indels)] <- 1
if(method =="INDELSLOG"){
indels <- log(2/(1+indels))
}else{
indels <- 1/indels
indels[is.infinite(indels)] <- .Machine$double.xmax
}
##ret <- list()
ret$indel <- indels
##ret$sm <- matrix(0, nrow=length(alphabet), ncol=length(alphabet))
costs <- matrix(0, nrow=length(alphabet), ncol=length(alphabet))
for(i in seq_along(alphabet)){
for(j in seq_along(alphabet)){
if(i!=j){
##ret$sm[i, j] <- indels[i]+indels[j]
costs[i, j] <- indels[i]+indels[j]
}
}
}
costs[is.infinite(ret$sm)] <- .Machine$double.xmax
##ret$sm[is.infinite(ret$sm)] <- .Machine$double.xmax
##return(ret)
}
if (method=="TRATE") {
if (time.varying) {
message(" [>] creating time varying substitution-cost matrix using transition rates ...")
tr <- seqtrate(seqdata, time.varying=TRUE, weighted=weighted, lag=lag, with.missing=with.missing)
tmat <- nrow(tr)
time <- ncol(seqdata)
costs <- array(0, dim=c(alphsize, alphsize, time))
## Function to compute the cost according to transition rates
tratecostBoth <- function(trate, time, state1, state2, debut, fin){
cost <- 0
if (!debut) { ## Premier état
cost <- cost - trate[state1,state2, time-1] - trate[state2, state1, time-1]
}
if (!fin) { ##Dernier Etat
cost <- cost - trate[state1,state2, time] - trate[state2, state1, time]
}
if (!debut && !fin) {
return(cost + cval)
}
else{
return(cval + 2*cost)
}
}
tratecostPrevious <- function(trate, time, state1, state2, debut, fin){
cost <- 0
if (!debut) { ## Premier état
cost <- cost - trate[state1,state2, time-1] - trate[state2, state1, time-1]
}
return(cval + cost)
}
tratecostNext <- function(trate, time, state1, state2, debut, fin){
cost <- 0
if (!fin) { ##Dernier Etat
cost <- cost - trate[state1,state2, time] - trate[state2, state1, time]
}
return(cval + cost)
}
if(transition=="previous") {
tratecost <- tratecostPrevious
}
else if (transition=="next") {
tratecost <- tratecostNext
}
else {
tratecost <- tratecostBoth
}
for (t in 1:time) {
for (i in 1:(tmat-1)) {
for (j in (i+1):tmat) {
cost <- max(0,tratecost(tr, t, i, j, t==1, t==time))
costs[i, j, t] <- cost
costs[j, i, t] <- cost
}
}
}
}
else {
message(" [>] creating substitution-cost matrix using transition rates ...")
tr <- seqtrate(seqdata, time.varying=FALSE, weighted=weighted, lag=lag, with.missing = with.missing)
tmat <- nrow(tr)
costs <- matrix(nrow=alphsize,ncol=alphsize)
diag(costs) <- 0
for (i in 1:(tmat-1)) {
for (j in (i+1):tmat) {
cost <- cval - tr[i,j] - tr[j,i]
costs[i,j] <- cost
costs[j,i] <- cost
}
}
ret$indel <- .5*max(costs)
}
}
##
## if (with.missing &&(method=="CONSTANT"||(method=="TRATE" && !missing.trate))) {
if (with.missing && miss.cost.fixed) {
if (time.varying) {
costs[alphsize,1:(alphsize-1),] <- miss.cost
costs[1:(alphsize-1),alphsize,] <- miss.cost
}
else {
costs[alphsize,1:(alphsize-1)] <- miss.cost
costs[1:(alphsize-1),alphsize] <- miss.cost
}
}
## Setting rows and columns labels
rclab <- paste(alphabet,"->",sep="")
if (time.varying) {
dimnames(costs) <- list(rclab, rclab, colnames(seqdata))
}
else {
dimnames(costs) <- list(rclab,rclab)
}
if(proximities){
costs <- 1-costs/max(costs)
}
#return(costs)
ret$sm <- costs
return(ret)
}
## alias for backward compatibility with seqsubm
seqsubm <- function(...){
return(seqcost(...)$sm)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.