Nothing
# implements a wrapper to distance (similarity) computation on
# collections of sequences. auto and cross distances can be
# computed (compare with dist in package proxy)
#
# note that 1) we can supply lists of vectors or vectors of
# character (strings)
# 2) operation weights are in the order of
# insertion/deletion, equality, and replacing
# 3) the first row/column of the matrix of alphabet
# weights are used for replacement with the empty
# symbol (space)
# 4) include NA, etc if exclude = NULL
# 5) but the C function returns NA if NAs are encounterd
# 6) use parallel mode only if y != NULL
#
# ceeboo 2006, 2008
sdists <- function(x,y=NULL, method="ow", weight=c(1,1,0,2),
exclude=c(NA,NaN,Inf,-Inf), pairwise = FALSE) {
METHODS <- c("ow","aw","awl")
code <- pmatch(method, METHODS)
if (is.na(code))
stop("invalid method")
if (code == -1)
stop("ambiguous method")
if (is.character(x))
x <- strsplit(x,"")
if (!is.list(x))
stop("'x' not a list")
if (!is.null(y)) {
if (is.character(y))
y <- strsplit(y,"")
if (!is.list(y))
stop("'y' not a list")
}
if (code >= 2) {
if (!is.matrix(weight))
stop("'weight' not a matrix")
if (dim(weight)[1] != dim(weight)[2])
stop("'weight' not square")
if (is.null(colnames(weight)))
stop("'weight' no colnames")
l <- colnames(weight)
}
else {
if (length(weight) < 4)
stop("'weight' invalid")
# determine common symbol set
l <- sort(unique(c(unlist(x),unlist(y),"")),na.last=TRUE)
}
x <- lapply(x,function(x)
factor(x,levels=l,exclude=if(is.integer(x))NA else exclude))
if (!is.null(y)) {
y <- lapply(y,function(x)
factor(x,levels=l,exclude=if(is.integer(x))NA else exclude))
if (pairwise && length(x) != length(y))
stop("'pairwise', lengths of 'x' and 'y' do not conform")
}
if (!is.double(weight))
storage.mode(weight) <- "double"
obj <- .Call(R_sdists,x,y,as.integer(code),weight,pairwise)
if (is.null(y))
obj <- structure(obj, Size=length(x), class="dist",
Diag=FALSE, Upper=FALSE,
Labels=names(x), method=method)
else if (!pairwise) {
rownames(obj) <- names(x)
colnames(obj) <- names(y)
}
obj
}
# as there is no unique space symbol available not 'excluding'
# NA has NA as result (see the C implementation).
#
# if graph = TRUE the vector of transcripts is transformed into
# graph data that can be supplied to 'segments', or 'grid.segments',
# etc. the dynmic programming table is returned as attribute
# 'table' and the traceback graph in attribute 'graph'.
sdists.trace <-
function(x,y, method="ow", weight=c(1,1,0,2), exclude=c(NA,NaN,Inf,-Inf), graph = FALSE, partial = FALSE) {
METHODS <- c("ow","aw","awl")
code <- pmatch(method, METHODS)
if (is.na(code))
stop("invalid method")
if (code == -1)
stop("ambiguous method")
if (is.character(x)) {
if (length(x) != 1)
stop("'x' not a scalar string")
x <- strsplit(x,"")[[1]]
}
if (is.factor(x))
x <- as.character(x)
if (!is.vector(x))
stop("'x' not a vector")
if (is.character(y)) {
if (length(y) != 1)
stop("'y' not a scalar string")
y <- strsplit(y,"")[[1]]
}
if (is.factor(y))
y <- as.character(y)
if (!is.vector(y))
stop("'y' not a vector")
if (code >= 2) {
if (partial)
stop("'partial' not implemented")
if (!is.matrix(weight))
stop("'weight' not a matrix")
if (is.null(colnames(weight)))
stop("'weight' no colnames")
l2 <- colnames(weight)
if (is.null(rownames(weight))) {
if (dim(weight)[1] != dim(weight)[2])
stop("'weight' not square")
l1 <- l2
} else
l1 <- rownames(weight)
}
else {
if (length(weight) < 4)
stop("'weight' invalid")
if (partial) {
if (length(weight) < 5)
weight <- c(weight, weight[1], 0)
if (length(weight) < 6)
weight <- c(weight, 0)
}
# determine symbol sets
l1 <- l2 <- sort(unique(c(x,y,"")),na.last=TRUE)
}
x <- factor(x,levels=l1,exclude=if(is.integer(x))NA else exclude)
y <- factor(y,levels=l2,exclude=if(is.integer(y))NA else exclude)
if (!is.double(weight))
storage.mode(weight) <- "double"
t <- .Call(R_sdists_transcript, x, y, as.integer(code), weight, graph)
if (is.na(t[1]))
return(t)
# reduce set of transcripts/paths
if (partial) {
z <- t
## reduce to maximum number of trailing inserts
k <- attr(regexpr("I+$", z), "match.length")
z <- z[k == max(k)]
## reduce to maximum number of matches
k <- sapply(lapply(strsplit(z, ""), table), "[", "M")
k <- which(k == max(k, na.rm = TRUE))
if (length(k))
z <- z[k]
## reduce to maximum number of leading inserts
k <- attr(regexpr("^I+", z), "match.length")
z <- z[k == max(k)]
attributes(z) <- attributes(t)
t <- z
}
if (graph) {
dimnames(attr(t, "table")) <-
list(x = c("", as.character(x)), y = c("", as.character(y)))
attr(t, "graph") <- .Call(R_sdists_graph, t)
names(attr(t, "graph")) <- c("x0", "y0", "x1", "y1", "weight")
names(attr(t, "pointer")) <- c("x0", "y0", "x1", "y1")
class(t) <- "sdists.graph"
return(t)
}
z <- lapply(t, function(t) .Call(R_sdists_align, x, y, t))
names(z) <- t
attr(z, "value") <- attr(t, "value")
attr(z, "partial") <- attr(t, "partial")
class(z) <- "sdists.trace"
z
}
### experimental plot function for
#
# idea from: http://home.uchicago.edu/~aabbott/
#
# in R 2.4.x we will fix yscale = c(ny, 0)
#
# label in grid.xaxis cannot contain "", i.e. does not
# produce output if it does.
#
# with pdf() it produces a garbage file that segfaults
# xpdf (but not acroread) :-(
#
# fixme: use another line type for prefixes or suffixes
# of local alignments.
#
# ceeboo 2006
plot.sdists.graph <-
function(x, circle.col = 1, graph.col = 2, circle.scale = c("mean", "max", "last", "text"), main = "", ...) {
circle.scale <- match.arg(circle.scale)
g <- attr(x, "graph")
b <- attr(x, "pointer")
t <- attr(x, "table")
nx <- dim(t)[2]
ny <- dim(t)[1]
if (circle.scale == "text")
fontsize <- 24 ## FIXME
else {
t <- t - min(t)
t <- t / switch(circle.scale, mean = mean(t),
max = max(t),
last = t[ny,nx])
}
cn <- colnames(t)
rn <- rownames(t)
# bug?fix
cn[cn == ""] <- " "
rn[rn == ""] <- " "
grid.newpage()
grid.text(y = 0.95, label = main,
gp = gpar(fontface = "bold"))
vp <- viewport(xscale = c(0, nx), yscale = c(0, ny),
width = nx / max(nx, ny) * 0.70,
height = ny / max(nx, ny) * 0.70)
pushViewport(vp)
grid.grill(h = seq(ny)-0.5, v = seq(nx)-0.5, default.units = "native")
grid.xaxis(at = seq(nx)-0.5, label = cn)
grid.yaxis(at = seq(ny)-0.5, label = rn)
if (circle.scale == "text")
mapply(grid.text,
label = t,
x = rep(1:nx, each = ny) - 1/2,
y = rep(1:ny, times = nx) - 1/2,
MoreArgs = list(
check.overlap = TRUE,
default.units = "native",
gp = gpar(col = "lightgrey", fontsize = fontsize))
)
else
grid.circle(x = rep(1:nx, each = ny) - 1/2,
y = rep(1:ny, times = nx) - 1/2,
r = t / 2,
default.units = "native",
gp = gpar(col = circle.col))
grid.segments(x0 = b$y0 + 1/2, y0 = b$x0 + 1/2,
x1 = b$y1 + 1/2, y1 = b$x1 + 1/2,
default.units = "native",
gp = gpar(lty = 3))
grid.segments(x0 = g$y0 + 1/2, y0 = g$x0 + 1/2,
x1 = g$y1 + 1/2, y1 = g$x1 + 1/2,
default.units = "native",
gp = gpar(col = graph.col, lwd = g$weight,
lty = (g$y1 > g$y0 & g$x1 > g$x0 &
cn[g$y1+1] == rn[g$x1+1]) + 1))
popViewport()
}
###
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.