similarity: Similarity measure
In AkselA/R-ymse: Ymse (Various)

Description Usage Arguments See Also Examples

Calculate the similarity between two character vectors based on a similarity matrix

1	similarity(x, y, sm = smat(x, y), sfun = sum, ...)

`x`	a character vecor or two-column data.frame/matrix
`y`	a character vector. Ignored if `x` is data.frame/matrix
`sm`	a similarity matrix. By default a unit matrix
`sfun`	function used to summarise the elementwise similarities
`...`	further arguments passed to `sfun`

smat

# In its most basic form similarity() gives the Hamming distance
similarity(c(1, 0, 1, 0), c(1, 1, 0, 0))


# Symmetry not required. 
bef <- c(1, 2, 3, 1, 2, 3, 1, 2, 3)
aft <- c(0, 2, 2, 1, 2, 2, 1, 1, 2)

# Here a decrease in value of 1 is considered
# more similar than an increase in value of 1.
sm1 <- t(structure(c(
3, 0, 0, 0, 
2, 3, 0, 0, 
0, 2, 3, 0,
0, 0, 2, 3), 
.Dim=c(4L, 4L), 
.Dimnames=list(c("0", "1", "2","3"), c("0", "1", "2", "3"))))

# Symmetric version
sm2 <- t(structure(c(
3, 1, 0, 0, 
1, 3, 1, 0, 
0, 1, 3, 1,
0, 0, 1, 3), 
.Dim=c(4L, 4L), 
.Dimnames=list(c("0", "1", "2","3"), c("0", "1", "2", "3"))))

similarity(bef, aft, sm1)
similarity(bef, aft, sm2)

# Pre-aligned fragments of insulin genes
data(insulin)

# Transition-transversion matrix
data(smt)

# Using pairwise() to run similarity() over all column pairs
pairwise(insulin, similarity, smt, sfun=mean)

# Imagined result from questionnaire
qu <- data.frame(
  Alice=c("happy", "sad", "angry", "unsure", "happy", "sad", "happy", "angry"),
  Bob=c("happy", "sad", "angry", "angry", "happy", "angry", "angry", "sad"),
  Charlie=c("sad", "sad", "unsure", "unsure", "happy", "sad", "angry", "sad"),
  stringsAsFactors=FALSE
) 

# Similarity matrix describing the relative similitudes of the moods
emsm <- as.matrix(read.table(text="
       happy  sad  angry unsure
 happy   5     0     1     1
   sad   0     5     2     1
 angry   1     2     4     2
unsure   1     1     2     3",
header=TRUE))

pairwise(qu, similarity, sm=emsm/5, sfun=mean)