# File util.R
#
# Elias Hernandis <eliashernandis@gmail.com>
# Given a string it returns it all lowercase with only the first letter of each
# word uppercase. It leaves non alphabetical characters untouched.
titleize <- function(x) {
s <- strsplit(x, " ")[[1]];
paste(toupper(substring(s, 1,1)), tolower(substring(s, 2)), sep="", collapse=" ")
}
# Given a TSV file this function performs some sanity checks on it, reporting
# fatal errors if they arise. See the README for details.
commonChecks <- function(filename, fileType) {
error <- c();
warning <- c();
df <- tryCatch(
read.table(filename, header=TRUE, sep="\t", stringsAsFactors=FALSE, na.strings = c("NA",""), check.names=FALSE),
error = function(e) { NULL });
if (is.null(df)) {
error <- append(error, paste("The", fileType, "you provided is not a well-formed tab-separated value (TSV) file. This can be avoided by generating the mixture file with a spreadsheet program instead of composing the file by hand."));
}
return(list(warning = warning, error = error, df = df));
}
# Given a pair of strings this function computes the Levenshtein distance
# between them, that is, an indication of the number of mistakes it takes to
# mistype one given the other is the correct one.
levenshteinDistance <- function(s1, s2) {
m <- nchar(s1);
n <- nchar(s2);
t1 <- strsplit(s1, '')[[1]]
t2 <- strsplit(s2, '')[[1]]
d <- matrix(0, m+1, n+1);
d[,1] <- matrix(0:m);
d[1,] <- matrix(0:n);
for (i in 1:m) {
for (j in 1:n) {
substCost <- 1;
if (t1[i] == t2[j]) {
substCost <- 0;
}
d[i+1,j+1] <- min(
d[i, j+1] + 1,
d[i+1, j] + 1,
d[i, j] + substCost
);
}
}
return(d[m+1,n+1]);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.