knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-" ) suppressMessages(devtools::load_all("."))
rmarkdown::render('./README.Rmd')
With personal functions I like to reuse everytime!
One of such is a proper function that capitalizes a string.
x <- "OnE oF sUcH iS a proPer function that capitalizes a string." proper(x)
my.colors()
and my.symbols()
can be used to improve plot readability.
In this example, draw.empty.plot is also used to create an empty plot to show data points after.
xdata <- -10:10 draw.empty.plot(xlim = c(min(xdata),max(xdata)), ylim = c(0,23)) for (ix in 1:22) { points(xdata, 1/10 * xdata * xdata + ix, pch = my.symbols(ix), col = my.colors(ix), cex = .9) }
suppressPackageStartupMessages(library(survival)) suppressPackageStartupMessages(library(plyr)) suppressPackageStartupMessages(library(ggfortify)) suppressPackageStartupMessages(library(gridExtra)) data(flchain) ydata <- data.frame( time = flchain$futime, status = flchain$death) xdata <- cbind(flchain$age, as.numeric(flchain$sex == 'M') * 2 + 1, flchain$sample.yr, flchain$kappa) page <- draw.kaplan(list(Age= c(1,0,0,0)), xdata = xdata, ydata = ydata)$plot psex <- draw.kaplan(list(Sex= c(0,1,0,0)), xdata = xdata, ydata = ydata)$plot grid.arrange(page, psex, ncol = 2) # draw.kaplan(list(Age= c(1,0,0,0), Sex = c(0,1,0,0), yr = c(0,0,1,0), kappa = c(0,0,0,1)), xdata = xdata, ydata = ydata)$plot
This is specially relevant in survival or binary output with few cases of one category that need to be well distributed among test/train datasets or in cross-validation folds.
Example below sets aside 90% of the data to the training set. As samples are already divided in two sets (set1
and set2
), it performs the 90% separation for each and then joins (with option join.all = T
) the result.
set1 <- c(T,T,T,T,T,T,T,T,F,T,T,T,T,T,T,T,T,T,F,T) set2 <- !set1 cat('Set1\n', set1, '\n\nSet2\n', set2, '\n\nTraining / Test set using logical indices\n\n') set.seed(1985) balanced.train.and.test(set1, set2, train.perc = .9) # set1 <- which(set1) set2 <- which(set2) cat('##### Same sets but using numeric indices\n\n', 'Set1\n', set1, '\n\nSet2\n', set2, '\n\nTraining / Test set using numeric indices\n') set.seed(1985) balanced.train.and.test(set1, set2, train.perc = .9) #
xdata1 <- gen.synth.xdata(10, 5, .2) xdata2 <- gen.synth.xdata(10, 5, .75)
# cat('Using .2^|i-j| to generate co-variance matrix\n\n') cat('X generated\n\n') data.frame(xdata1) cat('cov(X)\n\n') data.frame(cov(xdata1)) draw.cov.matrix(xdata1) + ggplot2::ggtitle('X1 Covariance Matrix') # cat('Using .75^|i-j| to generate co-variance matrix (plotting correlation)\n\n') cat('X generated\n\n') data.frame(xdata2) cat('cov(X)\n\n') data.frame(cor(xdata2, method = 'pearson')) draw.cov.matrix(xdata2, fun = cor, method = 'pearson') + ggplot2::ggtitle('X2 Pearson Correlation Matrix')
Uses a cache to save and retrieve results. The cache is automatically created with the arguments and source code for function, so that if any of those changes, the cache is regenerated.
Caution: Files are not deleted so the cache directory can become rather big.
a <- run.cache(sum, 1, 2) b <- run.cache(sum, 1, 2) all(a == b)
a <- run.cache(rnorm, 5, seed = 1985) b <- run.cache(rnorm, 5, seed = 2000) all(a == b)
RunCache was originaly intended to be used to calculate big correlation matrix
n.rows <- 1000 n.cols <- 50000 xdata <- matrix(rnorm(n.rows * n.cols), ncol = n.cols) # making sure cache is saved .Last.value <- run.cache(sapply, 2:n.cols, function(ix) {cor(xdata[,1], xdata[,ix])}) runCache.digest <- list(verissimo::digest.cache(xdata)) my.fun <- function(ix) {cor(xdata[,1], xdata[,ix])} microbenchmark::microbenchmark( runCache.non.cached = run.cache(sapply, 2:n.cols, my.fun, show.message = FALSE, force.recalc = T), runCache.cached = run.cache(sapply, 2:n.cols, my.fun, show.message = FALSE), runCache.cached.speed = run.cache(sapply, 2:n.cols, my.fun, cache.digest = runCache.digest, show.message = FALSE), actual.function = sapply(2:n.cols, my.fun), actual.4cores = unlist(parallel::mclapply(2:n.cols, my.fun, mc.cores = 4)), times = 5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.