# Getting started / FAQ for R
## Gene Leynes
## See also: http://www.statmethods.net/
## See also: http://rprogramming.net/how-tos/
## See also: http://manuals.bioinformatics.ucr.edu/home/programming-in-r
## See also: http://pirategrunt.com/
## See also:
## Table of Contents:
## Startup / System / Environment
## Packages
## Objects and Methods
## Website / XML Parsing
## Functions
## Flow Control
## STRINGS
## DATES
## Matching, Subsetting, and Sets
## FACTORS
## recode
## expand.grid
## Tables, Shingles, lapply
## Wide to tall using data.table
## Reshape using Pivot function
## Reshape using dcast.data.table
## Tables and apply functions
## REPORTING
## Graphics
#### Make a nice set of colors using color ramp
#### Modify a previous plot
#### Plot the "convex hull"
#### Plot Stars
#### all the point types
#### plot a color wheel using RGB
#### plot color wheels using heat, topo, terrain, and cm
#### Get HSV colors (where "blue" becomes "#0000FF")
#### Side by side boxplots
#### Identify points on a plot
#### Make two plots on the same canvas, Legend, Random walk, Tsay book cover
#### Add a background color to just the plot
## Lattice Graphics
#### Make xyplot plot in order
#### xy plot example with panel
## Simple fitting / statistics / plots
#### curve
#### spline
#### par - mfrow
#### bandplot
#### Boxplot example with gl
## Useful Functions
#### II
#### IM
#### clipper
#### clipped
#### NAsummary
## Examples
#### Example - Great plot example (bank dot plot example)
#### Example - Check R scripts for presence of a word pattern
#### Example - Make a .First function that does stuff
#### Example - Programming - Try catch
####
##-----------------------------------------------------------
## Update R
##-----------------------------------------------------------
## First, install the new version of R
## Second, make v0 the path to your old library and v1 the
## path to the new library
v0 = 'C:/Users/375492/Documents/R/R-3.1.0/library/'
v1 = 'C:/Program Files/R/R-3.1.1/library'
## These are the packages that didn't copy over:
list.files(v0)[!list.files(v0)%in%list.files(v1)]
## Now, just install geneorama and then the missing
## packages!
install.packages('devtools')
devtools::install_github("geneorama/geneorama")
library(geneorama)
loadinstall_libraries(list.files(v0)[!list.files(v0) %in% list.files(v1)])
##-----------------------------------------------------------
## Startup / System / Environment
##-----------------------------------------------------------
## Very useful help:
?Startup
## See the help file examples executed
example(svd)
## Change warnings options
options(warn=0) ## Default: stores warnings until function exit
options(warn=-1) ## No warnings
options(warn=1) ## Warnings print immediately
## View, getting, and setting environmental variables
names(Sys.getenv())
Sys.getenv('R_USER')
Sys.setenv(R_USER='C:\\Users\\gene.leynes')
Sys.setenv(R_USER='C:\\Documents and Settings\\Gene\\My Documents')
Sys.getenv('HOME')
Sys.getenv('R_HOME')
Sys.getenv("R_LIBS_USER")
## Typical start: (OLD)
setwd('~/Dropbox/R/[CurrentProject]')
source('~/Dropbox/R/00 Standard Functions/sourceDir.R')
sourceDir('~/Dropbox/R/00 Standard Functions', FALSE)
## Get R version number
R.Version()
## Change directory using a script using "getSrcFilename"
## Once you have saved this to a script, you can drag it into
## a running R window and it will change the working directory
## to the path of the saved script.
curpath <- dirname(getSrcFilename(function(){}, full=TRUE))
setwd(curpath)
cat('Now in ', curpath, '\n')
print(dir())
## Trick for loading the initial global environment
## 1) In a running instance of R create a function called .First
## function that creates everything you will want.
## 2) Save an image file (.RData file) with only .First
## 3) Everytime you use that .RData image your .First function
## will run first and create an environment that you specified.
## You can also use it to run an initialization, load a file,
## or start a process.
## SEE EXAMPLES FOR MORE COMPLEX EXAMPLE
## Delete everything so that you're only saving .First
rm(list=ls())
## Define your fuction
.First = function(){
## Create or load your desired objects
exampleFuncion <- function(x){x=x+1; x=x^2; return(x)}
## Save the object names in the local namespace
CurNames <- ls()
## Put each local object into the global environment
for(nm in CurNames){
assign(x=nm, value=get(nm), envir=.GlobalEnv)
}
## The .First function now exits. All local copies of
## created values are lost, but the global copies remain
## accessible to the user.
}
## Save .First somewhere
save.image('test.RData')
##-----------------------------------------------------------
## Packages
##-----------------------------------------------------------
## See also: http://manuals.bioinformatics.ucr.edu/home/programming-in-r#TOC-Building-R-Packages
## Detach package
detach(pos = match("package:graphics", search()))
## List all installed packages
rownames(installed.packages())
##list all commands in all packages
for(i in 1:length(search())) {print(ls(i))}
## Install a package from r-forge (or any other website)
install.packages("DierckxSpline",
repos = '"http://r-forge.r-project.org"')
## Download and install Geneorama from source
local({
installed_pacakges <- .packages(all.available = TRUE)
if(!'geneorama' %in% installed_pacakges){
## Get webfile and unzip to a temp directory
webfile <- 'http://geneorama.com/code/geneorama_1.1.tar.gz'
tmp <- tempdir()
tmpfile <- file.path(tmp, basename(webfile))
download.file(webfile, tmpfile)
install.packages(tmpfile, type = "source", repos = NULL)
unlink(tmpfile)
unlink(tmp)
}
})
## Copy out all the functions in a package
## Gene Leynes (4/27/11)
getFormals <- function(x){
Names <- names(formals(x))
Vals <- as.character(formals(x))
#Vals[Vals==''] <- "INPUT"
ret <- paste(Names,Vals,sep='=',collapse=', ')
ret <- gsub('\\=$', '', ret)
ret <- gsub('\\=,', ',', ret)
ret <- paste('(',ret,')', sep='')
ret
}
library(data.table)
funs <- ls(which(search()=="package:data.table"))
funargs <- sapply(funs, getFormals)
ret <- cbind(Functions = funs, Function_Arguments = funargs)
clipper(ret)
##-----------------------------------------------------------
## Objects and Methods
##-----------------------------------------------------------
## structure returns the given object with further attributes set.
structure(1:6, dim = 2:3)
## Information about an object
typeof(tempdf)
is.what(tempdf)
class(tempdf)
mode(tempdf)
## list all methods
methods(class="data.frame")
methods(class="systemfit")
methods(plot)
## Make a png file
png(file = "plot23000x23000.png", width = 23000, height = 23000, res = 2400)
print(myplot)
dev.off()
# Printing within a loop
# if using the GUI, turn off buffering,
# or use 'flush.console()' after 'cat'.
##------------------------------------------------------------------------------
## Website / XML Parsing
##------------------------------------------------------------------------------
##-----------------------------------------------------------
## xcode example
## http://dataissexy.wordpress.com/2013/06/01/10-second-rss-parsing-in-r-and-xpath/
##-----------------------------------------------------------
library(XML)
library(RCurl)
xml.url<-"http://dataissexy.wordpress.com/feed/"
rssdoc <- xmlParse(getURL(xml.url))
rsstitle <- xpathSApply(rssdoc, '//item/title', xmlValue)
rssdesc <- xpathSApply(rssdoc, '//item/description', xmlValue)
rssdate <- xpathSApply(rssdoc, '//item/pubDate', xmlValue)
##------------------------------------------------------------------------------
## Functions
##------------------------------------------------------------------------------
## Use "mapply" to re-use function arguments in subsequent calls
FunctionWithManyArguments <- function(x1, x2, x3, x4, x5, x6, x7, x8, x9){
ans <- x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9
return(ans)
}
FunctionWithManyArguments(1, 2, 3, 4, 5, 6, 7, 8, 9)
ArgsThatRarelyChange <- list(x3=3, x4=4, x5=5, x6=6, x7=7, x8=8, x9=9)
mapply(somefun, x1 = 1,x2 = 2, MoreArgs=ArgsThatRarelyChange)
## Check for function arguments (manually)
SomeFunction <- function(x,y){
if(nargs()!=length(formals(f)))
stop('Something is missing...')
}
SomeFunction(x=1:10)
## Check for function arguments (automatically)
SomeFunction <- function(x,y){
curfun <- deparse(match.call()[1])
curfun <- substr(curfun, 1, nchar(curfun) - 2)
if(length(formals(curfun)) != nargs())
stop('Something is missing...')
}
SomeFunction()
## Two ways to get the arguments of a function using match.call
ReturnArgs <- function(aa, bb){
mcall <- match.call()
for(i in 1:length(mcall)){
cat(i, '\n')
print(mcall[i])
cat(substr(mcall[i], 1, nchar(mcall[i])))
cat('\n\n')
}
invisible()
}
ReturnArgs(aa = 'onething', bb = 1:100)
# deparse substitute
PrintDeparsedVals <- function(aa, bb){
print(deparse(substitute(aa)))
print(deparse(substitute(bb)))
invisible()
}
PrintDeparsedVals(aa = 'onething', bb = 1:100)
##------------------------------------------------------------------------------
## Flow Control
##------------------------------------------------------------------------------
# else if
xx=c(0,1,2,3)
tester=function(x){
if(x==0){
print('zero')
} else if(x==1){
print('one')
} else {
print('more than one')
}
}
for (i in 1:length(xx)) tester(xx[i])
# stopifnot (displays error message if condition is false)
stopifnot(F)
##------------------------------------------------------------------------------
## STRINGS
##------------------------------------------------------------------------------
## Replace // with \
chartr('\\', '/', R.home())
## Find characters in a string
x <- deparse(mean)[2] # "UseMethod(\"mean\")"
gregexpr('u', deparse(mean)[2]) # -1
gregexpr('s', deparse(mean)[2]) # 2
gregexpr('\"', deparse(mean)[2]) # 11 16
## add a leading 0
sprintf("%02.0f", pi)
## Format a number to be a string with n digits
formatC(pi, digits=0, width=5, flag='0')
##------------------------------------------------------------------------------
## DATES
##------------------------------------------------------------------------------
# show sub second time
options(digits.secs=4)
# For details see
?DateTimeClasses
## Example date / times
set.seed(100)
datetime <- ISOdatetime(year=2012, month=10, day=29, hour=0, min=0, sec=0)
datetimes <- datetime + sort(runif(5, min=0, max=60 * 60 * 24 * 30))
datetimes
# [1] "2012-10-30 16:35:45 CDT" "2012-11-05 16:31:27 CST"
# [3] "2012-11-07 04:35:29 CST" "2012-11-12 00:21:19 CST"
# [5] "2012-11-14 12:40:19 CST"
unclass(datetimes)
# [1] 1351632945 1352154687 1352284530 1352701280 1352918420
data.frame(unclass(as.POSIXlt(datetimes)))
# sec min hour mday mon year wday yday isdst
# 1 45.12583 35 16 30 9 112 2 303 1
# 2 27.12267 31 16 5 10 112 1 309 0
# 3 29.75937 35 4 7 10 112 3 311 0
# 4 19.74392 21 0 12 10 112 1 316 0
# 5 19.74719 40 12 14 10 112 3 318 0
as.POSIXct(1472562988, origin = "1960-01-01") # local
as.POSIXct(1472562988, origin = ISOdatetime(1960,1,1,0,0,0)) # local
as.POSIXct(1472562988, origin = "1960-01-01", tz = "GMT") # UTC
# strftime is a wrapper for "fromat"
format(datetimes, "%H:%M:%OS3")
format(datetimes, "%Y-%m-%d")
# Output of strftime is char
# Use round to get dates, but convert to POSIXct; default format is lt
as.POSIXct(round(datetimes, "day"))
# strptime can convert char to date / time (sometimes)
strptime(c("02/27/92 23:03:20", "02/27/92 22:29:56"), "%m/%d/%y %H:%M:%S")
# Get WeekNumber from dates (add one?)
dates <- c("15-08-2005", "01-01-2005",
"15-01-2005", "08-08-2005", "08-08-2005")
as.factor(format(as.Date(dates,"%d-%m-%Y"),"%W"))
##------------------------------------------------------------------------------
## Matching and Subsetting, and Sets
##------------------------------------------------------------------------------
## Match and %in%
match('OH', state.abb) # 35
match(state.abb, 'OH') # NA,NA,...,NA,NA,1,NA,NA,NA,NA,...
'OH' %in% state.abb # TRUE
state.abb %in% 'OH' # FALSE FALSE FALSE.... TRUE FALSE FALSE....
# Subset
head(airquality)
head(subset(airquality, Temp > 80, select = c(Ozone, Temp)))
head(subset(airquality, Day == 1, select = -Temp))
head(subset(airquality, select = Ozone:Wind))
# Combine
require(gdata)
a <- matrix(rnorm(12),ncol=4,nrow=3)
b <- 1:4
combine(a, b)
combine(x=a, b)
combine(x=a, y=b)
combine(a, b, names=c("one","two"))
# set functions
?identical
(x <- c(sort(sample(1:20, 9)),NA))
(y <- c(sort(sample(3:23, 7)),NA))
union(x, y)
intersect(x, y)
setdiff(x, y)
setdiff(y, x)
setequal(x, y)
is.element(x, y)# length 10
is.element(y, x)# length 8
# all.equal
?identical()
d45 <- pi*(1/4 + 1:10)
all.equal(tan(d45), rep(1,10)) # TRUE, but
all (tan(d45) == rep(1,10)) # FALSE, since not exactly
all.equal(tan(d45), rep(1,10), tol=0) # to see difference
##==============================================================================
## FACTORS
## Create a sample string and convert it to factors and ordered factors
##==============================================================================
## Population of possibilities
pop_vec <- c('green', 'red', 'yellow', NA)
pop_vec_ordered <- c('green', 'yellow', 'red', NA)
## Sample from the population
set.seed(14)
ex_str <- sample(x = pop_vec, size = 20, replace = TRUE,
prob =c(.29, .29, .29, .13))
## Conversion examples
factor(x = ex_str, exclude = NA) ## Exclude NA (default behavior)
factor(x = ex_str, exclude = c(NA, 'red')) ## Exclude NA and "red"
factor(x = ex_str, exclude = NULL) ## Exclude nothing; use NA as a level
## Create factor vectors
ex_fac <- factor(x = ex_str, exclude = NULL, levels = pop_vec)
ex_ord <- factor(x = ex_str, exclude = NULL, levels = pop_vec_ordered, ordered=TRUE)
ex_ord
## Modify the levels of the example factor vectors:
## Rename all 1 level at a time:
## This should replace "green" with "good", but it also will forget the NA
## level! (created by the exclude = NULL part of the original call)
## levels(ex_fac_rename)[1] <- "good"
## Rename all 4 levels at once:
ex_fac_rename <- ex_fac ## Make a copy
ex_ord_rename <- ex_ord ## Make a copy
levels(ex_fac_rename) <- c('good', 'bad', 'ok', 'missing')
levels(ex_ord_rename) <- c('good', 'ok', 'bad', 'missing')
## Make missing slightly better than bad, but worse than ok
ex_ord_rename_relevel <- ex_ord_rename
ex_ord_rename_relevel <- ordered(ex_ord_rename_relevel,
c("good", "ok", "missing", "bad"))
## relevel doesn't work for this... relevel has a very narrow usage
## relevel(ex_ord_rename_relevel, c("good", "ok", "missing", "bad"))
data.frame(ex_str,
ex_fac,
ex_ord,
ex_ord_rename,
ex_ord_rename_relevel,
ex_fac_rename,
unclass(ex_fac),
unclass(ex_ord),
unclass(ex_ord_rename),
unclass(ex_ord_rename_relevel),
unclass(ex_fac_rename))[order(ex_ord), ]
##------------------------------------------------------------------------------
## recode
## See also: http://rprogramming.net/recode-data-in-r/
##------------------------------------------------------------------------------
(x <- rep(1:3,3))
## [1] 1 2 3 1 2 3 1 2 3
recode(x, "c(1,2)='A'; else='B'")
## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
recode(x, "1:2='A'; 3='B'")
## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
##------------------------------------------------------------------------------
## expand.grid
##------------------------------------------------------------------------------
jcetable <- expand.grid(
event = c('Wheezing at any time',
'Wheezing and breathless',
'Wheezing without a cold',
'Waking with tightness in the chest',
'Waking with shortness of breath',
'Waking with an attack of cough',
'Attack of asthma',
'Use of medication'),
method = c('Mail', 'Telephone'),
sex = c('Male', 'Female'),
what = c('Sensitivity', 'Specificity'))
##------------------------------------------------------------------------------
## Tables, Shingles, lapply
##------------------------------------------------------------------------------
# Nice way to summarize by groups
data(warpbreaks)
lapply(split(warpbreaks, warpbreaks$tension), summary)
# Make buckets of equal size
# see also: shingle
library(lattice)
equal.count(rnorm(100))
##------------------------------------------------------------------------------
## Wide to tall using data.table
##------------------------------------------------------------------------------
library(data.table)
dt <- data.table(uid=c("a","b"), var1=c(1,2), var2=c(100,200))
dt
melt(dt, id=c("uid"))
## Confusing way:
dt[, list(variable = names(.SD), value = unlist(.SD, use.names = F)), by = uid]
##------------------------------------------------------------------------------
## Reshape with dcast.data.table
##------------------------------------------------------------------------------
library(data.table)
dt <- data.table(id = c(1,1,2,2),
region = c("a","b","a","b"),
date = Sys.Date(),
count = c(6,7,8,9))
dt
dcast.data.table(data = dt,
formula = id ~ region + date)
dcast.data.table(data = dt,
formula = id ~ region + date,
value.var = "count")
dcast.data.table(data = dt,
formula = id ~ date,
value.var = "count")
dcast.data.table(data = dt,
formula = id ~ region,
value.var = "date")
## Reshape and aggregate by "sum"
dcast.data.table(data = dt,
fun.aggregate = sum,
formula = id ~ date,
value.var = "count")
##------------------------------------------------------------------------------
## Reshape using Pivot function
## http://stackoverflow.com/questions/5307313/fastest-tall-wide-pivoting-in-r
##------------------------------------------------------------------------------
tall <- data.frame(
int = rep(1:100, 100),
fac = rep( paste('v',1:100,sep=''), each = 100),
val = 1:1000) [-(1:5), ]
tall <- tall[sample(1:nrow(tall)), ]
pivot <- function(col, row, value) {
# browser() }
col = as.factor(col)
row = as.factor(row)
mat = array(
dim = c(nlevels(row), nlevels(col)),
dimnames = list(levels(row), levels(col)))
mat[(as.integer(col) - 1L) * nlevels(row) + as.integer(row)] = value
mat
}
wide <- with(tall, pivot(fac, int, val))
tall[1:10, 1:3]
wide[1:10, 1:10]
system.time( replicate(100, wide <- with(tall, tapply( val, list(int, fac), identity))))
system.time( replicate(100, wide <- with(tall, pivot(fac, int, val))))
##------------------------------------------------------------------------------
## Tables and apply functions
##------------------------------------------------------------------------------
## aggregate.table
## Depricated: Now use data.table
library(gdata)
g1 <- sample(letters[1:5], 1000, replace=TRUE)
g2 <- sample(LETTERS[1:3], 1000, replace=TRUE )
dat <- rnorm(1000)
stderr <- function(x) sqrt( var(x,na.rm=TRUE) / nobs(x) )
(means <- aggregate.table( dat, g1, g2, mean ))
(stderrs <- aggregate.table( dat, g1, g2, stderr ))
(ns <- aggregate.table( dat, g1, g2, nobs ))
## apply - some of the examples
## Depricated: Now use data.table
## Compute row and column sums for a matrix:
(x <- cbind(x1 = 3, x2 = c(4:1, 2:5)))
dimnames(x)[[1]] <- letters[1:8]
apply(x, 2, mean, trim = .2)
col.sums <- apply(x, 2, sum)
row.sums <- apply(x, 1, sum)
rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums)))
(ma <- matrix(c(1:4, 1, 6:8), nrow = 2))
apply(ma, 1, table) #--> a list of length 2
apply(ma, 1, stats::quantile)# 5 x n matrix with rownames
## tapply "Pivot Table"
tapply(airquality$Ozone, airquality$Month, length)
tapply(airquality$Ozone, airquality$Month, mean, na.rm = T)
## sapply
newdf$nUnique = sapply(sapply(df[,1:ncol(df)],unique),length)
## Example of mapply
# The function:
fx2 <- function (x,a) (x-a)^2+1
# Just for fun:
optimize(fx2,c(0,1),tol=.00001,a=1/3)
# A line where a is constant
xx <- seq(0, 1, .05)
aa <- rep(1/3, 21)
yy <- mapply(fx2, x = xx, a = aa)
plot(xx, yy, type='l')
# A line where a varies
xx <- seq(0, 1, .05)
aa <- seq(1/4, 5/12, (5/12-1/4)/(length(xx)-1))
yy <- mapply(fx2, x=xx, a=aa)
lines(xx, yy, col='blue')
##------------------------------------------------------------------------------
## REPORTING
##------------------------------------------------------------------------------
## http://stackoverflow.com/questions/10646665/how-to-convert-r-markdown-to-html-i-e-what-does-knit-html-do-in-rstudio-0-9
## Add this line to the Rmd file to see R Studio's messages
Sys.sleep(30)
# Create .md, .html, and .pdf files
# http://danieljhocking.wordpress.com/2013/09/25/knitting-beautiful-documents-in-rstudio/
knit("File.Rmd")
markdownToHTML('File.md', 'File.html', options=c("use_xhml"))
system("pandoc -s File.html -o File.pdf")
## Command to fix margins in latex (from blog post above)
\usepackage[vmargin=1in,hmargin=1in]{geometry}
# Create .md, .html, and .pdf files
# http://rprogramming.net/create-html-or-pdf-files-with-r-knitr-miktex-and-pandoc/
# Step 1: Install MiKTeX
# Step 2: Install Pandoc
# Step 3: Install Knitr and Markdown
# Step 4: Create a .Rmd File Containing Your Analysis
# Step 5: Create a .R File to Run the .Rmd File
# # Set working directory
# setwd("C:/Documents and Settings/name")
# # Load packages
# require(knitr)
# require(markdown)
# # Create .md, .html, and .pdf files
# knit("My_Analysis.Rmd")
# markdownToHTML('My_Analysis.md', 'My_Analysis.html', options=c("use_xhml"))
# system("pandoc -s My_Analysis.html -o My_Analysis.pdf")
# Step 6: Produce HTML and PDF Output Files with R
##------------------------------------------------------------------------------
## Graphics
##------------------------------------------------------------------------------
# Make a nice set of colors using color ramp
ramp <- colorRamp(c("red", "blue"))
rgb( ramp(seq(0, 1, length = 5)), max = 255)
plot(1:15, 1:15, pch=16, cex=10, col=
rgb( ramp(seq(0, 1, length = 15)), max = 255))
# Modify a previous plot
par(mfrow=c(3,1))
pars <- list()
for(i in 1:3){
set.seed(10)
plot(1:(10*i)*i, main=paste('examp', i))
pars[[i]] = par('usr')
}
for(i in 1:3){
par(mfg=c(i,1))
par(usr=pars[[i]])
lines(1:(10*i)*i, col=i+1)
}
# Plot the "convex hull"
plot(cars)
polygon( cars[chull(cars),], col="pink", lwd=3)
points(cars)
# Plot Stars
palette(rainbow(12, s = 0.6, v = 0.75))
stars(mtcars[, 1:7], len = 0.8, key.loc = c(12, 1.5),
main = "Motor Trend Cars", draw.segments = TRUE)
# all the point types
nn <- 1:50; plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
nn <- 51:100; plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
nn <- 101:150; plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
# plot a color wheel using RGB
r <- 1:100/100; g <-.05; b <- .05
pie(rep(1,100), col=rgb(r,g,b))
# plot color wheels using heat, topo, terrain, and cm
par(mfrow=c(2,2), mar=c(0,0,1.5,0))
pie(rep(1,40),col=heat.colors(40)[1:40], main='heat.colors')
pie(rep(1,40),col=terrain.colors(40)[1:40], main='terrain.colors')
pie(rep(1,40),col=topo.colors(40)[1:40], main='topo.colors')
pie(rep(1,40),col=cm.colors(40)[1:40], main='cm.colors')
# Get HSV colors (where "blue" becomes "#0000FF")
(hc <- rgb2hsv(col2rgb("blue")))
(SomeHSV <- hsv(hc[1,1],hc[2,1],hc[3,1]))
pie(1, col = SomeHSV)
# Side by side boxplots
boxplot(len ~ dose, data = ToothGrowth,
boxwex = 0.25, at = 1:3 - 0.2,
subset = supp == "VC", col = "yellow",
main = "Guinea Pigs' Tooth Growth",
xlab = "Vitamin C dose mg",
ylab = "tooth length",
xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")
boxplot(len ~ dose, data = ToothGrowth, add = TRUE,
boxwex = 0.25, at = 1:3 + 0.2,
subset = supp == "OJ", col = "orange")
legend(2, 9, c("Ascorbic acid", "Orange juice"),
fill = c("yellow", "orange"))
# Add a simple fitted line
plot(cars)
(z <- line(cars))
abline(coef(z))
# Identify points on a plot
plot(mtcars$mpg,mtcars$hp)
identify(mtcars$mpg,mtcars$hp,label=rownames(mtcars))
# Make two plots on the same canvas
# Legend
# Random walk
# Tsay book cover
set.seed(123456)
## White noise
e <- rnorm(500)
## Random walk
randwalk <- cumsum(e)
## Trend
trend <- 1:500
## Plots
par(mar=rep(5,4))
## deterministic trend + noise
plot.ts(0.5 * trend + e, lty=1, ylab='', xlab='')
## random walk with drift
lines(0.5 * trend + cumsum(e), lty=2)
## random walk (same scale)
lines(randwalk, lty=3, col='red')
par(new=T)
## random walk (own scale)
plot.ts(randwalk, lty=3, axes=FALSE, col='red')
axis(4, pretty(range(randwalk)))
legend(10, 18.7, legend=c('deterministic trend + noise (left)',
'random walk w/ drift (left)',
'random walk (left+right)'),
lty=c(1, 2, 3), col=c('black', 'black', 'red'))
# Add a background color to just the plot
# (Boxplot example)
dat <- data.frame(x=gl(10,20),y=rnorm(10*20))
plot(dat)
# ADD THE BACKGROUND
tmp <- par()$usr
rect(tmp[1], tmp[3], tmp[2], tmp[4], col = "lightyellow")
plot(dat, add=TRUE)
# Add a background color to just the plot
# (Non-boxplot example)
dat <- data.frame(x=gl(10,20),y=rnorm(10*20))
dat$x <- as.numeric(dat$x)
plot(dat, type='n')
# ADD THE BACKGROUND
tmp <- par("usr")
rect(tmp[1], tmp[3], tmp[2], tmp[4], col = "lightyellow")
points(dat)
##------------------------------------------------------------------------------
## Lattice Graphics
## also, see end for "great plot example"
##------------------------------------------------------------------------------
# Make xyplot plot in order
lattice.options(default.args = list(as.table = TRUE))
# xy plot example with panel
phi <- matrix(rnorm(19000), nrow=1000, ncol=19)
av <- .03 + .05 * phi
av <- t(apply(1+av, 1, cumprod))
spx <- (.03 + .05 * phi)^2
spx <- t(apply(1+spx, 1, cumprod))
#plot(av[1,],ylim=range(av));apply(av,1,lines)
#plot(spx[1,],ylim=range(spx));apply(spx,1,lines)
dat <- data.frame(
av=as.vector(av),
spx=as.vector(spx),
dif=as.vector(av-spx),
time = rep(2:20,each=1000))
lattice.options(default.args = list(as.table = TRUE))
xyplot(dif~spx|time, dat,
strip = strip.custom(strip.levels = TRUE),
scales = "free",
panel=function(x,y){
panel.xyplot(x,y)
panel.abline(h=c(0,1,1.5),lty=2)
}
)
##------------------------------------------------------------------------------
## Simple fitting / statistics / plots
##------------------------------------------------------------------------------
## curve
curve(x^3-3*x, -2, 2)
curve(x^2-2, add = TRUE, col = "violet")
## spline
n <- 9
x <- 1:n
y <- rnorm(n)
plot(x, y, main = paste("spline[fun](.) through", n, "points"))
lines(spline(x, y))
lines(spline(x, y, n = 201), col = 2)
## par - mfrow
par(mfrow = c(3,1))
for (i in 1:3) plot(matrix(rnorm(21), nrow=7))
## bandplot #2 (and lm)
x <- abs(rnorm(500))
y <- rnorm(500, mean=2*x, sd=2+2*x)
plot(x,y )
reg <- lm(y~x)
summary(reg)
bandplot(x,y)
bandplot(predict(reg),resid(reg))
## Boxplot Example with gl
g = factor(round(10*runif(10*100)))
x = rnorm(10*100)+sqrt(as.numeric(g))
xg = split(x,g)
boxplot(xg, col='lavender', notch=T, var.width=T)
##------------------------------------------------------------------------------
## Useful Functions
##------------------------------------------------------------------------------
II <- function(x) I(as.character(x))
IM <- function(x) as.data.frame(as.matrix(x))
clipper <- function(x){
#writeClipboard(x,format=1)
write.table(x, "clipboard", sep="\t", col.names=NA)
}
clipped = function(){
read.delim("clipboard")
}
NAsummary <- function(df){
newdf <- data.frame(Count = rep(nrow(df),ncol(df)))
rownames(newdf) = colnames(df)
newdf$nNA <- nrow(df)-sapply(df,function(x)length(na.omit(x)))
newdf$rNA <- newdf$nNA / newdf$Count
newdf$rNA <- trunc(newdf$rNA*10000)/10000
newdf$nUnique <- sapply(df, function(x)length(unique(x)))
newdf$rUnique <- newdf$nUnique / newdf$Count
newdf$rUnique <- trunc(newdf$rUnique*10000)/10000
return(newdf)
}
##------------------------------------------------------------------------------
## Data Frame VS. Matrix
##------------------------------------------------------------------------------
df <- data.frame(x=1:10, y=rnorm(10)) ;
mat <- matrix(c(x=1:10, y=rnorm(10)), ncol=2)
colnames(mat) = c('x','y')
df['x'] ; mat['x'] # Data frame vs. NA
df[,'x'] ; mat[,'x'] # Vector vs. Vector
df[['x']] ; mat[['x']] # Vector vs. subscript out of bounds
df$'x' ; mat$'x' # Vector vs. ERROR: operator is invalid for atomic vectors
df$x ; mat$x # Vector vs. ERROR: operator is invalid for atomic vectors
df$x[1] ; mat$x[1] # Vector vs. ERROR: operator is invalid for atomic vectors
df[[1,2]] ; mat[[1,2]] # Numeric Vec vs. Numeric Vec
df[1,2] ; mat[1,2] # Numeric Vec vs. NAMED Vec
##-----------------------------------------------------------
## Example - Great plot example (bank dot plot example)
##-----------------------------------------------------------
## xyplot
library(lattice)
library(grid)
### read the data
fname <- "http://addictedtor.free.fr/graphiques/data/150/data.txt"
d <- read.csv(file(fname))
### workaround so that lattice does not order bank names alphabetically
d$bank <- ordered(d$bank, levels=d$bank)
### setup the key
k <- simpleKey(c("Q2 2007", "January 20th 2009"))
k$points$fill <- c("lightblue", "lightgreen")
k$points$pch <- 21
k$points$col <- "black"
k$points$cex <- 1
list1=list(pch=21, fill = c("lightblue", "lightgreen"),
cex = 4, col = "black")
### create the plot
dotplot(bank ~ MV2007 + MV2009, data=d, horiz=T,
par.settings = list(superpose.symbol = list1),
xlab = "Market value ($Bn)", key=k,
panel = function(x, y, ...){
panel.dotplot(x, y, ...)
grid.text(
unit(x, "native"), unit(y, "native"),
label = x, gp = gpar(cex=.7))
})
##------------------------------------------------------------------------------
## Example - Check R scripts for presence of a word pattern
##------------------------------------------------------------------------------
ff <- list.files(recursive = TRUE, pattern = '\\.R$')
dat <- lapply(ff, readLines)
hasplot <- unlist(sapply(dat, function(x)
length(grep(x, pattern='plot')))) != 0
missingreset <- unlist(sapply(dat, function(x)
length(grep(x, pattern='resetGraph'))))==0
# Shows files that have "plot" but is missing "resetGraph"
ff[hasplot&missingreset]
##------------------------------------------------------------------------------
## Profile code
##------------------------------------------------------------------------------
# Profile some code:
nr <- 100
nc <- 10
yy <- c()
y <- matrix(rnorm(nr*nc), nrow=nr, ncol=nc)
Rprof("test.out")
readLines("test.out")
for (i in 1:nrow(y)){
yy[i] <- lm(y[i,] ~ I(1:nc))$coefficients[1]
}
Rprof(NULL)
summaryRprof("test.out")
file.remove("test.out")
rm(nr, nc, yy, y)
##-----------------------------------------------------------
## Make a .First function that does stuff
##-----------------------------------------------------------
## 5/20/12
.First=function(){
library(base)
library(stats)
library(mgcv)
library(graphics)
library(grDevices)
library(utils)
library(datasets)
library(methods)
library(PBSmodelling)
#if(!require(gdata)) stop()
#if(!require(zoo)) stop()
#if(!require(lattice)) stop()
#if(!require(quadprog)) stop()
# The following code is specific to the GUI stuff...
cat('Starting GUI\n')
#assign(x='DebugMode', value=1, envir=.GlobalEnv)
DebugMode <<- FALSE
options(stringsAsFactors=FALSE)
## LOAD FUNCTIONS
source('Functions/appLoadFunctions.R')
appLoadFunctions()
## SET CurrentMode MANUALLY AND CREATE GUI
CurrentMode <<- 1
loadGUI()
cat('Finished Loading Application\n')
}
save.image(file='loadgui.RData')
##------------------------------------------------------------------------------
## Example - Programming - Try catch
##------------------------------------------------------------------------------
## Try catch
tryCatch(expr = {1 + '1'},
error = function(x) return('it\'s ok, everybody makes mistakes'))
## Raise an Error:
e = simpleError("some annoying error")
## Example - try to write a file
WriteAttempt <- try(expr = write.table(x = x, file=fpname, append = F,
quote = FALSE, sep = ",", eol = "\n",
na = "", dec = ".", row.names = FALSE,
col.names = TRUE,
qmethod = c("escape", "double")),
silent=TRUE)
if("try-error" %in% class(WriteAttempt)){
write.table(x = x, file = fpnameTemp, append = F,
quote = FALSE, sep = ",", eol = "\n",
na = "", dec = ".", row.names = FALSE,
col.names = TRUE,
qmethod = c("escape", "double"))
openFile(fpnameTemp)
}else{
openFile(fpname)
}
## Example - try to install packages
libs <- c('mgcv','pppppppp')
xx <- sapply(X = libs,
FUN = function(x) try(expr = library(package = x,
character.only = TRUE),
silent = TRUE))
xx <- sapply(X = xx,
FUN = function(x) 'try-error' %in% class(x))
xx <- xx[xx]
sapply(X = names(xx),
FUN = function(x) install.packages(pkgs = x,
repos = 'http://cran.r-project.org'))
sapply(X = names(xx),
FUN = function(x) try(expr = library(package = x,
character.only = TRUE)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.