library(knitr)

Getting started / FAQ for R

Author: Gene Leynes

Primary resources:

  • http://rmarkdown.rstudio.com/
  • http://stackoverflow.com/questions/tagged/R

    Other great websites:

  • http://www.statmethods.net/ Great place to learn general R concepts
  • http://rprogramming.net/how-tos/
  • http://manuals.bioinformatics.ucr.edu/home/programming-in-r
  • http://pirategrunt.com/
  • http://www.r-bloggers.com/
  • http://en.wikibooks.org/wiki/R_Programming/Bayesian_Methods

    Startup / System / Environment

    Typical initialization

    ##------------------------------------------------------------------------------
    ## INITIALIZE
    ##------------------------------------------------------------------------------
    
    ## Remove all objects; perform garbage collection
    rm(list=ls())
    gc(reset=TRUE)
    
    ## Check for dependencies
    if(!"geneorama" %in% rownames(installed.packages())){
        if(!"devtools" %in% rownames(installed.packages())){
            install.packages('devtools')
            }
        devtools::install_github('geneorama/geneorama')
        }
    
    ## Load libraries
    geneorama::detach_nonstandard_packages()
    # geneorama::loadinstall_libraries(c("geneorama", "knitr", "caret", "gbm", 
    #                                  "glmnet", "ROCR", "pROC", "plyr", "class", 
    #                                  "hmeasure", "randomForest", 
    #                                  "AppliedPredictiveModeling", "data.table", 
    #                                  "doParallel", "e1071", "rpart"))
    geneorama::loadinstall_libraries(c("geneorama", "knitr", "data.table"))
    

    Example code to transfer libraries from a new R installation

    ## First, install the new version of R
    
    ## After the new version of R is installed
    ## make v0 the path to your *old* library
    ## make v1 the path to the *new* library
    v0 = 'C:/Users/375492/Documents/R/R-3.1.0/library/'
    v1 = 'C:/Program Files/R/R-3.1.1/library'
    
    ## These are the packages that didn't copy over:
    list.files(v0)[!list.files(v0)%in%list.files(v1)]
    
    ## Now, just install geneorama and then the missing 
    ## packages!
    install.packages('devtools')
    devtools::install_github("geneorama/geneorama")
    library(geneorama)
    loadinstall_libraries(list.files(v0)[!list.files(v0) %in% list.files(v1)])
    

    Load data from Yahoo

    library(geneorama)
    loadinstall_libraries(c("fImport","data.table"))
    
    # dat <- yahooImport("AAPL", frequency = "daily", 
    #                  from = "2014-01-01", to = "2014-09-22")
    # dat@data@.Data[,"Adj.Close"]
    # as.IDate(as.POSIXct(dat@data@positions, origin = "1970-01-01"))
    
    YahooDownloader <- function(symbols, start, end, period = "daily"){
        # symbols <- c("IBM", "AAPL", "BAC")
        dat_list <- lapply(symbols, yahooImport, frequency = period, 
                           from = start, to = end)
        extraction_function <- function(LIST_DATA, DataColumn){
            dat_dt_list <- lapply(
                X = LIST_DATA, 
                FUN = function(x){
                    dt <- data.table(
                        date = as.IDate(as.POSIXct(x@data@positions, 
                                                   origin = "1970-01-01")),
                        V1 = x@data@.Data[,DataColumn])
                    setnames(dt, c("date", DataColumn))
                    setkey(dt, date)
                    dt
                    }
                )
            dat <- Reduce(function(u,v)u[v], dat_dt_list)
            setnames(dat, c("date", symbols))
            return(dat)
            }
        ## Loop over possible column names
        ## colnames(dat_list[[1]]@data@.Data)
        YAHOO_DATA_NAMES <- c("Open", "High", "Low", "Close","Volume","Adj.Close")
        dat <- lapply(YAHOO_DATA_NAMES, extraction_function, LIST_DATA=dat_list)
        names(dat) <- YAHOO_DATA_NAMES
        return(dat)
        }
    
    dat <- YahooDownloader(symbols <- c("IBM", "AAPL", "BAC"),
                           start = "2014-01-01", end = "2014-09-21", period="daily")
    dat_monthly <- YahooDownloader(symbols <- c("IBM", "AAPL", "BAC"),
                                   start = "2014-01-01", end = "2014-09-21", period="monthly")
    
    dat
    dat_monthly
    
    plot(AAPL~date, dat$Adj.Close, type='o')
    
    loadinstall_libraries("zoo")
    plot(zoo(dat$Adj.Close[,-1,with=F], order.by = dat$Adj.Close$date))
    

    Getting help

    ## See the help file examples executed (not run in knitr)
    example(svd)
    

    Some useful settings / options

    ## Very useful help for learning about settings:
    ?Startup
    
    ## Change warnings options
    options(warn=0)  ## Default: stores warnings until function exit
    options(warn=-1) ## No warnings
    options(warn=1)  ## Warnings print immediately
    

    Environment variables

    ## View and get environmental variables
    names(Sys.getenv())
    Sys.getenv('R_USER')
    Sys.getenv('HOME')
    Sys.getenv('R_HOME')
    Sys.getenv("R_LIBS_USER")
    
    ## Syntax for SETTTING variables:
    # Sys.setenv(R_USER='C:\\Users\\gene.leynes')
    # Sys.setenv(R_USER='C:\\Documents and Settings\\Gene\\My Documents')
    
    ## Get R version number
    R.Version()
    
    ## Use this when submitting help
    sessionInfo()
    

    Example of .First function (use with caution)

    ## Trick for loading the initial global environment
    ## 1) In a running instance of R create a function called .First 
    ##    function that creates everything you will want.
    ## 2) Save an image file (.RData file) with only .First
    ## 3) Everytime you use that .RData image your .First function
    ##    will run first and create an environment that you specified.
    ##    You can also use it to run an initialization, load a file,
    ##    or start a process.
    
    ## SEE EXAMPLES FOR MORE COMPLEX EXAMPLE
    
    ## Delete everything so that you're only saving .First
    rm(list=ls())
    ## Define your fuction
    .First = function(){
        ## Create or load your desired objects
        exampleFuncion <- function(x){x=x+1; x=x^2; return(x)}
        ## Save the object names in the local namespace
        CurObjects <- ls()
        ## Put each local object into the global environment
        for(nm in CurObjects){
            assign(x=nm, value=get(nm), envir=.GlobalEnv)
            }
        ## The .First function now exits. All local copies of
        ## created values are lost, but the global copies remain
        ## accessible to the user.
        }
    ## Save .First somewhere
    save.image('test.RData')
    

    Pacakges / namespaces

    ##-----------------------------------------------------------
    ## Packages
    ##-----------------------------------------------------------
    
    ## Detach package
    detach(pos = match("package:graphics", search()))
    
    ## List all installed packages
    rownames(installed.packages())
    
    ##list all commands in all packages
    for(i in 1:length(search())) {print(ls(i))}
    
    ## Install a package from r-forge (or any other website)
    install.packages("DierckxSpline", 
                     repos = '"http://r-forge.r-project.org"')
    
    ## Download and install Geneorama from source
    install.packages(tmpfile, type = "source", repos = NULL)
    
    ## Example to download and install from source
    ## This is geneorama, but you could download anything
    local({
        installed_pacakges =  .packages(all.available = TRUE)
    
        if(!'geneorama' %in% installed_pacakges){
            ## Get webfile and unzip to a temp directory
            webfile <- 'http://geneorama.com/code/geneorama_1.1.tar.gz'
            tmp <- tempdir()
            tmpfile <- file.path(tmp, basename(webfile))
            download.file(webfile, tmpfile)
            install.packages(tmpfile, type = "source", repos = NULL)
            unlink(tmpfile)
            unlink(tmp)
            }
        })
    
    ## Copy out all the functions in a package
    ## Gene Leynes (4/27/11)
    getFormals <- function(x){
        Names <- names(formals(x))
        Vals <- as.character(formals(x))
        #Vals[Vals==''] <- "INPUT"
        ret <- paste(Names,Vals,sep='=',collapse=', ')
        ret <- gsub('\\=$', '', ret)
        ret <- gsub('\\=,', ',', ret)
        ret <- paste('(',ret,')', sep='')
        ret
        }
    loadinstall_libraries("RODBC")
    funs <- ls(which(search()=="package:RODBC"))
    funargs <- sapply(funs, getFormals)
    ret <- cbind(Functions = funs, Function_Arguments = funargs)
    ret
    ## Copy to clipboard: clipper(ret)
    
    ## Make a vector of NA's whose class isn't logical
    NA_real_
    
    ## BAD
    df <- data.table(iris[1:10, ])
    df[ , Sepal.Total := NA]
    df[1:5, Sepal.Total := Sepal.Length + Sepal.Width]
    df
    ## GOOD
    df <- data.table(iris[1:10, ])
    df[ , Sepal.Total := NA_real_]
    df[1:5, Sepal.Total := Sepal.Length + Sepal.Width]
    df
    

    NOT YET ORGANIZED AS EXECUTIBLE:

    ##-----------------------------------------------------------
    ## Objects and Methods
    ##-----------------------------------------------------------
    
    ## structure returns the given object with further attributes set.
    structure(1:6, dim = 2:3)
    
    ## Information about an object
    typeof(tempdf)
    is.what(tempdf)
    class(tempdf)
    mode(tempdf)
    
    ## list all methods
    methods(class="data.frame")
    methods(class="systemfit")
    methods(plot)
    
    ## Make a png file
    png(file = "plot23000x23000.png", width = 23000, height = 23000, res = 2400)
    print(myplot)
    dev.off()
    
    # Printing within a loop
    # if using the GUI, turn off buffering, 
    # or use 'flush.console()' after 'cat'.
    
    
    
    ##------------------------------------------------------------------------------
    ## Website / XML Parsing
    ##------------------------------------------------------------------------------
    
    ##-----------------------------------------------------------
    ## xcode example
    ## http://dataissexy.wordpress.com/2013/06/01/10-second-rss-parsing-in-r-and-xpath/
    ##-----------------------------------------------------------
    library(XML)
    library(RCurl)
    
    xml.url<-"http://dataissexy.wordpress.com/feed/"
    rssdoc <- xmlParse(getURL(xml.url))
    rsstitle <- xpathSApply(rssdoc, '//item/title', xmlValue)
    rssdesc <- xpathSApply(rssdoc, '//item/description', xmlValue)
    rssdate <- xpathSApply(rssdoc, '//item/pubDate', xmlValue)
    
    
    
    ##------------------------------------------------------------------------------
    ## Functions
    ##------------------------------------------------------------------------------
    
    ## Use "mapply" to re-use function arguments in subsequent calls
    FunctionWithManyArguments <- function(x1, x2, x3, x4, x5, x6, x7, x8, x9){
        ans <- x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9
        return(ans)
        }
    FunctionWithManyArguments(1, 2, 3, 4, 5, 6, 7, 8, 9)
    ArgsThatRarelyChange <- list(x3=3, x4=4, x5=5, x6=6, x7=7, x8=8, x9=9)
    mapply(somefun, x1 = 1,x2 = 2, MoreArgs=ArgsThatRarelyChange)
    
    ## Check for function arguments (manually)
    SomeFunction <- function(x,y){
        if(nargs()!=length(formals(f)))
            stop('Something is missing...')
        }
    SomeFunction(x=1:10)
    
    ## Check for function arguments (automatically)
    SomeFunction <- function(x,y){
        curfun <- deparse(match.call()[1])
        curfun <- substr(curfun, 1, nchar(curfun) - 2)
        if(length(formals(curfun)) != nargs())
            stop('Something is missing...')
        }
    SomeFunction()
    
    ## Two ways to get the arguments of a function using match.call
    ReturnArgs <- function(aa, bb){
        mcall <- match.call()
        for(i in 1:length(mcall)){
            cat(i, '\n')
            print(mcall[i])
            cat(substr(mcall[i], 1, nchar(mcall[i])))
            cat('\n\n')
            }
        invisible()
        }
    ReturnArgs(aa = 'onething', bb = 1:100)
    
    # deparse substitute
    PrintDeparsedVals <- function(aa, bb){
        print(deparse(substitute(aa)))
        print(deparse(substitute(bb)))
        invisible()
        }
    PrintDeparsedVals(aa = 'onething', bb = 1:100)
    
    
    
    ##------------------------------------------------------------------------------
    ## Flow Control
    ##------------------------------------------------------------------------------
    # else if
    xx=c(0,1,2,3)
    tester=function(x){
        if(x==0){
            print('zero')
            } else if(x==1){
                print('one')
                } else {
                    print('more than one')
                    }
        }
    for (i in 1:length(xx)) tester(xx[i])
    
    # stopifnot (displays error message if condition is false)
    stopifnot(F)
    
    
    
    ##------------------------------------------------------------------------------
    ## STRINGS
    ##------------------------------------------------------------------------------
    
    ## Replace // with \
    chartr('\\', '/', R.home())
    
    ## Find characters in a string
    x <- deparse(mean)[2]           # "UseMethod(\"mean\")"
    gregexpr('u', deparse(mean)[2])  # -1
    gregexpr('s', deparse(mean)[2])  # 2
    gregexpr('\"', deparse(mean)[2]) # 11 16
    
    ## add a leading 0
    sprintf("%02.0f", pi)
    
    ## Format a number to be a string with n digits
    formatC(pi, digits=0, width=5, flag='0')
    
    ##------------------------------------------------------------------------------
    ## DATES
    ##------------------------------------------------------------------------------
    # show sub second time
    options(digits.secs=4)
    
    # For details see
    ?DateTimeClasses
    
    ## Example date / times
    set.seed(100)
    datetime <- ISOdatetime(year=2012, month=10, day=29, hour=0, min=0, sec=0)
    datetimes <- datetime + sort(runif(5, min=0, max=60 * 60 * 24 * 30))
    datetimes
    # [1] "2012-10-30 16:35:45 CDT" "2012-11-05 16:31:27 CST" 
    # [3] "2012-11-07 04:35:29 CST" "2012-11-12 00:21:19 CST" 
    # [5] "2012-11-14 12:40:19 CST"
    unclass(datetimes)
    # [1] 1351632945 1352154687 1352284530 1352701280 1352918420
    data.frame(unclass(as.POSIXlt(datetimes)))
    #        sec min hour mday mon year wday yday isdst
    # 1 45.12583  35   16   30   9  112    2  303     1
    # 2 27.12267  31   16    5  10  112    1  309     0
    # 3 29.75937  35    4    7  10  112    3  311     0
    # 4 19.74392  21    0   12  10  112    1  316     0
    # 5 19.74719  40   12   14  10  112    3  318     0
    
    as.POSIXct(1472562988, origin = "1960-01-01") # local
    as.POSIXct(1472562988, origin = ISOdatetime(1960,1,1,0,0,0)) # local
    as.POSIXct(1472562988, origin = "1960-01-01", tz = "GMT") # UTC
    
    # strftime is a wrapper for "fromat"
    format(datetimes, "%H:%M:%OS3")
    format(datetimes, "%Y-%m-%d")
    
    # Output of strftime is char
    # Use round to get dates, but convert to POSIXct; default format is lt
    as.POSIXct(round(datetimes, "day"))
    
    # strptime can convert char to date / time (sometimes)
    strptime(c("02/27/92 23:03:20", "02/27/92 22:29:56"), "%m/%d/%y %H:%M:%S")
    
    
    
    # Get WeekNumber from dates (add one?)
    dates <- c("15-08-2005", "01-01-2005", 
               "15-01-2005", "08-08-2005", "08-08-2005")
    as.factor(format(as.Date(dates,"%d-%m-%Y"),"%W"))
    
    
    
    ##------------------------------------------------------------------------------
    ## Matching and Subsetting, and Sets
    ##------------------------------------------------------------------------------
    ## Match and %in%
    match('OH', state.abb) # 35
    match(state.abb, 'OH') # NA,NA,...,NA,NA,1,NA,NA,NA,NA,...
    'OH' %in% state.abb # TRUE
    state.abb %in% 'OH' # FALSE FALSE FALSE.... TRUE FALSE FALSE....
    
    # Subset
    head(airquality)
    head(subset(airquality, Temp > 80, select = c(Ozone, Temp)))
    head(subset(airquality, Day == 1, select = -Temp))
    head(subset(airquality, select = Ozone:Wind))
    
    # Combine
    require(gdata)
    a  <-  matrix(rnorm(12),ncol=4,nrow=3)
    b  <-  1:4
    combine(a, b)
    combine(x=a, b)
    combine(x=a, y=b)
    combine(a, b, names=c("one","two"))
    
    # set functions
    ?identical
    (x <- c(sort(sample(1:20, 9)),NA))
    (y <- c(sort(sample(3:23, 7)),NA))
    union(x, y)
    intersect(x, y)
    setdiff(x, y)
    setdiff(y, x)
    setequal(x, y)
    is.element(x, y)# length 10
    is.element(y, x)# length  8
    
    # all.equal
    ?identical()
    d45 <- pi*(1/4 + 1:10)
    all.equal(tan(d45), rep(1,10))         # TRUE, but
    all      (tan(d45) == rep(1,10))       # FALSE, since not exactly
    all.equal(tan(d45), rep(1,10), tol=0)  # to see difference
    
    
    ##==============================================================================
    ## FACTORS
    ## Create a sample string and convert it to factors and ordered factors
    ##==============================================================================
    
    ## Population of possibilities
    pop_vec <- c('green', 'red', 'yellow', NA)
    pop_vec_ordered <- c('green', 'yellow', 'red', NA)
    
    ## Sample from the population
    set.seed(14)
    ex_str <- sample(x = pop_vec, size = 20, replace = TRUE, 
                     prob =c(.29, .29, .29, .13))
    
    ## Conversion examples
    factor(x = ex_str, exclude = NA)           ## Exclude NA (default behavior)
    factor(x = ex_str, exclude = c(NA, 'red')) ## Exclude NA and "red"
    factor(x = ex_str, exclude = NULL)         ## Exclude nothing; use NA as a level
    
    ## Create factor vectors
    ex_fac <- factor(x = ex_str, exclude = NULL, levels = pop_vec)
    ex_ord <- factor(x = ex_str, exclude = NULL, levels = pop_vec_ordered, ordered=TRUE)
    ex_ord
    
    ## Modify the levels of the example factor vectors:
    
    ## Rename all 1 level at a time:
    ## This should replace "green" with "good", but it also will forget the NA 
    ## level!  (created by the exclude = NULL part of the original call)
    ## levels(ex_fac_rename)[1] <- "good"
    
    ## Rename all 4 levels at once:
    ex_fac_rename <- ex_fac  ## Make a copy
    ex_ord_rename <- ex_ord  ## Make a copy
    levels(ex_fac_rename) <- c('good', 'bad', 'ok', 'missing')
    levels(ex_ord_rename) <- c('good', 'ok', 'bad', 'missing')
    
    ## Make missing slightly better than bad, but worse than ok
    ex_ord_rename_relevel <- ex_ord_rename
    ex_ord_rename_relevel <- ordered(ex_ord_rename_relevel, 
                                     c("good", "ok", "missing", "bad"))
    ## relevel doesn't work for this... relevel has a very narrow usage
    ## relevel(ex_ord_rename_relevel, c("good", "ok", "missing", "bad"))
    
    data.frame(ex_str, 
               ex_fac, 
               ex_ord,
               ex_ord_rename,
               ex_ord_rename_relevel,
               ex_fac_rename,
               unclass(ex_fac), 
               unclass(ex_ord),
               unclass(ex_ord_rename),
               unclass(ex_ord_rename_relevel),
               unclass(ex_fac_rename))[order(ex_ord), ]
    
    ##------------------------------------------------------------------------------
    ## recode
    ## See also: http://rprogramming.net/recode-data-in-r/
    ##------------------------------------------------------------------------------
    (x <- rep(1:3,3))
    ## [1] 1 2 3 1 2 3 1 2 3
    recode(x, "c(1,2)='A'; else='B'")
    ## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
    recode(x, "1:2='A'; 3='B'")
    ## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
    
    
    ##------------------------------------------------------------------------------
    ## expand.grid
    ##------------------------------------------------------------------------------
    jcetable <- expand.grid(
        event = c('Wheezing at any time',
                  'Wheezing and breathless',
                  'Wheezing without a cold',
                  'Waking with tightness in the chest',
                  'Waking with shortness of breath',
                  'Waking with an attack of cough',
                  'Attack of asthma',
                  'Use of medication'),
        method = c('Mail', 'Telephone'), 
        sex = c('Male', 'Female'),
        what = c('Sensitivity', 'Specificity'))
    
    
    ##------------------------------------------------------------------------------
    ## Tables, Shingles, ftable, lapply
    ##------------------------------------------------------------------------------
    # Nice way to summarize by groups
    data(warpbreaks)
    lapply(split(warpbreaks, warpbreaks$tension), summary)
    
    # Make buckets of equal size
    # see also: shingle
    library(lattice)
    equal.count(rnorm(100))
    
    # Nice way to format a table
    ftable(UCBAdmissions,
           row.vars =c('Dept'), 
           col.vars = c('Gender','Admit'))
    
    df <- data.frame(
        category = factor(sample(c(1,2,3),20,T),labels=c('a','b','c')),
        time = runif(20,1,5)%/%1,
        LbsEaten = ((rnorm(20)^2)*16) %/% 1 / 4)
    df
    ftable(df)
    
    ##------------------------------------------------------------------------------
    ## Wide to tall using data.table
    ##------------------------------------------------------------------------------
    library(data.table)
    library(reshape2)
    dt <- data.table(uid=c("a","b"), var1=c(1,2), var2=c(100,200))
    dt
    melt(dt, id=c("uid"))
    ## Confusing way:
    dt[, list(variable = names(.SD), value = unlist(.SD, use.names = F)), by = uid]
    detach(package:reshape2)
    detach(package:data.table)
    
    ##------------------------------------------------------------------------------
    ## Reshape with dcast.data.table
    ##------------------------------------------------------------------------------
    library(data.table)
    
    dt <- data.table(id = c(1,1,2,2), 
                     region = c("a","b","a","b"),
                     date = Sys.Date(),
                     count = c(6,7,8,9))
    dt
    dcast.data.table(data = dt, 
                     formula = id ~ region + date)
    dcast.data.table(data = dt, 
                     formula = id ~ region + date,
                     value.var = "count")
    dcast.data.table(data = dt,
                     formula = id ~ date,
                     value.var = "count")
    dcast.data.table(data = dt,
                     formula = id ~ region,
                     value.var = "date")
    
    ## Reshape and aggregate by "sum"
    dcast.data.table(data = dt,
                     fun.aggregate = sum,
                     formula = id ~ date,
                     value.var = "count")
    
    
    ##------------------------------------------------------------------------------
    ## Reshape using Pivot function
    ## http://stackoverflow.com/questions/5307313/fastest-tall-wide-pivoting-in-r
    ##------------------------------------------------------------------------------
    tall <- data.frame( 
        int = rep(1:100, 100),
        fac = rep( paste('v',1:100,sep=''), each = 100),
        val = 1:1000) [-(1:5), ]
    tall <- tall[sample(1:nrow(tall)), ]
    
    pivot  <-  function(col, row, value) {
        # browser() }
        col = as.factor(col)
        row = as.factor(row)
        mat = array(
            dim = c(nlevels(row), nlevels(col)), 
            dimnames = list(levels(row), levels(col)))
        mat[(as.integer(col) - 1L) * nlevels(row) + as.integer(row)] = value
        mat
        }
    
    wide <- with(tall, pivot(fac, int, val))
    tall[1:10, 1:3]
    wide[1:10, 1:10]
    system.time( replicate(100, wide <- with(tall, tapply( val, list(int, fac), identity))))
    system.time( replicate(100, wide <- with(tall, pivot(fac, int, val))))
    
    
    ##------------------------------------------------------------------------------
    ## Tables and apply functions 
    ##------------------------------------------------------------------------------
    
    ## aggregate.table
    ## Depricated: Now use data.table
    library(gdata)
    g1 <- sample(letters[1:5], 1000, replace=TRUE)
    g2 <- sample(LETTERS[1:3], 1000, replace=TRUE )
    dat <- rnorm(1000)
    stderr <- function(x) sqrt( var(x,na.rm=TRUE) / nobs(x) )
    
    (means   <- aggregate.table( dat, g1, g2, mean ))
    (stderrs <- aggregate.table( dat, g1, g2, stderr ))
    (ns      <- aggregate.table( dat, g1, g2, nobs ))
    
    ## apply - some of the examples
    ## Depricated: Now use data.table
    ## Compute row and column sums for a matrix:
    (x <- cbind(x1 = 3, x2 = c(4:1, 2:5)))
    dimnames(x)[[1]] <- letters[1:8]
    apply(x, 2, mean, trim = .2)
    col.sums <- apply(x, 2, sum)
    row.sums <- apply(x, 1, sum)
    rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums)))
    
    (ma <- matrix(c(1:4, 1, 6:8), nrow = 2))
    apply(ma, 1, table)  #--> a list of length 2
    apply(ma, 1, stats::quantile)# 5 x n matrix with rownames
    
    ## tapply "Pivot Table"
    tapply(airquality$Ozone, airquality$Month, length)
    tapply(airquality$Ozone, airquality$Month, mean, na.rm = T)
    
    ## sapply
    newdf$nUnique = sapply(sapply(df[,1:ncol(df)],unique),length) 
    
    ## Example of mapply
    # The function:
    fx2 <- function (x,a) (x-a)^2+1
    # Just for fun:
    optimize(fx2,c(0,1),tol=.00001,a=1/3)
    
    # A line where a is constant
    xx <- seq(0, 1, .05)
    aa <- rep(1/3, 21)
    yy <- mapply(fx2, x = xx, a = aa)
    plot(xx, yy, type='l')
    
    # A line where a varies
    xx <- seq(0, 1, .05)
    aa <- seq(1/4, 5/12, (5/12-1/4)/(length(xx)-1))
    yy <- mapply(fx2, x=xx, a=aa)
    lines(xx, yy, col='blue')
    
    
    ##------------------------------------------------------------------------------
    ## REPORTING
    ##------------------------------------------------------------------------------
    
    ## http://stackoverflow.com/questions/10646665/how-to-convert-r-markdown-to-html-i-e-what-does-knit-html-do-in-rstudio-0-9
    ## Add this line to the Rmd file to see R Studio's messages
    Sys.sleep(30)
    
    # Create .md, .html, and .pdf files
    # http://danieljhocking.wordpress.com/2013/09/25/knitting-beautiful-documents-in-rstudio/
    knit("File.Rmd")
    markdownToHTML('File.md', 'File.html', options=c("use_xhml"))
    system("pandoc -s File.html -o File.pdf")
    ## Command to fix margins in latex (from blog post above)
    \usepackage[vmargin=1in,hmargin=1in]{geometry}
    
    # Create .md, .html, and .pdf files
    # http://rprogramming.net/create-html-or-pdf-files-with-r-knitr-miktex-and-pandoc/
    # Step 1: Install MiKTeX
    # Step 2: Install Pandoc
    # Step 3: Install Knitr and Markdown
    # Step 4: Create a .Rmd File Containing Your Analysis
    # Step 5: Create a .R File to Run the .Rmd File
    # # Set working directory
    # setwd("C:/Documents and Settings/name")
    
    # # Load packages
    # require(knitr)
    # require(markdown)
    
    # # Create .md, .html, and .pdf files
    # knit("My_Analysis.Rmd")
    # markdownToHTML('My_Analysis.md', 'My_Analysis.html', options=c("use_xhml"))
    # system("pandoc -s My_Analysis.html -o My_Analysis.pdf")
    
    # Step 6: Produce HTML and PDF Output Files with R
    
    
    
    ##------------------------------------------------------------------------------
    ## Graphics
    ##------------------------------------------------------------------------------
    
    # Make a nice set of colors using color ramp
    ramp <- colorRamp(c("red", "blue"))
    rgb( ramp(seq(0, 1, length = 5)), max = 255)
    plot(1:15, 1:15, pch=16, cex=10, col=
            rgb( ramp(seq(0, 1, length = 15)), max = 255))
    
    
    # Modify a previous plot
    par(mfrow=c(3,1))
    pars <- list()
    for(i in 1:3){
        set.seed(10)
        plot(1:(10*i)*i, main=paste('examp', i))
        pars[[i]] = par('usr')
        }
    for(i in 1:3){
        par(mfg=c(i,1))
        par(usr=pars[[i]])
        lines(1:(10*i)*i, col=i+1)
        }
    
    # Plot the "convex hull"
    plot(cars)
    polygon( cars[chull(cars),], col="pink", lwd=3)
    points(cars)
    
    # Plot Stars
    palette(rainbow(12, s = 0.6, v = 0.75))
    stars(mtcars[, 1:7], len = 0.8, key.loc = c(12, 1.5),
          main = "Motor Trend Cars", draw.segments = TRUE)
    
    # all the point types
    nn <- 1:50;    plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
    nn <- 51:100;  plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
    nn <- 101:150; plot(nn,nn,type='n');for(i in nn)points(i,i,pch=i);rm(i,nn)
    
    # plot a color wheel using RGB
    r <- 1:100/100; g <-.05; b <- .05
    pie(rep(1,100), col=rgb(r,g,b))
    
    # plot color wheels using heat, topo, terrain, and cm
    par(mfrow=c(2,2), mar=c(0,0,1.5,0))
    pie(rep(1,40),col=heat.colors(40)[1:40], main='heat.colors')
    pie(rep(1,40),col=terrain.colors(40)[1:40], main='terrain.colors')
    pie(rep(1,40),col=topo.colors(40)[1:40], main='topo.colors')
    pie(rep(1,40),col=cm.colors(40)[1:40], main='cm.colors')
    
    # Get HSV colors (where "blue" becomes "#0000FF")
    (hc <- rgb2hsv(col2rgb("blue")))
    (SomeHSV <- hsv(hc[1,1],hc[2,1],hc[3,1]))
    pie(1, col = SomeHSV)
    
    # Side by side boxplots
    boxplot(len ~ dose, data = ToothGrowth,
            boxwex = 0.25, at = 1:3 - 0.2,
            subset = supp == "VC", col = "yellow",
            main = "Guinea Pigs' Tooth Growth",
            xlab = "Vitamin C dose mg",
            ylab = "tooth length",
            xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")
    boxplot(len ~ dose, data = ToothGrowth, add = TRUE,
            boxwex = 0.25, at = 1:3 + 0.2,
            subset = supp == "OJ", col = "orange")
    legend(2, 9, c("Ascorbic acid", "Orange juice"),
           fill = c("yellow", "orange"))
    
    # Identify points on a plot
    plot(mtcars$mpg,mtcars$hp)
    identify(mtcars$mpg,mtcars$hp,label=rownames(mtcars))
    
    # Make two plots on the same canvas
    # Legend
    # Random walk 
    # Tsay book cover
    set.seed(123456)
    ## White noise
    e <- rnorm(500)
    ## Random walk
    randwalk <- cumsum(e)
    ## Trend
    trend <- 1:500
    ## Plots
    par(mar=rep(5,4))
    ## deterministic trend + noise
    plot.ts(0.5 * trend + e, lty=1, ylab='', xlab='')
    ## random walk with drift
    lines(0.5 * trend + cumsum(e), lty=2)
    ## random walk (same scale)
    lines(randwalk, lty=3, col='red')
    par(new=T)
    ## random walk (own scale)
    plot.ts(randwalk, lty=3, axes=FALSE, col='red')
    axis(4, pretty(range(randwalk)))
    legend(10, 18.7, legend=c('deterministic trend + noise (left)',
                              'random walk w/ drift (left)', 
                              'random walk (left+right)'),
           lty=c(1, 2, 3), col=c('black', 'black', 'red')) 
    
    
    # Add a background color to just the plot
    # (Boxplot example)
    dat <- data.frame(x=gl(10,20),y=rnorm(10*20))
    plot(dat)
    # ADD THE BACKGROUND
    tmp <- par()$usr
    rect(tmp[1], tmp[3], tmp[2], tmp[4], col = "lightyellow")
    plot(dat, add=TRUE)
    
    # Add a background color to just the plot
    # (Non-boxplot example)
    dat <- data.frame(x=gl(10,20),y=rnorm(10*20))
    dat$x <- as.numeric(dat$x)
    plot(dat, type='n')
    # ADD THE BACKGROUND
    tmp <- par("usr")
    rect(tmp[1], tmp[3], tmp[2], tmp[4], col = "lightyellow")
    points(dat)
    
    
    ##------------------------------------------------------------------------------
    ## Lattice Graphics
    ## also, see end for "great plot example"
    ##------------------------------------------------------------------------------
    
    # Make xyplot plot in order
    lattice.options(default.args = list(as.table = TRUE)) 
    
    # xy plot example with panel
    phi <- matrix(rnorm(19000), nrow=1000, ncol=19)
    av <- .03 + .05 * phi
    av <- t(apply(1+av, 1, cumprod))
    spx <- (.03 + .05 * phi)^2
    spx <- t(apply(1+spx, 1, cumprod))
    #plot(av[1,],ylim=range(av));apply(av,1,lines)
    #plot(spx[1,],ylim=range(spx));apply(spx,1,lines)
    dat <- data.frame(
        av=as.vector(av),
        spx=as.vector(spx),
        dif=as.vector(av-spx),
        time = rep(2:20,each=1000))
    
    lattice.options(default.args = list(as.table = TRUE)) 
    xyplot(dif~spx|time, dat, 
           strip = strip.custom(strip.levels = TRUE),
           scales = "free",
           panel=function(x,y){
            panel.xyplot(x,y)
            panel.abline(h=c(0,1,1.5),lty=2)
            }
           )
    
    
    
    ##------------------------------------------------------------------------------
    ## Simple fitting / statistics / plots
    ##------------------------------------------------------------------------------
    ## curve
    curve(x^3-3*x, -2, 2)
    curve(x^2-2, add = TRUE, col = "violet")
    
    ## spline
    n <- 9
    x <- 1:n
    y <- rnorm(n)
    plot(x, y, main = paste("spline[fun](.) through", n, "points"))
    lines(spline(x, y))
    lines(spline(x, y, n = 201), col = 2)
    
    ## par - mfrow
    par(mfrow = c(3,1))
    for (i in 1:3) plot(matrix(rnorm(21), nrow=7))
    
    ## bandplot #2 (and lm)
    x <- abs(rnorm(500))
    y <- rnorm(500, mean=2*x, sd=2+2*x)
    plot(x,y )
    reg <- lm(y~x)
    summary(reg)
    bandplot(x,y)
    bandplot(predict(reg),resid(reg))
    
    ## Boxplot Example with gl
    g = factor(round(10*runif(10*100)))
    x = rnorm(10*100)+sqrt(as.numeric(g))
    xg = split(x,g)
    boxplot(xg, col='lavender', notch=T, var.width=T)
    
    ##------------------------------------------------------------------------------
    ## Useful Functions
    ##------------------------------------------------------------------------------
    II <- function(x) I(as.character(x))
    IM <- function(x) as.data.frame(as.matrix(x))
    clipper <- function(x){
        #writeClipboard(x,format=1)
        write.table(x, "clipboard", sep="\t", col.names=NA)
        }
    
    clipped = function(){
        read.delim("clipboard")
        }
    NAsummary <- function(df){
        newdf <- data.frame(Count = rep(nrow(df),ncol(df)))
        rownames(newdf) = colnames(df)
        newdf$nNA <- nrow(df)-sapply(df,function(x)length(na.omit(x)))
        newdf$rNA <- newdf$nNA / newdf$Count
        newdf$rNA <- trunc(newdf$rNA*10000)/10000
        newdf$nUnique <- sapply(df, function(x)length(unique(x)))
        newdf$rUnique <- newdf$nUnique / newdf$Count
        newdf$rUnique <- trunc(newdf$rUnique*10000)/10000
        return(newdf)
        }
    
    
    ##------------------------------------------------------------------------------
    ## Data Frame VS. Matrix
    ##------------------------------------------------------------------------------
    df <- data.frame(x=1:10, y=rnorm(10))  ;  
    mat <- matrix(c(x=1:10, y=rnorm(10)), ncol=2)
    colnames(mat) = c('x','y')
    
    df['x']   ;  mat['x']    # Data frame   vs. NA
    df[,'x']  ;  mat[,'x']   # Vector    vs. Vector
    df[['x']] ;  mat[['x']]  # Vector    vs. subscript out of bounds
    df$'x'    ;  mat$'x'     # Vector    vs. ERROR: operator is invalid for atomic vectors
    df$x      ;  mat$x       # Vector    vs. ERROR: operator is invalid for atomic vectors
    df$x[1]   ;  mat$x[1]    # Vector    vs. ERROR: operator is invalid for atomic vectors
    df[[1,2]] ;  mat[[1,2]]  # Numeric Vec   vs. Numeric Vec
    df[1,2]   ;  mat[1,2]    # Numeric Vec  vs. NAMED Vec
    
    
    
    
    
    ##-----------------------------------------------------------
    ## Example - Great plot example (bank dot plot example)
    ##-----------------------------------------------------------
    ## xyplot
    library(lattice)
    library(grid)
    
    ### read the data
    fname <- "http://addictedtor.free.fr/graphiques/data/150/data.txt"
    d <- read.csv(file(fname))
    
    ### workaround so that lattice does not order bank names alphabetically
    d$bank <- ordered(d$bank, levels=d$bank)
    
    ### setup the key
    k <- simpleKey(c("Q2 2007",  "January 20th 2009"))
    k$points$fill <- c("lightblue", "lightgreen")
    k$points$pch <- 21
    k$points$col <- "black"
    k$points$cex <- 1
    
    list1=list(pch=21, fill = c("lightblue", "lightgreen"), 
               cex = 4, col = "black")
    
    ### create the plot
    dotplot(bank ~ MV2007 + MV2009, data=d, horiz=T,
            par.settings = list(superpose.symbol = list1),
            xlab = "Market value ($Bn)", key=k,
            panel = function(x, y, ...){
                panel.dotplot(x, y, ...)
                grid.text(
                    unit(x, "native"), unit(y, "native"),
                    label = x, gp = gpar(cex=.7))
                })
    
    
    
    ##------------------------------------------------------------------------------
    ## Example - Check R scripts for presence of a word pattern
    ##------------------------------------------------------------------------------
    ff <- list.files(recursive = TRUE, pattern = '\\.R$')
    dat <- lapply(ff, readLines)
    hasplot <- unlist(sapply(dat, function(x)
        length(grep(x, pattern='plot')))) != 0
    missingreset <- unlist(sapply(dat, function(x)
        length(grep(x, pattern='resetGraph'))))==0
    # Shows files that have "plot" but is missing "resetGraph"
    ff[hasplot&missingreset]
    
    
    ##------------------------------------------------------------------------------
    ## Profile code
    ##------------------------------------------------------------------------------
    
    # Profile some code:
    nr <- 100; nc<- 10; x <- 1:nc; yy <- c()
    y <- matrix(rnorm(nr*nc),nrow=nr,ncol=nc)
    
    Rprof("C:/Users/gene.leynes/Desktop/test.out")
    for (i in 1:nrow(y))yy[i]=lm(y[i,]~x)$coefficients[1]
    Rprof(NULL)
    summaryRprof("C:/Users/gene.leynes/Desktop/test.out")
    
    
    
    ##-----------------------------------------------------------
    ## Make a .First function that does stuff
    ##-----------------------------------------------------------
    ## 5/20/12
    .First=function(){
    
        library(base)
        library(stats)
        library(mgcv)
    
        library(graphics)
        library(grDevices)
        library(utils)
        library(datasets)
        library(methods)
        library(PBSmodelling)
        #if(!require(gdata)) stop()
        #if(!require(zoo)) stop()
        #if(!require(lattice)) stop()
        #if(!require(quadprog)) stop()
    
        # The following code is specific to the GUI stuff...
        cat('Starting GUI\n')
        #assign(x='DebugMode', value=1, envir=.GlobalEnv)
        DebugMode <<- FALSE
        options(stringsAsFactors=FALSE)
    
        ## LOAD FUNCTIONS
        source('Functions/appLoadFunctions.R')
        appLoadFunctions()
    
        ## SET CurrentMode MANUALLY AND CREATE GUI
        CurrentMode <<- 1
        loadGUI()
    
        cat('Finished Loading Application\n')
        }
    
    save.image(file='loadgui.RData')
    
    
    
    
    
    
    ##------------------------------------------------------------------------------
    ## Example - Programming - Try catch
    ##------------------------------------------------------------------------------
    
    ## Try catch
    tryCatch(expr = {1 + '1'}, 
             error = function(x) return('it\'s ok, everybody makes mistakes'))
    
    ## Raise an Error:
    e = simpleError("some annoying error")
    
    ## Example - try to write a file
    WriteAttempt <- try(expr = write.table(x = x, file=fpname, append = F, 
                                           quote = FALSE, sep = ",", eol = "\n",
                                           na = "", dec = ".", row.names = FALSE,
                                           col.names = TRUE, 
                                           qmethod = c("escape", "double")),
                        silent=TRUE)
    if("try-error" %in% class(WriteAttempt)){
        write.table(x = x, file = fpnameTemp, append = F,
                    quote = FALSE, sep = ",", eol = "\n",
                    na = "", dec = ".", row.names = FALSE,
                    col.names = TRUE,
                    qmethod = c("escape", "double"))
        openFile(fpnameTemp)
        }else{
            openFile(fpname)
            }
    
    ## Example - try to install packages
    libs <- c('mgcv','pppppppp')
    xx <- sapply(X = libs, 
                 FUN = function(x) try(expr = library(package = x, 
                                                     character.only = TRUE), 
                                      silent = TRUE))
    xx <- sapply(X = xx, 
                 FUN = function(x) 'try-error' %in% class(x))
    xx <- xx[xx]
    sapply(X = names(xx), 
           FUN = function(x) install.packages(pkgs = x, 
                                           repos = 'http://cran.r-project.org'))
    sapply(X = names(xx), 
           FUN = function(x) try(expr = library(package = x, 
                                             character.only = TRUE)))
    


    geneorama/geneorama documentation built on Oct. 17, 2020, 12:35 a.m.