knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(Rdlazer)
length
TRUE
, is actually a vector of length 1vector
: it can create any type (mode) of vector of any lengthlogical
, numeric
, character
FALSE
or 0
or ""
(empty string)is.character
, as.character
, etc.\newline
letters
\newline
c
# merge two character vectors of length one c('one', 'two')
# merge a character vector with a numeric one c('one', 1) # the numeric is (silently!) coerced to character as a vector can only hold a single type
# concatenate existing vectors (variables) v1 <- c('one', 'two') v2 <- c(1, 2) v3 <- c(FALSE, TRUE) v4 <- c(v1, v2, v3) v4
%in%
ro is.element
to test if something is an element of a vector:'one' %in% v4
anyDuplicated
to test if that is the caseunique
to get unique elements of a vector\newline
head
or tail
, respectively (6 elements by default)sort
rev
rep
i1 : i2
seq
for more options for numeric sequencesseq_along(x)
to obtain a sequence of integers from 1 through length(x)
\newline
attributes
attr
NA
or ""
.romans
for a named vector of Roman numeralsnames(x)
names(x)
returns names of xnames(x) <- [vector of names]
sets names of x, much like assigning a variableattr(x, "names")
names
exists for convenience, as messing with names is such a common action# get names names(v2) #set names names(v2) <- c('first', 'second') names(v2) # remove names names(v2) <- NULL names(v2)
c
name = value
pairsv5 <- c('first' = 1, 'second' = 2, 'third' = 3) names(v5) # the names need not be quoted v5 <- c(first = 1, second = 2, third = 3)
as.vector
will strip all atributes, leaving a bare vectorn <- 1:6 # mathematical operation n + 1 # change case toupper(letters) # compare numbers n > 3
n1 <- 1:4 n2 <- 4:1 n1 n2 n1 + n2 n1 / n2
n3 <- rep(4, 4) n4 <- 1:2 n3 + n4 l1 <- c(TRUE, FALSE) paste(n3, l1)
\newline
s1 <- names(v5) s1 length(s1)
nchar(s1)
l1 <- toupper(letters) l2 <- LETTERS identical(l1, l2) l1 == l2 vna <- c(1, NA, 2, NA, 3, NA) is.na(vna) anyNA(vna)
[
operator\newline
vec[n]
v4[1]
ind <- c(1,3) v4[ind]
ind <- c(1, 1, 3, 4, 6, 6) v4[ind] # invalid index yields an NA v4[7] # fractions are rounded down v4[2.3] # this should be avoided regardless
v5['first'] ind <- c('first', 'second', 'first', 'first') v5[ind]
ind <- c(T, T, F, T, F, F) v4[ind]
ten <- 1:10 x <- letters[ten] x # too long an index produces NAs ind <- rep(T, 11) x[ind] # too short an index is recycled ind <- c(T, T, F) x[ind] ind <- TRUE x[ind]
v4 v4[-2] v4[-c(2,5)] v4[c(-2, -5)]
ind <- 1:3 * 2 # extract elements n[ind] # replace elements n[ind] <- NA n
n[7] <- 15 n
n[10] <- 25 n
matrix
m <- matrix(1:12, nrow = 3, ncol = 4) m
byrow
argument of matrix
t
t(m)
nrow
and ncol
, respectively\newline
dim
# get dimensions dim(m) # the first number is the number of rows and the second is the number of columns # the attribute can be modified as will, even removed dim(m) <- NULL m # note that the matrix is dismantled in the same way is is built by default: # column after column # dim can be set on any vector as long as the grid size matches its length dim(m) <- c(4,3) m # note that, again, the matrix is build in the default order
length(m)
m[1,3] m[1:2, 2:3]
m1 <- m[1, ] is.matrix(m1) is.vector(m1)
drop
argument to FALSE
:m2 <- m[1, , drop = FALSE] # mind the empty second dimension!
m[1] m[7]
\newline
rbind
and cbind
, respectively:rbind(m, 101:103) cbind(m, letters[1:4]) # note the coercion
c
would do no good here as it strips attributes other than namesc(m, m) cbind(m,m)
\newline
rownames(m) <- letters[1:4] colnames(m) <- LETTERS[1:3] m
m['a', 'B'] m['a', c('A', 'B'), drop = FALSE] m[c('a', 'b'), c('A', 'B')]
dimnames
rownames
and colnames
dimnames(m) rownames(m) colnames(m)
\newline
m[2,3] <- "hello" m # note the conversion to character
m[] <- "goodbye" m
array
dim
to a vector longer than 2a1 <- array(1:12, dim = c(2, 2, 3)) a1 a2 <- 1:12 dim(a2) <- c(2, 2, 3) a2 identical(a1, a2)
abind
from the abind packageiris3
for an example array\newline
a1[1, , ] a1[, , 3, drop = F]
\newline
TRUE
to is.list
and is.recursive
state.center
and ability.cov
for examples of lists\newline
list
c
, list
takes a number of objects and returns a list objectl1 <- list(1, "one", TRUE, v5) l1
c
or append
list
to add items to a list will take your list make it an element of a higher order listl2 <- list(l1, "surprize") l2 l3 <- c(l1, "surprize") l3
as.list
and do the reverse with unlist
l4 <- list(one = 1:3, two = 4:5, three = 7:8, `forbidden name` = 9) l4
\newline
# subset with numeric index l4[2] ind <- 2:3 l4[ind] # subset with names l4['three'] # subset with logical vector ind <- c(T, F, F) l4[ind] # the logical vector has been recycled # it's a list x <- l4[1] x is.list(x)
# numeric l4[[3]] # names l4[['forbidden name']] # it's not a list x <- l4[[3]] x is.list(x) is.numeric(x)
Note: a double bracket can also be used on a normal vector. The difference to a single bracket is that a double one discards names. It also accepts only a length 1 index. [[
is virtually never used with atomic vectors.
$
l4$one
l4$one l4$`forbidden name`
print
and, well, printedc(1, c(4, 7))
v6 <- c(1, c(4, 7))
there are three function calls in the above expression: one to <-
and two to c
sidenote:
| the arrow operator creates a binding rather than return a value | it actually does return, but invisibly | the value can be visualized by putting the expression in parentheses
(v6 <- c(1, c(4, 7)))
| this explains why we can do something like this:
a <- b <- 1:2 a b
head(c(paste(letters[1:3], LETTERS[8:5], c(1, 2)), 1:8), 7)
string <- 'This is a rather long character string, like a sentence, it even has a period at the end.' # get the five longest words from this sentence, in uppercase. s <- strsplit(string, ' ')[[1]] toupper(s[order(nchar(s), decreasing = T)][1:5])
iris
for an example of a data framenrow(iris) ncol(iris) dim(iris)
is.data.frame
rownames(iris) colnames(iris)
dimnames
in a matrix, rownames
and colnames
are independent attributesdimnames
does still work, thoguhdimnames(iris)
colnames
can be set to NULL
and be removedrownames
can be set to NULL
but will be replaced by a character vector of integers\newline
data.frame
function, enumerating the vectors that build columns# different lengths of columns throw an error data.frame(1:4, letters[1:4], c('some', 'strings', 'here')) data.frame(1:4, letters[1:4], c('some', 'strings', 'go', 'here')) # the automatic names are unhelpful d1 <- data.frame(first = 1:4, second = letters[1:4], 'third' = c('some', 'strings', 'go', 'here')) d1 # note names need not be quoted
as.data.frame
as.data.frame(m) dimnames(m) <- NULL as.data.frame(m) # these automatic names are better
l3 as.data.frame(l3) l4 as.data.frame(l4)
\newline
\newline
length
returns the number of columns rather than rows of a data framecolnames(iris) names(iris) identical(names(iris), colnames(iris))
\newline
head
starts coming in really handy, as it displays the first rowshead(iris)
str
, which displays the structure of an objectstr(iris)
(What's this "Factor" thing here, you ask? Hold that thought.)
[
operator and specify two dimensions# use numeric indices iris[1:5, 3:4] # use row/column names iris[c('1', '4', '9'), c('Petal.Length', 'Species')] # leave columns unspecified to get them all iris[1:5, ] # leave rows unspecified to get them all iris[, 4:5] # selecting a single column will yield a vector, not a data frame iris[, 'Species'] # unless you specify that dimensions not be dropped iris[, 'Species', drop = F] # use logical vectors - highly unorthodox but possible iris[c(TRUE, FALSE), c(TRUE, FALSE)] # note the recycling # again, selecting a single column drops dimensions iris[, c(F, F, T, F, F)] # unless forbidden iris[, c(F, F, T, F, F), drop = FALSE]
[
operator and only specify the column(s), like with a listiris[4:5] iris['Species'] # always yields a data frame, consistent with list behavior
[[
to get one column as a vector, like with a listiris[[5]] iris[['Species']] # always yields a vector iris[[5, drop = F]] # keeping dimensions is not applicable iris[[c('Species', 'Petal.Length')]] # only one item iris[[1:5]] iris[[1:2]] #madness
$
operator to extract one column as a vector, like with a listiris$Species
\newline
now for something specific to data frames
use a logical expression derived from the data frame to determine rows
iris[iris$Species == 'setosa', ]
# 1. the iris$Species column is compared to the string 'setosa' iris$Species == 'setosa' # this yields a logical vector EXACTLY the same length as iris$Species, i.e. the number of rows # 2. this vector is then used for extracting rows ind <- iris$Species == 'setosa' iris[ind, ]
iris[iris$Species == 'setosa' & iris$Sepal.Length >= 5, ]
iris[iris$Species != 'virginica' & iris$Sepal.Length >= 5, 'Petal.Width']
# suppose we want to omit some outliers from our calculations # we can replace some data points with NA iris[iris$Sepal.Length > 6 | iris$Sepal.Length < 5 & iris$Species == 'versicolor', 'Petal.Width'] <- NA iris
head(mtcars) # compute how much horse power (hp) a car has per cylinder (cyl) mtcars$hp / mtcars$cyl
mtcars$hp.per.cyl <- mtcars$hp / mtcars$cyl head(mtcars)
[[
mtcars[['hp.per.cyl']] <- mtcars[['hp']] / mtcars[['cyl']]
# rather than substituting NAs for numbers, # let's add a logical column that will flag certain observations as outliers iris$outlier <- ifelse(iris$Sepal.Length > 6 | iris$Sepal.Length < 5 & iris$Species == 'versicolor', TRUE, FALSE) iris
ifelse
is a very useful function; study it carefullystr(iris)
head(iris$Species) str(iris$Species)
levels
attributestate.region
for an example factorfactor
ch <- rep(c('c', 'b', 'a'), 3) f <- factor(ch) ch f
factor(1:3, labels = c('a', 'b', 'c')) factor(3:1, labels = c('a', 'b', 'c')) factor(1:3, labels = c('c', 'b', 'a')) factor(3:1, labels = c('c', 'b', 'a'))
as.factor
as.factor(ch)
is.factor
data.frame
has a stringsAsFactors
argument just for this purposench <- as.character(rnorm(10)) nch as.numeric(nch) nf <- as.factor(nch) nf as.numeric(nf)
as.numeric
just strips the levels
attribute\newline
Wherefore the factors?
Factors store categorical data. It used to be a very useful way to encode what is called discreet variables. Today much of what factors do can be don eby character vectors just as well but many procedures rely on a factor input, so they are not going away any time soon.
It may seem superfluous now but we will get to places where factors are helpful and even necessary.
I promise.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.