# ch13: Print examples of chapter 13 of 'R for Dummies'. In rfordummies: Code Examples to Accompany the Book "R for Dummies"

 ch13 R Documentation

## Print examples of chapter 13 of 'R for Dummies'.

### Description

To print a listing of all examples of a chapter, use `ch13()`. To run all the examples of `ch13()`, use `example(ch13)`.

### Usage

```ch13()
```

`toc`

Other Chapters: `ch01()`, `ch02()`, `ch03()`, `ch04()`, `ch05()`, `ch06()`, `ch07()`, `ch08()`, `ch09()`, `ch10()`, `ch11()`, `ch12()`, `ch14()`, `ch15()`, `ch16()`, `ch17()`, `ch18()`, `ch19()`, `ch20()`

### Examples

```if (interactive()) {
# C hapter 13 - Manipulating and Processing Data

# Deciding on the Most Appropriate Data Structure

# Creating Subsets of Your Data

## Understanding the three subset operators
## Understanding the five ways of specifying the subset

str(islands)
islands[]
islands[c(8, 1, 1, 42)]
islands[-(3:46)]
islands[islands < 20]

## Subsetting data frames

str(iris)
iris[1:5, ]
iris[, c("Sepal.Length", "Sepal.Width")]
iris[, 'Sepal.Length']
iris[, 'Sepal.Length', drop=FALSE]
iris['Sepal.Length']
iris[1:5, c("Sepal.Length", "Sepal.Width")]

### Taking samples from data

sample(1:6, 10, replace=TRUE)

set.seed(1)
sample(1:6, 10, replace=TRUE)
sample(1:6, 10, replace=TRUE)

set.seed(1)
sample(1:6, 10, replace=TRUE)

set.seed(123)
index <- sample(1:nrow(iris), 5)
index
iris[index, ]

### Removing duplicate data

duplicated(c(1,2,1,3,1,4))
duplicated(iris)
which(duplicated(iris))
iris[!duplicated(iris), ]

index <- which(duplicated(iris))
iris[-index, ]

### Removing rows with missing data

str(airquality)
complete.cases(airquality)

x <- airquality[complete.cases(airquality), ]
str(x)
x <- na.omit(airquality)

# Adding Calculated Fields to Data

## Doing arithmetic on columns of a data frame

x <- iris\$Sepal.Length / iris\$Sepal.Width

## Using with and within to improve code readability

y <- with(iris, Sepal.Length / Sepal.Width)
identical(x, y)

iris\$ratio <- iris\$Sepal.Length / iris\$Sepal.Width
iris <- within(iris, ratio <- Sepal.Length / Sepal.Width)

## Creating subgroups or bins of data

### Using cut to create a fixed number of subgroups

frost <- state.x77[, "Frost"]
cut(frost, 3, include.lowest=TRUE)

cut(frost, 3, include.lowest=TRUE, labels=c("Low", "Med", "High"))

### Using table to count the number of observations

x <- cut(frost, 3, include.lowest=TRUE, labels=c("Low", "Med", "High"))
table(x)
x

# Combining and Merging Data Sets

## Creating sample data to illustrate merging

all.states <- as.data.frame(state.x77)
all.states\$Name <- rownames(state.x77)
rownames(all.states) <- NULL
str(all.states)

### Creating a subset of cold states

cold.states <- all.states[all.states\$Frost>150, c("Name", "Frost")]
cold.states

### Creating a subset of large states

large.states <- all.states[all.states\$Area>=100000, c("Name", "Area")]
large.states

## Using the merge() function

### Using merge to find the intersection of data

merge(cold.states, large.states)

### Understanding the different types of merge

merge(cold.states, large.states, all=TRUE)

## Working with lookup tables

### Finding a match

index <- match(cold.states\$Name, large.states\$Name)
index

large.states[na.omit(index), ]

### Making sense of %in%

index <- cold.states\$Name %in% large.states\$Name
index
!is.na(match(cold.states\$Name,large.states\$Name))
cold.states[index, ]

# Sorting and Ordering Data

some.states <- data.frame(
Region = state.region,
state.x77)

some.states <- some.states[1:10, 1:3]
some.states

## Sorting vectors

### Sorting a vector in ascending order

sort(some.states\$Population)

### Sorting a vector in decreasing order

sort(some.states\$Population, decreasing=TRUE)

## Sorting data frames

### Getting the order

order.pop <- order(some.states\$Population)
order.pop

some.states\$Population[order.pop]

## Sorting a data frame in ascending order

some.states[order.pop, ]
order(some.states\$Population)
order(some.states\$Population, decreasing=TRUE)

some.states[order(some.states\$Population, decreasing=TRUE), ]

### Sorting on more than one column

index <- with(some.states, order(Region, Population))
some.states[index, ]

### Sorting multiple columns in mixed order
index <- order(-xtfrm(some.states\$Region), some.states\$Population)
some.states[index, ]

# Traversing Your Data with the Apply Functions

## Using the apply() function to summarize arrays

str(Titanic)
apply(Titanic, 1, sum)
apply(Titanic, 3, sum)
apply(Titanic, c(3, 4), sum)

## Using lapply() and sapply() to traverse a list or data frame

lapply(iris, class)
sapply(iris, class)
sapply(iris, function(x) ifelse(is.numeric(x), mean(x), NA))

## Using tapply() to create tabular summaries

tapply(iris\$Sepal.Length, iris\$Species, mean)
with(iris, tapply(Sepal.Length, Species, mean))

### Using tapply() to create higher-dimensional tables

str(mtcars)
cars <- within(mtcars,
am <- factor(am, levels=0:1, labels=c("Automatic", "Manual"))
)

with(cars, tapply(mpg, am, mean))
with(cars, tapply(mpg, list(gear, am), mean))

### Using aggregate()

with(cars, aggregate(mpg, list(gear=gear, am=am), mean))

# Getting to Know the Formula Interface

aggregate(mpg ~ gear + am, data=cars, mean)

aov(mpg ~ gear + am, data=cars)

library(lattice)
xyplot(mpg ~ gear + am, data=cars)

# Whipping Your Data into Shape

## Understanding data in long and wide format

## Getting started with the reshape2 package

## Not run:
install.packages("reshape2")

## End(Not run)
library("reshape2")

goals <- data.frame(
Game = c("1st", "2nd", "3rd", "4th"),
Venue = c("Bruges", "Ghent", "Ghent", "Bruges"),
Granny = c(12, 4, 5, 6),
Geraldine = c(5, 4, 2, 4),
Gertrude = c(11, 5, 6, 7)
)

## Melting data to long format

mgoals <- melt(goals)
mgoals <- melt(goals, id.vars=c("Game", "Venue"))
mgoals

## Casting data to wide format

dcast(mgoals,  Venue + Game ~ variable, sum)
dcast(mgoals, variable ~ Venue , sum)
dcast(mgoals,  Venue ~ variable , sum)

dcast(mgoals,  Venue + variable ~ Game , sum)

library(ggplot2)
ggplot(mgoals, aes(x=variable, y=value, fill=Game)) + geom_bar(stat="identity")
}
```

rfordummies documentation built on March 18, 2022, 6:04 p.m.