View source: R/chapters-auto.R
ch13 | R Documentation |
To print a listing of all examples of a chapter, use ch13()
.
To run all the examples of ch13()
, use example(ch13)
.
ch13()
toc
Other Chapters:
ch01()
,
ch02()
,
ch03()
,
ch04()
,
ch05()
,
ch06()
,
ch07()
,
ch08()
,
ch09()
,
ch10()
,
ch11()
,
ch12()
,
ch14()
,
ch15()
,
ch16()
,
ch17()
,
ch18()
,
ch19()
,
ch20()
if (interactive()) { # C hapter 13 - Manipulating and Processing Data # Deciding on the Most Appropriate Data Structure # Creating Subsets of Your Data ## Understanding the three subset operators ## Understanding the five ways of specifying the subset str(islands) islands[] islands[c(8, 1, 1, 42)] islands[-(3:46)] islands[islands < 20] islands[c("Madagascar", "Cuba")] ## Subsetting data frames str(iris) iris[1:5, ] iris[, c("Sepal.Length", "Sepal.Width")] iris[, 'Sepal.Length'] iris[, 'Sepal.Length', drop=FALSE] iris['Sepal.Length'] iris[1:5, c("Sepal.Length", "Sepal.Width")] ### Taking samples from data sample(1:6, 10, replace=TRUE) set.seed(1) sample(1:6, 10, replace=TRUE) sample(1:6, 10, replace=TRUE) set.seed(1) sample(1:6, 10, replace=TRUE) set.seed(123) index <- sample(1:nrow(iris), 5) index iris[index, ] ### Removing duplicate data duplicated(c(1,2,1,3,1,4)) duplicated(iris) which(duplicated(iris)) iris[!duplicated(iris), ] index <- which(duplicated(iris)) iris[-index, ] ### Removing rows with missing data str(airquality) complete.cases(airquality) x <- airquality[complete.cases(airquality), ] str(x) x <- na.omit(airquality) # Adding Calculated Fields to Data ## Doing arithmetic on columns of a data frame x <- iris$Sepal.Length / iris$Sepal.Width head(x) ## Using with and within to improve code readability y <- with(iris, Sepal.Length / Sepal.Width) head(y) identical(x, y) iris$ratio <- iris$Sepal.Length / iris$Sepal.Width iris <- within(iris, ratio <- Sepal.Length / Sepal.Width) head(iris$ratio) ## Creating subgroups or bins of data ### Using cut to create a fixed number of subgroups head(state.x77) frost <- state.x77[, "Frost"] head(frost, 5) cut(frost, 3, include.lowest=TRUE) ### Adding labels to cut cut(frost, 3, include.lowest=TRUE, labels=c("Low", "Med", "High")) ### Using table to count the number of observations x <- cut(frost, 3, include.lowest=TRUE, labels=c("Low", "Med", "High")) table(x) x # Combining and Merging Data Sets ## Creating sample data to illustrate merging all.states <- as.data.frame(state.x77) all.states$Name <- rownames(state.x77) rownames(all.states) <- NULL str(all.states) ### Creating a subset of cold states cold.states <- all.states[all.states$Frost>150, c("Name", "Frost")] cold.states ### Creating a subset of large states large.states <- all.states[all.states$Area>=100000, c("Name", "Area")] large.states ## Using the merge() function ### Using merge to find the intersection of data merge(cold.states, large.states) ### Understanding the different types of merge merge(cold.states, large.states, all=TRUE) ## Working with lookup tables ### Finding a match index <- match(cold.states$Name, large.states$Name) index large.states[na.omit(index), ] ### Making sense of %in% index <- cold.states$Name %in% large.states$Name index !is.na(match(cold.states$Name,large.states$Name)) cold.states[index, ] # Sorting and Ordering Data some.states <- data.frame( Region = state.region, state.x77) some.states <- some.states[1:10, 1:3] some.states ## Sorting vectors ### Sorting a vector in ascending order sort(some.states$Population) ### Sorting a vector in decreasing order sort(some.states$Population, decreasing=TRUE) ## Sorting data frames ### Getting the order order.pop <- order(some.states$Population) order.pop some.states$Population[order.pop] ## Sorting a data frame in ascending order some.states[order.pop, ] order(some.states$Population) order(some.states$Population, decreasing=TRUE) some.states[order(some.states$Population, decreasing=TRUE), ] ### Sorting on more than one column index <- with(some.states, order(Region, Population)) some.states[index, ] ### Sorting multiple columns in mixed order index <- order(-xtfrm(some.states$Region), some.states$Population) some.states[index, ] # Traversing Your Data with the Apply Functions ## Using the apply() function to summarize arrays str(Titanic) apply(Titanic, 1, sum) apply(Titanic, 3, sum) apply(Titanic, c(3, 4), sum) ## Using lapply() and sapply() to traverse a list or data frame lapply(iris, class) sapply(iris, class) sapply(iris, function(x) ifelse(is.numeric(x), mean(x), NA)) ## Using tapply() to create tabular summaries tapply(iris$Sepal.Length, iris$Species, mean) with(iris, tapply(Sepal.Length, Species, mean)) ### Using tapply() to create higher-dimensional tables str(mtcars) cars <- within(mtcars, am <- factor(am, levels=0:1, labels=c("Automatic", "Manual")) ) with(cars, tapply(mpg, am, mean)) with(cars, tapply(mpg, list(gear, am), mean)) ### Using aggregate() with(cars, aggregate(mpg, list(gear=gear, am=am), mean)) # Getting to Know the Formula Interface aggregate(mpg ~ gear + am, data=cars, mean) aov(mpg ~ gear + am, data=cars) library(lattice) xyplot(mpg ~ gear + am, data=cars) # Whipping Your Data into Shape ## Understanding data in long and wide format ## Getting started with the reshape2 package ## Not run: install.packages("reshape2") ## End(Not run) library("reshape2") goals <- data.frame( Game = c("1st", "2nd", "3rd", "4th"), Venue = c("Bruges", "Ghent", "Ghent", "Bruges"), Granny = c(12, 4, 5, 6), Geraldine = c(5, 4, 2, 4), Gertrude = c(11, 5, 6, 7) ) ## Melting data to long format mgoals <- melt(goals) mgoals <- melt(goals, id.vars=c("Game", "Venue")) mgoals ## Casting data to wide format dcast(mgoals, Venue + Game ~ variable, sum) dcast(mgoals, variable ~ Venue , sum) dcast(mgoals, Venue ~ variable , sum) dcast(mgoals, Venue + variable ~ Game , sum) library(ggplot2) ggplot(mgoals, aes(x=variable, y=value, fill=Game)) + geom_bar(stat="identity") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.