options(htmltools.dir.version = FALSE) knitr::opts_chunk$set(fig.align="center", fig.width=5, fig.height=5, warning = FALSE, message = FALSE)
library(xaringanthemer) duo_accent( primary_color = "ivory", secondary_color = "#310A31", header_font_google = google_font("Roboto", "400"), text_font_google = google_font("Lato", "300"), code_font_family = "Fira Code", code_font_url = "https://cdn.rawgit.com/tonsky/FiraCode/1.204/distr/fira_code.css", header_color = "#f54278", title_slide_text_color = "#354a66" )
These slides make use of the chi_emps data set contained in the workshop package.
library(workshop) data(chi_emps)
dim(chi_emps)
-- What are the names of the columns?
names(chi_emps)
-- You can use summary or View as well to get more info.
What happens when you run the following?
summary(chi_emps)
What happens when you run the following?
summary(chi_emps)
and what about this?
View(chi_emps)
For example, we can select the first 5 rows and the first 4 columns of the chi_emps data frame:
--
chi_emps[c(1:5),c(1:4)]
It's often more practical to select columns by name however. The following code: - keeps only the Name, Dept, AnnSalary columns - displays the dimensions of the smaller data set --
chi2 <- chi_emps[, c("Name", "Dept", "AnnSalary")] dim(chi2)
-- Note that there was no row constraint in the first line as we were intending to keep all rows of the data.
The code below: - keeps only the rows where the salary is between than $75408 and $97440 - displays the first few rows --
midsal <- chi2[chi2$AnnSalary >= 75408 & chi2$AnnSalary <= 97440, ] head(midsal)
-- Note that there is no column constraint as we are keeping all the columns.
Operator <- c( "==", "!=", "<", "<=", ">", ">=", "|", "&", "!") Meaning <- c("equal", "not equal", "less than", "less than or equal to", "greater than", "greater than or equal to", "Or: at least one of the expressions is true", "And: Both expressions are true", "Not: the expression is not true" ) optable <- data.frame(Operator, Meaning) kableExtra::kable(optable)
How would we get a data set with only hourly paid employees?
hourly <- chi_emps[chi_emps$SalHour == "Hourly", ]
How would we get loans that meet both of the following: - have an hourly rate more than $20/hr - typical hours more than 25 --
df <- hourly[(hourly$HourlyRate > 20) & (hourly$TypicalHours > 25), ] head(df)
How would we get loans that meet either of the following: - the Department is PUBLIC LIBRARY - the employee is Part-time, (FullPart: P)
--
lib_part <- chi_emps[chi_emps$Dept == "PUBLIC LIBRARY" | chi_emps$FullPart == "P", ] head(lib_part)
hist(chi_emps$HourlyRate)
--
We have been using it to extract single columns from the data
We can also create new columns in the data set.
chi2$sal_gt100 <- ifelse(chi2$AnnSalary > 100000, TRUE, FALSE) head(chi2)
The ifelse function is very handy and has three arguments:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.