## ---- echo = FALSE, message = FALSE--------------------------------------
options(useFancyQuotes="UTF-8")
knitr::opts_chunk$set(comment = NA, message = FALSE, warning = FALSE, prompt = TRUE)
## ------------------------------------------------------------------------
library(PDS)
## ----eval = FALSE--------------------------------------------------------
# system.file(package = "PDS")
## ------------------------------------------------------------------------
gapminder[1:5, 'incomeperperson'] # Show first five rows for incomeperperson
gapminder[1:5, 1:4] # Show the first five rows for variables 1 through 4
## ----eval = FALSE--------------------------------------------------------
# install.packages("dplyr")
## ----eval = FALSE--------------------------------------------------------
# if (packageVersion("devtools") < 1.6) {
# install.packages("devtools")
# }
# devtools::install_github("hadley/lazyeval")
# devtools::install_github("hadley/dplyr")
## ------------------------------------------------------------------------
library(dplyr)
nesarc <- tbl_df(NESARC) %>%
filter(!is.na(CHECK321) & !is.na(AGE) & CHECK321 ==1 & AGE <= 25)
dim(nesarc)
## ----label = "SUB1"------------------------------------------------------
NESARCsub1 <- tbl_df(NESARC) %>%
filter(S3AQ1A == 1 & CHECK321 == 1 & S3AQ3B1 == 1 & AGE <= 25)
dim(NESARCsub1)
## ----label = "SUB2"------------------------------------------------------
NESARCsub2 <- NESARC[NESARC$S3AQ1A == 1 & NESARC$CHECK321 == 1 &
NESARC$S3AQ3B1 == 1 & NESARC$AGE <= 25, ]
dim(NESARCsub2)
## ----label = "SUB3"------------------------------------------------------
NESARCsub3 <- subset(NESARC, subset = S3AQ1A == 1 & CHECK321 == 1 &
S3AQ3B1 == 1 & AGE <= 25)
dim(NESARCsub3)
## ----label = "RENAME"----------------------------------------------------
NESARC[1:5, 1:5] # Show first 5 rows and first 5 columns of NESARC
NESARCtbl <- tbl_df(NESARC) %>%
rename(UniqueID = IDNUM, EthanolConsumption = ETOTLCA2, Ethnicity = ETHRACE2A,
SmokingFrequency = S3AQ3B1, Age = AGE, MajorDepression = MAJORDEPLIFE, Sex = SEX,
DysthymiaLifetime = DYSLIFE, TobaccoDependence = TAB12MDX, DailyCigsSmoked = S3AQ3C1) %>%
select(UniqueID, EthanolConsumption, Ethnicity, SmokingFrequency, Age, MajorDepression,
DysthymiaLifetime, TobaccoDependence, DailyCigsSmoked, S3AQ1A, CHECK321,
SmokingFrequency, Sex)
NESARCtbl
## ----label = "CodeMissing"-----------------------------------------------
NESARCtbl$SmokingFrequency[NESARCtbl$SmokingFrequency == 9] <- NA
summary(NESARCtbl$SmokingFrequency) # Note that 9 still appears
NESARCtbl$SmokingFrequency <- factor(NESARCtbl$SmokingFrequency)[, drop = TRUE]
summary(NESARCtbl$SmokingFrequency) # Unused level no longer appears
## ----label = "Freq"------------------------------------------------------
NESARCtbl$SmokingFrequency <- factor(NESARCtbl$SmokingFrequency,
labels = c("Every Day", "5 to 6 Days/week",
"3 to 4 Days/week", "1 to 2 Days/week",
"2 to 3 Days/month", "Once a month or less"))
summary(NESARCtbl$SmokingFrequency)
xtabs(~SmokingFrequency, data = NESARCtbl) # Note how the NA's are not printed
## ----label = "ggSmoke"---------------------------------------------------
library(ggplot2)
ggplot(data = NESARCtbl, aes(x = SmokingFrequency)) +
geom_bar(fill = "lightgray") +
labs(x = "Smoking Frequency") +
theme_bw()
## ----label = "ggSmoke2", fig.width = 6, fig.height = 6-------------------
ggplot(data = na.omit(NESARCtbl[ , "SmokingFrequency", drop = FALSE]), aes(x = SmokingFrequency)) +
geom_bar(fill = "lightgray") +
labs(x = "Smoking Frequency") +
theme_bw() +
theme(axis.text.x = element_text(angle = 55, hjust = 1.0))
## ----label = "CollapseHS"------------------------------------------------
NESARC$HS_DEGREE <- factor(ifelse(NESARC$S1Q6A %in% c("1", "2", "3", "4", "5", "6", "7"),
"No", "Yes"))
summary(NESARC$HS_DEGREE)
## ----label = "CUT1"------------------------------------------------------
NESARC$AGEfac <- cut(NESARC$AGE, breaks = c(18, 30, 50, Inf),
labels = c("Young Adult", "Adult", "Older Adult"),
include.lowest = TRUE)
summary(NESARC$AGEfac)
## ----label = "CUT2"------------------------------------------------------
NESARC$S3AQ3C1fac <- cut(NESARC$S3AQ3C1, breaks = c(0, 5, 10, 15, 20, 100),
include.lowest = TRUE)
summary(NESARC$S3AQ3C1fac)
## ----label = "Agg1"------------------------------------------------------
NESARC$DepressLife <- factor(ifelse( (NESARC$MAJORDEPLIFE == 1 | NESARC$DYSLIFE == 1), "Yes", "No"))
summary(NESARC$DepressLife)
## ----label = "PANIC"-----------------------------------------------------
NESARC$PPpanic <- factor(ifelse( (NESARC$APANDX12 == 1 | NESARC$APANDXP12 == 1 |
NESARC$PANDX12 == 1 | NESARC$PANDXP12 == 1 ), "Yes", "No"))
summary(NESARC$PPpanic)
## ----label = "DEPsym"----------------------------------------------------
NESARC$AllDeprSymp <- factor(ifelse( (NESARC$S4AQ4A1 == 1 & NESARC$S4AQ4A2 == 1 &
NESARC$S4AQ4A3 == 1 & NESARC$S4AQ4A4 == 1 &
NESARC$S4AQ4A5 == 1 & NESARC$S4AQ4A6 == 1 &
NESARC$S4AQ4A7 == 1 & NESARC$S4AQ4A8 == 1 &
NESARC$S4AQ4A9 == 1 & NESARC$S4AQ4A10 == 1 &
NESARC$S4AQ4A11 == 1 & NESARC$S4AQ4A12 == 1 &
NESARC$S4AQ4A13 == 1 & NESARC$S4AQ4A14 == 1 &
NESARC$S4AQ4A15 == 1 & NESARC$S4AQ4A16 == 1 &
NESARC$S4AQ4A17 == 1 & NESARC$S4AQ4A18 == 1 &
NESARC$S4AQ4A19 == 1), "Yes", "No"))
summary(NESARC$AllDeprSymp)
## ----label = "CompositeFactor"-------------------------------------------
mysum <- function(x){sum(x == 1)}
myadd <- function(x){apply(x, 1, mysum)}
ndf <- NESARC %>%
select(contains("S4AQ4A"))
nDS <- myadd(ndf)
ndf <- cbind(ndf, nDS)
xtabs(~nDS, data = ndf)
## ----label = "Mutate"----------------------------------------------------
MINI <- tbl_df(NESARC) %>%
select(S1Q24FT, S1Q24IN, S1Q24LB, SEX) %>%
filter(S1Q24FT < 99, S1Q24IN < 99, S1Q24LB < 999) %>%
mutate(Inches = (S1Q24FT*12 + S1Q24IN),
Sex = factor(SEX, labels = c("Male", "Female"))) %>%
rename(Weight = S1Q24LB)
MINI
## ------------------------------------------------------------------------
summary(NESARCtbl$SmokingFrequency)
levels(NESARCtbl$SmokingFrequency)
table(as.numeric(NESARCtbl$SmokingFrequency))
## ------------------------------------------------------------------------
NESARCtbl$DaysSmoke <- as.numeric(NESARCtbl$SmokingFrequency)
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 1] <- 30
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 2] <- 4*5.5
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 3] <- 4*3.5
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 4] <- 4*1.5
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 5] <- 2.5
NESARCtbl$DaysSmoke[NESARCtbl$DaysSmoke == 6] <- 1
# Using dplyr again
NESARCtbl <- NESARCtbl %>%
mutate(TotalCigsSmoked = DaysSmoke*DailyCigsSmoked)
proportions <- quantile(NESARCtbl$TotalCigsSmoked, na.rm = TRUE)
proportions
NESARCtbl$CigsSmokedFac <- cut(NESARCtbl$TotalCigsSmoked, breaks = proportions,
include.lowest = TRUE)
NESARCtbl
## ------------------------------------------------------------------------
NESARCtbl <- NESARCtbl %>%
filter(S3AQ1A == 1 & CHECK321 == 1 & SmokingFrequency == "Every Day" & Age <= 25)
dim(NESARCtbl)
str(NESARCtbl)
summary(NESARCtbl)
## ----label = "EDAquan"---------------------------------------------------
library(ggplot2)
ggplot(data = NESARCtbl, aes(x = EthanolConsumption) ) +
geom_histogram(binwidth = 1, fill = "pink") +
theme_bw() +
labs(x = "Ethanol Consumption")
ggplot(data = NESARCtbl, aes(x = EthanolConsumption) ) +
geom_density(fill = "pink") +
theme_bw() +
labs(x = "Ethanol Consumption")
## ----label = "EDAqual"---------------------------------------------------
ggplot(data = NESARCtbl, aes(x = Ethnicity)) +
geom_bar(fill = c("snow", "brown4", "red", "yellow", "tan"), color = "black") +
theme_bw()
## ------------------------------------------------------------------------
NESARCtbl$TobaccoDependence <- factor(NESARCtbl$TobaccoDependence,
labels = c("No Nicotine Dependence", "Nicotine Dependence"))
xtabs(~TobaccoDependence, data = NESARCtbl)
NESARCtbl$TobaccoDependence <- factor(NESARCtbl$TobaccoDependence,
levels = c("Nicotine Dependence", "No Nicotine Dependence"))
xtabs(~TobaccoDependence, data = NESARCtbl)
NESARCtbl$Ethnicity <- factor(NESARCtbl$Ethnicity,
labels = c("Caucasian", "African American",
"Native American", "Asian", "Hispanic"))
NESARCtbl$Sex <- factor(NESARCtbl$Sex, labels = c("Male", "Female"))
table(NESARCtbl$Sex)
NESARCtbl$Sex <- factor(NESARCtbl$Sex, levels = c("Female", "Male"))
table(NESARCtbl$Sex)
NESARCtbl$SmokingFrequency <- factor(NESARCtbl$SmokingFrequency,
levels = c("Once a month or less", "2 to 3 Days/month", "1 to 2 Days/week",
"3 to 4 Days/week", "5 to 6 Days/week", "Every Day"))
NESARCtbl$MajorDepression <- factor(NESARCtbl$MajorDepression,
labels = c("No Depression", "Yes Depression"))
ggplot(data = NESARCtbl, aes(x = Ethnicity)) +
geom_bar(fill = c("snow", "brown4", "red", "yellow", "tan"), color = "black") +
theme_bw() +
theme(axis.text.x = element_text(angle = 55, hjust = 1)) +
labs(x = "")
## ----label = "ggSmoke2A", fig.width = 6, fig.height = 4------------------
T1 <- xtabs(~ TobaccoDependence + MajorDepression, data = NESARCtbl)
T1
ggplot(data = NESARCtbl, aes(x = MajorDepression, fill = TobaccoDependence)) +
geom_bar() +
theme_bw()
T2 <- prop.table(T1, 2)
T2
ggplot(data = NESARCtbl, aes(x = MajorDepression, fill = TobaccoDependence)) +
geom_bar(position = "fill") +
labs(x = "", y = "Fraction",
title = "Fraction of young adult daily smokers\nwith and without nicotine addiction\nby depression status") +
theme_bw() +
scale_fill_manual(values = c("red", "pink"), name = "Tobacco Addiction Status")
###
ggplot(data = NESARCtbl, aes(x = MajorDepression, fill = TobaccoDependence)) +
geom_bar(position = "fill") +
labs(x = "", y = "Fraction",
title = "Fraction of young adult daily smokers\nwith and without nicotine addiction\nby depression status") +
theme_bw() +
scale_fill_manual(values = c("red", "pink"), name = "Tobacco Addiction Status") +
facet_grid(Sex ~ .)
## ----fig.width = 6.5, fig.height = 7.5-----------------------------------
ggplot(data = NESARCtbl, aes(x = MajorDepression, fill = TobaccoDependence)) +
geom_bar(position = "fill") +
labs(x = "", y = "Fraction",
title = "Fraction of young adult daily smokers\nwith and without nicotine addiction\nby depression status") +
theme_bw() +
scale_fill_manual(values = c("red", "pink"), name = "Tobacco Addiction Status") +
facet_grid(Ethnicity ~ Sex) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## ----label = "mergeAddHealth"--------------------------------------------
library(dplyr)
AddHealthW4 <- tbl_df(addhealth_public4) %>%
rename(AID = aid)
AddHealthW1and4 <- left_join(AddHealthW4, AddHealth)
dim(AddHealthW1and4)
## ---- echo = FALSE, results = 'asis'-------------------------------------
knitr::kable(NESARCtbl[1300:1305, c("EthanolConsumption", "Sex", "MajorDepression")],
align = c("c","c","c"), caption = "Three Selected Columns")
## ---- echo = FALSE, results = 'asis'-------------------------------------
knitr::kable(NESARCtbl[1300:1305, c("EthanolConsumption", "Sex", "MajorDepression")],
align = c("c","c","c"), caption = "Three Selected Columns", col.names = c("Ethanol Consuption", "Gender", "Depression"))
## ---- echo=FALSE, results='asis'-----------------------------------------
pander::pandoc.table(NESARCtbl[1300:1305, c("EthanolConsumption", "Sex", "MajorDepression")],
caption = "Three Selected Columns")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.