library(learnr)
library(tidyverse)
library(knitr)
library(here)
library(twitterwidget)
library(rlang)
library(ggrepel)
library(viridis)
library(gghighlight)
library(patchwork)
knitr::opts_chunk$set(echo = FALSE, 
                      fig.align="center")
path <- '/Users/matthewhirschey/Dropbox/DUKE/PROJECTS/bespokeDS/bespokelearnr/inst/extdata'
proteins <- readRDS(paste0(path, '/bespoke_dataframe.Rds'))
###
proteins_join <- readRDS(paste0(path, '/bespoke_dataframe_join.Rds'))
joined <- proteins %>% right_join(proteins_join, by = 'id')
###
df_input <- proteins # this line is for bespoke.R to get proper var
source('/Users/matthewhirschey/Dropbox/DUKE/PROJECTS/bespokeDS/bespokelearnr/inst/content/metadata.R')
source('/Users/matthewhirschey/Dropbox/DUKE/PROJECTS/bespokeDS/bespokelearnr/inst/content/bespoke.R')

Doctors make decisions based on symptoms

url <- "https://source.unsplash.com/701-FJcjLAQ/500x500"
knitr::include_graphics(url)

Photo by National Cancer Institute on Unsplash

New digital healthcare era introduces new decision-making challenges {.build}

Volume
- Data collection & storage allows access to huge amounts of medical information

Ubiquity
- Data are available anywhere across geography, social, and economic classes

Latency
- Technology facilitates no delay in access to data

How do you make better health care decisions? {.build}

Data-driven decision making!

url <- "https://upload.wikimedia.org/wikipedia/commons/0/06/DIKW_Pyramid.svg"
knitr::include_graphics(url)

https://en.wikipedia.org/wiki/DIKW_pyramid

Emerging field of Data Science

url <- "https://images.squarespace-cdn.com/content/v1/5150aec6e4b0e340ec52710a/1364352051365-HZAS3CLBF7ABLE3F5OBY/ke17ZwdGBToddI8pDm48kB2M2-8_3EzuSSXvzQBRsa1Zw-zPPgdn4jUwVcJE1ZvWQUxwkmyExglNqGp0IvTJZUJFbgE-7XRK3dMEBRBhUpxPe_8B-x4gq2tfVez1FwLYYZXud0o-3jV-FAs7tmkMHY-a7GzQZKbHRGZboWC-fOc/Data_Science_VD.png?format=1500w"
knitr::include_graphics(url)

http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram

Venn Diagram of Data Science v2.0

url <- "https://3.bp.blogspot.com/-bvQxcwfqATQ/V-E_uTBc4VI/AAAAAAAAMGQ/Qa1Ntef-rs0E-mWx5pkVu-CPlREdvD0TwCLcB/s1600/VennDiagram2.png"
knitr::include_graphics(url)

Joel Grus via KDnuggets

Overall goal is Knowledge Generation

url <- "https://upload.wikimedia.org/wikipedia/commons/0/06/DIKW_Pyramid.svg"
knitr::include_graphics(url)

https://en.wikipedia.org/wiki/DIKW_pyramid

Several Approaches to Knowledge Generation {.build}

tweet <- twitterwidget('1125268670324695041')

r tweet

World's most popular programming languages

excel <- tibble(
  name = c("Excel", "Java", "C", "C++", "Python"), 
  num = c(100000000, 9000000, 6000000, 4000000, 3000000)
)
excel_plot <- ggplot(excel) +
  geom_col(aes(x = fct_rev(fct_reorder(name, num)), y = num), fill = "navy") +
  labs(x = "", y = "Number of Users (Million)") +
  scale_y_continuous(labels = c("0", "25", "50", "75", "100")) +
  theme_minimal()
excel_plot
#need to add source

Your choice in programming language {.build}

"It's not important which language you choose, but rather what you can do with it"

R
language

Any questions?

R is a language

url <- "https://source.unsplash.com/ZzWsHbu2y80/256x455"
knitr::include_graphics(url)

Photo by Hannah Wright on Unsplash

R has values

R has objects

Try assigning an object

  1. Assign an object; remember, no quotes on name
    name <- 4

  2. Return that object by typing its name
    name

Try this in the code chunk below, then hit "Run Code"


R has functions

Try this in the code chunk below, then hit "Run Code"

round(pi, digits = 3)

Which of these are numbers?

question("Which of these are numbers?",
  answer("1", correct = TRUE),
  answer('"1"', message = "Because it has quotes, it is an string"),
  answer('"one"', message = "Because it has quotes, it is an string"),
  answer("one",  message = "R recognizes this as an object"), 
  allow_retry = TRUE, 
  random_answer_order = TRUE
)

Which of these will work?

Suppose one <- 1

question("Which of these will work?",
  answer("log(1)", correct = TRUE),
  answer('log("1")', message = "You cannot log transform a word (string)"),
  answer('log("one")', message = "You cannot log transform a word (string)"),
  answer("log(one)", correct = TRUE), 
  allow_retry = TRUE, 
  random_answer_order = TRUE
)

Try it for yourself!

one <- 1

Data are stored in tables and dataframes

Data stored in a dataframe are conceptually equivalent to a spreadsheet with rows and columns

This is a sample from the r dataframe_name dataset

df_input %>% 
  select(1:5) %>% 
  slice(1:5) %>% 
  knitr::kable()

Data are stored in tables and dataframes

Data stored in a dataframe are conceptually equivalent to a spreadsheet with rows and columns

This is a sample from the r dataframe_name dataset

df_input %>% 
  select(1:5) %>% 
  slice(1:5) %>% 
  print(as_tibble())

Extract or create new objects

col <- sample(colnames(df_input), 1)

You can call a single part of the data frame

r dataframe_name$r col

df_input %>% 
  select(!!col)

Extract or create new objects

Write the R code required to extract a variable from the r dataframe_name dataset:

Remember, the format is: r dataframe_name$r col


Extract or create new objects

You can also save a part of the dataframe as an object for later use

r col` <- `r dataframe_name`$`r col

In the code chunk below:
1. On the first line, write the R code to save a single column to a new object
2. On the second line, type the object name - this will print out the new object
3. Run the code




matthewhirschey/bespokelearnr documentation built on Oct. 11, 2020, 12:57 a.m.