knitr::opts_chunk$set(echo = F)

Team at CDC

Organizational Chart

NCHHSTP Organizational Chart{ width=100% }

::: notes photo obtained from https://www.cdc.gov/maso/pdf/NCHHSTP.pdf :::

The Problem

Potential Solution

Example Normal Distribution

library(ggplot2, quietly = T)
library(ggthemes, quietly = T)

### Different plots to show what a distribution is
dat = read.table("/Users/ishaandave/Desktop/gender-height.txt", sep = ",", header = T)

# hist(dat$Weight)

## Histogram of heights overall

overallHist = ggplot(dat, aes(x = Height)) +
  geom_histogram(aes(y = ..density.., color = "blue"),
                 fill = "lightpink", color = "darkred", alpha = 0.5, position = "identity", binwidth = 1, ) +
  geom_density(lwd = 0.65) + 
  ggtitle("Overall Histogram of Height")


overallHist

Example Frequency Distribution

library(pander)
panderOptions('table.split.table', Inf)
set.caption("The Great M&M Data")

my.data <- "Color   | Frequency | Percentage
    Brown           | 17        | 30.9%
    Red             | 18        | 32.7% 
    Blue            | 7         | 12.7%  
    Yellow          | 6         | 10.9%   
    Green           | 4         | 7.3%
    Orange          | 2         | 3.6%    
    Colorless/White | 1         | 1.8%"

df <- read.delim(textConnection(my.data), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE)

names(df) <- unname(as.list(df[1, ])) # put headers on

df <- df[-1, ] # remove first row

row.names(df)<-NULL

pander(df, style = 'rmarkdown')

But...

Height by Gender

onlyHist = ggplot(dat, aes(x = Height)) +
  geom_histogram(fill = "lightpink", color = "darkred", alpha = 0.5, position = "identity", binwidth = 1, ) + 
  ggtitle("Overall Histogram of Height")


histByGender = ggplot(dat, aes(x=Height, color=Gender)) +
   geom_histogram(fill="white", alpha=0.5, position="identity", binwidth = 1) +
   theme(legend.position = c(0.9 ,0.8),
         legend.text=element_text(size=8)) +
  ggtitle("Histograms of Heights by Gender")

## Density plots because why not
densByGender = ggplot(data = dat, aes(x = Height, color = Gender)) + 
    geom_density()


## Plotting overall and split + densities on 1 screen

Rmisc::multiplot(onlyHist, histByGender, densByGender, cols = 1)

A pattern!

Now, what does this package do?

How it works

Missing Values

Decision Tree

![Decision tree to handle missing values with continuous data](Decision Tree.png){ width=110% height=75% }

Usage Example

![Example of MHS Pipeline](/Users/ishaandave/Desktop/MHS Pipeline.png)

Other functionalities

Potential issues

Package Website

Acknowledgements

Comments, Questions, Concerns?



CDCgov/DemographySpawnR documentation built on Aug. 5, 2020, 7:41 p.m.