# Load your libraries here

library(lehmansociology)
library(googlesheets)
library(dplyr)
library(ggplot2)
library(xtable)

Title

Write an introduction here. Describe the variables you are using.

# Recreate our basic education_and_poverty data
poverty13 <- select (poverty.states, FIPStxt, Area_Name, PCTPOVALL_2013)
poverty13$FIPS.Code <- as.integer(poverty13$FIPStxt)

lessthanhighschool13 <- select(education.states, Area.name, FIPS.Code,
                        Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013)

education_and_poverty <- merge(poverty13, lessthanhighschool13,
                               by.x='FIPS.Code', by.y='FIPS.Code')
#type your code here
# First let's create the region data set
gs_region<-gs_url('https://docs.google.com/spreadsheets/d/1h_jY4A44WoSLkrqhwZZ9oJh51N2GybwVvGgEaY3n2gc/pubhtml')
region_data<-gs_read(gs_region)
# Add the region variable to education_and_poverty by matching the FIPS code
education_and_poverty <- merge(education_and_poverty, region_data, 
                               by.x='FIPS.Code', by.y='FIPS.Code')

Now let's confirm our distributions of regions and divisions by making tables

table(region_data$region)
prop.table(table(region_data$region))

# Add your table for division here
# More complicated, but nicer looking ... pick which version you want to keep.
options(xtable.comment = FALSE)
xtable(table('Number' = region_data$region))
xtable(prop.table(table('Proportion' = region_data$region)))

# Add your table for division here

What is the mode for region?

What is the mode for division?

Analysis

Next we will compare poverty and education within the different regions using a box plot. When you are happy with your poverty box plots, make boxplots for Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013

# In R, the term factor is used for nominal variables.
ggplot(education_and_poverty, aes(x=factor(region), y=PCTPOVALL_2013)) +
 geom_boxplot(aes(fill = "white")) +
 #Uncomment the line below to add the mean to your plot
 #stat_summary(fun.y = 'mean', geom = "point", shape = 23, size = 3, fill = "white") +
 guides(fill=FALSE) +
 ggtitle("") +
 labs(y = "",   x="")

A box plot shows:

Now we will do the same comparison (of means and medians) with a table of values.

# You can add any othe statistics you want, such as max() or min()
# Add the new statistics to the list inside the parentheses.
comparison_poverty <- education_and_poverty %>% 
  group_by(region) %>% 
  summarize(mean(PCTPOVALL_2013), median(PCTPOVALL_2013), IQR(PCTPOVALL_2013))

 comparison_poverty
# Now do the same thing for the percent with less than high school degree.

More complicated but nicer looking, choose one and delete the one you don't want.

# You can add any othe statistics you want, such as max() or min()
comparison_poverty<-education_and_poverty %>% 
    group_by(region) %>%  
    summarize(mean(PCTPOVALL_2013), median(PCTPOVALL_2013), IQR(PCTPOVALL_2013))

xtable(comparison_poverty, caption = 'Percent in Poverty by Region')

# Now do the same thing for the percent with less than high school degree.

Conclusion

Write a paragraph summarizing what you learned about the differences between (or lack of differences) in poverty and education in the regions.

Write a paragraph comparing the presentation of the results in the table and the presentation of the results in the graphs. Which do you think is more effective? Why?



lehmansociology/lehmansociology documentation built on May 21, 2022, 9:06 p.m.