# Load your libraries here



Write an introduction here. Describe the variables you are using.

# Recreate our basic education_and_poverty data
poverty13 <- select (poverty.states, FIPStxt, Area_Name, PCTPOVALL_2013)
poverty13$FIPS.Code <- as.integer(poverty13$FIPStxt)

lessthanhighschool13 <- select(education.states, Area.name, FIPS.Code,

education_and_poverty <- merge(poverty13, lessthanhighschool13,
                               by.x='FIPS.Code', by.y='FIPS.Code')
#type your code here
# First let's create the region data set
# Add the region variable to education_and_poverty by matching the FIPS code
education_and_poverty <- merge(education_and_poverty, region_data, 
                               by.x='FIPS.Code', by.y='FIPS.Code')

Now let's confirm our distributions of regions and divisions by making tables


# Add your table for division here
# More complicated, but nicer looking ... pick which version you want to keep.
options(xtable.comment = FALSE)
xtable(table('Number' = region_data$region))
xtable(prop.table(table('Proportion' = region_data$region)))

# Add your table for division here

What is the mode for region?

What is the mode for division?


Next we will compare poverty and education within the different regions using a box plot. When you are happy with your poverty box plots, make boxplots for Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013

# In R, the term factor is used for nominal variables.
ggplot(education_and_poverty, aes(x=factor(region), y=PCTPOVALL_2013)) +
 geom_boxplot(aes(fill = "white")) +
 #Uncomment the line below to add the mean to your plot
 #stat_summary(fun.y = 'mean', geom = "point", shape = 23, size = 3, fill = "white") +
 guides(fill=FALSE) +
 ggtitle("") +
 labs(y = "",   x="")

A box plot shows:

Now we will do the same comparison (of means and medians) with a table of values.

# You can add any othe statistics you want, such as max() or min()
# Add the new statistics to the list inside the parentheses.
comparison_poverty <- education_and_poverty %>% 
  group_by(region) %>% 
  summarize(mean(PCTPOVALL_2013), median(PCTPOVALL_2013), IQR(PCTPOVALL_2013))

# Now do the same thing for the percent with less than high school degree.

More complicated but nicer looking, choose one and delete the one you don't want.

# You can add any othe statistics you want, such as max() or min()
comparison_poverty<-education_and_poverty %>% 
    group_by(region) %>%  
    summarize(mean(PCTPOVALL_2013), median(PCTPOVALL_2013), IQR(PCTPOVALL_2013))

xtable(comparison_poverty, caption = 'Percent in Poverty by Region')

# Now do the same thing for the percent with less than high school degree.


Write a paragraph summarizing what you learned about the differences between (or lack of differences) in poverty and education in the regions.

Write a paragraph comparing the presentation of the results in the table and the presentation of the results in the graphs. Which do you think is more effective? Why?

SOC345/lehmansociology documentation built on May 9, 2019, 11:41 a.m.