library(ggplot2)
library(lehmansociology)
library(grid)
library(scales)
library(magrittr)
library("dplyr")
library(googlesheets)

# Set options for nicer looking documents
options(xtable.comment = FALSE)
knitr::opts_chunk$set(message=FALSE, warning=FALSE)
replaceCommas<-function(x){
  x<-as.numeric(gsub("\\,", "", x))
}
# Set up some data
poverty13 <- select (poverty.states, FIPStxt, Area_Name, PCTPOVALL_2013, PCTPOV05_2013,
                     MEDHHINC_2013, Rural_urban_Continuum_Code_2013)
poverty13$FIPS.Code <- as.integer(poverty13$FIPStxt)
poverty13$MEDHHINC_2013 <- replaceCommas(poverty13$MEDHHINC_2013)

lessthanhighschool13 <- select(education.states, Area.name, FIPS.Code,
                            Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013,
                            Percent.of.adults.with.a.bachelor.s.degree.or.higher..2009.2013,
                            Percent.of.adults.with.less.than.a.high.school.diploma..2000,
                            Percent.of.adults.with.a.bachelor.s.degree.or.higher..2000
                          )

education_and_poverty <- merge(poverty13, lessthanhighschool13, by.x='FIPS.Code', by.y='FIPS.Code')
#type your code here
# First let's create the region data set
gs_region<-gs_url('https://docs.google.com/spreadsheets/d/1h_jY4A44WoSLkrqhwZZ9oJh51N2GybwVvGgEaY3n2gc/pubhtml')
region_data<-gs_read(gs_region)
# We need to change this column name because the map data uses the term region differently.
# Add the region variable to education_and_poverty by matching the FIPS code
education_and_poverty <- merge(education_and_poverty, region_data, 
                               by.x='FIPS.Code', by.y='FIPS.Code')

Add a code chunk here that uses the replaceCommas() function defined earlier on this variable: poverty13$MEDHHINC_2013

baseplot <- ggplot(education_and_poverty,
       aes(x = Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013,
           y = PCTPOVALL_2013,
           fill = region
           )
       ) +
  geom_point(aes(color = region)) +
  ggtitle("Fig # : Poverty Rate and High School Completion (for States)") +
  labs(x = "Percent of adults with less than highschool diploma",
       y="Percent of population in poverty")
baseplot
  regression1 <- lm(PCTPOVALL_2013 ~
                Percent.of.adults.with.less.than.a.high.school.diploma..2009.2013, 
                data = education_and_poverty)
 summary(regression1)

Change to use PCTPOV05_2013 as the dependent variable


Change to use median household income 2013 as the dependent variable


Change each of the three above to use Percent.of.adults.with.a.bachelor.s.degree.or.higher..2009.2013 as the independent variable




Write out the equations for the lines

Comapre them

What are the R squares

Which variable has the strongest relationship with MEDHHINC?

Which variable has the strongest relationship wtih PCTPOVALL_2013?

Which variable has the stongest relationship with PCTPOV05_2013

Can you use the slope to say how strong the relationship? Why or why not?

What could explain the differences and similarities that you found in your result? Use your sociological imagination to answer.



elinw/lehmansociology documentation built on May 16, 2019, 3 a.m.