## load data data("afghan", package = "qss") ## summarize variables of interest summary(afghan$age) summary(afghan$educ.years) summary(afghan$employed) summary(afghan$income) prop.table(table(ISAF = afghan$violent.exp.ISAF, Taliban = afghan$violent.exp.taliban))
## print income data for first 10 respondents head(afghan$income, n = 10) ## indicate whether respondents' income is missing head(is.na(afghan$income), n = 10) sum(is.na(afghan$income)) # count of missing values mean(is.na(afghan$income)) # proportion missing x <- c(1, 2, 3, NA) mean(x) mean(x, na.rm = TRUE) prop.table(table(ISAF = afghan$violent.exp.ISAF, Taliban = afghan$violent.exp.taliban, exclude = NULL)) afghan.sub <- na.omit(afghan) # listwise deletion nrow(afghan.sub) length(na.omit(afghan$income))
par(cex = 1.5) ## a vector of proportions to plot ISAF.ptable <- prop.table(table(ISAF = afghan$violent.exp.ISAF, exclude = NULL)) ISAF.ptable ## make barplots by specifying a certain range for y-axis barplot(ISAF.ptable, names.arg = c("No harm", "Harm", "Nonresponse"), main = "Civilian victimization by the ISAF", xlab = "Response category", ylab = "Proportion of the respondents", ylim = c(0, 0.7)) ## repeat the same for the victimization by Taliban Taliban.ptable <- prop.table(table(Taliban = afghan$violent.exp.taliban, exclude = NULL)) barplot(Taliban.ptable, names.arg = c("No harm", "Harm", "Nonresponse"), main = "Civilian victimization by the Taliban", xlab = "Response category", ylab = "Proportion of the respondents", ylim = c(0, 0.7))
par(cex = 1.5) hist(afghan$age, freq = FALSE, ylim = c(0, 0.04), xlab = "Age", main = "Distribution of respondent's age") par(cex = 1.5) ## histogram of education. use `breaks' to choose bins hist(afghan$educ.years, freq = FALSE, breaks = seq(from = -0.5, to = 18.5, by = 1), xlab = "Years of education", main = "Distribution of respondent's education") ## add a text label at (x, y) = (3, 0.5) text(x = 3, y = 0.5, "median") ## add a vertical line representing median abline(v = median(afghan$educ.years)) ## adding a vertical line representing median lines(x = rep(median(afghan$educ.years), 2), y = c(0, 0.5))
par(cex = 1.25) boxplot(educ.years ~ province, data = afghan, main = "Education by province", ylab = "Years of education") tapply(afghan$violent.exp.taliban, afghan$province, mean, na.rm = TRUE) tapply(afghan$violent.exp.ISAF, afghan$province, mean, na.rm = TRUE) ## Saving or Printing a Graph ## pdf(file = "educ.pdf", height = 5, width = 5) ## boxplot(educ.years ~ province, data = afghan, ## main = "Education by Province", ylab = "Years of education") ## dev.off() ## pdf(file = "hist.pdf", height = 4, width = 8) ## ## one row with 2 plots with font size 0.8 ## par(mfrow = c(1, 2), cex = 0.8) ## ## for simplicity omit the texts and lines from the earlier example ## hist(afghan$age, freq = FALSE, ## xlab = "Age", ylim = c(0, 0.04), ## main = "Distribution of Respondent's Age") ## hist(afghan$educ.years, freq = FALSE, ## breaks = seq(from = -0.5, to = 18.5, by = 1), ## xlab = "Years of education", xlim = c(0, 20), ## main = "Distribution of Respondent's Education") ## dev.off()
par(cex = 1.5) ## load village data data("afghan.village", package = "qss") ## boxplots for altitude boxplot(altitude ~ village.surveyed, data = afghan.village, ylab = "Altitude (meter)", names = c("Nonsampled", "Sampled")) ## boxplots for log population boxplot(log(population) ~ village.surveyed, data = afghan.village, ylab = "log population", names = c("Nonsampled", "Sampled"))
tapply(is.na(afghan$violent.exp.taliban), afghan$province, mean) tapply(is.na(afghan$violent.exp.ISAF), afghan$province, mean) mean(afghan$list.response[afghan$list.group == "ISAF"]) - mean(afghan$list.response[afghan$list.group == "control"]) table(response = afghan$list.response, group = afghan$list.group)
data("congress", package = "qss") ## subset the data by party rep <- subset(congress, subset = (party == "Republican")) dem <- congress[congress$party == "Democrat", ] # another way to subset ## 80th and 112th congress rep80 <- subset(rep, subset = (congress == 80)) dem80 <- subset(dem, subset = (congress == 80)) rep112 <- subset(rep, subset = (congress == 112)) dem112 <- subset(dem, subset = (congress == 112)) ## preparing the labels and axis limits to avoid repetition xlab <- "Economic liberalism/conservatism" ylab <- "Racial liberalism/conservatism" lim <- c(-1.5, 1.5) par(cex = 1.5) ## scatterplot for the 80th Congress plot(dem80$dwnom1, dem80$dwnom2, pch = 16, col = "blue", xlim = lim, ylim = lim, xlab = xlab, ylab = ylab, main = "80th Congress") # democrats points(rep80$dwnom1, rep80$dwnom2, pch = 17, col = "red") # republicans text(-0.75, 1, "Democrats") text(1, -1, "Republicans") ## scatterplot for the 112th Congress plot(dem112$dwnom1, dem112$dwnom2, pch = 16, col = "blue", xlim = lim, ylim = lim, xlab = xlab, ylab = ylab, main = "112th Congress") points(rep112$dwnom1, rep112$dwnom2, pch = 17, col = "red") ## party median for each congress dem.median <- tapply(dem$dwnom1, dem$congress, median) rep.median <- tapply(rep$dwnom1, rep$congress, median) par(cex = 1.5) ## Democrats plot(names(dem.median), dem.median, col = "blue", type = "l", xlim = c(80, 115), ylim = c(-1, 1), xlab = "Congress", ylab = "DW-NOMINATE score (1st dimension)") ## add Republicans lines(names(rep.median), rep.median, col = "red") text(110, -0.6, "Democratic\n Party") text(110, 0.85, "Republican\n Party")
par(cex = 1.5) ## Gini coefficient data data("USGini", package = "qss") ## time-series plot for partisan difference plot(seq(from = 1947.5, to = 2011.5, by = 2), rep.median - dem.median, xlab = "Year", ylab = "Republican median -\n Democratic median", main = "Political polarization") ## time-series plot for Gini coefficient plot(USGini$year, USGini$gini, ylim = c(0.35, 0.45), xlab = "Year", ylab = "Gini coefficient", main = "Income inequality") cor(USGini$gini[seq(from = 2, to = nrow(USGini), by = 2)], rep.median - dem.median)
par(cex = 1.5) hist(dem112$dwnom2, freq = FALSE, main = "Democrats", xlim = c(-1.5, 1.5), ylim = c(0, 1.75), xlab = "Racial liberalism/conservatism dimension") hist(rep112$dwnom2, freq = FALSE, main = "Republicans", xlim = c(-1.5, 1.5), ylim = c(0, 1.75), xlab = "Racial liberalism/conservatism dimension") par(cex = 1.5) qqplot(dem112$dwnom2, rep112$dwnom2, xlab = "Democrats", ylab = "Republicans", xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5), main = "Racial liberalism/conservatism dimension") abline(0, 1) # 45 degree line
## 3x4 matrix filled by row; first argument take actual entries x <- matrix(1:12, nrow = 3, ncol = 4, byrow = TRUE) rownames(x) <- c("a", "b", "c") colnames(x) <- c("d", "e", "f", "g") dim(x) # dimension x ## data frame can take different data types y <- data.frame(y1 = as.factor(c("a", "b", "c")), y2 = c(0.1, 0.2, 0.3)) class(y$y1) class(y$y2) ## as.matrix() converts both variables to character z <- as.matrix(y) z ## column sums colSums(x) ## row means rowMeans(x) ## column sums apply(x, 2, sum) ## row means apply(x, 1, mean) ## standard deviation for each row apply(x, 1, sd)
## create a list x <- list(y1 = 1:10, y2 = c("hi", "hello", "hey"), y3 = data.frame(z1 = 1:3, z2 = c("good", "bad", "ugly"))) ## 3 ways of extracting elements from a list x$y1 # first element x[[2]] # second element x[["y3"]] # third element
names(x) # names of all elements length(x) # number of elements dwnom80 <- cbind(congress$dwnom1[congress$congress == 80], congress$dwnom2[congress$congress == 80]) dwnom112 <- cbind(congress$dwnom1[congress$congress == 112], congress$dwnom2[congress$congress == 112]) ## kmeans with 2 clusters k80two.out <- kmeans(dwnom80, centers = 2, nstart = 5) k112two.out <- kmeans(dwnom112, centers = 2, nstart = 5) ## elements of a list names(k80two.out) ## final centroids k80two.out$centers k112two.out$centers ## number of observations for each cluster by party table(party = congress$party[congress$congress == 80], cluster = k80two.out$cluster) table(party = congress$party[congress$congress == 112], cluster = k112two.out$cluster) ## kmeans with 4 clusters k80four.out <- kmeans(dwnom80, centers = 4, nstart = 5) k112four.out <- kmeans(dwnom112, centers = 4, nstart = 5) par(cex = 1.5) ## plotting the results using the labels and limits defined earlier plot(dwnom80, col = k80four.out$cluster + 1, xlab = xlab, ylab = ylab, xlim = lim, ylim = lim, main = "80th Congress") ## plotting the centroids points(k80four.out$centers, pch = 8, cex = 2) ## 112th congress plot(dwnom112, col = k112four.out$cluster + 1, xlab = xlab, ylab = ylab, xlim = lim, ylim = lim, main = "112th Congress") points(k112four.out$centers, pch = 8, cex = 2) palette()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.