# Packages library(sfi) library(kmodR) library(directlabels) library(webshot) library(ggplot2) library(dplyr) library(ggrepel) library(plotly) library(ggiraph) library(scales) library(tidyverse) library(knitr) library(Hmisc) library(RColorBrewer) library(extrafont) library(kableExtra) library(grid) # webshot::install_phantomjs() loadfonts() ## Global options options(max.print="75") opts_chunk$set(echo=FALSE, cache=FALSE, prompt=FALSE, tidy=TRUE, comment=NA, message=FALSE, warning=FALSE, dpi = 300, # dev = "cairo_pdf", dev = c("png", "cairo_pdf"), fig.pos="!h", fig.path = 'figures/') opts_knit$set(width=75) options(xtable.comment = FALSE) options(tinytex.verbose = TRUE)
\newpage
# find outliers based on # no scientific notation # options(scipen = '999') # # # get data data <- all_data$chen$f1 # get label numbers data$text_label <- unlist(lapply(strsplit(data$circuit, '-', fixed = TRUE), function(x) x[length(x)])) # remove 11 with FC and SC with 12, and 0 with dc data$text_label <- gsub('^0$', 'DC', data$text_label) data$text_label <- gsub('12', 'FC', data$text_label) # randomly sample half of the data set for clarity keep_index <- sample(1:nrow(data), nrow(data)/2) data <- data[keep_index, ] invisible(temp <- kmod(data[, c('x', 'y')], k = 14, l = 200, i_max = 300, conv_method = "delta_C", conv_error = 0, allow_empty_c = TRUE)) dat_c <- as.data.frame(cbind(data, clust_dist = temp$XC_dist_sqr_assign)) outlier_index <- as.character(temp$L_index) # create an variable that indicates outlier dat_c$outlier <- ifelse(dat_c$clust_dist.dist_sqr > 50, 'Outlier', 'In Cluster') dat_c$clust_dist.c <- as.factor(as.character(dat_c$clust_dist.c)) # recode SC and FC as in cluster dat_c$outlier <- as.factor(ifelse(dat_c$text_label == 'SC', 'In Cluster', ifelse(dat_c$text_label == 'FC', 'In Cluster', ifelse(dat_c$text_label == 'DC', 'In Cluster', dat_c$outlier))))
\newpage
# version 11 g1 <- ggplot(dat_c, aes(x = x, y = y, group = clust_dist.c, shape = clust_dist.c, color = outlier)) + geom_point(size = 1, alpha = 0.8) + scale_color_manual(name = '', values = c('black', 'grey')) + scale_shape_manual(name = 'Cluster group', breaks = sort(c('1', '10', '11', '12', '13', '14', '2', '3', '4', '5','6', '7', '8', '9')), values = c(14, 20, 8, 19, 12, 1, 2, 3, 5, 6, 7, 10, 15, 17)) + labs(x = '', y = '', title = paste0('Figure 1: Centered by Topic-Year', '\n', 'Averaged by Judge, Labeled by Court'), caption = paste0('Circuit, CC Judge Vector', '\n', 'Demeaned by Year and Big Topic')) + theme_sfi(lp = 'bottom', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1), plot.title = element_text(size =12)) + annotate(geom = 'text', label = 'SC', alpha = 0.8, x = -28, y = 42, family = c("Computer modern"), size =6) + annotate(geom = 'text', label = 'FC', alpha = 0.8, x = -32, y = 30, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = 'DC', alpha = 0.8, x = -16, y = 30, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '9', alpha = 0.8, x = -23, y = 9, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '5', alpha = 0.8, x = -7, y = -35.5, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '8', alpha = 0.8, x = 16, y = -38, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '6', alpha = 0.8, x = 26, y = 8, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '10', alpha = 0.8, x = 23, y = -15, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '4', alpha = 0.8, x = 3, y = -13, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '7', alpha = 0.8, x = 13, y = 18, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '2', x = 10, y = 40, family = 'CMU Bright', size =6) + annotate(geom = 'text', label = '3', alpha = 0.8, x = -6, y = 30, family = 'CMU Bright', size =6) g1
# # get data data <- all_data$chen$f1 # get label numbers data$text_label <- unlist(lapply(strsplit(data$circuit, '-', fixed = TRUE), function(x) x[length(x)])) # remove 11 with FC and SC with 12, and 0 with dc data$text_label <- gsub('^0$', 'DC', data$text_label) data$text_label <- gsub('12', 'FC', data$text_label) # randomly sample half of the data set for clarity keep_index <- sample(1:nrow(data), nrow(data)/2) data <- data[keep_index, ] # run kmeans clustering cluster_data <- data[, c('x', 'y')] dat_c <- kmeans(cluster_data, 14) clusters <- dat_c$cluster centers <- as.data.frame(dat_c$centers[dat_c$cluster, ]) rownames(centers) <- NULL names(centers) <- c('centroid_x', 'centroid_y') distances <- sqrt(rowSums((cluster_data-centers)^2)) # combine with old data data$cluster <- clusters data$distance <- as.numeric(distances) data <- as.data.frame(cbind(data, centers)) data <- data[order(data$distance, decreasing = TRUE),] # if distance over 3.5, then outlier data$outlier <- ifelse(data$distance > 8, 'Outlier', 'In cluster') # get subset of cluster, x_centroid, and y_centroid centroids <- data[, c('text_label','cluster','centroid_x', 'centroid_y')] centroids <- centroids[!duplicated(centroids$text_label),] # create list to store loop results data_list <- list() i = 1 unique_text <- unique(data$text_label) data$label <- '' for(i in 1:length(unique_text)){ this_text <- unique_text[i] sub_text <- data[data$text_label == this_text,] sub_text <- sub_text[order(sub_text$distance, decreasing = FALSE),] # randomly assign year label to one row random_row <- sample(1:nrow(sub_text), 1) sub_text$label[random_row] <- this_text data_list[[i]] <- sub_text } data <- do.call('rbind', data_list) # if label not blank, put in cluster data$outlier <- ifelse(grepl('1|2|3|4|5|6|7|8|9|10|FC|SC|11|DC', data$label), 'In cluster', data$outlier) # version 11 g1 <- ggplot(data, aes(x = x, y = y, group = text_label, color = outlier)) + geom_point(size = 1, alpha = 0.8) + geom_label_repel(aes(label = label), size = 5, segment.size = 0, box.padding = 0.5) + scale_color_manual(name = '', values = c('black', 'grey')) + labs(x = '', y = '', title = paste0('Figure 1: Centered by Topic-Year', '\n', 'Averaged by Judge, Labeled by Court'), caption = paste0('Circuit, CC Judge Vector', '\n', 'Demeaned by Year and Big Topic')) + theme_sfi(lp = 'bottom', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1), plot.title = element_text(size =12)) g1
# # get data data <- all_data$chen$f1 # get label numbers data$text_label <- unlist(lapply(strsplit(data$circuit, '-', fixed = TRUE), function(x) x[length(x)])) # remove 11 with FC and SC with 12, and 0 with dc data$text_label <- gsub('^0$', 'DC', data$text_label) data$text_label <- gsub('12', 'FC', data$text_label) # randomly sample half of the data set for clarity keep_index <- sample(1:nrow(data), nrow(data)/2) data <- data[keep_index, ] data$text_label <- as.factor(data$text_label) data$text_label <- factor(data$text_label, levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'DC', 'FC', 'SC')) data <- data[complete.cases(data),] # version 11 g1 <- ggplot(data, aes(x = x, y = y, group = text_label)) + geom_text(aes(label = text_label), alpha = 0.8) + labs(x = '', y = '', title = paste0('Figure 1: Centered by Topic-Year', '\n', 'Averaged by Judge, Labeled by Court'), caption = paste0('Circuit, CC Judge Vector', '\n', 'Demeaned by Year and Big Topic')) + theme_sfi(lp = 'bottom', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1), plot.title = element_text(size =12)) g1
# # get data data <- all_data$chen$f2 # rename names(data) <- c('x', 'y', 'circuit','decade', 'decades') # remove s from decades so it can be numeric data$decades <- gsub('s', '', data$decades, fixed = TRUE) data$decades <- as.numeric(data$decades) # create an empty label column data$label <- '' # create list to store loop results data_list <- list() i = 1 unique_years <- unique(data$decades) for(i in 1:length(unique_years)){ this_year <- unique_years[i] sub_year <- data[data$decades == this_year,] if(this_year == 1890 | this_year == 1950 | this_year == 2010) { # randomly assign year label to one row random_row <- sample(1:nrow(sub_year), 1) sub_year$label[random_row] <- this_year } data_list[[i]] <- sub_year print(this_year) } data <- do.call('rbind', data_list) cols <- make_colors(length(unique(data$decade)), bw = TRUE) # version 11 g1 <- ggplot(data, aes(x = x, y = y, color = decades)) + ylim(c(-7, 20)) + geom_point(size = 2) + # geom_dl(data = subset(data, decades == 1890), # aes(label = decades), method = 'last.qp', hjust = 1) + # geom_dl(data = subset(data, decades == 1950), # aes(label = decades), method = 'smart.grid') + # geom_dl(data = subset(data, decades == 2010), # aes(label = decades), method = 'last.qp') + geom_text_repel(aes(label = label),segment.size = 0) + scale_color_gradient(name = 'Year', low = "#2C2C2C", high = "#ABABAB") + labs(x = '', y = '', title = 'Figure 2.', subtitle = 'Centered by Court Topic, Averaged by Court-Year, Labeled by Decade', caption = 'Court Decade, SC & CC Court Decade Vector, Demeaned by Circuit and Big Topic') + theme_sfi(lp = 'bottom', lkw = TRUE, lkt = 'point', legend_width = 40, y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10)) g1
# # no scientific notation # options(scipen = '999') # # # get data data <- all_data$chen$f3 names(data) <- gsub('-', '.', names(data), fixed = TRUE) data <- data[data$big.issue != 'Others',] data <- data[data$big.issue != '9',] data$big.issue <- as.factor(data$big.issue) JY_levels <- levels(data$big.issue) JY_labels <- c('1 - Criminal Appeal', '2 - Civil Rights', '3 - First Amendment','4 - Due Process', '5 - Privacy', '6 - Labor','7 - Regulation') JY_labels_n <- c('1', '2', '3','4','5', '6','7') data$big.issue.n <- data$big.issue data$big.issue <- factor(data$big.issue, levels=JY_levels, labels=JY_labels) data$big.issue.n <- factor(data$big.issue.n, levels=JY_levels, labels=JY_labels_n) # # remove 9 and others # data <- data %>% filter(`big-issue` != '9') %>% # filter(`big-issue` != 'Others') # # Y_labels <- c('1 - Criminal Appeal', '2 - Civil Rights', '3 - First Amendment','4 - Due Process', # '5 - Privacy', '6 - Labor','7 - Regulation','9 - Miscellaneous', 'Others') # JY_labels_n <- c('1', '2', '3','4','5', '6','7','9', 'Others') # # # version 11 g1 <- ggplot(data, aes(x = x, y = y)) + geom_point(size = 2, color = 'black', alpha = 0.4) + scale_color_manual(name = 'Confidence elipse', values = c('darkgrey','darkgrey', 'darkgrey','darkgrey','darkgrey','darkgrey', 'darkgrey','darkgrey','darkgrey','darkgrey','darkgrey','darkgrey','darkgrey')) + labs(x = '', y = '', title = 'Figure 3.', subtitle = 'Centered by Judge-Year, Averaged by Topic-Year, Labeled by Topic', caption = paste0('Big Topics and Year, SC and CC Topic Year Vector, Demeaned by Judge & Year')) + annotate(geom = 'text', x = 0, y = -50, family = 'CMU Bright', label = 'Regulation') + annotate(geom = 'text', family = 'CMU Bright', x = 0, y = 50, label = 'Criminal Appeal') + annotate(geom = 'text', family = 'CMU Bright', x = -38, y = -19, label = '1st Amendment') + annotate(geom = 'text', family = 'CMU Bright', x = -40, y = 28, label = 'Civil Rights') + annotate(geom = 'text', family = 'CMU Bright', x = 17, y = 31, label = 'Privacy') + annotate(geom = 'text', family = 'CMU Bright', x = 38, y = 13, label = 'Labor') + annotate(geom = 'text', family = 'CMU Bright', x = 30, y = -2, label = 'Due Process') + theme_sfi(lp = 'none', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1)) g1
# # get data data <- all_data$chen$f4 # recode party affiliation data <- data[data$party %in% c(1,0),] data$party <- ifelse(data$party == 0, 'R', 'D') # # version 11 g1 <- ggplot(data, aes(x = x, y = y, color = party, shape = party)) + geom_point(size = 2, alpha = 0.7) + scale_shape_manual(name = 'Party affiliation', values = c(17, 15), breaks = c('D', 'R'), labels = c('D = Democrat (black)', 'R = Republican (grey)')) + scale_color_manual(name = '', values = c('black', 'darkgrey')) + labs(x = '', y = '', title = 'Figure 4.', subtitle = 'Centered by Court-Topic-Year, Averaged by Judge, Labeled by Political Party', caption = 'Party affiliation, SC & CC Judge Vector, Demeaned by Circuit, Big Topics, and year') + theme_sfi(lp = 'bottom', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1)) + guides(col = FALSE) g1
# # get data data <- all_data$chen$f4 # recode party affiliation data <- data[data$party %in% c(1,0),] data$party <- ifelse(data$party == 0, 'R', 'D') # # version 11 g1 <- ggplot(data, aes(x = x, y = y, fill = party)) + geom_point(size = 2, alpha = 0.7, pch = 21, color = 'black') + scale_fill_manual(name = '', values = c('black', 'white')) + labs(x = '', y = '', title = 'Figure 4.', subtitle = 'Centered by Court-Topic-Year, Averaged by Judge, Labeled by Political Party', caption = 'Party affiliation, SC & CC Judge Vector, Demeaned by Circuit, Big Topics, and year') + theme_sfi(lp = 'bottom', y_axis_title_style = 'bold', x_axis_title_style = 'bold', title_style = 'bold') + theme(axis.text=element_text(size = 10, hjust = 1)) + guides(col = FALSE) g1
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.