knitr::opts_chunk$set(echo = TRUE) option <- params$selected_option
r I(option)
devtools::load_all() library("tidyverse") library("Rtsne") library(simdiversity.entropy) library(simdiversity.data.politics) library(simdiversity.weighted.mds) datasets <- params$datasets
Get swsiss and italian data:
data <- list() dissimilarites <- list() weights <- list() w_disputedness <- list() scores_matrix <- list() for (dataset in datasets) { dissimilarites[[dataset]] <- dataset_from_str( paste0(dataset, "__", option, "__D_final") ) weights[[dataset]] <- dataset_from_str( paste0(dataset, "__", option, "__weight") ) data[[dataset]] <- dataset_from_str(dataset) w_disputedness[[dataset]] <- dataset_from_str( paste0( dataset, "__", option, "__weighted_vote_disputedness" ) ) s_matrix <- dataset_from_str( paste0(dataset, "__", option, "__scores_matrix") ) null_votes <- dataset_from_str( paste0(dataset, "__", option, "__null_votes") ) null_councilors <- dataset_from_str( paste0(dataset, "__", option, "__null_councilors") ) if (!is_empty(null_votes)) { s_matrix <- s_matrix[, !colnames(s_matrix) %in% null_votes] data[[dataset]]$voting_items <- data[[dataset]]$voting_items %>% filter(v_id %in% colnames(s_matrix)) } if (!is_empty(null_councilors)) { s_matrix <- s_matrix[!rownames(s_matrix) %in% null_councilors, ] data[[dataset]]$members_of_parliment <- data[[dataset]]$members_of_parliment %>% filter(i_id %in% rownames(s_matrix)) } scores_matrix[[dataset]] <- s_matrix } results <- list()
for (dataset in datasets) { print(paste( "Dataset :", dataset, "\ncontains:\n -", nrow(scores_matrix[[dataset]]), "Councillors\n -", ncol(scores_matrix[[dataset]]), "Votes\n\n" )) }
for (dataset in datasets) { means_and_vars <- column_means_and_vars(scores_matrix[[dataset]]) table(means_and_vars$Mean_votes) hist( means_and_vars$Votes_variance, main = paste(dataset, "-", "option", option), cex.lab = 2, xlab = expression("vote variance " * v[l]), cex.main = 2, breaks = 20 ) }
for (dataset in datasets) { hist( w_disputedness[[dataset]], breaks = 20, xlab = expression("weighted disputedness " * pi[k]), ylab = "count", cex.axis = 2, cex.lab = 2, main = paste(dataset, "-", "option", option) ) }
for (dataset in datasets) { results[[dataset]]$mds <- weighted_mds( dissimilarites[[dataset]], weights[[dataset]] ) }
\newpage
for (dataset in datasets) { selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() gender <-selection$gender labels <- paste(selection$first_name, selection$last_name) plot( results[[dataset]]$mds, c(1, 2), group_by = gender, labels = labels, main = paste("Gender", gsub("_", " ", dataset), "Option", option) ) }
\newpage
for (dataset in datasets) { selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() gender <- selection$gender labels <- paste(selection$first_name, selection$last_name) plot( results[[dataset]]$mds, c(3, 4), group_by = gender, labels = labels, main = paste("Gender", gsub("_", " ", dataset), "Option", option) ) }
\newpage
for (dataset in datasets) { selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() labels <- paste(selection$first_name, selection$last_name) party <- data[[dataset]]$members_of_parliment$parties %>% map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% unlist plot( results[[dataset]]$mds, c(1, 2), group_by = party, labels = labels, main = paste("Party", gsub("_", " ", dataset), "Option", option) ) }
\newpage
for (dataset in datasets) { party <- data[[dataset]]$members_of_parliment$parties %>% map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% map(~ ifelse(is.null(.x), "Other", .x)) %>% unlist selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() labels <- paste(selection$first_name, selection$last_name) plot( results[[dataset]]$mds, c(3, 4), group_by = party, labels = labels, main = paste("Party", gsub("_", " ", dataset), "Option", option) ) }
\newpage
for (dataset in datasets) { if ("canton" %in% colnames(data[[dataset]]$members_of_parliment)) { region <- data[[dataset]]$members_of_parliment$canton } else { region <- data[[dataset]]$members_of_parliment$region } selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() labels <- paste(selection$first_name, selection$last_name) plot( results[[dataset]]$mds, c(1, 2), group_by = region, labels = labels, main = paste("Region", gsub("_", " ", dataset), "Option", option) ) }
\newpage
for (dataset in datasets) { if ("canton" %in% colnames(data[[dataset]]$members_of_parliment)) { region <- data[[dataset]]$members_of_parliment$canton } else { region <- data[[dataset]]$members_of_parliment$region } selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() labels <- paste(selection$first_name, selection$last_name) plot( results[[dataset]]$mds, c(3, 4), group_by = region, labels = labels, main = paste("Region", gsub("_", " ", dataset), "Option", option) ) }
\newpage
plot_tsne <- function(x, group_by, f,...) { title <- c() colors <- grDevices::rainbow(length(unique(group_by)), alpha = .9) names(colors) <- stringr::str_sort(unique(group_by), numeric = TRUE) # check for additional function arguments if ( length(list(...)) ) { Lst <- list(...) if ( !is.null(Lst$zlim) ) { min <- Lst$zlim[1] max <- Lst$zlim[2] } if ( !is.null(Lst$main) ) { title <- Lst$main } if ( !is.null(Lst$colors) ) { colors <- Lst$colors } if ( !is.null(Lst$labels) ) { point_labels <- Lst$labels } } par(mar = c(5, 4, 4, 2) + 0.1, oma = c(6,0,0,0), pty = "s") a <- 0.5 b <- 2 cexf <- ((sqrt(f) - min(sqrt(f)))/(max(sqrt(f)) - min(sqrt(f)))*(b - a) + a) cexl <- rep(0.5, length(cexf)) x_max <- max(-x[, 1]) y_max <- max(x[, 2]) x_min <- min(-x[, 1]) y_min <- min(x[, 2]) plot( x[, 1], x[, 2], cex = cexf, main = title, cex.axis = .9, cex.lab = .9, col = colors[group_by], xlim = c(-.1 * (x_max - x_min) + x_min, .1 * (x_max - x_min) + x_max), ylim = c(-.1 * (y_max - y_min) + y_min, .1 * (y_max - y_min) + y_max) ) wordcloud::textplot( x[, 1], x[, 2], point_labels, show.lines = TRUE, new = FALSE, cex = cexl, col = rgb(.11, .11, .11, .33), xlim = c(-.2 * (x_max - x_min) + x_min, .2 * (x_max - x_min) + x_max), ylim = c(-.2 * (y_max - y_min) + y_min, .2 * (y_max - y_min) + y_max) ) abline(h = 0) abline(v = 0) par(mar = c(0,0,0,0), oma = c(0,0,10,0), pty = "m") if (length(unique(group_by)) < 10) { legend( "bottom", legend = stringr::str_sort(unique(group_by), numeric = TRUE), col = colors[stringr::str_sort(unique(group_by), numeric = TRUE)], pch = 2, horiz = TRUE, cex = .8, bty = "o", xpd = TRUE ) } else { legend( "bottom", legend = str_sort(unique(group_by), numeric = TRUE), col = colors[str_sort(unique(group_by), numeric = TRUE)], pch = 2, ncol = 5, cex = .8, bty = "o", xpd = TRUE ) } layout(1) } for (dataset in datasets) { party <- data[[dataset]]$members_of_parliment$parties %>% map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% unlist selection <- data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct() labels <- paste(selection$first_name, selection$last_name) results[[dataset]]$tsne_D <- Rtsne( dissimilarites[[dataset]], is_distance = TRUE ) plot_tsne( results[[dataset]]$tsne_D$Y, f = weights[[dataset]], group_by = party, labels = labels, main = paste( "Tsne Distance: Party", gsub("_", " ", dataset), "Option", params$selected_option ) ) }
\newpage
################ betwen parties ################ for (dataset in datasets) { party <- data[[dataset]]$members_of_parliment$parties %>% map_chr(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% unlist() results[[dataset]]$party_similarity <- group_dissimilarity( dissimilarites[[dataset]], f = weights[[dataset]], groups = party ) }
for (dataset in datasets) { scores <- scores_matrix[[dataset]] propYES <- apply(scores, 2, function(vote_epressions) { sum(vote_epressions, na.rm = T) / sum(1 - is.na(vote_epressions)) }) propNO <- apply(scores, 2, function(vote_epressions) { sum(as.numeric(vote_epressions == 0), na.rm = T) / sum(1 - is.na(vote_epressions)) }) hist(propYES, breaks = 50) sum(as.numeric(propNO == 0)) }
############ Entropies on the final dissimilarities d ############ for (dataset in datasets) { results[[dataset]]$entropy <- entropy( dissimilarites[[dataset]], weights[[dataset]] ) print(paste(dataset, "entropy:", results[[dataset]]$entropy)) }
for (dataset in datasets) { results[[dataset]]$effective_entropy <- effective_entropy( dissimilarites[[dataset]], weights[[dataset]], Nloop = 200, Nfine = 1200, pa = -3, pb = 3 ) }
# ########### PLOTS INTERESSANTS ### # ## plot(beta_rel,rhoS[,i]) # ## ou bien, pour aller plus vite for (dataset in datasets) { ee <- results[[dataset]]$effective_entropy is_not_null = which(colSums(ee$rho) > 0) par(mar = c(4.1,5.1,0.5,0.5)) plot(ee$rho, main = paste(dataset, "rho")) par(mar = c(4.1,5.1,0.5,0.5)) plot(ee$beta_rel, main = paste(dataset, "beta_rel")) par(mar = c(4.1,5.1,0.5,0.5)) plot(ee$R, main = paste(dataset, "R")) par(mar = c(4.1,5.1,0.5,0.5)) plot(ee$E, main = paste(dataset, "E")) par(mar = c(4.1,5.1,0.5,0.5)) matplot(ee$Ty, main = paste(dataset, "Ty"), type = "s") par(mar = c(4.1,5.1,0.5,0.5)) matplot(ee$beta_rel,cbind(ee$E,ee$R,ee$H),type = c("l"), lwd = c(2,2,2), col = c(1,1,1), log = "x") }
for (dataset in datasets) { ee <- results[[dataset]]$effective_entropy for (i in nrow(ee$rho)){ matplot(ee$beta_rel, ee$rho[i,],log="x",type="l") } }
# matplot(ee$beta_rel,cbind(ee$rho[,7],ee$rho[,21]),type="l",log="x") # # # ## monotone (avec concavite pour rho(beta)>0)= pour i=1 ?? 13,15 ?? 22 ?? 39,42 ?? 47, 49,50,53 ?? 67, 69 ?? 72, 74 # # ## non-monotone pour i=14,21, # # ## limite (rebond, brisant la concavite) pour i=9,40,41,48,51, 52,68 , 73 # # # ## figure 1 # par(mar=c(4.1,5.1,0.5,0.5)) # plot(ee$beta_rel,ee$rho[,16],type="l", xlab=expression(paste(beta[rel]," temp??rature inverse")), ylab=expression(paste(r[j]," poids du percept")),cex.lab=2,cex.axis=1.5,log = "x",lwd=2) # legend(x=5e-06, y=0.0055, legend="Rural African American Vernacular English", lty=1:2, cex=1.6,box.lty=0) # # # ## figure 2 # par(mar=c(4.1,5.1,0.5,0.5)) # matplot(ee$beta_rel,cbind(ee$rho[,3],ee$rho[,2],ee$rho[,6]),log="x",type="l",xlab=expression(paste(beta[rel]," temp??rature inverse")), ylab=expression(paste(r[j]," poids du percept")),lty=c("dotted","dashed","solid"),col=c("black", "black","black"),cex.lab=2,cex.axis=1.5,lwd=2) # # legend(x=8e-05, y=0.0085, legend=c("Irish English","Scottish English","English dialects in the North of England"), lty=c(3,2,1), cex=1.6,box.lty=0) # # # ## figure 3 # par(mar=c(4.1,5.1,0.5,0.5)) # matplot(ee$beta_rel,cbind(ee$rho[,51],ee$rho[,41],ee$rho[,52]),log="x",type="l",xlab=expression(paste(beta[rel]," temp??rature inverse")), ylab=expression(paste(r[j]," poids du percept")),lty=c("dotted","dashed","solid"),col=c("black", "black","black"),cex.lab=2,cex.axis=1.4,lwd=2) # # legend(x=5e-05, y=0.13, legend=c("Indian English","Nigerian Pidgin","Pakistani English"), lty=c(3,2,1), cex=1.8,box.lty=0) # # # ## figure 4 # par(mar=c(4.1,5.1,0.5,0.5)) # plot(ee$beta_rel,ee$rho[,21],type="l", xlab=expression(paste(beta[rel]," temp??rature inverse")), ylab=expression(paste(r[j]," poids du percept")),cex.lab=2,cex.axis=1.5,log = "x",lwd=2) # # legend(x=3e-06, y=0.9, legend="Chicano English", lty=1:2, cex=2,box.lty=0) # # dim(ee$rho) # apply(ee$rho,1,min) ## donne min_a(rho_a) pour chaque beta, et permet de determiner beta_L: c'est l'iteration 220 pour Nfine= 301 # ee$beta_rel[220] ## = 12.8825 # which.min(ee$rho[220,]) ## = 16 # ee$data$Variety[16] ## Rural African American Vernacular English # # par(mar=c(4.1,5.1,0.5,0.5)) # plot(ee$beta_rel,ee$Ty,type="s", xlab=expression(beta[rel]), ylab=expression(paste(N[eff]," number of effective types")), cex.lab=1.5,log = "x") # # plot(ee$beta_rel,ee$E,type="l", xlab=expression(paste(beta[rel]," inverse temperature")), ylab=expression(paste(E," effective entropy")), cex.lab=1.5,log = "x") # # plot(ee$beta_rel,ee$E,type="l", xlab=expression(paste(beta[rel]," inverse temperature")), ylab=expression(paste(E," effective entropy")), cex.lab=1.5,log = "x") # # # Shannon=-sum(weights[[dataset]]*log(weights[[dataset]])) # H=rep(Shannon,length(power_selection)) # a constant equal to Shannon entropy # # # par(mar=c(4.1,5.1,0.5,0.5)) # matplot(ee$beta_rel,cbind(ee$E,ee$R,H),type = c("l"),lwd=c(2,2,2),col=c(1,1,1),log = "x",xlab=expression(paste(beta[rel]," inverse temperature")), ylab="diversity measures : E, R and H",cex.lab=1.5) # # # plot(ee$beta_rel,ee$Ty,type="s", xlab=expression(paste(beta[rel]," inverse temperature")), ylab=expression(paste(N[eff]," number of identified categories")), cex.lab=1.5,log = "x") # }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.