knitr::opts_chunk$set(echo = TRUE)
option <- params$selected_option

Option: r I(option)

devtools::load_all()
library("tidyverse")
library("Rtsne")
library(simdiversity.entropy)
library(simdiversity.data.politics)
library(simdiversity.weighted.mds)

datasets <- params$datasets

Get swsiss and italian data:

data <- list()
dissimilarites <- list()
weights <- list()
w_disputedness <- list()
scores_matrix <- list()

for (dataset in datasets) {
  dissimilarites[[dataset]] <- dataset_from_str(
    paste0(dataset, "__", option, "__D_final")
  )

  weights[[dataset]] <- dataset_from_str(
    paste0(dataset, "__", option, "__weight")
  )

  data[[dataset]] <- dataset_from_str(dataset)

  w_disputedness[[dataset]] <- dataset_from_str(
    paste0(
      dataset, "__", option,
      "__weighted_vote_disputedness"
    )
  )

  s_matrix <- dataset_from_str(
    paste0(dataset, "__", option, "__scores_matrix")
  )
  null_votes <- dataset_from_str(
    paste0(dataset, "__", option, "__null_votes")
  )
  null_councilors <- dataset_from_str(
    paste0(dataset, "__", option, "__null_councilors")
  )
  if (!is_empty(null_votes)) {
    s_matrix <-
      s_matrix[, !colnames(s_matrix) %in% null_votes]
     data[[dataset]]$voting_items <- 
       data[[dataset]]$voting_items %>% filter(v_id %in%  colnames(s_matrix))
  }
  if (!is_empty(null_councilors)) {
    s_matrix <-
      s_matrix[!rownames(s_matrix) %in% null_councilors, ]
      data[[dataset]]$members_of_parliment <- 
        data[[dataset]]$members_of_parliment %>% filter(i_id %in% rownames(s_matrix))
  }
  scores_matrix[[dataset]] <- s_matrix

}


results <- list()

Basic infos

for (dataset in datasets) {
  print(paste(
    "Dataset :", dataset, "\ncontains:\n -",
    nrow(scores_matrix[[dataset]]), "Councillors\n -",
    ncol(scores_matrix[[dataset]]), "Votes\n\n"
  ))
}

Vote means and variance

for (dataset in datasets) {
  means_and_vars <- column_means_and_vars(scores_matrix[[dataset]])
  table(means_and_vars$Mean_votes)
  hist(
    means_and_vars$Votes_variance,
    main = paste(dataset, "-", "option", option),
    cex.lab = 2,
    xlab = expression("vote variance " * v[l]),
    cex.main = 2,
    breaks = 20
  )
}

Vote disputedness

for (dataset in datasets) {
  hist(
    w_disputedness[[dataset]],
    breaks = 20,
    xlab = expression("weighted disputedness " * pi[k]),
    ylab = "count",
    cex.axis = 2,
    cex.lab = 2,
    main = paste(dataset, "-", "option", option)
  )
}

MDS

for (dataset in datasets) {
  results[[dataset]]$mds <- weighted_mds(
    dissimilarites[[dataset]], weights[[dataset]]
  )
}

\newpage

MDS by Gender Dimension 1 and 2

for (dataset in datasets) {
  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  gender <-selection$gender
  labels <- paste(selection$first_name, selection$last_name)

  plot(
    results[[dataset]]$mds,
    c(1, 2),
    group_by = gender,
    labels = labels,
    main = paste("Gender", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

MDS by Gender Dimension 3 and 4

for (dataset in datasets) {
  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  gender <- selection$gender
  labels <- paste(selection$first_name, selection$last_name)
  plot(
    results[[dataset]]$mds,
    c(3, 4),
    group_by = gender,
    labels = labels,
    main = paste("Gender", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

MDS by Party Dimension 1 and 2

for (dataset in datasets) {
  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  labels <- paste(selection$first_name, selection$last_name)

  party <- 
    data[[dataset]]$members_of_parliment$parties %>% 
    map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% 
    unlist

  plot(
    results[[dataset]]$mds,
    c(1, 2),
    group_by = party,
    labels = labels,
    main = paste("Party", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

MDS by Party Dimension 3 and 4

for (dataset in datasets) {

  party <- 
    data[[dataset]]$members_of_parliment$parties %>% 
    map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% map(~ ifelse(is.null(.x), "Other", .x)) %>%
    unlist

  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  labels <- paste(selection$first_name, selection$last_name)

  plot(
    results[[dataset]]$mds,
    c(3, 4),
    group_by = party,
    labels = labels,
    main = paste("Party", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

MDS by Region Dimension 1 and 2

for (dataset in datasets) {
  if ("canton" %in% colnames(data[[dataset]]$members_of_parliment)) {
    region <- data[[dataset]]$members_of_parliment$canton
  } else {
  region <- data[[dataset]]$members_of_parliment$region
  }
  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  labels <- paste(selection$first_name, selection$last_name)

  plot(
    results[[dataset]]$mds,
    c(1, 2),
    group_by = region,
    labels = labels,
    main = paste("Region", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

MDS by Region Dimension 3 and 4

for (dataset in datasets) {
  if ("canton" %in% colnames(data[[dataset]]$members_of_parliment)) {
    region <- data[[dataset]]$members_of_parliment$canton
  } else {
  region <- data[[dataset]]$members_of_parliment$region
  }
  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  labels <- paste(selection$first_name, selection$last_name)

  plot(
    results[[dataset]]$mds,
    c(3, 4),
    group_by = region,
    labels = labels,
    main = paste("Region", gsub("_", " ", dataset), "Option", option)
  )
}

\newpage

TSNE

plot_tsne <- function(x, group_by, f,...) {
  title <- c()
  colors <- grDevices::rainbow(length(unique(group_by)), alpha = .9)
  names(colors) <- stringr::str_sort(unique(group_by), numeric = TRUE)
  # check for additional function arguments
  if ( length(list(...)) ) {
    Lst <- list(...)
    if ( !is.null(Lst$zlim) ) {
      min <- Lst$zlim[1]
      max <- Lst$zlim[2]
    }
    if ( !is.null(Lst$main) ) {
      title <- Lst$main
    }
    if ( !is.null(Lst$colors) ) {
      colors <- Lst$colors
    }
    if ( !is.null(Lst$labels) ) {
      point_labels <- Lst$labels
    }
  }

  par(mar = c(5, 4, 4, 2) + 0.1, oma = c(6,0,0,0), pty = "s")
  a <- 0.5
  b <- 2
  cexf <- ((sqrt(f) - min(sqrt(f)))/(max(sqrt(f)) - min(sqrt(f)))*(b - a) + a)
  cexl <- rep(0.5, length(cexf))
  x_max <- max(-x[, 1])
  y_max <- max(x[, 2])
  x_min <- min(-x[, 1])
  y_min <- min(x[, 2])

  plot(
    x[, 1],
    x[, 2],
    cex = cexf,
    main = title,
    cex.axis = .9,
    cex.lab = .9,
    col = colors[group_by],
    xlim = c(-.1 * (x_max - x_min) + x_min, .1 * (x_max - x_min) + x_max),
    ylim = c(-.1 * (y_max - y_min) + y_min, .1 * (y_max - y_min) + y_max)
  )

  wordcloud::textplot(
    x[, 1],
    x[, 2],
    point_labels,
    show.lines = TRUE,
    new = FALSE,
    cex = cexl,
    col = rgb(.11, .11, .11, .33),
    xlim = c(-.2 * (x_max - x_min) + x_min, .2 * (x_max - x_min) + x_max),
    ylim = c(-.2 * (y_max - y_min) + y_min, .2 * (y_max - y_min) + y_max)

  )
  abline(h = 0)
  abline(v = 0)
  par(mar = c(0,0,0,0), oma = c(0,0,10,0), pty = "m")
  if (length(unique(group_by)) < 10) {
    legend(
      "bottom",
      legend = stringr::str_sort(unique(group_by), numeric = TRUE),
      col = colors[stringr::str_sort(unique(group_by), numeric = TRUE)],
      pch = 2,
      horiz = TRUE,
      cex = .8,
      bty = "o",
      xpd = TRUE
    )
  } else {
    legend(
      "bottom",
      legend = str_sort(unique(group_by), numeric = TRUE),
      col = colors[str_sort(unique(group_by), numeric = TRUE)],
      pch = 2,
      ncol = 5,
      cex = .8,
      bty = "o",
      xpd = TRUE
    )
  }
  layout(1)
}

for (dataset in datasets) {

  party <- 
    data[[dataset]]$members_of_parliment$parties %>% 
    map(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% 
    unlist

  selection <-
    data[[dataset]]$members_of_parliment %>% select(i_id, first_name, last_name, gender) %>% distinct()
  labels <- paste(selection$first_name, selection$last_name)

  results[[dataset]]$tsne_D <- Rtsne(
    dissimilarites[[dataset]],
    is_distance = TRUE
  )
  plot_tsne(
    results[[dataset]]$tsne_D$Y,
    f = weights[[dataset]],
    group_by = party,
    labels = labels,
    main = paste(
      "Tsne Distance: Party", gsub("_", " ", dataset), "Option", params$selected_option
    )
  )
}

\newpage

Party similarity

################ betwen parties ################

for (dataset in datasets) {
  party <- 
    data[[dataset]]$members_of_parliment$parties %>%
    map_chr(~ .x$party_name[which(max(.x$end_date - .x$start_date) == .x$end_date - .x$start_date)]) %>% 
    unlist()

  results[[dataset]]$party_similarity <- group_dissimilarity(
    dissimilarites[[dataset]],
    f = weights[[dataset]],
    groups = party
  )
}

Consensual Votes

for (dataset in datasets) {
  scores <- scores_matrix[[dataset]]
  propYES <- apply(scores, 2, function(vote_epressions) {
    sum(vote_epressions, na.rm = T) / sum(1 - is.na(vote_epressions))
  })
  propNO <- apply(scores, 2, function(vote_epressions) {
    sum(as.numeric(vote_epressions == 0), na.rm = T) / sum(1 - is.na(vote_epressions))
  })
  hist(propYES, breaks = 50)
  sum(as.numeric(propNO == 0))
}

Entropy

############ Entropies on the final dissimilarities d ############
for (dataset in datasets) {
  results[[dataset]]$entropy <- entropy(
    dissimilarites[[dataset]], weights[[dataset]]
  )
  print(paste(dataset, "entropy:", results[[dataset]]$entropy))
}

Effective Entropy

for (dataset in datasets) {
results[[dataset]]$effective_entropy <- effective_entropy(
  dissimilarites[[dataset]], weights[[dataset]],
  Nloop = 200, Nfine = 1200,
  pa = -3, pb = 3
)
}
# ########### PLOTS INTERESSANTS ###
# ## plot(beta_rel,rhoS[,i])
# ## ou bien, pour aller plus vite
for (dataset in datasets) {

  ee <- results[[dataset]]$effective_entropy
  is_not_null = which(colSums(ee$rho) > 0)
   par(mar = c(4.1,5.1,0.5,0.5))
  plot(ee$rho, main = paste(dataset,  "rho"))
   par(mar = c(4.1,5.1,0.5,0.5))
  plot(ee$beta_rel, main = paste(dataset, "beta_rel"))
   par(mar = c(4.1,5.1,0.5,0.5))
  plot(ee$R, main = paste(dataset, "R"))
   par(mar = c(4.1,5.1,0.5,0.5))
  plot(ee$E, main = paste(dataset, "E"))
     par(mar = c(4.1,5.1,0.5,0.5))
  matplot(ee$Ty, main = paste(dataset, "Ty"), type = "s")
       par(mar = c(4.1,5.1,0.5,0.5))
  matplot(ee$beta_rel,cbind(ee$E,ee$R,ee$H),type =  c("l"), lwd = c(2,2,2),
          col = c(1,1,1), log = "x")
}
for (dataset in datasets) {
  ee <- results[[dataset]]$effective_entropy
  for (i in nrow(ee$rho)){
    matplot(ee$beta_rel, ee$rho[i,],log="x",type="l")
  }
}
# matplot(ee$beta_rel,cbind(ee$rho[,7],ee$rho[,21]),type="l",log="x")
# 
# # ## monotone (avec concavite pour rho(beta)>0)= pour i=1 ?? 13,15 ?? 22 ?? 39,42 ?? 47, 49,50,53 ?? 67, 69 ?? 72, 74
# # ## non-monotone pour i=14,21,
# # ## limite (rebond, brisant la concavite) pour i=9,40,41,48,51, 52,68 , 73
# 
# # ## figure 1
# par(mar=c(4.1,5.1,0.5,0.5))
# plot(ee$beta_rel,ee$rho[,16],type="l", xlab=expression(paste(beta[rel],"          temp??rature inverse")), ylab=expression(paste(r[j],"          poids du percept")),cex.lab=2,cex.axis=1.5,log = "x",lwd=2)
# legend(x=5e-06, y=0.0055, legend="Rural African American Vernacular English", lty=1:2, cex=1.6,box.lty=0)
# 
# # ## figure 2
# par(mar=c(4.1,5.1,0.5,0.5))
# matplot(ee$beta_rel,cbind(ee$rho[,3],ee$rho[,2],ee$rho[,6]),log="x",type="l",xlab=expression(paste(beta[rel],"          temp??rature inverse")), ylab=expression(paste(r[j],"          poids du percept")),lty=c("dotted","dashed","solid"),col=c("black", "black","black"),cex.lab=2,cex.axis=1.5,lwd=2)
# # legend(x=8e-05, y=0.0085, legend=c("Irish English","Scottish English","English dialects in the North of England"), lty=c(3,2,1), cex=1.6,box.lty=0)
# 
# # ## figure 3
# par(mar=c(4.1,5.1,0.5,0.5))
# matplot(ee$beta_rel,cbind(ee$rho[,51],ee$rho[,41],ee$rho[,52]),log="x",type="l",xlab=expression(paste(beta[rel],"          temp??rature inverse")), ylab=expression(paste(r[j],"          poids du percept")),lty=c("dotted","dashed","solid"),col=c("black", "black","black"),cex.lab=2,cex.axis=1.4,lwd=2)
# # legend(x=5e-05, y=0.13, legend=c("Indian English","Nigerian Pidgin","Pakistani English"), lty=c(3,2,1), cex=1.8,box.lty=0)
# 
# # ## figure 4
# par(mar=c(4.1,5.1,0.5,0.5))
# plot(ee$beta_rel,ee$rho[,21],type="l", xlab=expression(paste(beta[rel],"          temp??rature inverse")), ylab=expression(paste(r[j],"          poids du percept")),cex.lab=2,cex.axis=1.5,log = "x",lwd=2)
# # legend(x=3e-06, y=0.9, legend="Chicano English", lty=1:2, cex=2,box.lty=0)
# 
# dim(ee$rho)
# apply(ee$rho,1,min)  ## donne min_a(rho_a) pour chaque beta, et permet de determiner beta_L: c'est l'iteration 220 pour Nfine= 301
# ee$beta_rel[220]       ##  = 12.8825
# which.min(ee$rho[220,]) ##  = 16
# ee$data$Variety[16] ## Rural African American Vernacular English
# 
# par(mar=c(4.1,5.1,0.5,0.5))
# plot(ee$beta_rel,ee$Ty,type="s", xlab=expression(beta[rel]), ylab=expression(paste(N[eff],"          number of effective types")), cex.lab=1.5,log = "x")
# 
# plot(ee$beta_rel,ee$E,type="l", xlab=expression(paste(beta[rel],"          inverse temperature")), ylab=expression(paste(E,"          effective entropy")), cex.lab=1.5,log = "x")
# 
# plot(ee$beta_rel,ee$E,type="l", xlab=expression(paste(beta[rel],"          inverse temperature")), ylab=expression(paste(E,"          effective entropy")), cex.lab=1.5,log = "x")
# #
# Shannon=-sum(weights[[dataset]]*log(weights[[dataset]]))
# H=rep(Shannon,length(power_selection))  # a constant equal to Shannon entropy
# #
# par(mar=c(4.1,5.1,0.5,0.5))
# matplot(ee$beta_rel,cbind(ee$E,ee$R,H),type =  c("l"),lwd=c(2,2,2),col=c(1,1,1),log = "x",xlab=expression(paste(beta[rel],"          inverse temperature")), ylab="diversity measures : E, R and H",cex.lab=1.5)
# #
# plot(ee$beta_rel,ee$Ty,type="s", xlab=expression(paste(beta[rel],"          inverse temperature")), ylab=expression(paste(N[eff],"          number of identified categories")), cex.lab=1.5,log = "x")
# }


simdiversity/research-plotics documentation built on Jan. 26, 2021, 10:21 p.m.