R/codes.R
In manifestoR: Access and Process Data and Documents of the Manifesto Project

Documented in aggregate_cee_codes recode_cee_codes recode_v5_to_v4 v4_categories v5_categories

#' Process CMP codings
#' 
#' Several functions to process the CMP codings
#' 
#' @param x Vector of codes, ManifestoDocument or ManifestoCorpus
#' 
#' @details
#' \code{recode_cee_codes} recode the sub-categories used
#' in coding several manifestos in Central and Eastern Europe (4 digits) to
#' the main categories in the coding scheme (3 digits).
#' 
#' @rdname cmp_codes
#' @export
recode_cee_codes <- function(x) {
  UseMethod("recode_cee_codes", x)
}

#' @method recode_cee_codes default
#' @export
recode_cee_codes.default <- function(x) {
  for (i in names(cee_aggregation_relations())) { 
    x = gsub(paste0("^(", paste0(gsub("per", "", cee_aggregation_relations()[[i]]), collapse = "|"), ")$"), gsub("per", "", i), x)
  }
  x
}

#' @method recode_cee_codes ManifestoDocument
recode_cee_codes.ManifestoDocument <- function(x) {
  doc <- x
  codes(doc) <- recode_cee_codes(codes(doc))
  return(doc)
}

#' @method recode_cee_codes ManifestoCorpus
#' @export
recode_cee_codes.ManifestoCorpus <- function(x) {
  tm_map(x, recode_cee_codes)
}

#' `aggregate_cee_codes` is deprecated and will be removed in a future version of
#' manifestoR. Please use only `recode_cee_codes`, which provides the exact same 
#' functionality, but is more consistent in its name.
#' 
#' @rdname cmp_codes
#' @export
aggregate_cee_codes <- function(x) {
  .Deprecated("recode_cee_codes", package = "manifestoR")
  recode_cee_codes(x)
}


#' @rdname cmp_codes
#' 
#' @details
#' \code{recode_v5_to_v4} recode the CMP codings according to
#' the more specialized Coding Handbook Version 5 to the more general
#' categories of Handbook Version 4. Codes 202.2, 605.2 and 703.2 are
#' converted to a 000, while all other subcategory codes with an appended
#' dot and fourth digit are aggregated to the corresponding three-digit 
#' main category.
#' 
#' @export
recode_v5_to_v4 <- function(x) {
  UseMethod("recode_v5_to_v4", x)
}

#' @method recode_v5_to_v4 default
#' @export
recode_v5_to_v4.default <- function(x) {
  x <- as.character(x)
  x[x %in% c("202.2", "605.2", "703.2")] <- "0"
  return(gsub("^(\\d{3})\\.\\d$", "\\1", x))
}

#' @method recode_v5_to_v4 ManifestoDocument
#' @export
recode_v5_to_v4.ManifestoDocument <- function(x) {
  doc <- x
  codes(doc) <- recode_v5_to_v4(codes(doc))
  return(doc)
}

#' @method recode_v5_to_v4 ManifestoCorpus
#' @export
recode_v5_to_v4.ManifestoCorpus <- function(x) {
  tm_map(x, recode_v5_to_v4)
}

#' Lists of categories and category relations
#' 
#' Code numbers of the Manifesto Project's category scheme. For documentation
#' see \url{https://manifesto-project.wzb.eu/datasets}.
#' 
#' @rdname categories
#' @export 
v4_categories <- function() {
  c("uncod",  101, 102, 103, 104, 105,
    106, 107, 108, 109, 110, 201,
    202, 203, 204, 301, 302, 303,
    304, 305, 401, 402, 403, 404,
    405, 406, 407, 408, 409, 410,
    411, 412, 413, 414, 415, 416,
    501, 502, 503, 504, 505, 506,
    507, 601, 602, 603, 604, 605,
    606, 607, 608, 701, 702, 703,
    704, 705, 706)
}

#' @param include_parents include v5-categories that have subcategories
#' 
#' @rdname categories
#' @export 
v5_categories <- function(include_parents = TRUE) {
  v5_v4_aggregation_relations() %>%
    unlist() %>%
    { gsub("per", "", .) } %>%
    { gsub("_", ".", .) } %>%
    c(v4_categories()) %>%
    unique() %>%
    { if (!include_parents) purrr::discard(., ~ { .x %in% gsub("per", "", setdiff(names(v5_v4_aggregation_relations()), "peruncod")) }) else . } %>%
    sort()
}

#' @rdname categories
#' @export 
cee_categories <- function() {
  c("1011", "1012", "1013", "1014", "1015", "1016", "1021", "1022", 
    "1023", "1024", "1025", "1026", "1031", "1032", "1033", 
    "2021", "2022", "2023", "2031", "2032", "2033", "2041", 
    "3011", "3051", "3052", "3053", "3054", "3055", 
    "4011", "4012", "4013", "4014", "4121", "4122", "4123", "4124", 
    "4131", "4132", 
    "5021", "5031", "5041", "5061", 
    "6011", "6012", "6013", "6014", "6061", "6071", "6072", "6081", 
    "7051", "7052", "7061", "7062")
}

#' @rdname categories
#' @export 
v5_v4_aggregation_relations <- function() {
  list(peruncod = c("peruncod", "per202_2", "per605_2", "per703_2"),
       per103 = c("per103", "per103_1", "per103_2"),
       per201 = c("per201", "per201_1", "per201_2"),
       per202 = c("per202", "per202_1", "per202_3", "per202_4"),
       per305 = c("per305", "per305_1", "per305_2", "per305_3", "per305_4", "per305_5", "per305_6"),
       per416 = c("per416", "per416_1", "per416_2"),
       per601 = c("per601", "per601_1", "per601_2"),
       per602 = c("per602", "per602_1", "per602_2"),
       per605 = c("per605", "per605_1"),
       per606 = c("per606", "per606_1", "per606_2"),
       per607 = c("per607", "per607_1", "per607_2", "per607_3"),
       per608 = c("per608", "per608_1", "per608_2", "per608_3"),
       per703 = c("per703", "per703_1")
  )
}

#' @rdname categories
#' @export 
cee_aggregation_relations <- function() {
  list(
    per101 = c("per101","per1011","per1012","per1013","per1014","per1015","per1016"),
    per102 = c("per102","per1021","per1022","per1023","per1024", "per1025","per1026"),
    per103 = c("per103","per1031","per1032","per1033"),
    per202 = c("per202","per2021","per2022","per2023"),
    per203 = c("per203","per2031","per2032", "per2033"),
    per204 = c("per204","per2041"),
    per301 = c("per301","per3011"),
    per305 = c("per305","per3051","per3052","per3053","per3054","per3055"),
    per401 = c("per401","per4011","per4012","per4013","per4014"),
    per412 = c("per412","per4121","per4122","per4123","per4124"),
    per413 = c("per413","per4131","per4132"),
    per502 = c("per502","per5021"),
    per503 = c("per503","per5031"),
    per504 = c("per504","per5041"),
    per506 = c("per506","per5061"),
    per601 = c("per601","per6011", "per6012","per6013","per6014"),
    per606 = c("per606","per6061"),
    per607 = c("per607","per6071","per6072"),
    per608 = c("per608","per6081"),
    per705 = c("per705","per7051","per7052"),
    per706 = c("per706","per7061","per7062")
  )
}


baeck_policy_dimensions <- function() {
  list(foreign = c(101, 102, 103, 106, 107, 108, 109),
       defence = c(104, 105),
       interior = c(201, 202, 203, 204, 301, 302,
                    303, 304, 605, 607, 608),
       justice = c(201, 202, 203, 204, 303, 304, 605),
       finance = c(402, 414),
       economy = c(401, 403, 404, 405, 406, 407, 408, 409, 410, 412, 413, 415),
       labour = c(504, 505, 701, 702),
       education = c(506, 507),
       health = c(504, 505, 706),
       agriculture = c(703),
       industry = c(401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 412, 413, 414),
       environment = c(416, 501),
       social_affairs = c(503, 603, 604, 606, 705, 706)
       ) %>%
    lapply(prefix, "per")
}

bischof_issue_groups <- function() {
  list(
    ecology = c(416, 410, 501, 106),
    nationalist = c(601, 602, 605, 607, 608),
    agrarian = c(703),
    regional = c(301, 302, 706),
    europe = c(406, 407, 108, 110)
    ) %>%
    lapply(prefix, "per")
}

meyer_miller_2013_policy_dimensions <- function() {
  baeck_policy_dimensions() %>% { .$industry <- NULL; . }
}

#' Default programmatic clarity dimensions from 
#' Giebler/Lacewell/Regel/Werner 2015.
#'
#' @references Giebler/Lacewell/Regel/Werner (2015). Mass, Catch-all, or 
#' Programmatic? Toward an Empirical Classification of Party Types. Manuscript.
#'
#' @export
clarity_dimensions <- function() {
  list(
    "fsp" = list(pole_1 = c(101), pole_2 = c(102)),
    "mil" = list(pole_1 = c(104), pole_2 = c(105)),
    "int" = list(pole_1 = c(107), pole_2 = c(109)),
    "eui" = list(pole_1 = c(108), pole_2 = c(110)),
    "con" = list(pole_1 = c(203), pole_2 = c(204)),
    "cen" = list(pole_1 = c(301), pole_2 = c(302)),
    "mre" = list(pole_1 = c(401), pole_2 = c(403)),
    "pro" = list(pole_1 = c(406), pole_2 = c(407)),
    "fis" = list(pole_1 = c(409), pole_2 = c(414)),
    "wel" = list(pole_1 = c(504), pole_2 = c(505)),
    "edu" = list(pole_1 = c(506), pole_2 = c(507)),
    "nwl" = list(pole_1 = c(601), pole_2 = c(602)),
    "tmo" = list(pole_1 = c(603), pole_2 = c(604)),
    "mul" = list(pole_1 = c(607), pole_2 = c(608)),
    "lab" = list(pole_1 = c(701), pole_2 = c(702))
  ) %>%
    lapply(lapply, prefix, "per")
}


#' Aggregate category percentages in groups
#' 
#' \code{aggregate_pers} is a general function to aggregate percentage variables by creating a new
#' variable holding the sum. If a variable with the name for the aggregate
#' already exists, it is overwritten, giving a warning if it is changed, not NA,
#' not zero and not named "peruncod".
#' 
#' @param data dataset to use in aggregation
#' @param groups (named) list of variable name vectors to aggregate to a new one
#' (as given in the name); see default value for an example of the format
#' @param na.rm passed on to \code{\link{sum}}
#' @param keep keep variables that were aggregated in result?
#' @param overwrite Names of the variables that are allowed to be overwritten by
#' aggregate. Defaults to all aggregate variable names. If a variable is
#' overwritten, a message is issued in any case.
#' @seealso \code{\link{aggregate_pers_cee}}
#' 
#' @rdname aggregate_pers
#' @export
aggregate_pers <- function(data,
                           groups = v5_v4_aggregation_relations(),
                           na.rm = FALSE,
                           keep = FALSE,
                           overwrite = names(groups)) {
  
  data <- 
    Reduce(function(data, aggregate) {
        aggregated <- data %>%
          select(one_of(intersect(groups[[aggregate]], names(data))))
        if (ncol(aggregated) != 0L) {
          aggregated <- rowSums(aggregated, na.rm = na.rm)
            if (aggregate %in% names(data) &&
                any(!is.na(data[,aggregate]) & 
                    data[,aggregate] != 0.0 & 
                    na_replace(data[,aggregate] != aggregated), TRUE)) {
              if (aggregate %in% overwrite) {
                message(paste0("Changing non-zero supercategory per value ", aggregate, 
                               " when aggregating subcateogory percentages"))
                data[,aggregate] <- aggregated
              }
            } else {
              data[,aggregate] <- aggregated
            }
          }
        data
      },
      names(groups),
      init = data)
  
  if (!keep) {
    data %>%
      select(-one_of(setdiff(intersect(names(data), unlist(groups)), names(groups))))
  } else {
    data
  }
    
}

#' Aggregate cee-categories to main categories
#' 
#' Adds the code frequencies in a dataset of the 4 digit per-variables (per1011 to per7062 - mostly used in codings
#' of Central and Eastern European countries) to the main categories in the coding scheme (3 digits).
#' 
#' A wrapper of \code{\link{aggregate_pers}} using \code{cee_aggregation_relations}. 
#' 
#' @param data dataset to use in aggregation
#' @seealso \code{\link{aggregate_pers}}
#' @export
aggregate_pers_cee <- function(data) {
  aggregate_pers(data, 
                 groups = cee_aggregation_relations(),
                 keep = TRUE,
                 na.rm = TRUE)
}

#' Count the codings from a ManifestoDocument
#'
#' @param doc ManifestoDocument, ManifestoCorpus or vector of codes
#' @param code_layers vector of names of code layers to use, defaults to cmp_code; Caution:
#' The layer eu_code is handled separately in the parameter with_eu_codes due to its
#' different logic
#' @param with_eu_codes Whether to include special EU code layer; by default ("auto") taken
#' from the document's metadata
#' @param prefix prefix for naming the count/percentage columns in the resulting data.frame
#' @param relative If true, percentages are returned, absolute counts else
#' @param include_codes Vector of categories that should be included even if they are not
#' present in the data; the value of the created variables then defaults to 0.0 (or NA if
#' no codes are present at all);
#' @param aggregate_v5_subcategories if TRUE, for handbook version 5 subcategories, the aggregate
#' category's count/percentage is computed as well
#' @return A data.frame with onw row and the counts/percentages as columns
#' @export
count_codes <- function(doc,
                        code_layers = c("cmp_code"),
                        with_eu_codes = "auto",
                        prefix = "per",
                        relative = TRUE,
                        include_codes = if("cmp_code" %in% code_layers)
                                           { v4_categories() } else { c() },
                        aggregate_v5_subcategories = TRUE) {
  UseMethod("count_codes", doc)
}

fix_names_code_table <- function(df, prefix, include_codes) {
  
  if (length(include_codes) > 0) {
    
    ensure_names <- paste0(prefix, include_codes) %>%
    { gsub(".", "_", ., fixed = TRUE)} %>%
    { subset(., !grepl(paste0("$(", prefix, ").+^"), .)) } %>%
      as.character() %>%
      paste(collapse = ")|(") %>%
      { paste0("^(", . , ")$") }
    
    the_order <- order(names(df))
    df %>%
      select(the_order) %>%
      select(dplyr::matches("party"),
             dplyr::matches("date"),
             dplyr::starts_with(prefix),
             dplyr::matches(ensure_names),
             dplyr::matches("total"))
    
    
  } else {
    
    the_order <- order(names(df))
    df %>%
      select(the_order) %>%
      select(dplyr::matches("party"),
             dplyr::matches("date"),
             dplyr::starts_with(prefix),
             dplyr::matches("total"))
    
  }
  
}

fix_missing_counted_codes <- function(df, relative = TRUE) {
  
  m <- is.na(df)
  m[which(df$total <= 0),] <- FALSE
  df[m] <- if (relative) 0.0 else 0L
  
  return(df)
  
}

#' @export
count_codes.ManifestoCorpus <- function(doc,
                                        code_layers = c("cmp_code"),
                                        with_eu_codes = "auto",
                                        prefix = "per",
                                        relative = TRUE,
                                        include_codes = if("cmp_code" %in% code_layers)
                                                           { v4_categories() } else { c() },
                                        aggregate_v5_subcategories = TRUE) {
  
  lapply(content(doc),
         count_codes,
         code_layers, with_eu_codes, prefix, relative, include_codes, aggregate_v5_subcategories) %>%
    bind_rows() %>%
    fix_missing_counted_codes(relative = relative) %>%
    fix_names_code_table(prefix,
                         include_codes = include_codes)
  
}

#' @export
count_codes.ManifestoDocument <- function(doc,
                        code_layers = c("cmp_code"),
                        with_eu_codes = "auto",
                        prefix = "per",
                        relative = TRUE,
                        include_codes = if("cmp_code" %in% code_layers)
                                           { v4_categories() } else { c() },
                        aggregate_v5_subcategories = TRUE) {
  
  if (with_eu_codes == "auto") {
    with_eu_codes <- meta(doc, "has_eu_code")
    if (is.null(with_eu_codes)) {
      with_eu_codes <- FALSE
    }
  }
  the_codes <- c()
  if ("eu_code" %in% code_layers) {
    warning("eu_code is included in code_layers, but should be included via the with_eu_codes parameter to respect its logic!")
  }
  for (layer in code_layers) {
    the_codes <- c(the_codes, as.character(codes(doc, layer)))
  }
  if (length(with_eu_codes) > 0 && with_eu_codes) {
    eu_codes <- codes(doc, "eu_code")
    the_codes <- c(the_codes, eu_codes[!is.na(eu_codes) & eu_codes != 0L])
  }
  data.frame(party = null_to_na(meta(doc, "party")),
             date = null_to_na(meta(doc, "date"))) %>%
    bind_cols(count_codes(the_codes, code_layers, with_eu_codes, prefix, relative, include_codes, aggregate_v5_subcategories))
  
  
}

#' @export
count_codes.default <- function(doc,
                                code_layers = c("cmp_code"),
                                with_eu_codes = "auto",
                                prefix = "per",
                                relative = TRUE,
                                include_codes = if("cmp_code" %in% code_layers)
                                                   { v4_categories() } else { c() },
                                aggregate_v5_subcategories = TRUE) {
  
  tt <- table(doc)

  df <- as.data.frame(t(as.matrix(tt)))
  if (ncol(df) > 0) {
    names(df) <- paste0(prefix, names(df))
    if (relative) {
      n <- sum(df[1,])
      df[1,] <- df[1,]/n * 100
      df$total <- n
    }
  } else {
    df <- data.frame(total = 0L)
  }
  
  df <- aggregate_pers(df,
                       groups = list(peruncod = c("per0", "per000")),
                       keep = FALSE,
                       na.rm = TRUE)

  names(df) <- gsub(".", "_", names(df), fixed = TRUE)
    
  if (length(include_codes) > 0) {
    for (the_name in setdiff(paste0(prefix, gsub(".", "_", include_codes, fixed = TRUE)), names(df))) {
      df[,the_name] <- ifelse(df$total == 0L, NA_real_, 0.0)
    }
  }
  
  if (aggregate_v5_subcategories) {
    df <- aggregate_pers(df,
                         groups = v5_v4_aggregation_relations(),
                         keep = TRUE)
  }

  return(df)    
  
}

Any scripts or data that you put into this service are public.

manifestoR documentation built on Jan. 13, 2021, 9:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

manifestoR
Access and Process Data and Documents of the Manifesto Project

R/codes.R
In manifestoR: Access and Process Data and Documents of the Manifesto Project

Defines functions v5_categories v4_categories recode_v5_to_v4.ManifestoCorpus recode_v5_to_v4.ManifestoDocument recode_v5_to_v4.default recode_v5_to_v4 aggregate_cee_codes recode_cee_codes.ManifestoCorpus recode_cee_codes.ManifestoDocument recode_cee_codes.default recode_cee_codes

Documented in aggregate_cee_codes recode_cee_codes recode_v5_to_v4 v4_categories v5_categories

Try the manifestoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

manifestoR Access and Process Data and Documents of the Manifesto Project

R/codes.R In manifestoR: Access and Process Data and Documents of the Manifesto Project

Defines functions v5_categories v4_categories recode_v5_to_v4.ManifestoCorpus recode_v5_to_v4.ManifestoDocument recode_v5_to_v4.default recode_v5_to_v4 aggregate_cee_codes recode_cee_codes.ManifestoCorpus recode_cee_codes.ManifestoDocument recode_cee_codes.default recode_cee_codes

Documented in aggregate_cee_codes recode_cee_codes recode_v5_to_v4 v4_categories v5_categories

Try the manifestoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

manifestoR
Access and Process Data and Documents of the Manifesto Project

R/codes.R
In manifestoR: Access and Process Data and Documents of the Manifesto Project