R/plot.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Documented in plot.surveysd

#' Plot surveysd-Objects
#'
#' Plot results of `calc.stError()`
#'
#' @param x object of class 'surveysd' output of function [calc.stError]
#' @param variable Name of the variable for which standard errors have been
#'   calcualated in `dat`
#' @param type can bei either `"summary"` or `"grouping"`, default value is
#'   `"summary"`. For `"summary"` a barplot is created giving an overview of the
#'   number of estimates having the flag `smallGroup`, `cvHigh`, both or none
#'   of them. For 'grouping' results for point estimate and standard error are
#'   plotted for pre defined groups.
#' @param groups If `type='grouping'` variables must be defined by which the
#'   data is grouped. Only 2 levels are supported as of right now. If only one
#'   group is defined the higher group will be the estimate over the whole
#'   period. Results are plotted for the first argument in `groups` as well as
#'   for the combination of `groups[1]` and `groups[2]`.
#' @param sd.type can bei either `'ribbon'` or `'dot'` and is only used if
#'   `type='grouping'`. Default is `"dot"`
#' For `sd.type='dot'` point estimates are plotted and flagged if the
#' corresponding standard error and/or the standard error using the mean over
#' k-periods exceeded the value `cv.limit` (see [calc.stError]).
#' For `sd.type='ribbon'` the point estimates including ribbons, defined by
#' point estimate +- estimated standard error are plotted.
#' The calculated standard errors using the mean over k periods are plotted
#' using less transparency. Results for the higher level (~`groups[1]`) are
#' coloured grey.
#' @param ... additional arguments supplied to plot.
#'
#' @examples
#'
#' library(surveysd)
#' 
#' set.seed(1234)
#' eusilc <- demo.eusilc(n = 3, prettyNames = TRUE)
#'
#' dat_boot <- draw.bootstrap(eusilc, REP = 3, hid = "hid", weights = "pWeight",
#'                            strata = "region", period = "year")
#'
#' # calibrate weight for bootstrap replicates
#' dat_boot_calib <- recalib(dat_boot, conP.var = "gender", conH.var = "region")
#'
#' # estimate weightedRatio for povmd60 per period
#' group <- list("gender", "region", c("gender", "region"))
#' err.est <- calc.stError(dat_boot_calib, var = "povertyRisk",
#'                         fun = weightedRatio,
#'                         group = group , period.mean = NULL)
#'
#'
#' plot(err.est)
#'
#' # plot results for gender
#' # dotted line is the result on the national level
#' plot(err.est, type = "grouping", groups = "gender")
#'
#'
#' # plot results for rb090 in each db040
#' # with standard errors as ribbons
#' plot(err.est, type = "grouping", groups = c("gender", "region"), sd.type = "ribbon")
#'
#'
#' @export

plot.surveysd <- function(
  x, variable = x$param$var[1], type = c("summary", "grouping"), groups =
    NULL, sd.type = c("dot", "ribbon"), ...) {

  res_type <- GROUPING <- shape_bool <- shape_bool2 <- NULL


  #################
  # Input checking
  type <- type[1]
  sd.type <- sd.type[1]
  if (class(x)[1] != "surveysd") {
    stop("The data needs to be an object of class 'surveysd'!")
  }
  if (!type %in% c("summary", "grouping")) {
    stop("Parameter type can only take values 'summary' or 'grouping'!")
  }
  if (!variable %in% x$param$var) {
    stop("No results for ", variable, " present in the data!")
  }
  if (type == "grouping") {

    if (is.null(groups)) {
      stop("Paramter 'groups' cannot be NULL if type='grouping'!")
    }

    other_var <- unique(unlist(x$param$group))
    if (any(!groups %in% other_var)) {
      stop("Variables in 'groups' must contain variables from x$params$group!")
    }

    match_group <- lapply(
      x$param$group,
      function(z) {
        if (!is.null(z)) {
          identical(sort(z), sort(groups))
        }
      }
    )
    match_group <- any(unlist(match_group))
    if (length(groups) > 2) {
      stop("More than a maximum of 2 groups is not supported as of right now.")
    }
    if (!match_group) {
      if (length(groups) == 1) {
        stop("No results for ", groups, " present in the data")
      } else {
        stop("No results for the combination of ", groups[1], " and ",
             groups[2], " present in the data")
      }
    }
  }
  if (!sd.type %in% c("ribbon", "dot")) {
    stop("Parameter 'sd.type' can only take values 'ribbon' or 'dot'!")
  }
  # convert all factors for data tables in x
  # otherwise ther will be issues with merges
  x$Estimates <- convert_factors(x$Estimates)
  x$smallGroups <- convert_factors(x$smallGroups)
  x$cvHigh <- convert_factors(x$cvHigh)
  x$stEDecrease <- convert_factors(x$stEDecrease)
  #################
  # get variables from x and merge tables together
  period <- x$param$period

  poss.values <- c("Missing", "cvHigh", "SmallGroup+cvHigh", "SmallGroup", "OK")
  poss.color <- c("grey", "orangered3", "yellow2", "dodgerblue1", "green4")

  plot.x <- x$Estimates
  plot.x <- define_type(plot.x, x, variable = variable)
  values.plot <- plot.x[, unique(res_type)]
  color.plot <- poss.color[poss.values %in% values.plot]

  # define groups
  dt.eval("plot.x[grepl('^[[:digit:]]+$',", period,
          "),GROUPING:='Single Periods']")
  dt.eval("plot.x[grepl('-',", period, "),GROUPING:='Other']")
  dt.eval("plot.x[grepl('_',", period, "),GROUPING:='", x$param$period.mean,
          "-Period-Mean']")
  plot.x[, GROUPING := factor(GROUPING, levels = c("Single Periods", paste0(
    x$param$period.mean, "-Period-Mean"), "Other"))]

  #################
  # plot type 'summary'
  if (type == "summary") {
    title <- paste("Results of Groups per period for variable", variable)
    p1 <- ggplot(plot.x, aes(get(period), fill = res_type)) +
      geom_bar() + xlab("") + ylab("Count") + coord_flip() +
      facet_grid(GROUPING ~ ., scales = "free_y", space = "free_y") +
      theme(legend.title = element_blank()) +
      scale_fill_manual(breaks = values.plot,
                        values = color.plot) +
      ggtitle(title)
    plot(p1)
  } else if (type == "grouping") {
    #################
    # plot type 'grouping'
    if (length(groups) == 1) {
      groups <- c(period, groups)
    }

    if (length(other_var) > 0) {
      na_var <- other_var[!other_var %in% groups]
      if (length(na_var) > 0) {
        na_var <- paste(paste0("is.na(", na_var, ")"), collapse = "&")
        na_var <- paste0("&", na_var)
      }

      exp1 <- paste0("c(!is.na(", groups[1], ")&!is.na(", groups[2], ")",
                     na_var, ")")

      na_var <- other_var[!other_var %in% groups[1]]
      exp2_2 <- paste(paste0("is.na(", na_var, ")"), collapse = "&")
      exp2 <- paste0("c(!is.na(", groups[1], ")&", exp2_2, ")")
    } else {
      exp1 <- paste0("c(!is.na(", groups[1], ")&!is.na(", groups[2], "))")
      exp2 <- paste0("c(!is.na(", groups[1], ")&is.na(", groups[2], "))")
    }


    plot.group1 <- dt.eval("plot.x[", exp1, "]")
    plot.group2 <- dt.eval("plot.x[", exp2, "]")

    val_var <- paste0("val_", variable)
    ste_var <- paste0("stE_", variable)
    ste_bool <- variable


    # prepare plot.group1 for plotting
    plot.group1.period <- plot.group1[GROUPING == "Single Periods"]
    plot.group1.periodmean <- plot.group1[GROUPING == paste0(
      x$param$period.mean, "-Period-Mean")]

    # prepare plot.group2 for plotting
    plot.group2.period <- plot.group2[GROUPING == "Single Periods"]
    plot.group2.periodmean <- plot.group2[GROUPING == paste0(
      x$param$period.mean, "-Period-Mean")]

    # rename data columns for plot.group2
    ste_var2 <- paste0(ste_var, "2")
    val_var2 <- paste0(val_var, "2")
    ste_bool2 <- paste0(ste_bool, "2")

    # prepare output differently if period.mean has been used
    if (!is.null(x$param$period.mean)) {
      period.mean <- TRUE

      ste_var_mean <- paste0(ste_var, "_mean")
      ste_bool_mean <- paste0(ste_bool, "_mean")
      ste_var_mean2 <- paste0(ste_var_mean, "2")
      ste_bool_mean2 <- paste0(ste_bool_mean, "2")

      dt.eval("plot.group1.periodmean[,", period, ":=tstrsplit(", period,
              ",split='_',keep=", round((x$param$period.mean + 1) / 2), ")]")

      setnames(plot.group1.periodmean, c(ste_var, ste_bool),
               c(ste_var_mean, ste_bool_mean))

      plot.group1.period <- merge(
        plot.group1.period, dt.eval(
          "plot.group1.periodmean[,.(",
          paste(c(period, other_var, ste_var_mean, ste_bool_mean),
                collapse = ","),
          ")]"),
        all.x = TRUE)

      dt.eval("plot.group2.periodmean[,", period, ":=tstrsplit(", period,
              ",split='_',keep=", round((x$param$period.mean + 1) / 2), ")]")

      setnames(plot.group2.periodmean, c(ste_var, ste_bool),
               c(ste_var_mean, ste_bool_mean))
      plot.group2.period <- merge(
        plot.group2.period,
        dt.eval("plot.group2.periodmean[,.(",
                paste(c(period, other_var, ste_var_mean, ste_bool_mean),
                      collapse = ","), ")]"),
        all.x = TRUE)

      change.names <- c(ste_var, val_var, ste_bool, ste_var_mean, ste_bool_mean)
    } else {
      period.mean <- FALSE
      change.names <- c(ste_var, val_var, ste_bool)
    }

    setnames(plot.group2.period, change.names, paste0(change.names, "2"))
    select.group2 <- paste0(change.names, "2")

    # merge with plot.group
    if (period == groups[1]) {
      plot.group1.period <- merge(
        plot.group1.period,
        plot.group2.period[, mget(c(groups[1], select.group2))],
        all.x = TRUE, by = groups[1])
    } else {
      plot.group1.period <- merge(
        plot.group1.period,
        plot.group2.period[, mget(c(groups[1], period, select.group2))],
        all.x = TRUE, by = c(groups[1], period))
    }

    dt.eval("plot.group1.period[,", groups[1], ":=factor(", groups[1], ")]")
    dt.eval("plot.group1.period[,", groups[2], ":=factor(", groups[2], ")]")

    # plot results of subgroups and add results for mean over k periods
    p1 <- dt.eval("ggplot(plot.group1.period,aes(", period, ",", val_var,
                  ",group=", groups[2], "))+
                    geom_line(aes(colour=get(groups[2])))")

    if (sd.type == "ribbon") {
      p1 <- p1 + geom_ribbon(
        aes(ymin = get(val_var) - get(ste_var),
            ymax = get(val_var) + get(ste_var),
            fill = get(groups[2]), colour = get(groups[2])),
        linetype = 2, alpha = 0.1)


      # add results for group[1] -> higher level grouping
      p1 <- dt.eval("p1 + geom_line(aes(", period, ",", val_var2,
                    ",linetype='dotted'),color='black')")
      p1 <- p1 + geom_ribbon(
        aes(ymin = get(val_var2) - get(ste_var2),
            ymax = get(val_var2) + get(ste_var2)),
        fill = "grey", linetype = 2,  alpha = 0.1)

      if (period.mean) {
        # add results for k-mean-periods
        p1 <- p1 + geom_ribbon(
          aes(ymin = get(val_var) - get(ste_var_mean),
              ymax = get(val_var) + get(ste_var_mean), fill = get(groups[2]),
              colour = get(groups[2])),
          linetype = 2, alpha = 0.5)

        # add results for group[1] and k-period-mean
        p1 <- p1 + geom_ribbon(
          aes(ymin = get(val_var2) - get(ste_var_mean2),
              ymax = get(val_var2) + get(ste_var_mean2)),
          fill = "grey", linetype = 2, alpha = 0.5)
      }

    } else {
      plot.group1.period[, shape_bool := "Low st.Error"]
      plot.group1.period[get(ste_bool) == TRUE, shape_bool := "high st.Error"]
      plot.group1.period[, shape_bool2 := "Low st.Error"]
      plot.group1.period[get(ste_bool2) == TRUE, shape_bool2 := "high st.Error"]

      if (period.mean) {
        plot.group1.period[get(ste_bool_mean) == TRUE,
                           shape_bool := "high st.Error for mean"]
        plot.group1.period[get(ste_bool_mean2) == TRUE,
                           shape_bool2 := "high st.Error for mean"]
      }

      p1 <- dt.eval(
        "p1 + geom_point(data=plot.group1.period[get(ste_bool)==TRUE],
                         aes(", period, ",", val_var, ",shape=shape_bool))")

      # add results for group[1] -> higher level grouping
      p1 <- dt.eval("p1 + geom_line(aes(", period, ",", val_var2,
                    ",linetype='solid'))")
      p1 <- dt.eval(
        "p1 + geom_point(data=plot.group1.period[get(ste_bool2)==TRUE],
                         aes(", period, ",", val_var, ",shape=shape_bool2))")

      if (period.mean) {
        # add results for k-mean-periods
        p1 <- dt.eval("p1 + geom_point(data=plot.group1.period[", ste_bool_mean,
                      "==TRUE],
                      aes(", period, ",", val_var, ",shape=shape_bool))")

        # add results for group[1] and k-period-mean
        p1 <- dt.eval("p1 + geom_point(data=plot.group1.period[",
                      ste_bool_mean2, "==TRUE],
                      aes(", period, ",", val_var, ",shape=shape_bool2))")

      }
    }


    # define paramters for plot
    ylabel <- paste0("Point Estimate on variable ", x$param$var)
    p1 <- p1 + xlab(x$param$period) + ylab(ylabel) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))


    # split the plots into facets
    if (groups[1] == period) {
      p1 <- p1 + facet_wrap(~get(groups[2]))
      if (sd.type == "ribbon") {
        p1 <- p1 + guides(fill = 'none',
                          colour = 'none',
                          linetype = guide_legend(title = ""))
        p1 <- p1 + scale_linetype_manual(
          values = c("solid"), labels = paste0("Result for ", groups[1]))
      } else {
        shapes <- c(1, 16)
        names(shapes) <- c("high st.Error", "high st.Error for mean")
        p1 <- p1 + scale_shape_manual(values = shapes)
        p1 <- p1 + scale_colour_discrete(guide = 'none') +
        # p1 <- p1 + scale_colour_discrete(value="grey")
           theme(legend.title = element_blank())
        p1 <- p1 + scale_linetype_manual(values = c("dotted"), labels = paste0(
          "Result for ", groups[1]))
      }
    } else {
      p1 <- p1 + facet_wrap(~get(groups[1]))
      if (sd.type == "ribbon") {
        p1 <- p1 + guides(fill = guide_legend(title = groups[2]),
                          colour = guide_legend(title = groups[2]),
                          linetype = guide_legend(title = ""))
        p1 <- p1 + scale_linetype_manual(values = c("solid"), labels = paste0(
          "Result for ", groups[1]))
      } else {
        shapes <- c(1, 16)
        names(shapes) <- c("high st.Error", "high st.Error for mean")
        p1 <- p1 + scale_shape_manual(values = shapes) +
          guides(colour = guide_legend(title = groups[2]),
                 linetype = guide_legend(title = ""),
                 shape = guide_legend(title = ""))

        p1 <- p1 + scale_linetype_manual(values = c("dotted"), labels = paste0(
          "Result for ", groups[1]))
      }
    }

    plot(p1)
  }
}


define_type <- function(plot.x, x, variable = "HX080") {

  SMALLGROUP <- res_type <- NULL

  val_variable <- paste("val", variable, sep = "_")
  poss.values <- c("Missing", "cvHigh", "SmallGroup+cvHigh", "SmallGroup", "OK")

  # merge data
  merge.on <- copy(colnames(x$smallGroups))
  x$smallGroups[, SMALLGROUP := TRUE]
  plot.x <- x$smallGroups[plot.x,, on = c(merge.on)] # nolint
  merge.on <- unique(c(x$param$period, unlist(x$param$group)))
  plot.x <- x$cvHigh[plot.x,, on = c(merge.on)] # nolint

  # define type
  plot.x[!is.na(SMALLGROUP) & get(variable) == FALSE, res_type := "SmallGroup"]
  plot.x[!is.na(SMALLGROUP) & get(variable) == TRUE,
         res_type := "SmallGroup+cvHigh"]
  plot.x[is.na(get(val_variable)), res_type := "Missing"]
  plot.x[get(variable) == TRUE & is.na(res_type), res_type := "cvHigh"]
  plot.x[is.na(res_type), res_type := "OK"]
  plot.x[, res_type := factor(res_type, levels = poss.values)]

  return(plot.x)
}

convert_factors <- function(x) {
  x <- x[, lapply(
    .SD,
    function(z) {
      if (is.factor(z)) {
        as.character(z)
      } else {
        z
      }
    }
  )]
  return(x)
}
Any scripts or data that you put into this service are public.
surveysd documentation built on Dec. 28, 2022, 2:15 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/plot.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Defines functions convert_factors define_type plot.surveysd

Documented in plot.surveysd

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/plot.R In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Defines functions convert_factors define_type plot.surveysd

Documented in plot.surveysd

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/plot.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs