R/raking.R

raking_svy <- function (design, sample.margins, population.margins, control = list(maxit = 10,epsilon = 1, verbose = FALSE), compress = NULL){
  #' Function for running the raking algorithm from the survey package, ignoring missing marginals
  #'
  #' I created this function to avoid errors when running the raking algorithm with small
  #' samples.

  if (!missing(control)) {
    control.defaults <- formals(rake)$control
    for (n in names(control.defaults)) if (!(n %in% names(control)))
      control[[n]] <- control.defaults[[n]]
  }
  is.rep <- inherits(design, "svyrep.design")
  if (is.rep && is.null(compress))
    compress <- inherits(design$repweights, "repweights_compressed")
  if (is.rep)
    design$degf <- NULL
  if (length(sample.margins) != length(population.margins))
    stop("sample.margins and population.margins do not match.")
  nmar <- length(sample.margins)
  if (control$epsilon < 1)
    epsilon <- control$epsilon * sum(weights(design, "sampling"))
  else epsilon <- control$epsilon
  strata <- lapply(sample.margins, function(margin) if (inherits(margin,
                                                                 "formula")) {
    mf <- model.frame(margin, data = design$variables)
  })
  allterms <- unlist(lapply(sample.margins, all.vars))
  ff <- formula(paste("~", paste(allterms, collapse = "+"),
                      sep = ""))
  oldtable <- svytable(ff, design)
  if (control$verbose)
    print(oldtable)
  oldpoststrata <- design$postStrata
  iter <- 0
  converged <- FALSE
  while (iter < control$maxit) {
    design$postStrata <- NULL
    for (i in 1:nmar) {
      design <- postStratify(design, strata[[i]], population.margins[[i]],
                             compress = FALSE,partial = TRUE)
    }
    newtable <- svytable(ff, design)
    if (control$verbose)
      print(newtable)
    delta <- max(abs(oldtable - newtable))
    if (delta < epsilon) {
      converged <- TRUE
      break
    }
    oldtable <- newtable
    iter <- iter + 1
  }
  rakestrata <- design$postStrata
  if (!is.null(rakestrata)) {
    class(rakestrata) <- "raking"
    design$postStrata <- c(oldpoststrata, list(rakestrata))
  }
  design$call <- sys.call()
  if (is.rep && compress)
    design$repweights <- compressWeights(design$repweights)
  if (is.rep)
    design$degf <- degf(design)
  if (!converged)
    warning("Raking did not converge after ", iter, " iterations.\n")
  return(design)
}




# ###############################################
# ###############################################
# ### use code and data below to write function
# ###############################################
# ###############################################
#
# require(neale)
# require(xlsx)
# require(survey)
# require(dplyr)
# require(stringr)
#
# #automating weights
#
#
#
# dir <- "E:\\DADOS\\CONSULTORIA\\SLEEK DATA\\Ipsos USA\\Chamber of Commerce - 3-3-2017"
# dir.data <- "E:\\DADOS\\CONSULTORIA\\SLEEK DATA\\Ipsos USA\\Chamber of Commerce - 3-3-2017\\Partial Data - 16-03-2017"
# file <- paste0(dir.data,"\\US_Chamber_of_Commerce_SPSS.sav")
# file.r=paste0(dir,"\\Chamber of Commerce all data.RData")
# file.targets <- paste0(dir,"\\Chamber of Commerce Weights.xlsx")
# file.regions <- paste0(dir,"\\census region.xlsx")
#
# ################# Loading data
#
# spss <- get_spss(file)
# data <- spss$sav
# df.vars <- spss$vars
#
#
# #############################
# #targets
#
# targets <- read.xlsx(file.targets,sheetIndex = 1)
# targets <- as.data.frame(targets %>% group_by(var) %>% mutate(pop=pop/sum(pop)))
# regions <- read.xlsx(file.regions,sheetIndex = 1)
#
# #############################
# #recodes
#
# data <- left_join(data,regions,by='SAMPLE_STATE')
# data$grp_wgts <- factor(as.numeric(data$INDUSTRYGROUP),labels=paste0('group.',1:5))
# #data$size_wgts <- rowSums(select(data,FTEMPLOYEE_S1,PTEMPLOYEE_S2),na.rm = TRUE)
# #data$size_wgts <- cut(data$size_wgts,breaks = c(0,4,19,99,99999),labels = c("0-4 employees","5-19 employees","20-99 employees","100+ employees"))
# data$size_wgts <- cut(data$SAMPLE_EMPLOYTOTAL,breaks = c(0,4,19,99,99999),labels = c("0-4 employees","5-19 employees","20-99 employees","100+ employees"))
#
# ##############################
# ############# pasrsing formula
#
# form <-  ~ grp_wgts*size_wgts + region_wgts
# #terms(form)
# df.form <- model.frame(form, data = data)
#
# vars <- attr(terms(form),which="term.labels")
# bi.var <- grep(':',vars,value = TRUE)
# uni.aux <- unlist(unique(str_split(bi.var,pattern = ":")))
# uni.var <- setdiff(all.vars(form),uni.aux)
#
#
# #############################
# #adding missing group in population targets
#
# aux.grp5 <- xtabs(~data$grp_wgts)[5]/sum(xtabs(~data$grp_wgts))
# aux.grp5 <- aux.grp5 * (1+aux.grp5)
# targets <- targets %>% add_row(var='grp_wgts',categ='group.5',pop=aux.grp5)
#
# #############################
# #weights
#
# #data <- droplevels(data)
#
# #uni-dimensional
# wgts <- sort(names(data %>% select(ends_with('wgts'))))
# sample <- lapply(paste0('~',wgts),as.formula)
# targets <- targets %>% arrange(var)
# population <- dlply(targets,'var',function(x){aux<-x$var[1];x$var <- NULL;names(x)[1]<-aux;return(x)})
#
# ##bi-demensional
# teste <- data %>% select(ends_with('wgts'))
# teste$peso <- 1
# teste <- teste %>% gather(var,categ,-2,-peso)
# teste <- teste %>% group_by(var,categ,grp_wgts) %>% summarise(pop=n())
# teste$pop <- teste$pop + runif(length(teste$pop),1,50)
#
#
# teste <- teste %>% arrange(var)
# population <- dlply(teste,'var',function(x){aux<-x$var[1];x$var <- NULL;names(x)[1]<-aux;return(x)})
# sample <- lapply(population,function(x){y<-names(select(x,-pop));as.formula(paste0('~',paste(y,collapse = "+")))})
#
# ###ponderação
# #data.svy <- test(data.svy, sample=list(~grp_wgts,~region_wgts,~size_wgts), population=list(pop.grp,pop.wgts,pop.size), control = list(maxit = 800))
#
# data.svy <- svydesign(id=~argid,data = data);
# data.svy <- raking_svy(data.svy, sample=sample, population=population, control = list(maxit = 800))
# data$weights <- weights(data.svy)
# data$weights <- data$weights * (dim(data)[1] / sum(data$weights))
#
#
# #############################
# #checks
#
# check <- data[,c('weights',wgts)]
# check <- check %>% gather(var,categ,-weights) %>% group_by(var,categ) %>% summarise(sample=n(),raw=n(),weights=sum(weights))
# check <- full_join(check,targets,by=c('var','categ'))
# check <- check %>% mutate(raw=round(100*raw/sum(raw,na.rm = TRUE),1),weights=round(100*weights/sum(weights,na.rm = TRUE),1),pop=round(100*pop/sum(pop,na.rm = TRUE),1))
# check$diff <- check$weights - check$pop
#
# # check bi
#
# check <- data %>% select(ends_with('wgts'),weights)
# check <- check %>% gather(var,categ,-2,-weights)
# check <- check %>% group_by(var,categ,grp_wgts) %>% summarise(sample=n(),raw=n(),weights=sum(weights))
# check <- check %>% group_by(var) %>% mutate(raw=round(100*raw/sum(raw),1),weights=round(100*weights/sum(weights),1))
# check <- left_join(check,teste,by=c('var','categ','grp_wgts'))
# check <- check %>% group_by(var) %>% mutate(pop=round(100*pop/sum(pop),1))
# check$diff <- check$weights - check$pop
neale-eldash/neale documentation built on May 23, 2019, 1:29 p.m.