R/msproj.R

msproj <- function(data = NULL, patt = NULL, country = NULL, SSP = "SSP2", fert = NULL, mort = NULL, dom.mig = NULL, recl = NULL, edu = NULL, 
                   detail.out = FALSE, nSx = "axmx", iPr = 18, reclass.model = NULL, reclass.period = 5, mig.adj = FALSE,
                   maxeapr5 = c(urban = 0.7, rural = 0.5, total = 0.6)) {
  "%nin%" <- function(x, y) !x %in% y
  is.not.null <- function(x) !is.null(x)
  
  #positions of data files in the list:
  pos.st.sp <- grep("state.space", names(data))
  pos.mig_dom <- grep("mig_dom", names(data))
  pos.mig_int <- grep("mig_int", names(data))
  pos.var.def <- grep("var.def", names(data))
  pos.axmx <- grep("axmx", names(data))
  spatial.vars <- intersect(c("region", "residence"), data[[pos.var.def]]$variables)
  
  if (length(country) == 0) {
    country <- as.character(data[[pos.var.def]][variables == "country", values])
    #if there is still no country, set "Country" as country:
    if (length(country) == 0) country <- "Country"
    }
  
  #-----sanity checks-----

  #check for duplicate column names:
  dups.idx <- which(sapply(data, function(x) any(duplicated(names(x)))))
  dups <- sapply(c("axmx", "mig", "state_space", "var_def"), function(x) grep(x, names(dups.idx)))
  names(dups) <- c("axmx", "migration", "state space", "variable definition")
  dups <- names(unlist(dups))
  
  if (length(dups) > 0) {
    stop(paste(rep("There are duplicate column names in your", length(dups)), dups, "file. "))
  }
  
  #check if there is any NA in the data sets:
  check.na <- function(x) { 
    if (any(names(x) == "mrate")) {
      data.cols <- which(names(x) == "mrate")
    } else {
      data.cols <- names(x)[-c(1:which(names(x) == "var"))]
    }
    which(!complete.cases(x[, data.cols, with = FALSE]))
  }
  x <- check.na(data[[pos.st.sp]])
  if (length(pos.mig_dom > 0)) {
    y <- check.na(data[[pos.mig_dom]])
  } else {
    y <- NULL
  }

  if (length(x) > 0 | length(y) > 0) {
    x.stop <- y.stop <- NULL
    if (length(x) > 0) {
      x.stop <- paste("Please check row(s)", paste(x, collapse = ", "), "in your state space file.")
    }
    if (length(y) > 0) {
      y.stop <- paste("Please check row(s)", paste(y, collapse = ", "), "in your migration file.")
    }    
    stop(paste("There are missing values in your data set(s).", x.stop, y.stop))
    }
  
  #check for columns - rows shouldn't be needed - that are empty (!= check for NAs):
  check.empty <- function(x) {
    which(sapply(x, function(x) all(x == "")))
    }
  
  x <- check.empty(data[[pos.st.sp]])
  if (length(pos.mig_dom > 0)) {
    y <- check.empty(data[[pos.mig_dom]])
  } else {
    y <- NULL
  }

  if (length(x) > 0 | length(y) > 0) {
    x.stop <- y.stop <- NULL
    if (length(x) > 0) {
      x.stop <- paste("Please check column(s)", paste(x, collapse = ", "), "in your state space file.")
    }
    if (length(y) > 0) {
      y.stop <- paste("Please check column(s)", paste(y, collapse = ", "), "in your migration file.")
    }    
    stop(paste("There are empty columns in your data set(s).", x.stop, y.stop))
  }
         
  #1. reclass:
  reclass.dt <- data[[pos.st.sp]][var %in% c("reclasstr", "gap", "perural")]
  if (nrow(reclass.dt) > 0) {
    m.vars <- names(reclass.dt)[-c(1:which(names(reclass.dt) == "var"))]
    reclass.dt <- melt(reclass.dt, measure.vars = m.vars)
    reclass.dt <- reclass.dt[grep("rural", reclass.dt$variable), ]
    reclass.dt <- dcast(reclass.dt, factor(variable) ~ var, value.var = "value")
    names(reclass.dt)[1] <- "area"
    reclass.dt$area <- sapply(reclass.dt$area, function(x) sub("_rural", "", x))
    areas.oldtr <- reclass.dt[reclasstr > 0.05 & perural > 0.4 | sapply(reclass.dt$reclasstr, function(x) identical(all.equal(x, 0), TRUE)), 
                              .(area = area, reclasstr)]
    if (any(reclass.dt$reclasstr > 0) & is.not.null(reclass.model)) {
      load(paste("input_models/", reclass.model, sep = ""))
      }
    } else {
    rm(reclass.dt)
  }
  
  #2. sexratio:
  sexratio.dt <- data[[pos.st.sp]][var == "sexr"]
  if (nrow(sexratio.dt) == 0) {
    sexratio.dt <- data[[pos.st.sp]][var %in% c("sexr_rur", "sexr_urb", "sexr_tot")]
  }
  sexratio.dt <- melt(sexratio.dt, id.vars = c("period", "var"), measure.vars = names(sexratio.dt)[-c(1:which(names(sexratio.dt) == "var"))], 
                      variable.name = "area", value.name = "sexr")
  sexratio.dt <- sexratio.dt[sexr != 0][, var := NULL]
  if (any(sexratio.dt$area == "value")) sexratio.dt$area <- country
  sexratio.dt <- sexratio.dt[order(period, area)]
  
  #column order for popedu.dt, asfredu.dt, ass.edu.dt, dom.dt and nsxedu.dt:
  col.order <- c("period", "sex", "age", "edu", "region", "residence", "origin", "destination", "pattern", "var", "value")
  
  #3. popedu:
  popedu.dt <- data[[pos.st.sp]][var == "pop"]
  if (any(c("region", "residence") %in% spatial.vars)) {
    id.vars <- intersect(c("period", "sex", "age", "edu", "var"), names(popedu.dt))
    m.vars <- names(popedu.dt)[-c(1:which(names(popedu.dt) == "var"))]
    popedu.dt <- melt(popedu.dt, id.vars = id.vars, measure.vars = m.vars)
    if (all(c("region", "residence") %in% spatial.vars)) { #region and residence
      popedu.dt <- popedu.dt[, `:=` (region = factor(gsub("_.*", "", variable)), residence = factor(gsub(".*_", "", variable)), variable = NULL)]
    } else if ("residence" %in% spatial.vars) { #only residence
      setnames(popedu.dt, "variable", "residence") 
    } else if ("region" %in% spatial.vars) { #only region
      setnames(popedu.dt, "variable", "region")
    } 
  }
  p.vars <- names(popedu.dt)[names(popedu.dt) %in% c("sex", "age", "edu", "region", "residence")]
  popedu.dt[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
  setcolorder(popedu.dt, col.order[col.order %in% names(popedu.dt)])
 
  #If an area (=region/residence combination) has inhabitants of a certain age, 
  #there must be males and females - check for a positive number of inhabitants first, 
  #then check if this is true for both sexes:
  # mf.miss <-  sapply(seq(0, 30, 5), function(x) {
  #   mf.pos <- popedu.dt[age == x, .(Freq = sum(value)), by = c("region", "residence", "sex")]
  #   mf.pos.ftab <- ftable(xtabs(Freq ~ region + sex + residence, data = mf.pos), row.vars = c(1, 3))
  #   idx.F <- apply(matrix(mf.pos.ftab, ncol = 2), 1, function(x) xor(all(x == 0), all(x > 0)))
  #   areas.F <- expand.grid(attr(mf.pos.ftab, "row.vars")$residence, attr(mf.pos.ftab, "row.vars")$region)[2:1][idx.F == FALSE, ]
  #   }, simplify = FALSE)
  # names(mf.miss) <- seq(0, 30, 5)
  # if (any(sapply(mf.miss, nrow) > 0)) {
  #   stop(c("There are either no male or female inhabitants of age ", paste(names(mf.miss[sapply(mf.miss, nrow) > 0]), collapse = ", "), " in some of the areas."))
  # }
  #4. nsxedu:
  if (nSx == "axmx") {
    if (length(pos.axmx) > 0) {
      nsxedu.dt <- data[[pos.axmx]]
    } else {
      axmx.dt <- data[[pos.st.sp]][var %in% c("ax", "mx")]
      if (any(c("region", "residence") %in% spatial.vars)) {
        id.vars <- intersect(c("period", "sex", "age", "edu", "var"), names(axmx.dt))
        m.vars <- names(axmx.dt)[-c(1:which(names(axmx.dt) == "var"))]
        axmx.dt <- melt(axmx.dt, id.vars = id.vars, measure.vars = m.vars)
        if (all(c("region", "residence") %in% spatial.vars)) { #region and residence
          axmx.dt <- axmx.dt[, `:=` (region = factor(gsub("_.*", "", variable)), residence = factor(gsub(".*_", "", variable)), variable = NULL)]
        } else if ("residence" %in% spatial.vars) { #only residence
          setnames(axmx.dt, "variable", "residence")
        } else if ("region" %in% spatial.vars) { #only region
          setnames(axmx.dt, "variable", "region")
        }
      }
    axmx.dt[, axmxvars := do.call(paste, c(.SD, sep = "_")), .SDcols = c("period", setdiff(p.vars, "age"))]
    nsxedu.dt <- data.table(ddply(axmx.dt, .(axmxvars), lt.axmx))
    nsxedu.split <- tstrsplit(nsxedu.dt$axmxvars, split = "_")
    nsxedu.dt$period <- nsxedu.split[1]
    if (length(nsxedu.split) > 2) {
      patt.temp <- data.table(nsxedu.split[[2]], nsxedu.dt$age - 5, matrix(unlist(nsxedu.split[-(1:2)]), nrow = nrow(nsxedu.dt)))
    } else {
      patt.temp <- data.table(nsxedu.split[[2]], nsxedu.dt$age - 5)
    }
    nsxedu.dt$pattern <- do.call(paste, c(patt.temp, sep = "_"))
    nsxedu.dt[, `:=` (axmxvars = NULL, age = NULL)]
    setcolorder(nsxedu.dt, c("period", "pattern", "nSx"))
    #write.csv(nsxedu.dt, paste("input_data/", patt, "_axmx.csv", sep = ""), row.names = FALSE)
      
      #sanity check:
      if (any(nsxedu.dt$nSx < 0 | nsxedu.dt$nSx > 1)) {
        stop(paste("nSx-values smaller than 0 or larger than 1 occured.\n Please check the values for ax and mx in your input data."))#, e.g., in lines",  
        #                 paste(which(nsxedu.dt$nSx < 0 | nsxedu.dt$nSx > 1)[1:10], collapse = ", "), "etc."))
        # nsxedu.dt[nsxedu.dt$nSx < 0 | nsxedu.dt$nSx > 1]
        }
      }
    }
  #5. asfredu:
  asfredu.dt <- data[[pos.st.sp]][var == "asfr"]
  if (any(c("region", "residence") %in% spatial.vars)) {
    id.vars <- intersect(c("period", "sex", "age", "edu", "var"), names(asfredu.dt))
    m.vars <- names(asfredu.dt)[-c(1:which(names(asfredu.dt) == "var"))]
    asfredu.dt <- melt(asfredu.dt, id.vars = id.vars, measure.vars = m.vars)
    if (all(c("region", "residence") %in% spatial.vars)) { #region and residence
      asfredu.dt <- asfredu.dt[, `:=` (region = factor(gsub("_.*", "", variable)), residence = factor(gsub(".*_", "", variable)), variable = NULL)]
    } else if ("residence" %in% spatial.vars) { #only residence
      setnames(asfredu.dt, "variable", "residence") 
    } else if ("region" %in% spatial.vars) { #only region
      setnames(asfredu.dt, "variable", "region")
    } 
  }
  asfredu.dt[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
  asfredu.dt <- asfredu.dt[, .(period, pattern, value)]
  setnames(asfredu.dt, "value", "asfr")
  #6. ass.edu:
  ass.edu.dt <- data[[pos.st.sp]][var == "eapr"]
  #sanity check:
#   tab.eaprs <- table(ass.edu.dt$edu, ass.edu.dt$age)
#   if (any(apply(tab.eaprs, 1, function(x) sum(x > 0)) > 1)) {
#     stop(paste("Each possible educational transition should only have one eapr assigned to it, namely the one for the maximum age it can happen.\n 
# In the data, some transitions are associated with more than one age level. Please check your input data."))
#   }
    if (nrow(ass.edu.dt) > 0) {
    m.vars <- names(ass.edu.dt)[-c(1:which(names(ass.edu.dt) == "var"))]
    areasex.vars <- c("region", "residence", "sex")
    idx <- sapply(rowSums(ass.edu.dt[, (m.vars), with = FALSE]), function(x) identical(all.equal(x, 0), TRUE))
    ass.edu.dt <- subset(ass.edu.dt, !idx)
    if (any(c("region", "residence") %in% spatial.vars)) {
      id.vars <- intersect(c("period", "sex", "age", "edu", "var"), names(ass.edu.dt))
      ass.edu.dt <- melt(ass.edu.dt, id.vars = id.vars, measure.vars = m.vars)
      if (all(c("region", "residence") %in% spatial.vars)) { #region and residence
        ass.edu.dt <- ass.edu.dt[, `:=` (region = factor(gsub("_.*", "", variable)), residence = factor(gsub(".*_", "", variable)), variable = NULL)]
      } else if ("residence" %in% spatial.vars) { #only residence
        setnames(ass.edu.dt, "variable", "residence") 
      } else if ("region" %in% spatial.vars) { #only region
        setnames(ass.edu.dt, "variable", "region")
      } 
    }
    ass.edu.dt[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
    setcolorder(ass.edu.dt, col.order[col.order %in% names(ass.edu.dt)])
    ass.edu.dt[, areasex := do.call(paste, c(.SD, sep = "_")), .SDcols = intersect(names(ass.edu.dt), areasex.vars)]
  } else {
    ass.edu.dt <- NULL
    }
  #7. domestic migration:
  if (length(pos.mig_dom) > 0) {
    dom.dt <- data[[pos.mig_dom]]
    #MW 2017-03-28: delete period information, otherwise matching projedu and dom is not possible from 2015 on
    if ("period" %in% names(dom.dt)) dom.dt[, period := NULL]
    if (any(names(dom.dt) == "mrate")) {
      setnames(dom.dt, "mrate", "value")
    } else {
      data.cols <- names(dom.dt)[-c(1:which(names(dom.dt) == "var"))]
      typeof.tab <- table(sapply(dom.dt[, .SD, .SDcols = data.cols], typeof)) #to prevent warning message
      #MW 2018-05-11: delete the following line since "World" is no longer used for internal migration
      #mode(dom.dt$World) <- names(which(typeof.tab == max(typeof.tab)))
      dom.dt <- melt(dom.dt, measure.vars = data.cols, variable.name = "destination")
    }
    dom.dt[, age := age - 5]
    dom.dt <- dom.dt[age >= 0]
    setcolorder(dom.dt, col.order[col.order %in% names(dom.dt)])
    }
  
  migr.contrl <- 1
  if (exists("dom.dt")) dom.dt[, value := value * migr.contrl]
  
  #8. international migration:
  if (length(pos.mig_int) > 0) {
    int.dt <- data[[pos.mig_int]]
    #delete period information, otherwise matching projedu and int is not possible from 2015 on
    #if ("period" %in% names(int.dt)) int.dt[, period := NULL]
    int.dt[, age := age - 5]
    int.dt <- int.dt[age >= 0]
    setcolorder(int.dt, col.order[col.order %in% names(int.dt)])
  }

  # if mabs is used for emigration, estimate the corresponding mrate for the base period and keep it constant throughout the simulation
  if ("mrate" %nin% int.dt) { 
    int.dt <- int.dt # placeholder
  }
  
  #-----------data preparation:----------

  SSP.name <- SSP
  
  SSP1 <- list(fert = c(seq(0.95, 0.8, -0.05), seq(0.7875, 0.75, -0.0125), rep(0.75, 10)), 
               mort = 0.5, 
               dom.mig = c(seq(1.125, 1.5, 0.125), rep(1.5, 14)), 
               recl = rep(1, 18), 
               edu = 1)
  SSP2 <- list(fert = 1, 
               mort = 0, 
               dom.mig = rep(1, 18), 
               recl = rep(1, 18), 
               edu = 1)
  SSP3 <- list(fert = c(seq(1.05, 1.2, 0.05), seq(1.2125, 1.25, 0.0125), rep(1.25, 10)), 
               mort = -0.5, 
               dom.mig = c(seq(0.875, 0.5, -0.125), rep(0.5, 14)), 
               recl = c(seq(1.125, 1.5, 0.125), rep(1.5, 14)), 
               edu = 1)
  SSP4 <- list(fert = c(seq(0.95, 0.8, -0.05), seq(0.7875, 0.75, -0.0125), rep(0.75, 10)), #MW 2016-08-31: changed fert assumptions from H to L
               mort = -0.5, 
               dom.mig = c(seq(1.125, 1.5, 0.125), rep(1.5, 14)), 
               recl = c(seq(1.125, 1.5, 0.125), rep(1.5, 14)), 
               edu = 1)
  SSP5 <- list(fert = c(seq(0.95, 0.8, -0.05), seq(0.7875, 0.75, -0.0125), rep(0.75, 10)), 
               mort = 0.5, 
               dom.mig = c(seq(1.125, 1.5, 0.125), rep(1.5, 14)), 
               recl = c(seq(0.875, 0.5, -0.125), rep(0.5, 14)), 
               edu = 1)
  SSPs <- list(SSP1 = SSP1, SSP2 = SSP2, SSP3 = SSP3, SSP4 = SSP4, SSP5 = SSP5)
    
  SSP <- SSPs[[SSP.name]]
  
  if(any(sapply(list(fert, mort, dom.mig, recl, edu), is.not.null))) SSP <- "SSP6"
 
  path.1 <- "output_data"
  path.2 <- paste(patt, "_", SSP.name, "_", format(Sys.time(), "%Y-%m-%d_%H%M%S"), sep = "")
  if (!dir.exists(path.1)) dir.create(path.1, recursive = TRUE)
  dir.create(file.path(path.1, path.2))
  out.name <- file.path(path.1, path.2, path.2)
  
  if (is.not.null(fert)) {
    SSP$fert <- fert
  }
  if (SSP.name != "SSP2" | is.not.null(fert)) {
    if(length(SSP$fert) < 18) SSP$fert <- c(SSP$fert, rep(SSP$fert[length(SSP$fert)], 18 - length(SSP$fert)))
    asfredu.dt$value <- asfredu.dt$value * rep(SSP$fert, each = nrow(asfredu.dt) / iPr)
  }
  
  if (is.not.null(mort)) {
    SSP$mort <- mort
  }
  
  #unconditional computation of nsxedu:
  if (nSx == "le0") {
    if(length(SSP$mort) < 18) SSP$mort <- c(SSP$mort, rep(SSP$mort[length(SSP$mort)], 18 - length(SSP$mort)))
    SSP$mort <- cumsum(SSP$mort)
    nsxedu.dt <- le0.change(SSP$mort, out.name)
    sx.vars <- setdiff(intersect(names(popedu.dt), names(nsxedu.dt)), "value")
    nsxedu.dt <- nsxedu.dt[region != "india", .(var = "nSx", value = mean(value)), by = sx.vars]
    nsxedu.dt[, age := age - 5]
    if ("region" %in% p.vars) {
      nsxedu.dt$region <- factor(nsxedu.dt$region, labels = data[[pos.var.def]][variables == "region"]$values)
      }
    nsxedu.dt[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
    nsxedu.dt <- nsxedu.dt[, .(period, pattern, value)]
    setnames(nsxedu.dt, "value", "nSx")
  }
      
  if (is.not.null(dom.mig)) { 
    SSP$dom.mig <- dom.mig
  }
  if (SSP.name != "SSP2" | is.not.null(dom.mig)) {  
    if(length(SSP$dom.mig) < 18) SSP$dom.mig <- c(SSP$dom.mig, rep(SSP$dom.mig[length(SSP$dom.mig)], 18 - length(SSP$dom.mig)))
    #dom doesn't contain a time variable and thus is multiplied with the dom.mig-values within the loop below
    }

  if (exists("reclass.dt")) {      
    if (is.not.null(recl)) { 
      SSP$recl <- recl
    }
    if (SSP.name != "SSP2" | is.not.null(recl)) {  
      if(length(SSP$recl) < 18) SSP$recl <- c(SSP$recl, rep(SSP$recl[length(SSP$recl)], 18 - length(SSP$recl)))
      #reclass.dt doesn't contain a time variable and thus is multiplied with the recl-values within the loop below
      }
    }  
  
  if (is.not.null(edu)) SSP$edu <- edu

  base.year <- min(popedu.dt$period)
  iPr.fin <- iPr
     
  for(iPr in 1:iPr.fin) {  
    print(paste("Period is ", iPr, sep = ""))
    
    #the following has to be done in every period because popedu.dt is derived from resProj within the simulation:
    popedu1 <- popedu.dt[period == base.year + 5 * (iPr - 1)][, var := NULL]
    setnames(popedu1, "value", "pop")
    nsxedu1 <- nsxedu.dt[period == base.year + 5 * (iPr - 1)][, period := NULL]
    projedu <- popedu1[nsxedu1, on = "pattern", nomatch = 0]
    projedu[, `:=` (pop1 = pop * nSx, deaths = pop * (1 - nSx))]

    if (is.not.null(ass.edu.dt)) {
      projedu[, areasex := do.call(paste, c(.SD, sep = "_")), .SDcols = intersect(names(projedu), areasex.vars)]
      #Check if all areas have a positive number of inhabitants. If not, eapr() is only executed for the ones with positive numbers:
      area.pos <- projedu[, .(popsum = sum(pop)), by = intersect(names(projedu), areasex.vars)]
      patt.pos <- unique(area.pos[, do.call(paste, c(.SD, sep = "_")), .SDcols = intersect(names(area.pos), c("region", "residence"))])
      if (length(patt.pos) > 0) {
        idx.pos <- sort((c(sapply(patt.pos, function(x) grep(x, projedu$areasex)))))
      } else {
        idx.pos <- 1:nrow(projedu)
      }
      projedu.pos <- data.table(ddply(projedu[idx.pos], .(areasex), eapr, ass.edu.dt = ass.edu.dt, maxeapr5 = maxeapr5))
      if (length(idx.pos) < nrow(projedu)) {
        projedu.0 <- projedu[setdiff(1:nrow(projedu), idx.pos)]
        projedu.0[, pop2 := pop1]
        projedu <- merge(projedu.pos, projedu.0, all = TRUE)
        #row reordering to match the intial order: 
        projedu <- projedu[match(popedu1$pattern, projedu$pattern), ]
        projedu <- projedu[, areasex := NULL]
      } else {
        projedu <- projedu.pos
      }
    } else {
      projedu[, pop2 := pop1]
    }
    
    #----- domestic migration -----
    if (exists("dom.dt")) {
      dom <- copy(dom.dt)
      dom <- dom[, value := value * SSP$dom.mig[iPr]]
      origin.vars <- intersect(names(projedu), c("region", "residence"))
      if (length(origin.vars) == 2) {
        projedu[, `:=` (origin = paste(region, residence, sep = "_"))]
        } else {
        projedu[, `:=` (origin = paste(projedu[[origin.vars]], sep = "_"))]
      }
      dom <- projedu[dom, on = intersect(names(dom), names(projedu)), nomatch = 0, allow.cartesian = TRUE]
      dom[, `:=` (mig = value * pop2 / 1000)]
      setkeyv(dom, intersect(c("sex", "age", "edu"), names(dom)))
      dom.out <- dom[, .(outmig = sum(mig)), by = c(key(dom), "origin")]
      dom.in <- dom[, .(inmig = sum(mig)), by = c(key(dom), "destination")] 
      resProj <- merge(projedu, dom.out, by = c(key(dom), "origin"), all.x = TRUE)
      resProj <- merge(resProj, dom.in, by.x = c(key(dom), "origin"), by.y = c(key(dom), "destination"), all.x = TRUE)
      for (j in c("outmig", "inmig")) set(resProj, which(is.na(resProj[[j]])), j, 0)
      resProj[, `:=` (pop3 = pop2 - outmig + inmig)]
    } else {
      resProj <- copy(projedu)
      resProj[, pop3 := pop2]
      }

    #----- international migration -----
    setkeyv(int.dt, intersect(c("sex", "age", "edu"), names(int.dt)))
    
    #emigration: only rates (possibly transformed from abs and fixed for all periods)
    #immigration: only abs
    int <- int.dt[period == base.year + 5 * (iPr - 1)]
    projedu <- projedu[resProj, on = intersect(names(projedu), names(resProj))]
    origin.vars <- intersect(names(projedu), c("region", "residence"))
    if (length(origin.vars) == 2) {
      projedu[, `:=` (origin = paste(region, residence, sep = "_"))]
    } else {
      projedu[, `:=` (origin = paste(projedu[[origin.vars]], sep = "_"))]
    }
    int.out <- projedu[int[var == "mrate"], on = intersect(names(int), names(projedu)), nomatch = 0, allow.cartesian = TRUE]
    int.out[, `:=` (emi = value * pop3)]
    int.out <- int.out[, .(emi = sum(emi)), by = c(key(int), "origin")]
    int.in <- int[var == "mabs", .(immi = sum(value)), by = c(key(int), "destination")]
    resProj <- merge(projedu, int.out, by = c(key(int.dt), "origin"), all.x = TRUE)
    resProj <- merge(resProj, int.in, by.x = c(key(dom), "origin"), by.y = c(key(dom), "destination"), all.x = TRUE)
    for (j in c("emi", "immi")) set(resProj, which(is.na(resProj[[j]])), j, 0)
    resProj[, `:=` (pop4 = pop3 - emi + immi, origin = NULL)]
    
    #----- fertility -----
    asfr1 <- asfredu.dt[period == base.year + 5 * (iPr - 1)][, period := NULL]
    resProj <- merge(resProj, asfr1, by = "pattern", all.x = TRUE)
    
    p2.vars <- c(names(resProj)[1:(which(names(resProj) == "pop") - 1)], "pop4")
    popedu2 <- resProj[, p2.vars, with = FALSE]
    popedu2[, age := age + 5]
    popedu2[age == 105, age := 100]
    popedu2[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
    popedu2 <- rbind(popedu2[age < 100], popedu2[age >= 100, .(pop4 = sum(pop4)), by = setdiff(names(popedu2), c("pop4"))])
    setnames(popedu2, "pop4", "pop4.shift")
    popedu2[, pattern := do.call(paste, c(.SD, sep = "_")), .SDcols = p.vars]
    
    resProj <- merge(resProj, popedu2[, .(pattern, pop4.shift)], by = "pattern", all.x = TRUE)

    for (j in c("asfr", "pop4.shift")) set(resProj, which(is.na(resProj[[j]])), j, 0)    
    resProj[, births := asfr * (pop + pop4.shift) / 2 / 200]
    area.vars <- c("region", "residence")
    if (any(area.vars %in% names(resProj))) {
      resProj[, area := do.call(paste, c(.SD, sep = "_")), .SDcols = intersect(names(projedu), area.vars)]
    } else {
      resProj[, area := country]
    }

    pop.04 <- resProj[, .(births = sum(births)), by = area]
    sexRatio <- sexratio.dt[period == base.year + 5 * (iPr - 1)][, period := NULL]
    pop.04 <- pop.04[sexRatio, on = "area"]
      
    s0.male <- nsxedu1[grep("\\bmale_-5", nsxedu1$pattern)]
    s0.female <- nsxedu1[grep("female_-5", nsxedu1$pattern)]
      
    if (any(pop.04$area != country)) { #= if region and/or residence are given
      #only one nSx is used for all edu levels:
      s0.male <- s0.male[sapply(pop.04$area, function(x) grep(x, s0.male$pattern)[1]), ]
      s0.female <- s0.female[sapply(pop.04$area, function(x) grep(x, s0.female$pattern)[1]), ]
        
      pop.04[, c("pattern.male", "s0.male", "pattern.female", "s0.female") := list(s0.male[sapply(pop.04$area, function(x) grep(x, s0.male$pattern)), pattern],
                                                                                    s0.male[sapply(pop.04$area, function(x) grep(x, s0.male$pattern)), nSx],
                                                                                    s0.female[sapply(pop.04$area, function(x) grep(x, s0.female$pattern)), pattern],
                                                                                    s0.female[sapply(pop.04$area, function(x) grep(x, s0.female$pattern)), nSx])]
      pop.04[, c("surv.male", "dead.male", "surv.female", "dead.female") := list(births * 1000 / (1000 + sexr) * s0.male,
                                                                                  births * 1000 / (1000 + sexr) * (1 - s0.male),
                                                                                  births * sexr / (1000 + sexr) * s0.female,
                                                                                  births * sexr / (1000 + sexr) * (1 - s0.female))]
      } else {
        s0.male <- s0.male[1, ]
        s0.female <- s0.female[1, ]
        pop.04[, c("pattern.male", "s0.male", "pattern.female", "s0.female") := list(s0.male$pattern, s0.male$nSx, s0.female$pattern, s0.female$nSx)]
        pop.04[, c("surv.male", "dead.male", "surv.female", "dead.female") := list(births * 1000 / (1000 + sexr) * s0.male,
                                                                                   births * 1000 / (1000 + sexr) * (1 - s0.male),
                                                                                   births * sexr / (1000 + sexr) * s0.female,
                                                                                   births * sexr / (1000 + sexr) * (1 - s0.female))] 
        }      
             
    pop.04.d <- melt(pop.04, measure = patterns("^surv", "^dead", "^pattern"), value.name = c("pop4.shift", "deaths.nb", "pattern"))
    pop.04.d$pattern <- sapply(pop.04.d$pattern, function(x) sub("-5", "0", x))
    
    resProj <- pop.04.d[, .(deaths.nb, pattern)][resProj, on = "pattern", nomatch = NA]
    resProj[match(pop.04.d$pattern, resProj$pattern), pop4.shift := pop.04.d$pop4.shift]
    setcolorder(resProj, c(names(resProj)[-1], "deaths.nb"))
    resProj[is.na(deaths.nb) == TRUE, deaths.nb := 0L]
    
    #----- reclassification -----
       
    if (exists("reclass.dt")) {
      proprur <- resProj[, .("pop2" = sum(pop2)), area]
      if (nrow(proprur) > 2) { #regions and residence given
        proprur[, c(area.vars) := tstrsplit(proprur$area, "_")]
        proprur <- dcast(proprur, region ~ residence, value.var = "pop2")
        proprur[, newperural := rural / (rural + urban)]
        proprur$gap <- reclass.dt$gap[match(proprur$region, reclass.dt$area)]
      } else {
        #instead of area = "rural", region = "rural" is used to allow the correct matching:
        proprur <- data.table(region = "rural", rural = proprur[area == "rural", pop2], urban = proprur[area == "urban", pop2], 
                              newperural = proprur[area == "rural", pop2] / sum(proprur$pop2), gap = reclass.dt$gap)
      }
      
      #conditional computation of reclasstr:
      if (exists("glmMigrReclass")) {
        proprur[, `:=` (reclasstr = (predict(glmMigrReclass, data.frame(perural = proprur$newperural), type = "response") + gap) * SSP$recl[iPr])] #1.05ms
        proprur[areas.oldtr$area, reclasstr := areas.oldtr$reclasstr]
        proprur <- proprur[, .(region, reclasstr = reclasstr / (reclass.period / 5))]
      } else {
        proprur <- proprur[, .(region, reclasstr = reclass.dt$reclasstr)]
      }
 
      resProj[, pop4.shift.total := rep(resProj[, .(sum(pop4.shift)), by = setdiff(p.vars, "residence")]$V1, each = 2)]
      resProj[, `:=` (perural = pop4.shift / pop4.shift.total), ]
      if ("region" %in% names(resProj)) {
        resProj <- resProj[proprur, on = "region", nomatch = NA]
      } else {
        resProj[, reclasstr := proprur$reclasstr]
      }

      setkey(resProj, "pattern") 
      resProj[residence == "rural", pop5 := (pop4.shift.total * perural) - (pop4.shift.total * perural * reclasstr)]
      resProj[residence == "urban", pop5 := (pop4.shift.total * perural) + (pop4.shift.total * (1 - perural) * reclasstr)]
      for (j in c("perural", "pop5")) set(resProj, which(is.na(resProj[[j]])), j, 0)
    } else {
      resProj[, pop5 := pop4.shift] 
      }
    
    #----- results -----
    
    setkeyv(resProj, p.vars)
    if(iPr == 1) results <- vector("list", iPr.fin + 1)
    results[[iPr]] <- resProj

    popedu.dt <- resProj[, c(intersect(names(popedu.dt), names(resProj))), with = FALSE]
    popedu.dt[, `:=` (period = period + 5, var = "pop", value = resProj$pop5)]
    
    if(iPr == iPr.fin) {
      results[[iPr + 1]] <- popedu.dt[, `:=` (pop = value, var = NULL, value = NULL)] #final period
    } else {
      popedu.dt.100p <- popedu.dt[age >= 100, .(value = sum(value)), by = setdiff(p.vars, "age")]
      popedu.dt[age == 100, value := popedu.dt.100p[, value]]
      popedu.dt <- popedu.dt[age <= 100]
    }

    if (iPr < iPr.fin & mig.adj == TRUE & "residence" %in% names(popedu1)) {
      #adjustment factors adj.fac: population proportions, either for all region/residence combinations or only for the whole country divided 
      #into rural/urban, at the end of the period divided by the values at the beginning
      #dom.dt: multiply the values in dom.dt with the corresponding adjustment factors
      dom.mig.adj <- popedu1[, .(ini = sum(pop)), by = intersect(names(popedu1), area.vars)]
      dom.mig.adj <- dom.mig.adj[popedu.dt[, .(end = sum(value)), by = intersect(names(popedu1), area.vars)], on = intersect(names(popedu1), area.vars)]
      dom.mig.adj[, `:=` (adj.fac = end / ini, origin = do.call(paste, c(.SD, sep = "_"))), .SDcols = intersect(names(popedu1), area.vars)]
      dom.dt <- dom.dt[dom.mig.adj[, .(origin, adj.fac)], on = "origin"][, value := value * adj.fac][, adj.fac := NULL] 
    }
  }
  
  #----- output -----
      
  #round results:
  round.names <- intersect(c("pop", "pop1", "deaths", "pop2", "outmig", "inmig", "pop3", "emi", "immi", "pop4", 
                             "births", "deaths.nb", "pop4.shift", "pop4.shift.total", "pop5"), names(resProj))
  lapply(results[1:iPr], function(x) x[, (round.names) := round(.SD), .SDcols = round.names])
  results[[iPr + 1]][, pop := round(pop)]
  
  #write .csv of full results with 3 added columns:
  res <- rbindlist(results, fill = TRUE)
  write.csv(res, paste(file.path(path.1, path.2, paste("popprojFull", path.2, sep = "_")), ".csv", sep = ""))
  
  #standard output:
  res.var <- intersect(c("pop", "births", "deaths", "outmig", "inmig", "pop4.shift"), names(resProj))
  res.list1 <- vector("list", length(res.var))
  res.list1 <- lapply(seq_along(res.var), function(x) {
    res.list1[[x]] <- xtabs(as.formula(paste(res.var[x], "~", paste(c("period", "sex", intersect(names(resProj), area.vars)), collapse = "+"))), data = res)
    })
  names(res.list1) <- res.var 

  if (exists("dom.dt")) {
    net.dom <- with(res.list1, (inmig) - (outmig))
    res.list1$net.dom <- net.dom
    }
    
  res.var <- c("pop4.shift", "pop5")
  res.list2 <- vector("list", length(res.var))
  res.list2 <- lapply(seq_along(res.var), function(x) {
    res.list2[[x]] <- xtabs(as.formula(paste(res.var[x], "~", paste(c("period", intersect(names(resProj), area.vars)), collapse = "+"))), data = res)
  })
  names(res.list2) <- res.var
  if ("residence" %in% names(resProj)) {
    if ("region" %in% names(resProj)) {
      res.list2$prop.pop <- round(margin.table(res.list2$pop4.shift, 1:3)[, , 2] / margin.table(res.list2$pop4.shift, 1:2), 4) #proportion of urban population
      res.list2$prop.reclass <- round((margin.table(res.list2$pop4.shift, 1:3)[, , 1] - margin.table(res.list2$pop5, 1:3)[, , 1]) / 
        margin.table(res.list2$pop4.shift, 1:3)[, , 1], 4)  #proportion of reclassified (rural)
    } else {
      res.list2$prop.pop <- round(margin.table(res.list2$pop4.shift, 1:2)[, 2] / margin.table(res.list2$pop4.shift, 1), 4)
      res.list2$prop.reclass <- round((margin.table(res.list2$pop4.shift, 1:2)[, 1] - margin.table(res.list2$pop5, 1:2)[, 1]) / margin.table(res.list2$pop4.shift, 1:2)[, 1], 4)
    }
    res.list <- c(res.list1[c(1:3, 7)], res.list2[-(1:2)], SSP)
  } else {
    res.list <- res.list1[c(1:3, 7)]
  }
  
  saveRDS(res.list, paste(out.name, ".rds", sep = "")) 
  
  if(detail.out == TRUE & iPr.fin > 1) msproj.out(res, res.list, res.list1, out.name, p.vars, country, area.vars)
  
  return(res.list)
}

Try the MSDem package in your browser

Any scripts or data that you put into this service are public.

MSDem documentation built on May 2, 2019, 4:42 p.m.