R/GADMagreer.R

#' Processes GADM1 and GADM2 entries
#' Processes GADM1 and GADm2 entries and combines them ready to implement vaccination coverage.
#' Takes the initial information to generate population and vaccination arrays for the desired countries. Only inputs the EPI coverage for vaccination and sets it up for the addition of vaccination campaigns.
#' @param campaigncsv The csv file containing the vaccination campaigns produced by the function format_csv.
#' @param scenario This changes the underlying assumptions of the population size. Base does nothing, "hiCov" reduces the population by 25 percent, "loCov" increases the population by 25 percent.
#' @param GAVI.switch This implements different vaccination campaigns based on the column "scenario" in the campaigncsv parameter. "-all.epi" ignores the contribution of epi, "5year.mass" implements five year mass vaccination campaigns,"epi90" does...something?,"+fut.epi" includes future epi coverage.
#' @param skew This affects how vaccination campaigns are applied. If the skew is set to -1 then campaigns are applied to the whole population, regardless of prior vaccination status. If the skew is set to 0 then campaigns are applied only to the unvaccinated population.
#' @param genvals This is the output from popandvac
#' @param shp2 This is a second administrative division shapefule that contains all the countries you want to generate population vaccination coverage for.
#' @examples
#' GADMagreeer(campaigncsv)
#' @keywords internal



GADMagreeer<-function(campaigncsv,scenario="base",GAVI.switch="base",skew=0,genvals,countryiso){
  
  shp2_vs.8sub<-genvals$shp2_v2.8sub
  
  campaigncsv<-campaigncsv[campaigncsv$country.code %in% countryiso,]
  
  options(warn=-1)
  scheduleGADM1<-campaigncsv[campaigncsv$location.encoding=="GADM1",]
  scheduleGADM2<-campaigncsv[campaigncsv$location.encoding=="GADM2",]

  #Unpacking
  dndf<-genvals$dndf
  vacc.cov<-genvals$vacc.cov
  dn2<-genvals$dn2
  dn3<-genvals$dn3
  dn1.0.name<-dndf$dn1.0.name
  dn1.1.name<-dndf$dn1.1.name
  dn1.2.name<-dndf$dn1.2.name
  dn1.1_gadm1<-dndf$dn1.1_gadm1
  dn1.2_gadm1<-dndf$dn1.2_gadm1
  pop1.adm2<-genvals$pop1.adm2
  # print("Aggregating going through GADM1 entries")
  
  ## making sure there are no spaces or other annoying characters that
  ## will mess data entry up, and converting to the correct class:
  ## Omit the country and country.code, campaign.type,scenario
  for(x in (1:length(scheduleGADM1))[-which(names(scheduleGADM1) %in% c("country","country.code","campaign.type","scenario","location.encoding","adm1","adm2","adm3"))]) scheduleGADM1[,x]<-as.numeric(gsub(" ","",scheduleGADM1[,x]))
  for(x in (1:length(scheduleGADM2))[-which(names(scheduleGADM2) %in% c("country","country.code","campaign.type","scenario","location.encoding","adm1","adm2","adm3"))]) scheduleGADM2[,x]<-as.numeric(gsub(" ","",scheduleGADM2[,x]))
  
  #Adding vaccination from different scenarios
  if(scenario %in% c("M.base","M.hiCov","M.loCov")) {
    scheduleGADM1<-scheduleGADM1[scheduleGADM1$year>1980,]
  }
  if(GAVI.switch %in% c("base","+fut.epi","5year.mass","epi90","-all.epi")) {
    scheduleGADM1<-scheduleGADM1[scheduleGADM1$scenario %in% c("base","past.GAVI"),]
  } else if(GAVI.switch=="-past.mass") {
    scheduleGADM1<-scheduleGADM1[scheduleGADM1$scenario %in% c("base"),]
  } else if(GAVI.switch %in% c("+fut.mass","+fut.both")) {
    #keep everything in...
  } else if(GAVI.switch == "GAVINov13A") {
    scheduleGADM1<-scheduleGADM1[scheduleGADM1$scenario!="future.GAVI",]
    schedfut<-schedfut[schedfut$year>2012 & schedfut$scenario=="GAVI.A",]
    scheduleGADM1<-scheduleGADM1[, -which(names(scheduleGADM1) == "skew")]
    scheduleGADM1<-cbind(scheduleGADM1, skew = skew)
    schedfut<-schedfut[, 1:18]
    scheduleGADM1<-rbind(scheduleGADM1,schedfut)
    rm(schedfut)
  }
  
  #Adding vaccination from different scenarios
  if(scenario %in% c("M.base","M.hiCov","M.loCov")) {
    scheduleGADM2<-scheduleGADM2[scheduleGADM2$year>1980,]
  }
  if(GAVI.switch %in% c("base","+fut.epi","5year.mass","epi90","-all.epi")) {
    scheduleGADM2<-scheduleGADM2[scheduleGADM2$scenario %in% c("base","past.GAVI"),]
  } else if(GAVI.switch=="-past.mass") {
    scheduleGADM2<-scheduleGADM2[scheduleGADM2$scenario %in% c("base"),]
  } else if(GAVI.switch %in% c("+fut.mass","+fut.both")) {
    #keep everything in...
  } else if(GAVI.switch == "GAVINov13A") {
    scheduleGADM2<-scheduleGADM2[scheduleGADM2$scenario!="future.GAVI",]
    schedfut<-schedfut[schedfut$year>2012 & schedfut$scenario=="GAVI.A",]
    scheduleGADM2<-scheduleGADM2[, -which(names(scheduleGADM2) == "skew")]
    scheduleGADM2<-cbind(scheduleGADM2, skew = skew)
    schedfut<-schedfut[, 1:18]
    scheduleGADM2<-rbind(scheduleGADM2,schedfut)
    rm(schedfut)
  }
  
  #or each line, working out which adm level it belongs to:
  #table(schedule$vac.id,schedule$adm1)
  lvl<-rep(3,nrow(scheduleGADM1))
  lvl[is.na(scheduleGADM1$adm3)]<-2
  lvl[is.na(scheduleGADM1$adm2)]<-1
  
  if(3 %in% lvl){
    
    #Aggregating those given at adm3 to adm2, taking into account the proportion of the adm2 unit living in adm3:
    scheduleGADM1$coverage.survey[lvl==3]<-scheduleGADM1$coverage.survey[lvl==3]*scheduleGADM1$adm2.prop[lvl==3]
    scheduleGADM1$coverage.adm[lvl==3]<-scheduleGADM1$coverage.adm[lvl==3]*scheduleGADM1$adm2.prop[lvl==3]
    scheduleGADM1$coverage.planned[lvl==3]<-scheduleGADM1$coverage.planned[lvl==3]*scheduleGADM1$adm2.prop[lvl==3]
    
    #Doing stuff to bring adm3 to adm2
    schedule2<-scheduleGADM1[lvl<3,]
    # scheduleGADM1[lvl<3, c(1:6,9:15,17:18)]
    schedule2a<-aggregate(scheduleGADM1$adm1[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)
    
    schedule2a<-cbind(schedule2a,year = aggregate(scheduleGADM1$year[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x)
    schedule2a<-cbind(schedule2a,target.population = aggregate(scheduleGADM1$target.population[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x)
    schedule2a<-cbind(schedule2a,doses = aggregate(scheduleGADM1$doses[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x)
    schedule2a<-cbind(schedule2a,coverage.planned = aggregate(scheduleGADM1$coverage.planned[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x) 
    schedule2a<-cbind(schedule2a,coverage.adm = aggregate(scheduleGADM1$coverage.adm[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x)
    schedule2a<-cbind(schedule2a,coverage.survey = aggregate(scheduleGADM1$coverage.survey[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x) 
    schedule2a<-cbind(schedule2a,agemin = aggregate(scheduleGADM1$agemin[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x) 
    schedule2a<-cbind(schedule2a,agemax = aggregate(scheduleGADM1$agemax[lvl==3],by=list(vac.id=scheduleGADM1$vac.id[lvl==3],adm2=scheduleGADM1$adm2[lvl==3]),mean)$x)

    #Re-ordering
    head(schedule2a)
    head(schedule2)
    
    schedule2a<-schedule2a[,c(1,4,3,2,5:11)] 
    names(schedule2a)[3]<-"adm1"
    schedule2a<-cbind(schedule2a,scenario=scheduleGADM1$scenario[match(schedule2a$vac.id,scheduleGADM1$vac.id)])
    schedule2a<-cbind(schedule2a[,1:2],scheduleGADM1[match(schedule2a$vac.id,scheduleGADM1$vac.id),3:4],schedule2a[,3:12],skew=scheduleGADM1[match(schedule2a$vac.id,scheduleGADM1$vac.id),"skew"],"location.encoding"=scheduleGADM1[match(schedule2a$vac.id,scheduleGADM1$vac.id),"location.encoding"])
    
    schedule2<-schedule2[,names(schedule2)[which(names(schedule2) %in% names(schedule2a))]]
    schedule2_2adone<-rbind(schedule2,schedule2a)
    rm(schedule2a)
    
  } else schedule2_2adone<-scheduleGADM1
  
  lvl<-rep(2,nrow(schedule2_2adone))
  lvl[is.na(schedule2_2adone$adm2)]<-1
  lvl[is.na(schedule2_2adone$adm1) & is.na(schedule2_2adone$adm2)]<-0
  
  # print("Disaggregating adm1 to adm2 for GADM1")
  
  #For those just given at adm1: get a line for each adm2 within this adm1:
  if(dim(schedule2_2adone)[1]>0){
    schedule2b<-schedule2_2adone[lvl==1,]
  } else {
    schedule2b<-schedule2_2adone
  }
  
  
  for(i in 1:nrow(schedule2b)) {
    adm2<-which(dn1.1_gadm1==schedule2b$adm1[i])
    nd<-length(adm2)
    # schedule2b$adm2[i] = dn1.2_gadm1[adm2[1]]
    
    schedule2b$adm2[i]<-dn1.2_gadm1[adm2[1]]
    
    if(nd>1) for(j in 2:nd) {
      schedule2b<-rbind(schedule2b,schedule2b[i,])
      schedule2b$adm2[nrow(schedule2b)]<-dn1.2_gadm1[adm2[j]]
    }
  }
  
  schedule2.5<-rbind(schedule2_2adone[lvl==2,],schedule2b)
  rm(schedule2b, adm2)
  schedule2.5<-schedule2.5[order(schedule2.5$vac.id, schedule2.5$adm1,schedule2.5$adm2),]
  
  blankdf<-data.frame(matrix(ncol=ncol(schedule2.5)))
  names(blankdf)<-names(schedule2.5)
  
  #Calculate shapefile areas
  # shp2.8toshp_adm2<-areacalculator(shp2_v2.8,shp2_v1.0,"adm2")
  for(ISOcode in as.character(unique(shp2_v1.0$ISO))){
    
    shptoshp2.8_adm2<-areacalculator(shp2_v1.0[shp2_v1.0$ISO==ISOcode,],shp2_v2.8[shp2_v2.8$ISO==ISOcode,],"adm2")
    schedule2.5_subbed<-schedule2.5[schedule2.5$country.code==ISOcode,]

    for(x in 1:dim(schedule2.5_subbed)[1]){
      
      if(length(shp2_v1.0[shp2_v1.0$ISO==ISOcode,])!=length(shp2_v2.8[shp2_v2.8$ISO==ISOcode,])){
        
        #Subset to campaign
        campaignrow<-schedule2.5_subbed[x,]
        
        if(is.na(campaignrow$adm2)){
          numadm2<-shp2_v1.0$ID_2[which(shp2_v1.0$ID_1 %in% campaignrow$adm1)]
          campaignrowad<-campaignrow[rep(1,length(numadm2)),]
          campaignrowad$target.population<-campaignrowad$target.population/length(campaignrowad$target.population)
          campaignrowad$doses<-campaignrowad$doses/length(campaignrowad$target.population)
          campaignrowad$adm2<-numadm2
          campaignrow<-campaignrowad
        }
        
        #Subset to population
        population<-data.frame("id2"=1:dim(shp2_v2.8)[1],"pop"=rowSums(pop1.adm2[,campaignrow$year-1950,]))
        #Proofs
        propofinc<-sapply(1:dim(campaignrow)[1], function(j) shptoshp2.8_adm2[which(shptoshp2.8_adm2[,1] %in% campaignrow$adm2[j]),],simplify = FALSE)
        
        #What we do
        outputsboogaloo<-sapply(1:dim(campaignrow)[1], function(z){
          #Work out the proportions
          propo<-data.frame(matrix(unlist(propofinc[[z]]),ncol=4))
          names(propo)<-c("gadmv2.8_adm2","gadmv1.0_adm2","area","prop")
          
          #Work out populations and do stuff, remove 0's
          incpop<-population[match(propo[,2],population$id2),"pop"]
          
          subsetfirst<-campaignrow[z,]
          
          popdose<-sapply(1:2, function(y){
            subsetval<-subsetfirst[,c("target.population","doses")][y]
            popact<-round(propo$prop*as.numeric(subsetval),0)
            popact[popact==0]<-NA
            popact
          })
          
          coverage<-sapply(1:3, function(y){
            subsetval<-subsetfirst[,c("coverage.planned","coverage.adm","coverage.survey")][y]
            popact<-round(propo$prop*(incpop*as.numeric(subsetval))/incpop,3)
            popact[popact==0]<-NA
            popact
          })
          
          matrix(c(popdose,coverage),ncol=5)
          
        },simplify = FALSE)
        
        #Mats for formatting
        bigmat<-propofinc[[1]]
        if(length(propofinc)>1){
          for(hj in 2:length(propofinc)){
            bigmat<-rbind(bigmat,propofinc[[hj]])
          }
        }
        bigmat2<-outputsboogaloo[[1]]
        if(length(outputsboogaloo)>1){
          for(hj in 2:length(outputsboogaloo)){
            bigmat2<-rbind(bigmat2,outputsboogaloo[[hj]])
          }
        }
        
        outputdf<- data.frame(cbind(bigmat[,2],bigmat2))
        names(outputdf)<-c("id2","target.population","doses","coverage.planned","coverage.adm","coverage.survey")
        cleaneddf<-outputdf[rowSums(is.na(outputdf))!=5, ]
        
        #Rep to the number we need
        df.expanded<-campaignrow[rep(1,dim(cleaneddf)[1]),]
        #Add doses
        df.expanded[,c("target.population","doses","coverage.planned","coverage.adm","coverage.survey","adm2")]<-cleaneddf[,c("target.population","doses","coverage.planned","coverage.adm","coverage.survey","id2")]
        
        countshp<-shp2_v2.8[shp2_v2.8$ISO==df.expanded$country.code[1],]
        df.expanded$adm1<-paste(countshp$ISO[1:length(df.expanded$adm1)],countshp[match(df.expanded$adm2,countshp$ID_2),]$ID_1,sep="")
        df.expanded$adm2<-paste(countshp$ISO[1:length(df.expanded$adm2)],df.expanded$adm2,sep="")
        blankdf<-rbind(blankdf,df.expanded)
    } else {
      #Sub
      boobap<-shptoshp2.8_adm2[shptoshp2.8_adm2$prop>0.8,]
      df.expanded<-schedule2.5_subbed[x,]
      countshp<-shp2_v2.8[shp2_v2.8$ISO==df.expanded$country.code[1],]
      df.expanded$adm1<-paste(countshp$ISO[1:length(df.expanded$adm2)],countshp[match(df.expanded$adm2,boobap$ID_2.1),]$ID_1,sep="")
      df.expanded$adm2<-paste(countshp$ISO[1:length(df.expanded$adm1)],countshp[match(df.expanded$adm2,boobap$ID_2.1),]$ID_2,sep="")
      blankdf<-rbind(blankdf,df.expanded)
      
    }
    } 
  }
  
  #Get rid of blank first row
  blankdf<-blankdf[-1,]

  dfbyid<-sapply(as.numeric(na.omit(unique(blankdf$vac.id))),function(r){
    subdf<-blankdf[blankdf$vac.id==r,]
    
    coverage<-subdf[,c("adm2","coverage.planned","coverage.adm","coverage.survey")]
    peepdoses<-subdf[,c("target.population","doses")]
    
    covagg<-aggregate(coverage,by=list(coverage$adm2),FUN=mean,na.rm=TRUE)
    peepdosagg<-aggregate(peepdoses,by=list(coverage$adm2),FUN=sum,na.rm=TRUE)
    
    mergedcovdose<-merge(covagg,peepdosagg,by="Group.1")
    
    subdfcropped<-subdf[match(unique(subdf$adm2), subdf$adm2),]
    dfvals<-c(unlist(subdfcropped[,c("vac.id","year","country","country.code","agemin","agemax","scenario","skew","location.encoding")]))

    outputdf<-data.frame(matrix(dfvals,ncol=9),covagg[,c("Group.1","coverage.planned","coverage.adm","coverage.survey")],peepdosagg[,c("target.population","doses")])
    names(outputdf)<-c("vac.id","year","country","country.code","agemin","agemax","scenario","skew","location.encoding","adm2","coverage.planned","coverage.adm","coverage.survey","target.population","doses")
    shp2_v2.8done<-shp2_v2.8[shp2_v2.8$ISO %in% subdf$country.code,]
    matched<-shp2_v2.8done[match(outputdf$adm2,paste(shp2_v2.8done$ISO,shp2_v2.8done$ID_2,sep="")),]
    outputdf$adm1<-paste(matched$ISO,matched$ID_1,sep="")
    outputdf
    
  },simplify=FALSE)
  
  
  unzipdf<-dfbyid[[1]]
  if(length(dfbyid)>1){
    for(h in 2:length(dfbyid)){
      unzipdf<-rbind(unzipdf,dfbyid[[h]])
    }
  }
  
  #Re-arrange
  unzipdf<-unzipdf[,names(schedule2.5[which(names(schedule2.5) %in% names(unzipdf))])]
  
  #Assign the GADM2 values for identification
  # mm3<-match(schedule2.5$adm2, gadm1_adm2$adm2.old)
  # schedule2.5$adm2<-paste(gadm1_adm2$adm0.new[mm3],gadm1_adm2$adm2.new[mm3],sep="")
  # 
  # schedule2.5$adm1<-paste(shp2_vs.8sub$ISO[match(as.character(schedule2.5$adm2),as.character(shp2_vs.8sub$SP_ID))],
  #                         shp2_vs.8sub$ID_1[match(as.character(schedule2.5$adm2),as.character(shp2_vs.8sub$SP_ID))],sep="")
  # 
  '%!in%' <- function(x,y)!('%in%'(x,y))
  
  scheduleGADM2clipped<-scheduleGADM2[,which(names(scheduleGADM2) %!in% c("adm3","adm2.prop","campaign.type","location.encoding"))]
  schedulerbind<-rbind(unzipdf,scheduleGADM2clipped)
  schedulerbind

}
arranhamlet/popvac_package documentation built on May 10, 2019, 1:48 p.m.