R/CensusSortR.R

PVI.2012.2016 <- function(file = "PVI Scores.csv", header = TRUE){
  PVI <- read.csv(file= file,header=header)

  PVI$X1 <- gsub("[^[:digit:]]","",PVI$Official.Names)
  PVI$X1 <- substr(PVI$X1,1,5)
  
  Symbols2012 <- gsub("[[:digit:]]","",PVI$X2012)
  Raw2012 <- gsub("[^[:digit:]]","",PVI$X2012)
  Symbols2016 <- gsub("[[:digit:]]","",PVI$X2016)
  Raw2016 <- gsub("[^[:digit:]]","",PVI$X2016)

  PVI$Raw2012 <- ifelse(Symbols2012 == "R+",
                        as.numeric(as.character(Raw2012)),
                        ifelse(Symbols2012 == "D+",
                               -1*as.numeric(as.character(Raw2012)),
                               as.numeric(0)))
  PVI$Raw2016 <- ifelse(Symbols2016 == "R+", 
                        as.numeric(as.character(Raw2016)),
                        ifelse(Symbols2016 == "D+",
                               -1*as.numeric(as.character(Raw2016)),
                               as.numeric(0)))
  
  PVI$Ternary2012 <- ifelse(Symbols2012 == "R+", "R", 
                            ifelse(Symbols2012 == "D+", "D", "EVEN"))
  PVI$Ternary2016 <- ifelse(Symbols2016 == "R+", "R", 
                            ifelse(Symbols2016 == "D+", "D", "EVEN"))
  
  PVI <- cbind.data.frame(PVI[,1:2],PVI[,6],PVI[3:4],PVI[,7:10])
  
  PVI_Names <- c("State","County","FIPS","PVI.2012","PVI.2016","Raw2012"
                 ,"Raw2016","Ternary2012","Ternary2016")
  
  colnames(PVI) <- PVI_Names
  
  return(PVI)
}

Data.Set.Guide <- function(DIR = "", pattern="" ){
  fils <- list.files(DIR, pattern, full.names = TRUE, recursive = TRUE)
  file_name_length <- length(fils)

  dim_store <- as.vector(NULL)
  for(i in 1:file_name_length){
    temp_data <- read.csv(fils[i],header=TRUE)
    dim_store[i] <- ncol(temp_data)
    rm(temp_data)
  }
  
  valid_dataset_addresses <- NULL
  for(i in 1:length(fils)){
    if(dim_store[i] == max(dim_store)){
      valid_dataset_addresses[i] <- fils[i]
    }
  }
  
  valid_dataset_addresses <- as.data.frame(valid_dataset_addresses)

  data_list <- as.list(NA)
  condition_requirement <- max(dim_store)
  for(i in 1:length(dim_store)){
    if(dim_store[i]== condition_requirement){

      data_list[i] <- list(read.csv(fils[i],header=TRUE))
    }
  }
  data_list <- as.data.frame(data_list[1])

  
  County_Variable_Detail <- as.data.frame(
    cbind(t(as.vector(data_list[1,])),1:length(t(as.vector(data_list[1,])))
    ))
  colnames(County_Variable_Detail) <- c("Detail","R Index")
  
  list <- list(County_Variable_Detail,valid_dataset_addresses)
  print("OutPutObject[[1]] <- dataframe of codebook for all datasets included.")
  print("OutPutObject[[2]] <- addresses of included datasets.")
  View(County_Variable_Detail)
  View(valid_dataset_addresses)
  return(list)
}

FolderSearch.By.Id <- function(targeted_fit = PVI_dataset_with_FIPS_COLUMN_NAMED_FIPS, 
                               DIR = DIR,
                               pattern="_with_ann.csv$",
                               indices_of_interest = data_interests_index_vector,
                               DataSet.Generic.Name = "ACS",
                               Covariate.and.MoE.Names = Covariate.and.MoE.Names){
  if("FIPS" %in% names(targeted_fit)){
    

    fils <- list.files(DIR, pattern=pattern, full.names = TRUE, recursive = TRUE)
    file_name_length <- length(fils)

    col_store <- as.vector(NULL)
    for(i in 1:file_name_length){
      temp_data <- read.csv(fils[i],header=TRUE)
      col_store[i] <- ncol(temp_data)
      rm(temp_data)
    }
    
    data_list <- as.list(NULL)
    for(i in 1:length(col_store)){
      if(col_store[i]==sum(unique(col_store))){
        data_list[i] <- list(read.csv(fils[i],header=TRUE))
      }else if(col_store[i] <= max(indices_of_interest)){
        data_list[i] <- list(read.csv(fils[i],header=TRUE))
      }else{
        print("furthest location of covariate of interest and MoE not found, so dataset %d, is ignored", i)
      }
    }
    
    Estimates_of_interest <- c(NULL)
    MoE_of_interest <- c(NULL)
    
    if(length(indices_of_interest) %% 2 == 0){
      vetted_covariates <- indices_of_interest
      
      for(i in 1:length(vetted_covariates)){
        ifelse(i %% 2 == 1, 
               Estimates_of_interest[i] <- vetted_covariates[i], 
               MoE_of_interest[i] <- vetted_covariates[i])}
    }
    else{
      stop("You're vector isn't even. So either you forgot an estimate or MoE. You have to include both for this function.
           Add in order of Estimate then its MoE. ")
    }
    
    Estimates_of_interest <- na.omit(Estimates_of_interest)
    MoE_of_interest <- na.omit(MoE_of_interest)
    
    Covariate.Names <- NULL
    MoE.Names <- NULL

    if(length(Covariate.and.MoE.Names) %% 2 == 0){
      for(i in 1:length(Covariate.and.MoE.Names)){
        if(i %% 2 != 0){
          Covariate.Names[i] <- Covariate.and.MoE.Names[i]
        }else if(i %% 2 == 0){
          MoE.Names[i] <- Covariate.and.MoE.Names[i]
        }
      }
      Covariate.Names <- na.omit(Covariate.Names)
      MoE.Names <- na.omit(MoE.Names)
    }else{
      stop("You're vector isn't even. So either you forgot to name an estimate or MoE.")
    }
    

    new_list <- list()

    container <- list()
    for(i in 1:length(data_list)){
      address_of_dataset <- fils[i]
      for(j in 1:length(Estimates_of_interest)){

        new_list[[j]] <- as.data.frame(lapply(Estimates_of_interest[j]:MoE_of_interest[j], 
                                              function(x) data_list[[i]][,x][match(targeted_fit$FIPS,
                                                                                   data_list[[i]][,2])]))
        MatchingCharacter <- regexpr("_\\d{2}",address_of_dataset, perl=TRUE)
        DataSetType_Year <- regmatches(address_of_dataset,MatchingCharacter)
        DataSetType_YearACS<- paste0(gsub('_', paste0(DataSet.Generic.Name,'\\.'), DataSetType_Year),".")
        colnames(new_list[[j]]) <- c(paste0(DataSetType_YearACS,Covariate.Names[j]),paste0(DataSetType_YearACS,MoE.Names[j]))

        container[[i]] <- new_list
      }
    }
    
    return(as.data.frame(container))
    }else{
      print("Add the name FIPS to your column of FIPS. Or get FIPS to add to your dataset you want to attach census data based on  similar enumeration levels. ")
    }
}
melmaniwan/CensusSortR documentation built on May 12, 2019, 4:36 a.m.