R/manipulate.R

Defines functions Parse_Array_Writer Rename_Variables Get_Variable_Names Add_Additional_Info Combine_Surveys_dt Fix_Date_And_Time_Fields

Documented in Add_Additional_Info Combine_Surveys_dt Fix_Date_And_Time_Fields Get_Variable_Names Parse_Array_Writer Rename_Variables

#' Fix data times from raw survey downloads
#'
#' The raw surveys from LS have messed up data times. This function fixes that.
#'
#' @param Survey_List this is a list containing only the downloaded data from the survey for a multiple-part survey.
#' @return The surveys, just fixed.
#' @keywords Dates
#' @export
#' @examples \dontrun{
#' Fix_Date_And_Time_Fields(Cint_Surveys_0)
#' }
Fix_Date_And_Time_Fields <- function(Survey_List){

  for(i in 1:length(Survey_List)){
    Survey_List[[i]] <- Survey_List[[i]][, ':='(startdate = lubridate::ymd_hms(startdate),
                                                submitdate = lubridate::ymd_hms(submitdate),
                                                datestamp = lubridate::ymd_hms(datestamp)
    ) ]
  }
}




#' Combine different survey lists in an rbindlist fashion.
#'
#' Combines the corresponding survey parts.
#'
#' @param List_of_Survey_Lists this is a list containing the downloaded (or filtered- but the Count_and_Filter function packages the survey list with other stuff, so beware!) data from the survey for a multiple-part survey.
#' @return The surveys, combined
#' @keywords Combine
#' @export
#' @examples \dontrun{
#' Merged_Cint_Surveys <- Combine_Surveys_dt(list(Cint_Surveys_SL, Cint_Surveys_Between_SL_and_20210308, Cint_Surveys_0))
#' }
Combine_Surveys_dt <- function(List_of_Survey_Lists){
  Survey_List <- list()

  # Have to get the ith element of all of the nested surveys (https://stackoverflow.com/questions/20428742/select-first-element-of-nested-list)

  for(i in c(1:length(List_of_Survey_Lists[[1]]))){
    Survey_List[[i]] <- rbindlist(lapply(List_of_Survey_Lists, function(l) l[[i]]), use.names = TRUE, fill = TRUE)
  }

  return(Survey_List)
}







#' Add Panel, and project code information to each of the survey parts
#'
#' @param Survey_List this is a list containing the downloaded (or filtered) data from the survey for a multiple-part survey.
#' @param Panel_Provider string specifying the panel provider for this survey list
#' @param Project_Code string specifying the project code for this survey list
#' @return The surveys, with additional columns for the panel_provider and project_code in each of the survey parts
#' @keywords Additional information
#' @export
#' @import data.table
#' @examples \dontrun{
#' Merged_Cint_Surveys <- Add_Additional_Info(Cint_Surveys_0, "Cint", "166052")
#' }
Add_Additional_Info <- function(Survey_List, Panel_Provider, Project_Code){
  # Function adds the columns Panel_Provider, Survey_Part, and Project_Code to all of the survey parts
  # Panel_Provider: String like "Cint" or "Dynata"
  # Project_Code: String like "166052"
  for(i in 1:length(Survey_List)){
    Survey_List[[i]][, ':='(Panel_Provider = Panel_Provider,
                            Survey_Part = i,
                            Project_Code = Project_Code
    )]
  }

}



#' Gets all of the variables in the survey
#'
#' @param Survey_List this is a list containing the downloaded (or filtered) data from the survey for a multiple-part survey.
#' @return All of the variable names as a data.table
#' @keywords Variable names
#' @export
#' @examples \dontrun{
#' Variable_Names <- Get_Variable_Names(Cint_Surveys_0)
#' }
Get_Variable_Names <- function(Survey_List){
  # Only pass in the Survey List, not the whole list object returned by Count and Filter!
  variable_names <- list()

  for(i in 1:length(Survey_List)){
    variable_names[[i]] <- data.table(colnames(Survey_List[[i]]))
  }
  variable_names_list <- unique(rbindlist(variable_names,use.names = TRUE))

  return(variable_names_list)
}



#' Renames all of the variables in the survey based on a data.table with two columns
#'
#' @param Survey_List this is a list containing the downloaded (or filtered) data from the survey for a multiple-part survey.
#' @param Names needs to be a data.table with two columns that have the old variable names (those pulled using Get_Variable_Names, for example)- Names$RAW and the new ones (Variable registry names)- Names$ANALYSIS
#' @return The surveys, with additional columns for the panel_provider and project_code in each of the survey parts
#' @keywords Additional information
#' @export
#' @examples \dontrun{
#' Rename_Variables(Cint_Surveys_0, Variable_Names)
#' }
Rename_Variables <- function(Survey_List, Names){
  # Names needs to be a csv with two columns that have the old variable names (those pulled using Get_Variable_Names) and the new ones (Variable registry names)
  # Here is an example document:
  # Names <- fread("C:/Users/bpres/Desktop/Book1.csv")

  for(i in 1:length(Survey_List)){
    setnames(Survey_List[[i]], old = Names$RAW, new = Names$ANALYSIS, skip_absent = TRUE)
  }

}





#' Reforms the Array_Writer responses into long data
#'
#' @param columns_dt these are the columns that need to be reformed (for example, all of the driver storage columns) AND a "key" column that uniquely identifies the respondent
#' @param names These are the names of the variables that you want to result. Defaults to c("evaluation", "code").
#' @param key_name This is a string that specifies the name of the unique identifier column for each response. Default is "key"
#' @return a three column data.table with the key, the evaluation, and the code corresponding to that evaluation- the code could be a company name, for example, or a driver question code
#' @keywords parse, parsing, array writer
#' @export
#' @examples \dontrun{
#' Parse_Array_Writer(driver_storage_columns)
#' }
Parse_Array_Writer <- function(columns_dt, key_name = "key", names = c("evaluation", "code")){

  # makes it DT
  columns_dt <- setDT(columns_dt)

  # Melts the data and gets rid of NAs (which are death to the strsplit)
  melted <- data.table::melt(columns_dt, na.rm = TRUE, id.vars = key_name)
  melted <- melted[value != ""]

  # Unlisting turns the javascript array (comma separated) into a vector, where all of the entries retain their key
  JS_unlist <- melted[, .(value = unlist(strsplit(value, ","))), by = c(key_name, "variable")]%>%
    # separate turns it into a dataframe with the value column being replaced with the number of columns as named in names
    separate(col = "value", into = names, sep = "-", fill = "right")


  result <- copy(JS_unlist)[!is.na(get(names[1]))]

  return(result)
} # end function
bpresentati/surveyR documentation built on March 19, 2022, 3:40 a.m.