knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE )
library(gooseR) library(dplyr) library(ggplot2)
Survey data often comes with unwieldy column names - long questions that make analysis difficult. gooseR's intelligent survey tools transform these into clean, meaningful variable names while preserving the original mapping for documentation.
Typical survey exports look like this:
# Example of raw survey data columns raw_columns <- c( "ResponseId", "How satisfied are you with our customer service on a scale of 1-5?", "On a scale of 0-10, how likely are you to recommend our product to a friend or colleague?", "What is your annual household income before taxes?", "How often do you use our product? (Daily, Weekly, Monthly, Rarely, Never)", "Please rate your agreement: The product meets my needs", "In which age range do you fall? (18-24, 25-34, 35-44, 45-54, 55-64, 65+)", "What is your primary reason for using our product? (Please select all that apply)" ) print(raw_columns)
Working with these names is painful: - Hard to type - Difficult to read in code - Problematic in formulas - Messy in visualizations
# Load your survey data survey_data <- read.csv("survey_export.csv") # Automatically rename columns with intelligent abbreviations clean_data <- goose_rename_columns(survey_data) # View what happened goose_view_column_map(clean_data)
gooseR recognizes common survey patterns and applies intelligent abbreviations:
# Pattern Recognition Examples: # Satisfaction questions "How satisfied are you with our customer service?" # → "sat_cust_serv" # NPS (Net Promoter Score) "On a scale of 0-10, how likely are you to recommend..." # → "nps" # Demographics "What is your annual household income before taxes?" # → "hh_income" "In which age range do you fall?" # → "age_range" # Frequency questions "How often do you use our product?" # → "use_freq" # Likert scales "Please rate your agreement: The product meets my needs" # → "agree_meets_needs" # Multiple choice "What is your primary reason for using our product?" # → "primary_reason"
You can provide domain-specific abbreviations:
# Create custom dictionary for your domain custom_dict <- list( "customer service" = "cs", "artificial intelligence" = "ai", "machine learning" = "ml", "return on investment" = "roi", "key performance indicator" = "kpi" ) # Apply with custom dictionary clean_data <- goose_rename_columns( survey_data, custom_abbrev = custom_dict, max_length = 20 # Maximum variable name length )
# Load raw survey data survey <- read.csv("qualtrics_export.csv", stringsAsFactors = FALSE) # Check the messy column names names(survey)[1:5] # Clean the column names survey_clean <- goose_rename_columns(survey) # Check the clean names names(survey_clean)[1:5] # Save the mapping for documentation mapping <- goose_view_column_map(survey_clean) write.csv(mapping, "column_mapping.csv", row.names = FALSE)
# Share a sample with goose for context goose_give_sample(survey_clean) # Get an analysis plan plan <- goose_make_a_plan("exploratory") cat(plan) # Ask specific questions goose_ask("What's the best way to analyze Likert scale data in this survey?") goose_ask("How should I handle missing data in the income field?")
# Now you can use clean names in your analysis survey_clean %>% group_by(age_range) %>% summarise( avg_satisfaction = mean(sat_overall, na.rm = TRUE), avg_nps = mean(nps, na.rm = TRUE), n = n() ) %>% arrange(desc(avg_satisfaction)) # Create visualizations with clean labels ggplot(survey_clean, aes(x = age_range, y = sat_overall)) + geom_boxplot() + theme_brand("block") + labs( title = "Satisfaction by Age Group", x = "Age Range", y = "Overall Satisfaction" )
# Get feedback on your analysis approach goose_honk(severity = "moderate") # Create documentation goose_handoff() # Save your work goose_save( survey_clean, category = "survey_data", tags = c("cleaned", "q3_2024", "customer_satisfaction") ) # Create a continuation prompt for next session goose_continuation_prompt()
# Process multiple survey files consistently files <- c("survey_q1.csv", "survey_q2.csv", "survey_q3.csv") all_surveys <- lapply(files, function(file) { data <- read.csv(file) goose_rename_columns(data) }) # Combine with consistent naming combined <- bind_rows(all_surveys, .id = "quarter")
gooseR detects these patterns automatically:
The mapping is always preserved:
# After renaming clean_data <- goose_rename_columns(survey_data) # Access the mapping attr(clean_data, "column_map") # Or use the helper function mapping <- goose_view_column_map(clean_data) # Use in reports library(knitr) kable( mapping, caption = "Survey Variable Mapping", col.names = c("Variable Name", "Original Question") )
Always Save the Mapping: Keep the column mapping for documentation and methodology sections
Use Custom Dictionaries: Add industry-specific abbreviations for consistent naming
Check the Results: Review renamed columns to ensure they make sense
Combine with Memory: Save cleaned datasets with descriptive tags
Document Your Work: Use goose_handoff() to create documentation
# Set maximum length clean <- goose_rename_columns(survey, max_length = 15)
# gooseR automatically handles duplicates by adding numbers # "satisfaction_1", "satisfaction_2", etc.
# gooseR automatically removes special characters # "What's your opinion?" → "opinion" # "Rate 1-5: Service" → "rate_service"
# Combine with AI analysis goose_give_sample(clean_data) advice <- goose_ask("What statistical tests should I use for this Likert scale data?") # Get code review goose_honk(severity = "gentle") # Save for later goose_save(clean_data, category = "surveys", tags = c("2024", "cleaned")) # Create formatted output results <- clean_data %>% group_by(age_range) %>% summarise(mean_sat = mean(sat_overall, na.rm = TRUE)) goose_format_table(results)
gooseR's survey tools eliminate the tedious work of cleaning survey data, letting you focus on analysis and insights. The intelligent pattern recognition ensures consistent, meaningful variable names while preserving the full context of your original questions.
Next: Check out the Code Review and Testing vignette to learn about gooseR's development tools.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.