survey-data-analysis.R
In gooseR: R Integration for 'Goose' AI

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----setup--------------------------------------------------------------------
# library(gooseR)
# library(dplyr)
# library(ggplot2)

## -----------------------------------------------------------------------------
# # Example of raw survey data columns
# raw_columns <- c(
#   "ResponseId",
#   "How satisfied are you with our customer service on a scale of 1-5?",
#   "On a scale of 0-10, how likely are you to recommend our product to a friend or colleague?",
#   "What is your annual household income before taxes?",
#   "How often do you use our product? (Daily, Weekly, Monthly, Rarely, Never)",
#   "Please rate your agreement: The product meets my needs",
#   "In which age range do you fall? (18-24, 25-34, 35-44, 45-54, 55-64, 65+)",
#   "What is your primary reason for using our product? (Please select all that apply)"
# )
# 
# print(raw_columns)

## -----------------------------------------------------------------------------
# # Load your survey data
# survey_data <- read.csv("survey_export.csv")
# 
# # Automatically rename columns with intelligent abbreviations
# clean_data <- goose_rename_columns(survey_data)
# 
# # View what happened
# goose_view_column_map(clean_data)

## -----------------------------------------------------------------------------
# # Pattern Recognition Examples:
# 
# # Satisfaction questions
# "How satisfied are you with our customer service?"
# # → "sat_cust_serv"
# 
# # NPS (Net Promoter Score)
# "On a scale of 0-10, how likely are you to recommend..."
# # → "nps"
# 
# # Demographics
# "What is your annual household income before taxes?"
# # → "hh_income"
# 
# "In which age range do you fall?"
# # → "age_range"
# 
# # Frequency questions
# "How often do you use our product?"
# # → "use_freq"
# 
# # Likert scales
# "Please rate your agreement: The product meets my needs"
# # → "agree_meets_needs"
# 
# # Multiple choice
# "What is your primary reason for using our product?"
# # → "primary_reason"

## -----------------------------------------------------------------------------
# # Create custom dictionary for your domain
# custom_dict <- list(
#   "customer service" = "cs",
#   "artificial intelligence" = "ai",
#   "machine learning" = "ml",
#   "return on investment" = "roi",
#   "key performance indicator" = "kpi"
# )
# 
# # Apply with custom dictionary
# clean_data <- goose_rename_columns(
#   survey_data,
#   custom_abbrev = custom_dict,
#   max_length = 20  # Maximum variable name length
# )

## -----------------------------------------------------------------------------
# # Load raw survey data
# survey <- read.csv("qualtrics_export.csv",
#                    stringsAsFactors = FALSE)
# 
# # Check the messy column names
# names(survey)[1:5]
# 
# # Clean the column names
# survey_clean <- goose_rename_columns(survey)
# 
# # Check the clean names
# names(survey_clean)[1:5]
# 
# # Save the mapping for documentation
# mapping <- goose_view_column_map(survey_clean)
# write.csv(mapping, "column_mapping.csv", row.names = FALSE)

## -----------------------------------------------------------------------------
# # Share a sample with goose for context
# goose_give_sample(survey_clean)
# 
# # Get an analysis plan
# plan <- goose_make_a_plan("exploratory")
# cat(plan)
# 
# # Ask specific questions
# goose_ask("What's the best way to analyze Likert scale data in this survey?")
# 
# goose_ask("How should I handle missing data in the income field?")

## -----------------------------------------------------------------------------
# # Now you can use clean names in your analysis
# survey_clean %>%
#   group_by(age_range) %>%
#   summarise(
#     avg_satisfaction = mean(sat_overall, na.rm = TRUE),
#     avg_nps = mean(nps, na.rm = TRUE),
#     n = n()
#   ) %>%
#   arrange(desc(avg_satisfaction))
# 
# # Create visualizations with clean labels
# ggplot(survey_clean, aes(x = age_range, y = sat_overall)) +
#   geom_boxplot() +
#   theme_brand("block") +
#   labs(
#     title = "Satisfaction by Age Group",
#     x = "Age Range",
#     y = "Overall Satisfaction"
#   )

## -----------------------------------------------------------------------------
# # Get feedback on your analysis approach
# goose_honk(severity = "moderate")
# 
# # Create documentation
# goose_handoff()
# 
# # Save your work
# goose_save(
#   survey_clean,
#   category = "survey_data",
#   tags = c("cleaned", "q3_2024", "customer_satisfaction")
# )
# 
# # Create a continuation prompt for next session
# goose_continuation_prompt()

## -----------------------------------------------------------------------------
# # Process multiple survey files consistently
# files <- c("survey_q1.csv", "survey_q2.csv", "survey_q3.csv")
# 
# all_surveys <- lapply(files, function(file) {
#   data <- read.csv(file)
#   goose_rename_columns(data)
# })
# 
# # Combine with consistent naming
# combined <- bind_rows(all_surveys, .id = "quarter")

## -----------------------------------------------------------------------------
# # After renaming
# clean_data <- goose_rename_columns(survey_data)
# 
# # Access the mapping
# attr(clean_data, "column_map")
# 
# # Or use the helper function
# mapping <- goose_view_column_map(clean_data)
# 
# # Use in reports
# library(knitr)
# kable(
#   mapping,
#   caption = "Survey Variable Mapping",
#   col.names = c("Variable Name", "Original Question")
# )

## -----------------------------------------------------------------------------
# # Set maximum length
# clean <- goose_rename_columns(survey, max_length = 15)

## -----------------------------------------------------------------------------
# # gooseR automatically handles duplicates by adding numbers
# # "satisfaction_1", "satisfaction_2", etc.

## -----------------------------------------------------------------------------
# # gooseR automatically removes special characters
# # "What's your opinion?" → "opinion"
# # "Rate 1-5: Service" → "rate_service"

## -----------------------------------------------------------------------------
# # Combine with AI analysis
# goose_give_sample(clean_data)
# advice <- goose_ask("What statistical tests should I use for this Likert scale data?")
# 
# # Get code review
# goose_honk(severity = "gentle")
# 
# # Save for later
# goose_save(clean_data, category = "surveys", tags = c("2024", "cleaned"))
# 
# # Create formatted output
# results <- clean_data %>%
#   group_by(age_range) %>%
#   summarise(mean_sat = mean(sat_overall, na.rm = TRUE))
# 
# goose_format_table(results)