report_satd_alert/execution.R

correlate_satd_pmd(
  code_location = "C:/doutorado/hive-rel-release-2.3.8/hive-rel-release-2.3.8",
  output_file = here::here("temp/hive_bags_dist_stem.rds"),
  bags = TRUE
)

correlate_satd_pmd(
  code_location = "C:/doutorado/spark-2.9.3/spark-2.9.3",
  output_file = here::here("temp/spark_bags_dist_stem.rds"),
  bags = TRUE
)
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/ant/ant-rel-1.10.10",
#   output_file = here::here("temp/ant_bags_2.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/joda-time/joda-time-2.10",
#   output_file = here::here("temp/jodatime_bags_dist_stem.rds"),
#   bags = TRUE
# )

correlate_satd_pmd(
  code_location = "C:/doutorado/pulsar-2.7.2/pulsar-2.7.2",
  output_file = here::here("temp/pulsar_bags_dist_stem.rds"),
  bags = TRUE
)

# correlate_satd_pmd(
#   code_location = "C:/doutorado/RxJava-3.0.12/RxJava-3.0.12",
#   output_file = here::here("temp/rxjava_bags_dist_stem.rds"),
#   bags = TRUE
# )

correlate_satd_pmd(
  code_location = "C:/doutorado/glide-4.12.0/glide-4.12.0",
  output_file = here::here("temp/glide_bags_dist_stem.rds"),
  bags = TRUE
)

# correlate_satd_pmd(
#   code_location = "C:/doutorado/kubernetes-10.0.1/java-client-parent-java-10.0.1",
#   output_file = here::here("temp/kubernetes_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/lottie-android-3.7.0/lottie-android-3.7.0",
#   output_file = here::here("temp/lottie_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/EventBus-3.2.0/EventBus-3.2.0",
#   output_file = here::here("temp/eventbus_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/joda-money-1.0.1/joda-money-1.0.1",
#   output_file = here::here("temp/jodamoney_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/dbeaver-21.0.5/dbeaver-21.0.5",
#   output_file = here::here("temp/dbeaver_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/MPAndroidChart-3.1.0/MPAndroidChart-3.1.0",
#   output_file = here::here("temp/mpandroidchart_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/checkstyle-checkstyle-8.42/checkstyle-checkstyle-8.42",
#   output_file = here::here("temp/checkstyle_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/presto-0.253/presto-0.253",
#   output_file = here::here("temp/presto_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/storm-2.2.0/storm-2.2.0",
#   output_file = here::here("temp/storm_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# 
#   
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/Activiti-7.1.338/Activiti-7.1.338",
#   output_file = here::here("temp/activiti_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/ExoPlayer-r2.14.0/ExoPlayer-r2.14.0",
#   output_file = here::here("temp/exoplayer_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/hbase-rel-2.2.7/hbase-rel-2.2.7",
#   output_file = here::here("temp/hbase_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/beam-2.29.0/beam-2.29.0",
#   output_file = here::here("temp/beam_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/spring-security-5.5.0/spring-security-5.5.0",
#   output_file = here::here("temp/spring_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/tomcat-10.0.6/tomcat-10.0.6",
#   output_file = here::here("temp/tomcat_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/redisson-redisson-3.15.5/redisson-redisson-3.15.5",
#   output_file = here::here("temp/redisson_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# 
correlate_satd_pmd(
  code_location = "C:/doutorado/flink-release-1.13.0_4/flink-release-1.13.0",
  output_file = here::here("temp/flink_bags_dist_stem_4.rds"),
  bags = TRUE
)

correlate_satd_pmd(
  code_location = "C:/doutorado/netbeans-12.2.2-7",
  output_file = here::here("temp/netbeans_bags_dist_stem_7.rds"),
  bags = TRUE
)


correlate_satd_pmd(
  code_location = "C:/doutorado/netbeans-12.2.2-9",
  output_file = here::here("temp/netbeans_bags_dist_stem_9.rds"),
  bags = TRUE
)



bind_outputs_correlate_satd(
  files = c(
    "temp/netbeans_bags_dist_stem_1.rds",
    "temp/netbeans_bags_dist_stem_2.rds",
    "temp/netbeans_bags_dist_stem_3.rds",
    "temp/netbeans_bags_dist_stem_4.rds",
    "temp/netbeans_bags_dist_stem_5.rds",
    "temp/netbeans_bags_dist_stem_6.rds",
    "temp/netbeans_bags_dist_stem_7.rds",
    "temp/netbeans_bags_dist_stem_8.rds",
    "temp/netbeans_bags_dist_stem_9.rds"
  ),
  output_file = "temp/netbeans_bags_dist_stem.rds"
)



# 
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/jenkins-jenkins-2.293/jenkins-jenkins-2.293",
#   output_file = here::here("temp/jenkins_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/ArgoUML/0_35",
#   output_file = here::here("temp/argouml_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 
# 
# 
# correlate_satd_pmd(
#   code_location = "C:/doutorado/kafka-2.7.1/kafka-2.7.1",
#   output_file = here::here("temp/kafka_bags_dist_stem.rds"),
#   bags = TRUE
# )
# 

correlate_satd_pmd(
  code_location = "C:/doutorado/junit5-r5.7.2/junit5-r5.7.2",
  output_file = here::here("temp/junit_bags_dist_stem.rds"),
  bags = TRUE
)


correlate_satd_pmd(
  code_location = "C:/doutorado/netbeans-12.2.2",
  output_file = here::here("temp/netbeans_bags_dist_stem.rds"),
  bags = TRUE
)


resulbags_dist_stem_ant <- read_rds(here::here("temp/ant_bags_dist_stem.rds"))
resulbags_dist_stem_jodatime <- read_rds(here::here("temp/jodatime_bags_dist_stem.rds"))
resulbags_dist_stem_argouml <- read_rds(here::here("temp/argouml_bags_dist_stem.rds"))
resulbags_dist_stem_kafka <- read_rds(here::here("temp/kafka_bags_dist_stem.rds"))
resulbags_dist_stem_hive <- read_rds(here::here("temp/hive_bags_dist_stem.rds"))
resulbags_dist_stem_junit <- read_rds(here::here("temp/junit_bags_dist_stem.rds"))
resulbags_dist_stem_spark <- read_rds(here::here("temp/spark_bags_dist_stem.rds"))
resulbags_dist_stem_netbeans <- read_rds(here::here("temp/netbeans_bags_dist_stem.rds"))
resulbags_dist_stem_pulsar <- read_rds(here::here("temp/pulsar_bags_dist_stem.rds"))
resulbags_dist_stem_rxjava <- read_rds(here::here("temp/rxjava_bags_dist_stem.rds"))
resulbags_dist_stem_glide <- read_rds(here::here("temp/glide_bags_dist_stem.rds"))
resulbags_dist_stem_kubernetes <- read_rds(here::here("temp/kubernetes_bags_dist_stem.rds"))
resulbags_dist_stem_lottie <- read_rds(here::here("temp/lottie_bags_dist_stem.rds"))
resulbags_dist_stem_eventbus <- read_rds(here::here("temp/eventbus_bags_dist_stem.rds"))
resulbags_dist_stem_jodamoney <- read_rds(here::here("temp/jodamoney_bags_dist_stem.rds"))
resulbags_dist_stem_dbeaver <- read_rds(here::here("temp/dbeaver_bags_dist_stem.rds"))
resulbags_dist_stem_mpandroidchart <- read_rds(here::here("temp/mpandroidchart_bags_dist_stem.rds"))
resulbags_dist_stem_checkstyle <- read_rds(here::here("temp/checkstyle_bags_dist_stem.rds"))
resulbags_dist_stem_presto <- read_rds(here::here("temp/presto_bags_dist_stem.rds"))
resulbags_dist_stem_storm <- read_rds(here::here("temp/storm_bags_dist_stem.rds"))
resulbags_dist_stem_camel <- read_rds(here::here("temp/camel_bags_dist_stem.rds"))
resulbags_dist_stem_activiti <- read_rds(here::here("temp/activiti_bags_dist_stem.rds"))
resulbags_dist_stem_exoplayer <- read_rds(here::here("temp/exoplayer_bags_dist_stem.rds"))
resulbags_dist_stem_hbase <- read_rds(here::here("temp/hbase_bags_dist_stem.rds"))
resulbags_dist_stem_beam <- read_rds(here::here("temp/beam_bags_dist_stem.rds"))
resulbags_dist_stem_spring <- read_rds(here::here("temp/spring_bags_dist_stem.rds"))
resulbags_dist_stem_tomcat <- read_rds(here::here("temp/tomcat_bags_dist_stem.rds"))
resulbags_dist_stem_redisson  <- read_rds(here::here("temp/redisson_bags_dist_stem.rds"))
resulbags_dist_stem_flink  <- read_rds(here::here("temp/flink_bags_dist_stem.rds"))
resulbags_dist_stem_jenkins  <- read_rds(here::here("temp/jenkins_bags_dist_stem.rds"))





# resulbags_dist_stem_ant <- read_rds(here::here("temp/ant_bags_dist_stem.rds"))
# resulbags_dist_stem_jodatime <- read_rds(here::here("temp/jodatime_bags_dist_stem.rds"))
# resulbags_dist_stem_argouml <- read_rds(here::here("temp/argouml_bags_dist_stem.rds"))
resulbags_dist_stem_kafka <- read_rds(here::here("temp/kafka_bags_dist_stem.rds"))
resulbags_dist_stem_hive <- read_rds(here::here("temp/hive_bags_dist_stem.rds"))
resulbags_dist_stem_junit <- read_rds(here::here("temp/junit_bags_dist_stem.rds"))
# resulbags_dist_stem_spark <- read_rds(here::here("temp/spark_bags_dist_stem.rds"))
# resulbags_dist_stem_netbeans <- read_rds(here::here("temp/netbeans_bags_dist_stem.rds"))
resulbags_dist_stem_pulsar <- read_rds(here::here("temp/pulsar_bags_dist_stem.rds"))
# resulbags_dist_stem_rxjava <- read_rds(here::here("temp/rxjava_bags_dist_stem.rds"))
resulbags_dist_stem_glide <- read_rds(here::here("temp/glide_bags_dist_stem.rds"))
# resulbags_dist_stem_kubernetes <- read_rds(here::here("temp/kubernetes_bags_dist_stem.rds"))
# resulbags_dist_stem_lottie <- read_rds(here::here("temp/lottie_bags_dist_stem.rds"))
# resulbags_dist_stem_eventbus <- read_rds(here::here("temp/eventbus_bags_dist_stem.rds"))
# resulbags_dist_stem_jodamoney <- read_rds(here::here("temp/jodamoney_bags_dist_stem.rds"))
# resulbags_dist_stem_dbeaver <- read_rds(here::here("temp/dbeaver_bags_dist_stem.rds"))
# resulbags_dist_stem_mpandroidchart <- read_rds(here::here("temp/mpandroidchart_bags_dist_stem.rds"))
# resulbags_dist_stem_checkstyle <- read_rds(here::here("temp/checkstyle_bags_dist_stem.rds"))
# resulbags_dist_stem_presto <- read_rds(here::here("temp/presto_bags_dist_stem.rds"))
# resulbags_dist_stem_storm <- read_rds(here::here("temp/storm_bags_dist_stem.rds"))
# resulbags_dist_stem_camel <- read_rds(here::here("temp/camel_bags_dist_stem.rds"))
# resulbags_dist_stem_activiti <- read_rds(here::here("temp/activiti_bags_dist_stem.rds"))
# resulbags_dist_stem_exoplayer <- read_rds(here::here("temp/exoplayer_bags_dist_stem.rds"))
# resulbags_dist_stem_hbase <- read_rds(here::here("temp/hbase_bags_dist_stem.rds"))
# resulbags_dist_stem_beam <- read_rds(here::here("temp/beam_bags_dist_stem.rds"))
# resulbags_dist_stem_spring <- read_rds(here::here("temp/spring_bags_dist_stem.rds"))
# resulbags_dist_stem_tomcat <- read_rds(here::here("temp/tomcat_bags_dist_stem.rds"))
# resulbags_dist_stem_redisson  <- read_rds(here::here("temp/redisson_bags_dist_stem.rds"))
# resulbags_dist_stem_flink  <- read_rds(here::here("temp/flink_bags_dist_stem.rds"))
# resulbags_dist_stem_jenkins  <- read_rds(here::here("temp/jenkins_bags_dist_stem.rds"))
# 


resulbags_dist_stem_ant <- calculate_summaries_satd_alerts(resulbags_dist_stem_ant$data)
resulbags_dist_stem_jodatime <- calculate_summaries_satd_alerts(resulbags_dist_stem_jodatime$data)
resulbags_dist_stem_argouml <- calculate_summaries_satd_alerts(resulbags_dist_stem_argouml$data)
resulbags_dist_stem_kafka <- calculate_summaries_satd_alerts(resulbags_dist_stem_kafka$data)
resulbags_dist_stem_hive <- calculate_summaries_satd_alerts(resulbags_dist_stem_hive$data)
resulbags_dist_stem_junit <- calculate_summaries_satd_alerts(resulbags_dist_stem_junit$data)
resulbags_dist_stem_spark <- calculate_summaries_satd_alerts(resulbags_dist_stem_spark$data)
resulbags_dist_stem_netbeans <- calculate_summaries_satd_alerts(resulbags_dist_stem_netbeans$data)
resulbags_dist_stem_pulsar <- calculate_summaries_satd_alerts(resulbags_dist_stem_pulsar$data)
resulbags_dist_stem_rxjava <- calculate_summaries_satd_alerts(resulbags_dist_stem_rxjava$data)
resulbags_dist_stem_glide <- calculate_summaries_satd_alerts(resulbags_dist_stem_glide$data)
resulbags_dist_stem_kubernetes <- calculate_summaries_satd_alerts(resulbags_dist_stem_kubernetes$data)
resulbags_dist_stem_lottie <- calculate_summaries_satd_alerts(resulbags_dist_stem_lottie$data)
resulbags_dist_stem_eventbus <- calculate_summaries_satd_alerts(resulbags_dist_stem_eventbus$data)
resulbags_dist_stem_jodamoney <- calculate_summaries_satd_alerts(resulbags_dist_stem_jodamoney$data)
resulbags_dist_stem_dbeaver <- calculate_summaries_satd_alerts(resulbags_dist_stem_dbeaver$data)
resulbags_dist_stem_mpandroidchart <- calculate_summaries_satd_alerts(resulbags_dist_stem_mpandroidchart$data)
resulbags_dist_stem_checkstyle <- calculate_summaries_satd_alerts(resulbags_dist_stem_checkstyle$data)
resulbags_dist_stem_presto <- calculate_summaries_satd_alerts(resulbags_dist_stem_presto$data)
resulbags_dist_stem_storm <- calculate_summaries_satd_alerts(resulbags_dist_stem_storm$data)
resulbags_dist_stem_camel <- calculate_summaries_satd_alerts(resulbags_dist_stem_camel$data)
resulbags_dist_stem_activiti <- calculate_summaries_satd_alerts(resulbags_dist_stem_activiti$data)
resulbags_dist_stem_exoplayer <- calculate_summaries_satd_alerts(resulbags_dist_stem_exoplayer$data)
resulbags_dist_stem_hbase <- calculate_summaries_satd_alerts(resulbags_dist_stem_hbase$data)
resulbags_dist_stem_beam <- calculate_summaries_satd_alerts(resulbags_dist_stem_beam$data)
resulbags_dist_stem_spring <- calculate_summaries_satd_alerts(resulbags_dist_stem_spring$data)
resulbags_dist_stem_tomcat <- calculate_summaries_satd_alerts(resulbags_dist_stem_tomcat$data)
resulbags_dist_stem_redisson <- calculate_summaries_satd_alerts(resulbags_dist_stem_redisson$data)
resulbags_dist_stem_flink <- calculate_summaries_satd_alerts(resulbags_dist_stem_flink$data)
resulbags_dist_stem_jenkins <- calculate_summaries_satd_alerts(resulbags_dist_stem_jenkins$data)



# 
# 
# resultados_ant <- calculate_summaries_satd_alerts(resultados_ant$data)
# resultados_jodatime <- calculate_summaries_satd_alerts(resultados_jodatime$data)
# resultados_argouml <- calculate_summaries_satd_alerts(resultados_argouml$data)
# resultados_kafka <- calculate_summaries_satd_alerts(resultados_kafka$data)
# resultados_hive <- calculate_summaries_satd_alerts(resultados_hive$data)
# resultados_junit <- calculate_summaries_satd_alerts(resultados_junit$data)
# resultados_spark <- calculate_summaries_satd_alerts(resultados_spark$data)
# resultados_netbeans <- calculate_summaries_satd_alerts(resultados_netbeans$data)
# resultados_pulsar <- calculate_summaries_satd_alerts(resultados_pulsar$data)
# resultados_rxjava <- calculate_summaries_satd_alerts(resultados_rxjava$data)
# resultados_glide <- calculate_summaries_satd_alerts(resultados_glide$data)
# resultados_kubernetes <- calculate_summaries_satd_alerts(resultados_kubernetes$data)
# resultados_lottie <- calculate_summaries_satd_alerts(resultados_lottie$data)
# resultados_eventbus <- calculate_summaries_satd_alerts(resultados_eventbus$data)
# resultados_jodamoney <- calculate_summaries_satd_alerts(resultados_jodamoney$data)
# resultados_dbeaver <- calculate_summaries_satd_alerts(resultados_dbeaver$data)
# resultados_mpandroidchart <- calculate_summaries_satd_alerts(resultados_mpandroidchart$data)
# resultados_checkstyle <- calculate_summaries_satd_alerts(resultados_checkstyle$data)
# resultados_presto <- calculate_summaries_satd_alerts(resultados_presto$data)
# resultados_storm <- calculate_summaries_satd_alerts(resultados_storm$data)
# resultados_camel <- calculate_summaries_satd_alerts(resultados_camel$data)
# resultados_activiti <- calculate_summaries_satd_alerts(resultados_activiti$data)
# resultados_exoplayer <- calculate_summaries_satd_alerts(resultados_exoplayer$data)
# resultados_hbase <- calculate_summaries_satd_alerts(resultados_hbase$data)
# resultados_beam <- calculate_summaries_satd_alerts(resultados_beam$data)
# resultados_spring <- calculate_summaries_satd_alerts(resultados_spring$data)
# resultados_tomcat <- calculate_summaries_satd_alerts(resultados_tomcat$data)
# resultados_redisson <- calculate_summaries_satd_alerts(resultados_redisson$data)
# resultados_flink <- calculate_summaries_satd_alerts(resultados_flink$data)
# resultados_jenkins <- calculate_summaries_satd_alerts(resultados_jenkins$data)
# 
# 
# 



resulbags_2ant$summarised_data
resulbags_jodatime$summarised_data
resulbags_argouml$summarised_data
resulbags_kafka$summarised_data
resulbags_hive$summarised_data
resulbags_junit$summarised_data
resulbags_spark$summarised_data
resulbags_netbeans$summarised_data
resulbags_pulsar$summarised_data
resulbags_rxjava$summarised_data
resulbags_glide$summarised_data
resulbags_kubernetes$summarised_data
resulbags_lottie$summarised_data
resulbags_eventbus$summarised_data
resulbags_jodamoney$summarised_data
resulbags_dbeaver$summarised_data
resulbags_mpandroidchart$summarised_data
resulbags_checkstyle$summarised_data
resulbags_presto$summarised_data
resulbags_storm$summarised_data
resulbags_camel$summarised_data
resulbags_activiti$summarised_data
resulbags_exoplayer$summarised_data
resulbags_hbase$summarised_data
resulbags_beam$summarised_data
resulbags_flatbuffers$summarised_data
resulbags_spring$summarised_data
resulbags_tomcat$summarised_data
resulbags_redisson$summarised_data
resulbags_flink$summarised_data
resulbags_jenkins$summarised_data


resulbags_jodatime$p_value 
resulbags_ant$p_value
resulbags_argouml$p_value
resulbags_kafka$p_value
resulbags_hive$p_value
resulbags_junit$p_value
resulbags_spark$p_value
resulbags_netbeans$p_value
resulbags_pulsar$p_value
resulbags_rxjava$p_value
resulbags_glide$p_value
resulbags_kubernetes$p_value
resulbags_lottie$p_value
resulbags_eventbus$p_value
resulbags_jodamoney$p_value
resulbags_dbeaver$p_value
resulbags_mpandroidchart$p_value
resulbags_checkstyle$p_value
resulbags_presto$p_value
resulbags_storm$p_value
resulbags_camel$p_value
resulbags_activiti$p_value
resulbags_exoplayer$p_value



extract_summary <- function(name, prefix = "resultado"){
  

  if (str_detect(string = name, pattern = prefix)){
    
    local_results <- .GlobalEnv[[name]]
    
    n_classes <- local_results$data$class_blocks %>% unique() %>% length()
    
    n_methods <- local_results$data %>% 
      count(          package_blocks,
                      class_blocks,
                      method_blocks,
                      id_blocks
      ) %>% 
      nrow()
    
    n_methods_satd <- local_results$n_satd
    
    prop_alerts_satd <- local_results$prop_satd
    
    prop_alerts_no_satd <- local_results$prop_no_satd
    
    greater_class <- if_else(prop_alerts_no_satd > prop_alerts_satd, "NO SATD", "SATD" )
    
    p_value <- local_results$p_value
    
    output <- tibble(
      project = str_remove(string = name, pattern = str_glue("{prefix}\\_")),
      n_classes = n_classes,
      n_methods = n_methods,
      n_methods_satd = n_methods_satd,      
      prop_alerts_satd = prop_alerts_satd,
      prop_alerts_no_satd = prop_alerts_no_satd,
      greater_class = greater_class,
      p_value = p_value
    )
    
    
  } else {
    output <- NULL  
  }
  
  print(name)
   
  output
   
}



extract_data <- function(name, prefix = "resultado" ){
  
  
  if (str_detect(string = name, pattern = prefix)){
    
    local_results <- .GlobalEnv[[name]]
    
    output <- local_results$data %>% 
      mutate(
        project = str_remove(string = name, pattern = str_glue("{prefix}\\_") )
      ) %>% 
      select(
        -code_blocks
      )
    
  } else {
    output <- NULL  
  }
  
  print(name)
  
  output
  
}


outputs <- map_df(.x = names(.GlobalEnv), .f = ~extract_summary(name = .x, prefix = "resultados" ) )
outputs_bags <- map_df(.x = names(.GlobalEnv), .f = ~extract_summary(name = .x, prefix = "resulbags" ) )
outputs_bags_2 <- map_df(.x = names(.GlobalEnv), .f = ~extract_summary(name = .x, prefix = "resulbags_dist_stem" ) )


outputs_data <- map_df(.x = names(.GlobalEnv), .f = ~extract_data(name = .x, prefix = "resultados") )
outputs_data_bags <- map_df(.x = names(.GlobalEnv), .f = ~extract_data(name = .x, prefix = "resulbags") )
outputs_data_bags_2 <- map_df(.x = names(.GlobalEnv), .f = ~extract_data(name = .x, prefix = "resulbags_dist_stem") )



outputs_comments <- map_df(.x = names(.GlobalEnv), .f = ~extract_comments_bags(name = .x, prefix = "resulbags_dist_stem") )


evaluations <- tibble(
  user = character(),
  id_comment_pk = integer(),
  satd = character(),
  justification = character()
)


str(outputs_comments)






library(DBI)
library(dbplyr)
library(here)

map_github <- tribble(
  ~project, ~root_dir, ~root_github,
  "junit",   "C:/doutorado/junit5-r5.7.2/junit5-r5.7.2/",        "https://github.com/junit-team/junit5/tree/0ba600e478c728cdb80c83dcbf554c08f7a0755c/",
  "glide",   "C:/doutorado/glide-4.12.0/glide-4.12.0/",          "https://github.com/bumptech/glide/tree/384617791a97f3d2f3052e0b63bd4d971da92f7d/",
  "kafka",   "C:/doutorado/kafka-2.7.1/kafka-2.7.1/",            "https://github.com/apache/kafka/tree/61dbce85d0d41457d81a4096ecaea049f3a4b3ae/",
  "pulsar",  "C:/doutorado/pulsar-2.7.2/pulsar-2.7.2/",          "https://github.com/apache/pulsar/tree/7bf14b5ac049d71c7ff74bbe758cb41aaffeb0af/",
  "hive",    "C:/doutorado/hive-rel-release-2.3.8/hive-rel-release-2.3.8/",   "https://github.com/apache/hive/tree/f1e87137034e4ecbe39a859d4ef44319800016d7/" 
)


con <- dbConnect(RSQLite::SQLite(), here("verify-satd/db/db.db"))

dbWriteTable(con, "comments", outputs_comments %>% mutate(id_comment_pk = row_number()), append = TRUE, overwrite = FALSE )

dbWriteTable(con, "map_github", map_github, append = FALSE, overwrite = TRUE )

dbWriteTable(con, "evaluations", evaluations, append = FALSE, overwrite = TRUE )

dbWriteTable(
  con, 
  "bags", 
  bagsword %>% 
    mutate(
      across(
        .cols = where(is.numeric),
        .fns = as.integer
      )
    ),  
  append = FALSE, 
  overwrite = TRUE  
)



dbDisconnect(con)



















write_rds(outputs_bags_2 , here::here("report_satd_alert/output_bags_2.rds"))

feather::write_feather(outputs_data_bags, here::here("report_satd_alert/output_data_bags.feather"))
feather::write_feather(outputs_data, here::here("report_satd_alert/output_data.feather"))
feather::write_feather(outputs_data_bags_2, here::here("report_satd_alert/output_data_bags_2.feather"))


data_regression <-  feather::read_feather(here::here("report_satd_alert/output_data.feather"))
data_regression_bags <-  feather::read_feather(here::here("report_satd_alert/output_data_bags.feather"))
data_regression_bags_2 <-  feather::read_feather(here::here("report_satd_alert/output_data_bags_2.feather"))



data_regression_prepared_bags_2 <- data_regression_bags_2 %>% 
  mutate(
    size = endline_blocks - beginline_blocks
  ) %>% 
  filter(
    size > 3
  ) %>% 
  mutate(
    has_satd = if_else(has_satd, 1, 0)
  )

data_regression_prepared_bags <- data_regression_bags %>% 
  mutate(
    size = endline_blocks - beginline_blocks
  ) %>% 
  filter(
    size > 3
  ) %>% 
  mutate(
    has_satd = if_else(has_satd, 1, 0)
  )


modelo <- glm(data = data_regression_prepared, formula = has_alert ~ has_satd + log(size), family = "binomial" )

write_rds(modelo, here::here("report_satd_alert/modelo.rds"))

modelo_bags <- glm(data = data_regression_prepared_bags, formula = has_alert ~ has_satd + log(size), family = "binomial" )

write_rds(modelo_bags, here::here("report_satd_alert/modelo_bags_2.rds"))



modelo_bags_2 <- glm(data = data_regression_prepared_bags_2, formula = has_alert ~ has_satd + log(size), family = "binomial" )

write_rds(modelo_bags_2, here::here("report_satd_alert/modelo_bags_2.rds"))



summary(modelo_bags)


synthetic_data <- tibble(
  has_satd = c(0, 1)
) %>% 
  crossing(
    tibble(
      size = 10 ^ seq(from = 0, to = 3, by = .1)
    )
  )



data_regression <-  read_feather(here::here("report_satd_alert/output_data.feather")) %>%
  mutate(
    size = endline_blocks - beginline_blocks
  ) %>% 
  filter(
    size > 3
  )


synthetic_data <- tibble(
  has_satd = c(0, 1)
) %>% 
  crossing(
    tibble(
      size = 10 ^ seq(from = 0, to = 3, by = .1)
    )
  )

predictions <- predict.glm(modelo, newdata = synthetic_data, type = "response")

synthetic_predictions <- bind_cols(synthetic_data, predictions) %>% 
  rename(
    prediction = 3
  ) %>% 
  mutate(
    has_satd = if_else(has_satd == 1, "SATD", "NO SATD")
  )



data_regression_bags <-  read_feather(here::here("report_satd_alert/output_data_bags.feather")) %>%
  mutate(
    size = endline_blocks - beginline_blocks
  ) %>% 
  filter(
    size > 3
  )


predictions_bags <- predict.glm(modelo_bags, newdata = synthetic_data, type = "response")

synthetic_predictions_bags <- bind_cols(synthetic_data, predictions_bags) %>% 
  rename(
    prediction = 3
  ) %>% 
  mutate(
    has_satd = if_else(has_satd == 1, "SATD", "NO SATD")
  )




predictions_bags_2 <- predict.glm(modelo_bags_2, newdata = synthetic_data, type = "response")

synthetic_predictions_bags_2 <- bind_cols(synthetic_data, predictions_bags_2) %>% 
  rename(
    prediction = 3
  ) %>% 
  mutate(
    has_satd = if_else(has_satd == 1, "SATD", "NO SATD")
  )


bagsword <- read_excel(here::here("bagsword/bagsword.xlsx"))



feather::write_feather(synthetic_predictions, here::here("report_satd_alert/predictions.rds"))

feather::write_feather(synthetic_predictions_bags, here::here("report_satd_alert/predictions_bags_2.rds"))

feather::write_feather(synthetic_predictions_bags_2, here::here("report_satd_alert/predictions_bags_2.rds"))



extract_comments_bags <- function(name, prefix = "resulbags_dist_stem" ){
  
  
  if (str_detect(string = name, pattern = prefix)){
    
    local_results <- .GlobalEnv[[name]]
    
    output <- local_results$data_comments %>% 
      mutate(
        project = str_remove(string = name, pattern = str_glue("{prefix}\\_") )
      ) 
      
    
  } else {
    output <- NULL  
  }
  
  print(name)
  
  output
  
}
crotman/kludgenudger documentation built on Oct. 19, 2021, 7:30 p.m.