R/Bivdiff.R

Defines functions weighted_correlation wgt_cor n_func varfunc2 varfunc r_cor_func na_func na_help_func difference_summary empty_finder2 empty_finder biv_comp_subfunction

##############################################################
###                                                        ###
### 		Subject:	Compare Samples on a bivariate level     ###
### 		Date: 		May 2023                                 ###
### 		Author: 	Bjoern Rohr                              ###
### 	Version:  	1.00                                     ###
###                                                        ###
### 		Bugfix:   	/                                      ###
###                                                        ###
##############################################################

############################################
### function for only 1 benchmark and df ###
############################################

biv_comp_subfunction<-function(df, benchmark, data = TRUE, corrtype="r",plot_title=NULL, 
                               variables=NULL,ID=NULL,ID_bench=NULL,
                               weight=NULL,weight_bench=NULL, strata=NULL,strata_bench=NULL,
                               p_value=NULL, varlabels=NULL, mar = c(0,0,0,0),
                               note=TRUE,p_adjust=NULL, full=FALSE, grid="white",diff_perc=FALSE,
                               diff_perc_size=4.5,perc_diff_transparance=0 ,gradient=FALSE, 
                               breaks=breaks, colors=NULL, remove_nas="pairwise", nboots=0,
                               parallel = FALSE,adjustment_weighting="raking", 
                               adjustment_vars=NULL,raking_targets=NULL,post_targets=NULL,
                               boot_all=FALSE,percentile_ci=TRUE,number=i) {


  ### Build title
  df1_label<-deparse(substitute(df))
  bench_label<-deparse(substitute(benchmark))
  plot_title<-ifelse(is.null(plot_title),paste("Compare ", df1_label," & ",bench_label, sep = "", collapse = NULL), plot_title)



  ############################
  ### equalize data frames ###
  ############################
  
  ###############################################
  ### When benchmark is a object of dataframe ###
  ###############################################
  if (inherits(benchmark,"data.frame")) {
  df2<-dataequalizer(benchmark,df,variables=variables,silence=TRUE)
  benchmark2<-dataequalizer(df2,benchmark,variables=variables,silence=TRUE)
  

  #### add weight variables again ifany ###
  if (is.null(weight)==FALSE) df2<-df[,c(colnames(df2),weight)]
  if (is.null(ID)==FALSE) df2<-df[,c(colnames(df2),ID)]
  if (is.null(strata)==FALSE) df2<-df[,c(colnames(df2),strata)]
  if (is.null(adjustment_vars)==FALSE)df2<-df[,c(colnames(df2),adjustment_vars)]

  if (is.null(weight_bench)==FALSE) benchmark2<-benchmark[,c(colnames(benchmark2),weight_bench)]
  if (is.null(ID_bench)==FALSE) benchmark2<-benchmark[,c(colnames(benchmark2),ID_bench)]
  if (is.null(strata_bench)==FALSE) benchmark2<-benchmark[,c(colnames(benchmark2), strata_bench)]

  
  #colnames(df)<-c(as.character(1:(ncol(df))))
  ### Build correlation matrices
  if(remove_nas=="all") df2<-stats::na.omit(df2)
  df<-df2
  
  if (is.null(weight)==FALSE & is.null(ID)==FALSE) {
    df[,weight]<-NULL
    df[,ID]<-NULL
    #ID<-df2[,ID]
    #weight<-df2[,weight]
    if (is.null(strata)==FALSE) {
      df[,strata]<-NULL
      #strata<-df2[,strata]
      }
  }

  if(remove_nas=="all") benchmark2<-stats::na.omit(benchmark2)
  benchmark<-benchmark2
  if (is.null(weight_bench)==FALSE & is.null(ID_bench)==FALSE) {
    benchmark[,weight_bench]<-NULL
    benchmark[,ID_bench]<-NULL
    #ID_bench<-benchmark2[,ID_bench]
    #weight_bench<-benchmark2[,weight_bench]
    if (is.null(strata_bench)==FALSE) {
      benchmark[,strata_bench]<-NULL
      #strata_bench<-benchmark2[,strata_bench]
      }
    }
  }
  
  ###########################################
  ### When benchmark is a object of rcorr ###
  ###########################################
  
  if(inherits(benchmark,"data.frame")==FALSE){
    
    fit_cor<-function(rcorr_object,df=NULL,variables=NULL){
      
      ### check for variables in df and variables ###
      if (is.null(variables)==FALSE) vars<-variables[variables %in% colnames(df)]
      if (is.null(variables)) vars<- colnames(df)
      
      
      
      rcorr_object[[1]]<-rcorr_object[[1]][rownames(rcorr_object[[1]]) %in% vars, colnames(rcorr_object[[1]]) %in% vars]
      rcorr_object[[2]]<-rcorr_object[[2]][rownames(rcorr_object[[2]]) %in% vars, colnames(rcorr_object[[2]]) %in% vars]
      rcorr_object[[3]]<-rcorr_object[[3]][rownames(rcorr_object[[3]]) %in% vars, colnames(rcorr_object[[3]]) %in% vars]
      
      rcorr_object
      
    }
    
    benchmark<-fit_cor(benchmark,df=df,variables = variables)
    df2<-df[,colnames(benchmark[[1]])]

    
    if (is.null(weight)==FALSE) df2<-df[,c(colnames(df2),weight)]
    if (is.null(ID)==FALSE) df2<-df[,c(colnames(df2),ID)]
    if (is.null(strata)==FALSE) df2<-df[,c(colnames(df2),strata)]
    if (is.null(adjustment_vars)==FALSE) df2<-df[,c(colnames(df2),adjustment_vars)]
    
    
    if(remove_nas=="all") df2<-stats::na.omit(df2)
    df<-df2
    if (is.null(weight)==FALSE & is.null(ID)==FALSE) {
      df[,weight]<-NULL
      df[,ID]<-NULL
      #ID<-df2[,ID]
      #weight<-df2[,weight]
      if (is.null(strata)==FALSE) {
        df[,strata]<-NULL
        #strata<-df2[,strata]
        }
    }
    
  }
  
  ###########################
  ### Type of correlation ###
  ###########################

  if(corrtype=="r") corrtype<-"pearson"
  
  if (is.null(ID)==FALSE ){
    if(is.na(ID)==TRUE) if(corrtype=="rho") corrtype<-"spearman"
    if(is.na(ID)==FALSE) if(corrtype=="rho") corrtype<-"pearson"
  }
  if (is.null(ID)==TRUE){
    if(corrtype=="rho") corrtype<-"spearman"
  }
  
  
  
  ###############################################
  ### Get correlation matrix of the Benchmark ###
  ###############################################
  
  if (inherits(benchmark,"data.frame")) {
    cor_matrix_bench <- Hmisc::rcorr(as.matrix(benchmark), type = corrtype)
    
    if (is.null(weight_bench)==FALSE & is.null(ID_bench)==FALSE) 
      cor_matrix_bench<- weighted_correlation(benchmark2,weight=weight_bench,
                                               ids=ID_bench,stratas=strata_bench,
                                               variables=colnames(benchmark),remove_nas = remove_nas ,
                                               nboots = 0, parallel = parallel,
                                               corrtype=corrtype,benchmark=TRUE)
    

    gc()  }
  
  if (inherits(benchmark,"data.frame")==FALSE) cor_matrix_bench<-benchmark
  
  
  
  ####################################
  ### Get correlation matrix of df ###
  ####################################
  
  cor_matrix_df <- Hmisc::rcorr(as.matrix(df), type = corrtype)
  if ((is.null(weight)==FALSE & is.null(ID)==FALSE) |is.null(adjustment_vars)==FALSE | nboots>0){
    
    cor_matrix_df<- weighted_correlation(df2,weight=weight,ids=ID,stratas=strata,
                                         bench=benchmark2,weight_bench=weight_bench,
                                         ids_bench=ID_bench,stratas_bench=strata_bench,
                                          variables=colnames(cor_matrix_bench[[1]]), remove_nas=remove_nas,
                                          nboots = nboots, adjustment_vars=adjustment_vars,
                                          raking_targets=raking_targets, 
                                          post_targets=post_targets,
                                          adjustment_weighting=adjustment_weighting,
                                          cor_matrix_bench=cor_matrix_bench,
                                          parallel=parallel,
                                          corrtype=corrtype,benchmark=FALSE,
                                         boot_all=boot_all,
                                         percentile_ci=percentile_ci)}
  
  
  
  #return(cor_matrix_df)###
  # if (is.null(weight)==FALSE & is.null(ID)==FALSE) cor_matrix_df$r<- weighted_cor(df2, weight , ID=ID, strata = strata, return="r")
  # if (is.null(weight)==FALSE & is.null(ID)==FALSE) cor_matrix_df[[3]]<- weighted_cor(df2, weight , ID=ID, strata = strata, return="p")
  cor_matrix_df$r[cor_matrix_df$r=="NaN"]<-NA
  cor_matrix_df$r[cor_matrix_bench$r=="NaN"]<-NA
  cor_matrix_df[[3]][cor_matrix_df[[3]]=="NaN"]<-NA


  
  fischer_cor_df<- suppressWarnings(psych::fisherz(cor_matrix_df$r))
  fischer_cor_bench<- suppressWarnings(psych::fisherz(cor_matrix_bench$r))
  
  fischer_z_test<-suppressWarnings(psych::paired.r(cor_matrix_df$r,cor_matrix_bench$r,n=cor_matrix_df$n, n2=cor_matrix_bench$n))
  fischer_z_test$p[round(cor_matrix_df$r,digits=3) %in%"-1" & round(cor_matrix_bench$r,digits=3) %in% "-1"]<-1
  fischer_z_test$p[fischer_z_test$p=="NaN"]<-NA
  if(is.null(cor_matrix_df$p_diff)==FALSE) fischer_z_test$p<-cor_matrix_df$p_diff
  p_value= ifelse(is.null(p_value),0.05,p_value)

  ### implement p_adjustments if needed ###
  
  for (i in 1:nrow(fischer_z_test$p)){
  if (is.null(p_adjust)==FALSE){ 
    if (p_adjust!=FALSE) fischer_z_test$p[i,]<- matrix(stats::p.adjust(p = fischer_z_test$p[i,], 
                                                                   method = p_adjust,
                                                                   n = ncol(fischer_z_test$p)),
                                                      ncol = ncol(fischer_z_test$p))}}

  ### Compute Comparison Matrix
  comp_matrix<-fischer_cor_df
  comp_matrix[comp_matrix=="Inf"]<-NA
  comp_matrix[fischer_z_test$p=="NaN"]<-NA
  comp_matrix[fischer_z_test$p>p_value]<-breaks[1]
  comp_matrix[fischer_z_test$p<p_value & (cor_matrix_df[[3]]>=p_value & cor_matrix_bench[[3]]>=p_value)]<-breaks[1]
  comp_matrix[fischer_z_test$p<p_value & (cor_matrix_df[[3]]<p_value | cor_matrix_bench[[3]]<p_value)]<-breaks[2]
  comp_matrix[fischer_z_test$p<p_value & (cor_matrix_df[[3]]<p_value | cor_matrix_bench[[3]]<p_value) &
                (abs(cor_matrix_df$r)>2*abs(cor_matrix_bench$r) | abs(cor_matrix_bench$r)>2*abs(cor_matrix_df$r)) &
                ((cor_matrix_df$r<0 & cor_matrix_bench$r<0) | (cor_matrix_df$r>0 & cor_matrix_bench$r>0))]<-breaks[3]
  comp_matrix[fischer_z_test$p<p_value & (cor_matrix_df[[3]]<p_value | cor_matrix_bench[[3]]<p_value) &
                ((cor_matrix_df$r>0 & cor_matrix_bench$r<0) | (cor_matrix_df$r<0 & cor_matrix_bench$r>0))]<-breaks[3] # 1 sig and one positive, while the other negative
  comp_matrix<-as.matrix(comp_matrix)
  if (isFALSE(full)) comp_matrix[upper.tri(comp_matrix)]<-NA

  if (is.null(colors)==TRUE) colors=c('green','yellow','red')

  note_text<- paste("Note: ",breaks[1]," ", colors[1],") means that the Pearson's rs are not significant different. \n" ,breaks[2]," (", colors[2], ") means, at least one is significant >0 or <0 and both are
  significant different from each other. \n",breaks[3]," (", colors[3], ") means all conditions for Small Diff are true and the
  coeficients differ in direction or one is double the value of the other. \nLevel of Significance is p < 0.05.")



  ### Calculate percentage of difference ###

  if(diff_perc==TRUE) {
    percental_difference_b1<-length(comp_matrix[comp_matrix == breaks[1] & is.na(comp_matrix)==FALSE ])/ length(comp_matrix[is.na(comp_matrix)==FALSE])
    percental_difference_b2<-length(comp_matrix[comp_matrix == breaks[2] & is.na(comp_matrix)==FALSE ])/ length(comp_matrix[is.na(comp_matrix)==FALSE])
    if (length(breaks)>2) percental_difference_b3<-length(comp_matrix[comp_matrix == breaks[3] & is.na(comp_matrix)==FALSE ])/ length(comp_matrix[is.na(comp_matrix)==FALSE])
    if (length(breaks)>3) percental_difference_b4<-length(comp_matrix[comp_matrix == breaks[4] & is.na(comp_matrix)==FALSE ])/ length(comp_matrix[is.na(comp_matrix)==FALSE])

    diff_summary<-paste(breaks[1]," :",(round((percental_difference_b1), digits = 3)*100),"% \n",
                        breaks[2]," :",(round(percental_difference_b2, digits = 3)*100),"%")
    if (length(breaks)>2) diff_summary<-paste (diff_summary, "\n",breaks[3], " :", (round(percental_difference_b3, digits = 3)*100),"%")
    if (length(breaks)>3) diff_summary<-paste (diff_summary, "\n",breaks[4], " :", (round(percental_difference_b4, digits = 3)*100),"%")
  }

  ###########################
  ### Get X Instead of NA ###
  ###########################
  
  comp_matrix[lower.tri(comp_matrix) & is.na(comp_matrix)]<-"X"
  #return(comp_matrix)###
  
  ###########################
  # prepare data for ggplot
  ###########################

  comp_matrix_df<-reshape2::melt(comp_matrix)
  colnames(comp_matrix_df) <- c("x", "y", "value")

  if (grid!="white"){ # create a matrix for NA, to exclude from grid
    na_matrix<-comp_matrix_df[is.na(comp_matrix_df$value),]

    comp_matrix_2<-comp_matrix
    comp_matrix_2[is.na(comp_matrix_2)]<- 99

    for (i in 1:(nrow(comp_matrix_2)-1)){
      comp_matrix_2[i+1,i]<-NA
    }

    comp_matrix_df2<-reshape2::melt(comp_matrix_2)
    colnames(comp_matrix_df2) <- c("x", "y", "value")
    edge_matrix<- comp_matrix_df2[is.na(comp_matrix_df2$value),]
  }

  ### add r and bench_r to df
  
  comp_matrix_df$corr<-reshape2::melt(cor_matrix_df$r)$value
  comp_matrix_df$corr_bench<-reshape2::melt(cor_matrix_bench$r)$value
  
  ### add p_values and bench_p_values to df
  
  comp_matrix_df$p<-reshape2::melt(cor_matrix_df$P)$value
  comp_matrix_df$bench_p<-reshape2::melt(cor_matrix_bench$P)$value
  comp_matrix_df$p_diff<-reshape2::melt(fischer_z_test$p)$value
  
  #### add difference to data frame

  difference_r<-(cor_matrix_df$r-cor_matrix_bench$r)
  difference_r<-reshape2::melt(difference_r)
  comp_matrix_df$difference_r<-difference_r$value

  ### add absolute relative difference to data frame
  comp_matrix_df$abs_rel_difference_r<-reshape2::melt(abs((cor_matrix_df$r-cor_matrix_bench$r)/cor_matrix_bench$r))$value
  
  ### change gradient ###

  alpha_matrix<-(abs(cor_matrix_df$r-cor_matrix_bench$r)/2/5)+0.8
  alpha_matrix<-reshape2::melt(alpha_matrix)
  colnames(alpha_matrix) <- c("x", "y", "gradient")
  comp_matrix_df$gradient<-alpha_matrix$gradient
  #}

  if (gradient==FALSE) alpha_matrix$value<-1

  #if (matrix==TRUE) return(comp_matrix_df)

  ##############################
  ###     Label variables    ###
  ##############################

  if (is.null(varlabels)) varlabels<- unique(comp_matrix_df$x)


  ### build bigger out matrix ###


  cor_matrix_df[[1]][upper.tri(cor_matrix_df[[1]], diag= TRUE)]<-NA
  cor_matrix_df[[2]][upper.tri(cor_matrix_df[[2]], diag= TRUE)]<-NA
  cor_matrix_df[[3]][upper.tri(cor_matrix_df[[3]], diag = TRUE)]<-NA
  cor_matrix_bench[[1]][upper.tri(cor_matrix_bench[[1]], diag= TRUE)]<-NA
  cor_matrix_bench[[2]][upper.tri(cor_matrix_bench[[2]], diag= TRUE)]<-NA
  cor_matrix_bench[[3]][upper.tri(cor_matrix_bench[[3]], diag = TRUE)]<-NA
  diff_table<-cor_matrix_df[[1]]-cor_matrix_bench[[1]]
  fischer_z_test$p[upper.tri(fischer_z_test$p,diag=TRUE)]<-NA

  comp_matrix_list<-list(comp_matrix_df,list(cor_matrix_df[[1]],cor_matrix_df[[2]],cor_matrix_df[[3]],
                                             cor_matrix_bench[[1]],cor_matrix_bench[[2]],cor_matrix_bench[[3]],
                                             diff_table,fischer_z_test$p))

  names(comp_matrix_list)<-c("comparison_dataframe", "correlation_data")
  names(comp_matrix_list[[2]])<-c("pearsons_matrix_df","n_matrix_df","p_matrix_df",
                                  "pearsons_r_bench","n_matrix_bench","p_matrix_bench",
                                  "r_diff_matrix","r_diff_p_matrix")

  #  comp_matrix_list<- cor_matrix_df[[1]]
  if (data == TRUE) return(comp_matrix_list)

  #############################
  # Plot Matrix with ggplot2
  #############################


  comparison_plot<-
    ggplot2::ggplot(comp_matrix_df, ggplot2::aes(x = comp_matrix_df$y, y = comp_matrix_df$x, fill = factor(comp_matrix_df$value, levels = breaks))) +
    {if (gradient==TRUE) ggplot2::aes(alpha= alpha_matrix$gradient)}+
    {if (grid != "none") ggplot2::geom_tile(colour= grid, lwd =1,linetype=1)}+
    {if (grid == "none") ggplot2::geom_tile()}+
    {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = na_matrix, colour = "white", lwd=1,linetype=1)}+
    {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = edge_matrix, colour = grid, lwd=1,linetype=1)}+
    ggplot2::coord_fixed()+
    ggplot2::scale_fill_manual(values= colors, name="", na.translate = FALSE)+
    ggplot2::scale_y_discrete(name="", limits = rev(levels(comp_matrix_df$x)), labels= varlabels, breaks=unique(comp_matrix_df$x))+
    ggplot2::scale_x_discrete(name="", limits = levels(comp_matrix_df$y), labels= varlabels, breaks=unique(comp_matrix_df$y))+
    #{if(gradient==TRUE) ggplot2::scale_alpha_continuous(values = alpha_matrix$values, na.translate=FALSE)}+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust=0.9),
                   axis.text.y = ggplot2::element_text(vjust = 1, hjust=0.9),
                   axis.title.x= ggplot2::element_blank(),
                   axis.title.y= ggplot2::element_blank(),
                   plot.margin = grid::unit(mar, "cm"),
                   plot.caption=ggplot2::element_text(hjust = 0))+
    ggplot2::ggtitle(plot_title)+
    ggplot2::guides(alpha="none")

  if(note==TRUE) comparison_plot<-comparison_plot + ggplot2::labs(caption = note_text)


  if (diff_perc==TRUE) {
    label<-diff_summary$label
    comparison_plot <- comparison_plot + ggplot2::geom_label(ggplot2::aes(x = rep(Inf,length(label)), 
                                                                           y = rep(Inf,length(label)), 
                                                                           hjust = rep(1,length(label)), 
                                                                           vjust = rep(1,length(label))), data = diff_summary,
                                                             label=diff_summary$label,
                                                             fill = ggplot2::alpha("white", perc_diff_transparance), 
                                                             color = ggplot2::alpha("black", 1), size= diff_perc_size)}


  #if (gradient==TRUE) return(alpha_matrix)

  if (data == FALSE) return(comparison_plot)

}







#' Compare Multiple Data Frames on a Bivariate Level
#'
#' Compare multiple data frames on a bivariate level and plot them together.
#'
#' @param dfs A character vector containing the names of data frames to compare 
#' against the \code{benchmarks}.
#' @param benchmarks A character vector containing the names of benchmarks to 
#' compare the \code{dfs} against, or the names of a list. If it is a list, it 
#' has to be of the form, as the output of \link[Hmisc]{rcorr}, with a 
#' Pearson's r matrix in the first position, a n-matrix (matrix of n for every 
#' correlation) in the second position and a p-matrix in the third position. 
#' The vector must either be the same length as \code{dfs}, or length 1. If it 
#' has length one every survey will be compared against the same benchmark. 
#' @param variables A character vector that containes the names of the variables for 
#' the comparison. If it is \code{NULL}, all variables that are named similarly 
#' in both the \code{dfs} and the benchmarks will be compared. Variables missing 
#' in one of the \code{dfs} or the \code{benchmarks} will be neglected for this 
#' comparison.
#' @param plot_title A character string containing the title of the plot.
#' @param plots_label A character string or vector of character strings 
#' containing the new names of the data frames that are used in the plot.
#' @param data If \code{TRUE}, a biv_compare object is returned, containing the results 
#' of the comparison.
#' @param id_bench,id A character vector determining id variables used to weigh
#' the \code{dfs} or \code{benchmarks} with the help of the \code{survey} package. They have
#' to be part of the respective data frame. If less characters strings are provided,
#' than in \code{dfs}, the first input is used to weigh every \code{df} or 
#' \code{benchmark}, where no input is provided.
#' @param weight_bench,weight A character vector that determines variables to weigh
#' the \code{dfs} of \code{benchmarks}. They have to be part of the respective 
#' data frame. If fewer characters strings are provided, than in \code{dfs}, 
#' the first input is used to weigh every df or benchmark, where no input is 
#' provided. If a weight variable is provided also an id variable is needed. 
#' For weighting, the \code{survey} package is used.
#' @param strata,strata_bench A character vector that determines strata variables 
#' that are used to weigh the \code{dfs} or \code{benchmarks} with the help of 
#' the \code{survey} package. It has to be part of the respective data frame. 
#' If fewer characters strings are provided, than in \code{dfs}, the first input 
#' is used to weigh every df or benchmark, where no input is provided.
#' @param p_value A number between zero and one to determine the maximum significance niveau.
#' @param varlabels A character string or vector of character strings containing 
#' the new names of variables that is used in the plot.
#' @param mar A vector that determines the margins of the plot.
#' @param note If \code{note = TRUE}, a note will be displayed to describe the plot.
#' @param p_adjust Can be either \code{TRUE} or a character string indicating an 
#' adjustment method. If \code{p_adjust = TRUE} the p_values will be adjusted with the 
#' Bonferroni adjustment method, by default, to account for the problem of 
#' multiple comparisons. All adjustment methods available in 
#' \code{\link{p.adjust}} can be used here, with the same character strings.
#' @param grid A color string, that determines the color of the lines between 
#' the tiles of the heatmap.
#' @param diff_perc If \code{TRUE} a percental difference between surveys and benchmarks is
#' displayed in the plot.
#' @param diff_perc_size A number to determine the size of the displayed percental
#' difference between surveys in the plot.
#' @param perc_diff_transparance A number to determine the transparency of the displayed
#' percental difference between surveys in the plot.
#' @param gradient If \code{gradient = TRUE}, colors in the heatmap will be more 
#' or less transparent, depending on the difference in Pearson's r of the data 
#' frames of comparison.
#' @param sum_weights A vector containing information for every variable to 
#' weigh them in the displayed percental-difference calculation. It can be used 
#' if some variables are over- or underrepresented in the analysis.
#' @param order A character vector to determine in which order the variables should be
#' displayed in the plot.
#' @param breaks A vector to label the color scheme in the legend.
#' @param colors A vector to determine the colors in the plot.
#' @param corrtype A character string, indicating the type of the bivariate correlation. 
#' It can either be "r" for Pearson's r or "rho" for Spearman's "rho". At the moment,
#' rho is only applicable to unweighted data.
#' @param missings_x If \code{TRUE}, missing pairs in the plot will be marked with an X.
#' @param remove_nas A character string, that indicates how missing values should be 
#' removed, can either be \code{"all"}, to remove all cases that contain NA in any 
#' of the variables, or \code{"pairwise"}, to remove NAs separately for every variable 
#' pair when calculating Pearson's r.
#' @param ncol_facet The number of columns used in faced_wrap() for the plots.
#' @param nboots A numeric value indicating the number of bootstrap replications. 
#' If \code{nboots = 0} no bootstrapping will be performed. Else \code{nboots} 
#' must be >2. Note, that bootstrapping can be very computationally heavy and can 
#' therefore take a while.
#' @param parallel Can be either \code{FALSE} or a number of cores that should 
#' be used in the function. If it is \code{FALSE}, only one core will be used and 
#' otherwise the given number of cores will be used.
#' @param adjustment_vars Variables used to adjust the survey when using raking 
#' or post-stratification.
#' @param raking_targets A list of raking targets that can be given to the rake 
#' function of \code{\link[survey]{rake}}, to rake the \code{dfs}.
#' @param post_targets A list of post_stratification targets that can be given to 
#' the \code{\link[survey]{postStratify}} function, to post-stratify the \code{dfs}.
#' @param adjustment_weighting A character vector indicating if adjustment 
#' weighting should be used. It can either be \code{"raking"} or \code{"post_start"}.
#' @param boot_all If TURE, both, dfs and benchmarks will be bootstrapped. Otherwise 
#' the benchmark estimate is assumed to be constant.
#' @param percentile_ci If TURE, cofidence intervals will be calculated using the percentile method.
#' If False, they will be calculated using the normal method.
#'
#' @return A object generated with the help of [ggplot2::ggplot2()] visualizes
#' the differences between the data frames and benchmarks. If \code{data = TRUE} 
#' instead of the plot a list will be returned containing information of the 
#' analyses. This \code{biv_compare} object can be used in 
#' \code{plot_biv_compare} to build a plot, or in \code{biv_compare_table}, 
#' to get a table.
#'
#' @details
#' The plot shows a heatmap of a correlation matrix, where the colors are determined by
#' the similarity of the Pearson's r values in both sets of respondents. Leaving 
#' default breaks and colors,
#' * \code{Same} (green) indicates, that the Pearson's r correlation is not significant > 0 in
#' the related data frame or benchmark or the Pearson's r correlations are not significantly
#' different, between data frame and benchmark.
#' * \code{Small Diff} (yellow) indicates that the Pearson's r
#' correlation is significant > 0 in the related data frame or benchmark and the Pearson's r
#' correlations are significantly different, between data frame and benchmark.
#' * \code{Large Diff} (red) indicates, that the same conditions of yellow are fulfilled, and
#' the correlations are either in opposite directions,or one is double the size of the other.
#' 
#' @examples
#' 
#' ## Get Data for comparison
#' 
#' data("card")
#' 
#' north <- card[card$south==0,]
#' white <- card[card$black==0,]
#' 
#' ## use the function to plot the data 
#' bivar_comp<-sampcompR::biv_compare(dfs = c("north","white"),
#'                                    benchmarks = c("card","card"),
#'                                    variables= c("age","educ","fatheduc","motheduc","wage","IQ"),
#'                                    data=FALSE)
#' bivar_comp
#' 
#' 
#' @export

biv_compare<-function (dfs, benchmarks, variables=NULL, corrtype="r", data = TRUE,
                       id=NULL,weight=NULL, strata=NULL,  id_bench=NULL,
                       weight_bench=NULL, strata_bench=NULL, p_value=NULL,
                       p_adjust=NULL, varlabels=NULL, plot_title=NULL, plots_label=NULL,
                       diff_perc=TRUE, diff_perc_size=4.5, perc_diff_transparance=0,  
                       note=FALSE, order=NULL,  breaks=NULL, colors=NULL,  
                       mar = c(0,0,0,0), grid="white", gradient=FALSE,sum_weights= NULL ,missings_x=TRUE, 
                       remove_nas="pairwise", ncol_facet=3, nboots=0, boot_all=FALSE,
                       parallel = FALSE, adjustment_weighting="raking",
                       adjustment_vars=NULL,raking_targets=NULL,
                       post_targets=NULL,percentile_ci=TRUE){





  if(is.null(colors)==TRUE) colors=c('green','yellow','red')
  if (is.null(breaks)) breaks<-c("Same","Small Diff", "Large Diff")

  plot_list<-NULL
  summary_df<-data.frame("samp"=NA,"label"=NA)

  if (is.null(plots_label)) plots_label<-dfs[1:length(dfs)]
  if (is.null(plots_label)==FALSE) {
    if (length(plots_label)<length(dfs)) plots_label[(length(plots_label)+1):length(dfs)]<-dfs[(length(plots_label)+1):length(dfs)]
    if (length(plots_label)>length(dfs)) plots_label<-plots_label[[1:length(dfs)]]
  }
  
  ### prepare some inputs ###
  if (length(benchmarks)>=1 & length(benchmarks) < length(dfs)) benchmarks<-c(benchmarks,rep(benchmarks[1],length(dfs)-length(benchmarks)))
  
  if (length(id)>=1 & length(id) < length(dfs)) id<- c(id,rep(id[1],(length(dfs)-length(id))))
  if (length(weight)>=1 & length(weight) < length(dfs)) weight<- c(weight,rep(weight[1],(length(dfs)-length(weight))))
  if (length(strata)>=1 & length(strata) < length(dfs)) strata<- c(strata,rep(strata[1],(length(dfs)-length(strata))))
  if (length(id_bench)>=1 & length(id_bench) < length(dfs)) id_bench<- c(id_bench,rep(id_bench[1],(length(dfs)-length(id_bench))))
  if (length(weight_bench)>=1 & length(weight_bench) < length(dfs)) weight_bench<- c(weight_bench,rep(weight_bench[1],(length(dfs)-length(weight_bench))))
  if (length(strata_bench)>=1 & length(strata_bench) < length(dfs)) strata_bench<- c(strata_bench,rep(strata_bench[1],(length(dfs)-length(strata_bench))))
  
  ### if p_adjust==TRUE get bonferroni as default ###
  if(is.null(p_adjust)==FALSE) if(isTRUE(p_adjust)) if(is.character(p_adjust)==FALSE) p_adjust<-"bonferroni"
  

  ### Get Dataframes ###
  help<-list()
  
  for (i in 1:length(dfs)){

    curr_df<-get(dfs[i])
    if(is.function(curr_df)) stop(paste("dfs must not be named the same as a existing function"))
    
    curr_bench<-get(benchmarks[i])
    if(is.function(curr_bench)) stop(paste("benchmarks must not be named the same as a existing function"))
    

    if (is.null(weight)==FALSE) {
    if (is.na(weight[i])==FALSE) {
      curr_id <- id[i]
      curr_weight<- weight[i]}
      if (is.null(strata)==FALSE) {if (is.na(strata[i])==FALSE) curr_strata<- strata[i]}}
    

    if (is.null(weight_bench)==FALSE){
      if (is.na(weight_bench[i])==FALSE) {
        curr_id_bench <- id_bench[i]
        curr_weight_bench<- weight_bench[i]}
      if (is.null(strata_bench)==FALSE) {if (is.na(strata_bench[i])==FALSE) curr_strata_bench<- strata_bench[i]}}




    if (is.null(weight)==FALSE) {
      if (is.na(weight[i])) {
        curr_id <- NULL
        curr_weight<- NULL
        if (is.null(strata)==FALSE) {if(is.na(strata[i])) curr_strata<-NULL}}}

    if (is.null(weight)) {
      curr_id <- NULL
      curr_weight<- NULL}
    if (is.null(strata)) curr_strata<-NULL

    if (is.null(weight_bench)==FALSE){
      if (is.na(weight_bench[i])) {
        curr_id_bench <- NULL
        curr_weight_bench<- NULL
        if (is.null(strata_bench)==FALSE) {if(is.na(strata_bench[i])) curr_strata_bench<-NULL}}}

    if (is.null(weight_bench)){
        curr_id_bench <- NULL
        curr_weight_bench<- NULL}
    if (is.null(strata_bench)) curr_strata_bench<-NULL


    help[[i]]<-biv_comp_subfunction(df=curr_df,benchmark = curr_bench, corrtype=corrtype, variables = variables ,
                               plot_title=plot_title, ID_bench=curr_id_bench,
                               weight_bench= curr_weight_bench, ID=curr_id,weight=curr_weight,
                               strata = curr_strata, strata_bench = curr_strata_bench,
                               p_value=p_value, varlabels=varlabels, note=note,p_adjust=p_adjust, gradient=TRUE,
                               data = TRUE, breaks=breaks,remove_nas=remove_nas, nboots = nboots,
                               parallel = parallel, adjustment_weighting=adjustment_weighting,
                               adjustment_vars=adjustment_vars[[i]],
                               raking_targets=raking_targets[[i]],
                               post_targets=post_targets[[i]],boot_all=boot_all,
                               percentile_ci=percentile_ci,number=i)
    message(paste("survey",i,"of",length(dfs),"is compared"))
    
    #return(help[[i]])###
    help[[i]][[1]]$samp<-i
    if (is.null(plots_label)) help[[i]][[1]]$samp_name<-dfs[i]
    if (is.null(plots_label)==FALSE) help[[i]][[1]]$samp_name<-plots_label[i]

    
    if (is.null(plot_list)==FALSE) {
      plot_list[[1]]<-rbind(plot_list[[1]],help[[i]][[1]])
      plot_list[1+i]<-help[[i]][2]
    }

    
    if(is.null(plot_list)) plot_list <- help[[i]]
    


  }

  names(plot_list)<-c(names(plot_list[1]),paste("correlation_data_",plots_label[1:length(dfs)],sep=""))

   



  #######################################
  ### reorder plots to original order ###
  #######################################

 # if (is.null(plots_label)) plot_list[[1]]$samp_name <- factor(plot_list[[1]]$samp_name, levels = dfs)
 # if (is.null(plots_label)==FALSE) plot_list[[1]]$samp_name <- factor(plot_list[[1]]$samp_name, levels = plots_label)
   if (is.null(plots_label)) plots_label <- dfs
   if(length(plots_label)< length(dfs)) plots_label[(length(plots_label)+1):length(dfs)]<-
    dfs[(length(plots_label)+1):length(dfs)]


  
  labellist_biv<-function(lables,values){
    
    # output<-list()
    # for (i in 1:length(lables)){
    #   output[i]<-lables[i]
    # }
    output<-lables
    names(output)<-as.character(values)
    output
  }
  
  
  labellist<-labellist_biv(plots_label,c(1:length(plots_label)))
  
  
  ##########################
  ### add X for missings ###
  ##########################
  
  plot_list[[1]]<-empty_finder(plot_list[[1]],plots_label)
  
  ################################
  ### change color of the grid ###
  ################################
  
  if (grid!="white"){ # create a matrix for NA, to exclude from grid
    
    ### buid a df where no grid shall be set ###
    na_df<-plot_list[[1]][is.na(plot_list[[1]]$value),]
    
    ### build a df, where the diagonal is.
    plot_df2<-plot_list[[1]]
    names_var<-as.character(unique(plot_df2$x))
    names_var<-c(names_var,names_var[1])
    plot_df2$value[is.na(plot_df2$value)]<- "not_edge"
    
    
    
    for (i in 1:length(names_var)){
      #if (is.null(k)==FALSE) k=k+1
      #if (is.null(k)) k<-1
      plot_df2$value[plot_df2$x==names_var[i+1] & plot_df2$y==names_var[i]]<-NA
    }
    
    edge_df<- plot_df2[is.na(plot_df2$value),]
  }
  
  
  ##########################################
  ### Calculate percentage of difference ###
  ##########################################
  
  if(diff_perc==TRUE) {
    summary_df<-difference_summary(plot_list[[1]],breaks=breaks, sum_weights=sum_weights)
  }
  
  
  #####################################
  ### edit shape, breaks and colors ###
  #####################################
  
  breaks2<-c(breaks,"X")
  colors2<-c(colors, "white")
  
  plot_list[[1]]$shape<-NA
  plot_list[[1]]$shape[plot_list[[1]]$value=="X"]<-"X"
  

  ##############################
  ###    order variables     ###
  ##############################
  if (is.null(order) & is.null(variables)==FALSE) order<-variables[variables %in% unique(plot_list[[1]]$x)]
  if (is.null(order)==FALSE) plot_list[[1]]$x<-factor(plot_list[[1]]$x, levels =order)
  if (is.null(order)==FALSE) plot_list[[1]]$y<-factor(plot_list[[1]]$y, levels = order)

  ##############################
  ###     Label variables    ###
  ##############################
  
  variables_in<-unique(plot_list[[1]]$x)
  if (is.null(varlabels)) varlabels<- unique(plot_list[[1]]$x)
  if (length(varlabels)<length(variables_in)) varlabels<-c(varlabels,variables_in[(length(varlabels)+1):length(variables_in)])
  if(length(varlabels)==length(variables)) varlabels<-varlabels[variables %in% unique(plot_list[[1]]$x)]
  
  #return(list(variables,varlabels,unique(plot_title[[1]]$x)))
  plot_list[[1]]<-plot_list[[1]] %>%
    dplyr::mutate(x=forcats::fct_recode(plot_list[[1]]$x, !!! stats::setNames(order, varlabels)),
           y=forcats::fct_recode(plot_list[[1]]$y, !!! stats::setNames(order, varlabels)))
  
  
  
  ####################
  ### Build a Note ###
  ####################
  
  note_text<- paste("Note: ",breaks[1]," (", colors[1],") means that the Pearson's rs are not significant different. \n" ,breaks[2]," (", colors[2], ") means, at least one is significant >0 or <0 and both are
  significant different from each other. \n",breaks[3]," (", colors[3], ") means all conditions for Small Diff are true and the
  coeficients differ in direction or one is double the value of the other. \nLevel of Significance is p < 0.05.")
  
  ####################################
  ### Add information to plot_list ###
  ####################################
  
  plot_list$dfs<-dfs
  plot_list$benchmarks<-benchmarks
  plot_list$variables <-variables 
  plot_list$colors<-colors2
  plot_list$breaks <-breaks 
  plot_list$shape<-plot_list[[1]]$shape
  #plot_list$plots_label<-as.character(unique(plot_list[[1]]$samp_name))
  plot_list$plots_label<-plots_label
  
  if (is.null(plot_title)==FALSE) plot_list$plot_title<-plot_title
  if (is.null(plot_title)) plot_list$plot_title<-NA
  
  if (is.null(varlabels)==FALSE) plot_list$varlabels<-varlabels
  if (is.null(varlabels)) plot_list$varlabels<-NA
  
  if (is.null(p_value)==FALSE) plot_list$p_value <-p_value 
  if (is.null(p_value)) plot_list$p_value <-NA  
  
  if (is.null(id)==FALSE) plot_list$id  <-id  
  if (is.null(id)) plot_list$id  <-NA   
  
  if (is.null(weight)==FALSE) plot_list$weight  <-weight  
  if (is.null(weight)) plot_list$weight  <-NA   
  
  if (is.null(strata)==FALSE) plot_list$strata   <-strata    
  if (is.null(strata)) plot_list$strata   <-NA    
  
  if (is.null(id_bench)==FALSE) plot_list$id_bench  <-id_bench  
  if (is.null(id_bench)) plot_list$id_bench  <-NA   
  
  if (is.null(weight_bench)==FALSE) plot_list$weight_bench  <-weight_bench   
  if (is.null(weight_bench)) plot_list$weight_bench  <-NA   
  
  if (is.null(strata_bench)==FALSE) plot_list$strata_bench <-strata_bench  
  if (is.null(strata_bench)) plot_list$strata_bench <-NA  
  
  if (is.null(p_adjust )==FALSE) plot_list$p_adjust  <-p_adjust   
  if (is.null(p_adjust )) plot_list$p_adjust  <-NA
  
  plot_list$note_text  <-note_text
  
  
  for (i in 1:length(plot_list)){
    plot_list[i][is.na(plot_list[i])]<-list(NULL)}
  
  ###########################
  ### Return if data=TRUE ###
  ###########################
  
  if (data == TRUE) return(plot_list)
  
  ######################
  ###     Plots      ###
  ######################


  comparison_plot<-
    ggplot2::ggplot(plot_list[[1]], ggplot2::aes(x = plot_list[[1]]$y, y = plot_list[[1]]$x)) +
    {if (gradient==TRUE) ggplot2::aes(alpha= gradient)}+
    {if (grid != "none") ggplot2::geom_tile(colour= grid, lwd =1,linetype=1,
                                            ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
    {if (grid == "none") ggplot2::geom_tile(ggplot2::aes( fill = factor(plot_list[[1]]$value, levels = breaks)))}+
    {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = na_df, colour = "white", lwd=1,linetype=1,
                                                              ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
    {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = edge_df, colour = grid, lwd=1,linetype=1,
                                                              ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
    #ggplot2::geom_point(data= subset(plot_list[[1]], value=="X"), ggplot2::aes(x = y, y = x), show.legend = TRUE)+
    {if (missings_x==TRUE) 
      ggplot2::geom_point(show.legend = FALSE, na.rm = TRUE, ggplot2::aes(shape= factor(plot_list[[1]]$shape, levels="X", labels=c("Missing"))))}+
    ggplot2::coord_fixed()+
    ggplot2::scale_fill_manual(values= colors, name="", na.translate = FALSE)+
    ggplot2::scale_y_discrete(name="", limits = rev(levels(plot_list[[1]]$x)), labels= varlabels, breaks=unique(plot_list[[1]]$x))+
    ggplot2::scale_x_discrete(name="", limits = levels(plot_list[[1]]$y), labels= varlabels, breaks=unique(plot_list[[1]]$y))+
    ggplot2::scale_shape_manual(name="", values = c("Missing"=4))+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.33, hjust=0),
                   axis.text.y = ggplot2::element_text(vjust = 0.33, hjust=0),
                   axis.title.x= ggplot2::element_blank(),
                   axis.title.y= ggplot2::element_blank(),
                   plot.margin = grid::unit(mar, "cm"),
                   plot.caption=ggplot2::element_text(hjust = 0))+
    ggplot2::ggtitle(plot_title)+
    ggplot2::guides(alpha="none",
                    fill  = ggplot2::guide_legend(order = 1),
                    shape = ggplot2::guide_legend(order = 2))+
    ggplot2::facet_wrap(~ factor(samp,levels=unique(samp),labels = labellist), labeller = ggplot2::labeller(samp = labellist),ncol = ncol_facet)

  if(note==TRUE) comparison_plot<-comparison_plot + ggplot2::labs(caption = plot_list[[1]]$note_text)


  if (diff_perc==TRUE) {
    comparison_plot <- comparison_plot + ggplot2::geom_label(x=Inf, y=Inf,
                                                             ggplot2::aes( hjust = 1, vjust = 1), data=summary_df, 
                                                             label = summary_df$label,
                                                             fill = ggplot2::alpha("white", perc_diff_transparance),
                                                             color = ggplot2::alpha("black", 1), size= diff_perc_size, show.legend = FALSE)}




  if (data == FALSE) return (comparison_plot)

}





# #' Plot Comparison of Multiple Data Frames on a Bivariate Level
# #'
# #' Plot a object generated by \link{biv_compare} function.
# #' @param biv_data_object A object generated by \link{biv_compare} function.
# #' @param plot_title A character string containing the title of the plot.
# #' @param plots_label A character string or vector of character strings containing the
# #' new names of the data frames , also used in plot.
# #' @param p_value A number between 0 and one to determine the maximum significance niveau.
# #' @param varlabels A character string or vector of character strings containing the new
# #' names of variables, also used in plot.
# #' @param mar A vector that determines the margins of the plot.
# #' @param note If note = True, a note will be displayed to describe the Plot.
# #' @param p_adjust Can be either TRUE or a character string indicating a adjustment method.
# #' If p_adjust=TRUE the p_values will be adjusted with the Bonferroni adjustment method, by default,
# #' to account for the problem of multiple comparisons. All adjustment methods available
# #' in \code{\link{p.adjust}} can be used here, with the same character strings.
# #' @param grid Grid determines the color of the lines between the tiles of the heatmap.
# #' @param diff_perc If \code{TRUE} a percental measure of difference between dfs and benchmarks is
# #' displayed in the plot.
# #' @param diff_perc_size A number to determine the size of the displayed percental
# #' difference between surveys in the plot.
# #' @param perc_diff_transparance A number to determine the transparancy of the displayed
# #' percental difference between surveys in the plot.
# #' @param gradient If gradient = TRUE, colors in the heatmap will be more or less transparent,
# #' depending on the difference in Pearson's r of the data frames of comparison.
# #' @param sum_weights A vector containing information for every variable to weight them in
# #' the displayed percental difference calculation. It can be used if some variables are
# #' over- or underrepresented in the analysis.
# #' @param legend_show_x If true the X will be shown in the legend. At the moment, das
# #' does not yet work correctly.
# #' @param order A character vector to determine in which order the variables should be
# #' displayed in the plot.
# #' @param breaks A vector to label the color sheme in the in the legend.
# #' @param colors A vector to determine the colors in the plot.
# #' @param missings_x If TRUE, missing pairs in the plot will be marked with an X.
# #' @param ncol_facet Number of colomns used in faced_wrap() for the plots.
# #'
# #' @return A object generated with the help of [ggplot2::ggplot2()], used to visualize
# #' the differences between the data frames and benchmarks.
# #' @details The plot shows a heatmap of a correlation matrix, where the colors are determined by
# #' the similarity of the Pearson's r value in both sets of respondents. Leaving 
# #' default breaks and colors,
# #' * \code{Same} (green) indicates, that the Pearson's r correlation is not significant > 0 in
# #' the related data frame or benchmark or the Pearson's r correlations are not significant
# #' different, between data frame and benchmark.
# #' * \code{Small Diff} (yellow) indicates that the Pearson's r
# #' correlation is significant > 0 in the related data frame or benchmark and the Pearson's r
# #' correlations are significant different, between data frame and benchmark.
# #' * \code{Large Diff} (red) indicates, that the same coditions of yellow are fulfilled, and
# #' the correlations are either in opposite directions,or one is double the size of the other.
# #'
# #' @examples
# #' 
# #' ## Get Data for comparison
# #' 
# #' data("card")
# #' 
# #' south <- card[card$south==1,]
# #' north <- card[card$south==0,]
# #' black <- card[card$black==1,]
# #' white <- card[card$black==0,]
# #' 
# #' ## use the function to plot the data 
# #' bivar_data<-sampcompR::biv_compare(dfs = c("north","white"),
# #'                                    benchmarks = c("south","black"),
# #'                                    variables= c("age","educ","fatheduc","motheduc","wage","IQ"),
# #'                                    data=TRUE)
# #'                         
# #' sampcompR::plot_biv_compare(bivar_data)
# #'
# #' @export
# 
# 
# plot_biv_compare<-function (biv_data_object, plot_title=NULL, plots_label=NULL,
#                             p_value=NULL, varlabels=NULL,
#                             mar = c(0,0,0,0),note=FALSE, grid="white",diff_perc=TRUE,
#                             diff_perc_size=4.5,perc_diff_transparance=0,gradient=FALSE,sum_weights= NULL,
#                             missings_x = TRUE, legend_show_x=FALSE, order=NULL, breaks=NULL,colors=NULL,
#                             ncol_facet = 3){
# 
# 
# 
#   plot_list<- biv_data_object
#   if(is.null(colors)==TRUE) colors=plot_list$colors
#   if (is.null(breaks)) breaks<-plot_list$breaks
#
# 
#   ##########################################
#   ### Calculate percentage of difference ###
#   ##########################################
# 
#   if(diff_perc==TRUE) {
# 
#     summary_df<-difference_summary(plot_list[[1]],breaks=breaks, sum_weights=sum_weights)
#   }
# 
#   ################################
#   ### change color of the grid ###
#   ################################
# 
#   if (grid!="white"){ # create a matrix for NA, to exclude from grid
# 
#     ### buid a df where no grid shall be set ###
#     na_df<-plot_list[[1]][is.na(plot_list[[1]]$value),]
# 
#     ### build a df, where the diagonal is.
#     plot_df2<-plot_list[[1]]
#     names_var<-as.character(unique(plot_df2$x))
#     names_var<-c(names_var,names_var[1])
#     plot_df2$value[is.na(plot_df2$value)]<- "not_edge"
# 
# 
# 
#     for (i in 1:length(names_var)){
#       plot_df2$value[plot_df2$x==names_var[i+1] & plot_df2$y==names_var[i]]<-NA
#     }
# 
#     edge_df<- plot_df2[is.na(plot_df2$value),]
#   }
# 
# 
# 
#   #######################################
#   ### reorder plots to original order ###
#   #######################################
# 
#   #if (is.null(plots_label)) plot_list[[1]]$samp_name <- factor(plot_list[[1]]$samp_name, levels = unique(plot_list[[1]]$samp_name))
#   #if (is.null(plots_label)==FALSE) plot_list[[1]]$samp_name <- factor(plot_list[[1]]$samp_name, levels = plots_label)
#   if (is.null(plots_label)) plots_label <- plot_list$plots_label
#   if(length(plots_label)< length(plot_list$plots_label)) plots_label[(length(plots_label)+1):length(plot_list$plots_label)]<-
#       plot_list$plots_label[(length(plots_label)+1):length(plot_list$plots_label)]
#   
# 
#   breaks2<-c(breaks,"X")
#   colors2<-c(colors, "white")
# 
#   plot_list[[1]]$shape<-NA
#   plot_list[[1]]$shape[plot_list[[1]]$value=="X"]<-"X"
#   
#   
#   labellist_biv<-function(lables,values){
#     output<-lables
#     names(output)<-as.character(values)
#     output
#   }
#   
#   
#   labellist<-labellist_biv(plots_label,c(1:length(plots_label)))
#   
# 
#   ##############################
#   ###    order variables     ###
#   ##############################
#   if (is.null(order)==FALSE) plot_list[[1]]$x<-factor(plot_list[[1]]$x, levels =order)
#   if (is.null(order)==FALSE) plot_list[[1]]$y<-factor(plot_list[[1]]$y, levels = order)
# 
#   ##############################
#   ###     Label variables    ###
#   ##############################
# 
#   variables_in<-unique(plot_list[[1]]$x)
#   if (is.null(varlabels)) varlabels<- unique(plot_list[[1]]$x)
#   if (length(varlabels)<length(variables_in)) varlables<-c(varlabels,variables_in[(length(varlabels)+1):length(variables_in)])
#   
#   # ########################
#   # ### edit plots_label ###
#   # ########################
#   # 
#   # if (is.null(plots_label)) plots_label<-"dfs"
#   # if (length(plots_label)<length(unique(plot_list[[1]]$samp_name))) plots_label<-c(plots_label,unique(plot_list[[1]]$samp_name)[(length(plots_label)+1:length(unique(plot_list[[1]]$samp_name)))])
#   # 
# 
#   ######################
#   ###     Plots      ###
#   ######################
# 
#   comparison_plot<-
#     ggplot2::ggplot(plot_list[[1]], ggplot2::aes(x = plot_list[[1]]$y, y = plot_list[[1]]$x)) +
#     {if (gradient==TRUE) ggplot2::aes(alpha= gradient)}+
#     {if (grid != "none") ggplot2::geom_tile(colour= grid, lwd =1,linetype=1,
#                                             ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
#     {if (grid == "none") ggplot2::geom_tile(ggplot2::aes( fill = factor(plot_list[[1]]$value, levels = breaks)))}+
#     {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = na_df, colour = "white", lwd=1,linetype=1,
#                                                               ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
#     {if (grid != "white" & grid != "none") ggplot2::geom_tile(data = edge_df, colour = grid, lwd=1,linetype=1,
#                                                               ggplot2::aes(fill = factor(plot_list[[1]]$value, levels = breaks)))}+
#     #ggplot2::geom_point(data= subset(plot_list[[1]], value=="X"), ggplot2::aes(x = y, y = x), show.legend = TRUE)+
#     {if(missings_x==TRUE) 
#       ggplot2::geom_point(show.legend = legend_show_x, na.rm = TRUE, ggplot2::aes(shape= factor(plot_list$shape, levels="X", labels=c("Missing"))))}+
#     ggplot2::coord_fixed()+
#     ggplot2::scale_fill_manual(values= colors, name="", na.translate = FALSE)+
#     ggplot2::scale_y_discrete(name="", limits = rev(levels(plot_list[[1]]$x)), labels= varlabels, breaks=unique(plot_list[[1]]$x))+
#     ggplot2::scale_x_discrete(name="", limits = levels(plot_list[[1]]$y), labels= varlabels, breaks=unique(plot_list[[1]]$y))+
#     ggplot2::scale_shape_manual(name="", values = c("Missing"=4))+
#     ggplot2::theme_classic()+
#     ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.33, hjust=0),
#                    axis.text.y = ggplot2::element_text(vjust = 0.33, hjust=0),
#                    axis.title.x= ggplot2::element_blank(),
#                    axis.title.y= ggplot2::element_blank(),
#                    plot.margin = grid::unit(mar, "cm"),
#                    plot.caption=ggplot2::element_text(hjust = 0))+
#     ggplot2::ggtitle(plot_title)+
#     ggplot2::guides(alpha="none",
#                     fill  = ggplot2::guide_legend(order = 1),
#                     shape = ggplot2::guide_legend(order = 2))+
#     ggplot2::facet_wrap(~ samp, labeller = ggplot2::labeller(samp = labellist),ncol = ncol_facet)
# 
#   if(note==TRUE) comparison_plot<-comparison_plot + ggplot2::labs(caption = plot_list$note_text)
# 
# 
#   if (diff_perc==TRUE) {
#     comparison_plot <- comparison_plot + ggplot2::geom_label(x=Inf, y=Inf,
#                                                              ggplot2::aes(  hjust = 1, vjust = 1), data=summary_df,
#                                                              label = summary_df$label,
#                                                              fill = ggplot2::alpha("white", perc_diff_transparance),
#                                                              color = ggplot2::alpha("black", 1), size= diff_perc_size, show.legend = FALSE)}
# 
# 
#
# 
#   
#     return (comparison_plot)
# 
# }



empty_finder <- function(df,samp_names){
  
  varnames<-as.character(unique(df$y))
  samps<-as.character(unique(df$samp))
  sampnames<-samp_names
  
  
  
  for (i in 1:length (varnames)) {
    for (j in 1:length (varnames)){
      for (k in 1:length (samps)) {
        
        if ((length(df$value[df[,1]==varnames[i] & df[,2]==varnames[j] & df$samp==samps[k]])==0) &
            (any((df[,1]==varnames[i] & df[,2]==varnames[j] & is.na(df[,3]) & df$samp!=samps[k])))==FALSE) df<-rbind(df, c(varnames[i],varnames[j],"X",NA,NA,NA,NA,NA,NA,NA,NA,samps[k],sampnames[k]))
        if ((length(df$value[df[,1]==varnames[i] & df[,2]==varnames[j] & df$samp==samps[k]])==0) &
            (any((df[,1]==varnames[i] & df[,2]==varnames[j] & df$samp!=samps[k])))==TRUE) df<-rbind(df, c(varnames[i],varnames[j],NA,NA,NA,NA,NA,NA,NA,NA,NA,samps[k],sampnames[k]))
      }
      
    }
    
  }
  return (df)
}


empty_finder2 <- function(df){
  
  varnames<-as.character(unique(df$y))
  sampnames<-as.character(unique(df$samp))
  length(varnames)
  
  for (i in 1:length (varnames)) {
    for (j in 1:length (varnames)){
      for (k in 1:length (sampnames)) {
        
        if ((length(df$value[df[,1]==varnames[i] & df[,2]==varnames[j] & df["samp"]==sampnames[k]])==0) &
            (any((df[,1]==varnames[i] & df[,2]==varnames[j] & is.na(df[,3]) & df["samp"]!=sampnames[k])))==FALSE) df<-rbind(df, c(varnames[i],varnames[j],"X",NA,NA,sampnames[k]))
        if ((length(df$value[df[,1]==varnames[i] & df[,2]==varnames[j] & df["samp"]==sampnames[k]])==0) &
            (any((df[,1]==varnames[i] & df[,2]==varnames[j] & df["samp"]!=sampnames[k])))==TRUE) df<-rbind(df, c(varnames[i],varnames[j],NA,NA,NA,sampnames[k]))
      }
      
    }
    
  }
  return (df)
}



difference_summary<-function(results_object,sum_weights=NULL,breaks){
  
  ### prepare needed variables ###
  varnames<-as.character(unique(results_object$x))
  samps<-unique(results_object$samp)
  results_object$sum_weight<-NA
  summary_df<-data.frame("samp"=NA,"label"=NA)
  
  ### check for sum_weights ###
  if (is.null(sum_weights)) {
    sum_weights<-matrix(data=1, nrow=length(samps), ncol=length(varnames))
  }
  
  for (i in 1:length(samps)){
    
    
    help_matrix<-matrix(NA, nrow=length(varnames), ncol=length(varnames))
    rownames(help_matrix)<-varnames
    colnames(help_matrix)<-varnames
    
    ### build a weight matrix ###
    for (f in 1:length(varnames)){
      for (g in 1:length(varnames)){
        help_matrix[f,g]<-sum_weights[i,][f]*sum_weights[i,][g]
      }
    }
    
    ### turn weight matrix to df ###
    help_matrix_df<-reshape2::melt(help_matrix)
    help_matrix_df$samp<-samps[i]
    
    
    
    ### add help_matrix to results_object ###
    results_object$sum_weight[results_object$samp==samps[i]]<-help_matrix_df$value
    
    
    results_object$sum_weight[is.na(results_object$value)]<-NA
    
    ### build a summary for every sample ###
    
    percental_difference_b1<-sum(results_object$sum_weight[results_object$value == breaks[1] & is.na(results_object$value)==FALSE
                                                           & results_object$samp==samps[i] & results_object$value != "X"])/
      sum(results_object$sum_weight[is.na(results_object$value)==FALSE & results_object$samp==samps[i] & results_object$value != "X"])
    
    percental_difference_b2<-sum(results_object$sum_weight[results_object$value == breaks[2] & is.na(results_object$value)==FALSE
                                                           & results_object$samp==samps[i] & results_object$value != "X"])/
      sum(results_object$sum_weight[is.na(results_object$value)==FALSE & results_object$samp==samps[i] & results_object$value != "X"])
    if (length(breaks)>2) {
      percental_difference_b3<-sum(results_object$sum_weight[results_object$value == breaks[3] & is.na(results_object$value)==FALSE
                                                             & results_object$samp==samps[i] & results_object$value != "X"])/
        sum(results_object$sum_weight[is.na(results_object$value)==FALSE & results_object$samp==samps[i] & results_object$value != "X"])}
    
    
    # Define a function to pad a string with leading spaces
    pad_with_spaces <- function(x, width=4) {
      if (nchar(x) < width) {
        paste0(rep("  ", width - nchar(x)),x)
      } else {
        x
      }
    }
    
    
    diff_summary<-paste0(breaks[1],"  ",pad_with_spaces(format((round((percental_difference_b1), digits = 3)*100),nsmall=1))," %\n",
                         breaks[2],"  ",pad_with_spaces(format((round(percental_difference_b2, digits = 3)*100),nsmall=1))," %")
    if (length(breaks)>2) diff_summary<-paste0 (diff_summary, "\n",breaks[3], "  ", pad_with_spaces(format((round(percental_difference_b3, digits = 3)*100),nsmall=1))," %")
    
    summary_df[i,]<- c(as.character(samps[i]), diff_summary)
  }
  
  return(summary_df)
}





### Some needed little functions ###

na_help_func<-function(x){
  ifelse(x==1,return(1),return(na_help_func(x-1)+x-1))
  #if(x==1) return(1)
  #if(x>=2) return(na_help_func(x-1)+x-1)
}

na_func<-function(values,vars,i){
  
  ifelse(i==0,
         return(rep(NA,(length(vars)-i))),
         return(c(values[na_help_func(i):(na_help_func(i)+(i-1))],rep(NA,length(vars)-i))))
  # if(i==0) return(rep(NA,(length(vars)-i)))
  # else(c(values[na_help_func(i):(na_help_func(i)+(i-1))],rep(NA,length(vars)-i)))
  
}

r_cor_func<-function(design,var1,var2,corrtype="pearson"){
  
  if(corrtype=="pearson"){
  variance <- suppressWarnings(survey::svyvar(stats::reformulate(var1,var2), design, na.rm = TRUE,
                             return.replicates=TRUE))}
  
  if(corrtype=="spearman"){
    
    svyrank<-function(formula, design){
      mf <- stats::model.frame(formula, stats::model.frame(design), na.action = stats::na.omit)
      y<-mf[,1]
      ii <- order(y)
      n <- length(y)
      rankhat <- numeric(n)
      w <- stats::weights(design, "sampling")
      rankhat[ii] <- stats::ave(cumsum(w[ii]) - w[ii]/2, factor(y[ii]))
      rankhat
    }
    
    #rank_desig<-svyrank(stats::reformulate(var1,var2),design)
    
    variance <- suppressWarnings(survey::svyvar(stats::reformulate(var1,var2), 
                                                design, na.rm = TRUE, 
                                                return.replicates=TRUE))
    
    
  }
  suppressWarnings(cormatrix<-stats::cov2cor(as.matrix(variance)))
  # boot_r<-map_dbl(1:nrow(variance[[2]]),~cov2cor(matrix(variance[.x,],ncol=2))[2])
  # boot_SE<-sqrt(stats::var(boot_r))
  list(cormatrix[2],variance)
}

### get a longer list of variables
varfunc<-function(vars,i){
  c(rep(vars[i],i-1))
}

varfunc2<-function(vars,i){
  c(vars[1:i])
}

### Function to calculate pairwise n
n_func<-function(design,var1,var2){
  rows<-"rows"
  
  formula<-paste(rows,  " ~ " ,var1, " + ", var2)
  
  n<-survey::svytotal(stats::as.formula(formula), design, na.rm = TRUE)[1]
  names(n)<-NULL
  n
}

### Function, to weight correlations ###

wgt_cor<-function(df, row, col, i = NULL, weight_var = NULL, stratas = NULL, ids = NULL,  
                  bench=NULL,weight_var_bench=NULL,
                  stratas_bench=NULL,ids_bench=NULL,
                  variables = NULL, remove_nas = "pairwise", nboots=0, 
                  adjustment_weighting="raking",
                  adjustment_vars=NULL, raking_targets=NULL,post_targets=NULL,
                  cor_matrix_bench=NULL,
                  parallel=FALSE,corrtype="pearson",benchmark=FALSE,
                  boot_all=FALSE,
                  percentile_ci=TRUE){
  
  

  
  ### pret dataframe for the boot function
  if(remove_nas=="all") {
    df<-stats::na.omit(df)
    if(isTRUE(boot_all)) bench<-stats::na.omit(bench)}
  df$rows<-1
  
  ### prepare weight variables ###
  ifelse(is.null(ids)==FALSE,
         ifelse(is.na(ids)==FALSE,ids<-df[,ids],c(1:nrow(df)))
         ,ids<-c(1:nrow(df)))
  
  if(isTRUE(boot_all)){
    ifelse(is.null(ids_bench)==FALSE,
           ifelse(is.na(ids_bench)==FALSE,ids_bench<-bench[,ids_bench],c(1:nrow(bench)))
           ,ids_bench<-c(1:nrow(bench)))
  }
  
  ifelse(is.null(weight_var)==FALSE,
         ifelse(is.na(weight_var)==FALSE,weight_var<-df[,weight_var],weight_var<-c(rep(1,nrow(df))))
         ,weight_var<-c(rep(1,nrow(df))))
  
  if(isTRUE(boot_all)){
    ifelse(is.null(weight_var_bench)==FALSE,
           ifelse(is.na(weight_var_bench)==FALSE,weight_var_bench<-bench[,weight_var_bench],weight_var_bench<-c(rep(1,nrow(bench))))
           ,weight_var_bench<-c(rep(1,nrow(bench))))
  }

  
  ### normalize the weight_var
  weight_var<- weight_var/(sum(weight_var)/nrow(df))
  if(isTRUE(boot_all)){weight_var_bench<- weight_var_bench/(sum(weight_var_bench)/nrow(bench))}
  
  if(is.null(stratas)==FALSE){
    ifelse(is.na(stratas)==FALSE,stratas<-df[,stratas],stratas<-NULL)
  }

  if(isTRUE(boot_all)){
    if(is.null(stratas_bench)==FALSE){
      ifelse(is.na(stratas_bench)==FALSE,stratas_bench<-bench[,stratas_bench],stratas_bench<-NULL)
    }
  }
  
  ### turn factor dummy variables into numeric variables ###
  df<-unfactor(df=df,func="d_mean",weight=NULL,strata=NULL,id=NULL)
  
  
  ### Get surveydesign ###
  df_design<- survey::svydesign(id      = ids,
                                strata  = stratas,
                                weights = weight_var,
                                nest    = FALSE,
                                data    = df)
  
  if(isTRUE(boot_all)){
    bench_design<- survey::svydesign(id      = ids_bench,
                                  strata  = stratas_bench,
                                  weights = weight_var_bench,
                                  nest    = FALSE,
                                  data    = bench)
  }
  
  #################################
  ### if no bootstrap is needed ###
  #################################
  
  if(nboots==0){
  
    if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="raking"){
      
      adjustment_vars<-purrr::map(paste0("~",adjustment_vars),stats::as.formula)
      
      survey_design_raked <- survey::rake(design = df_design, 
                                          population.margins = raking_targets, 
                                          sample.margins = adjustment_vars, 
                                          control = list(maxit =1000, epsilon = 1, verbose=FALSE))
      
    }
    
    if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="post_strat"){
      
      #adjustment_vars<-purrr::map(paste0("~",adjustment_vars),as.formula)
      
      survey_design_post <- survey::postStratify(design= df_design, 
                                         strata=stats::reformulate(adjustment_vars), 
                                         population=post_targets,
                                         partial = FALSE)
      
    }
    
    
    }
  
  ##############################
  ### if bootstrap is needed ###
  ##############################
  
  if(nboots>0){
    
    bootstrap_rep_design <- svrep::as_bootstrap_design(df_design,
                                                       type = "Rao-Wu-Yue-Beaumont",
                                                       replicates = nboots)
    
    if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="raking"){
      
      adjustment_vars<-furrr::future_map(paste0("~",adjustment_vars),stats::as.formula)
      
      survey_design_raked <- survey::rake(design = bootstrap_rep_design, 
                                  population.margins = raking_targets, 
                                  sample.margins = adjustment_vars, 
                                  control = list(maxit =1000, epsilon = 1, verbose=FALSE))
    }
    
    if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="post_strat"){
      
      #adjustment_vars<-purrr::map(paste0("~",adjustment_vars),as.formula)
      
      survey_design_post <- survey::postStratify(design= bootstrap_rep_design,
                                         strata=stats::reformulate(adjustment_vars),
                                         population=post_targets,
                                         partial = FALSE)
      

      
    }
    
    
    if(isTRUE(boot_all)){
      
      bench_design <- svrep::as_bootstrap_design(bench_design,
                                                         type = "Rao-Wu-Yue-Beaumont",
                                                         replicates = nboots)}
    
    }
 
  ###############################
  ### choose the right design ###
  ###############################
  
  if(nboots==0 & is.null(adjustment_vars)==TRUE) final_design<-df_design
  if(nboots>0 & is.null(adjustment_vars)==TRUE) final_design<-bootstrap_rep_design
  if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="raking") final_design<-survey_design_raked
  if(is.null(adjustment_vars)==FALSE & adjustment_weighting=="post_strat") final_design<-survey_design_post
  
  
  ############################################
  ### calculate number of n for every pair ###
  ############################################
  
  n<-mapply(n_func,var1=row,var2=col,
            MoreArgs = list(design = df_design))
  
  #####################################
  ### calculate R and variance list ###
  #####################################
  

  if(parallel!=FALSE){future::plan(future::multisession,workers=parallel)}
  

  r_cor_list<-furrr::future_map2(row,col,~r_cor_func(final_design,.x,.y,corrtype=corrtype))
  r<-furrr::future_map_dbl(1:length(row),~r_cor_list[[.x]][[1]])
  var_list<-furrr::future_map(1:length(row),~r_cor_list[[.x]][[2]])
  
  if(isTRUE(boot_all)){
  r_cor_list_bench<-furrr::future_map2(row,col,~r_cor_func(bench_design,.x,.y,corrtype=corrtype))
  r_bench<-furrr::future_map_dbl(1:length(row),~r_cor_list_bench[[.x]][[1]])
  var_list_bench<-furrr::future_map(1:length(row),~r_cor_list_bench[[.x]][[2]])}
  
  
  if(nboots>0){
  boot_r_func<-function(variance){
    
    boot_r<-purrr::map_dbl(1:nrow(variance[[2]]),~suppressWarnings(cov2cor(matrix(variance[[2]][.x,],ncol=2)))[2])
    
  }
  boot_r<-purrr::map(var_list,~boot_r_func(.x))
  if(boot_all==TRUE) boot_r_bench<-purrr::map(var_list_bench,~boot_r_func(.x))
  
  }
  
  
  
  ##########################
  ### calculate p values ###
  ##########################
  
  if(nboots==0){
    
    t<-(abs(r)*sqrt(n-2))/suppressWarnings(sqrt(1-(r^2)))
    p<-2*stats::pt(t,(n-2),lower.tail = FALSE)}
  
  if(nboots>0){
    
    boot_pvalues<-function(boot_object,reference=0,r=NULL,row,col,iteration=NULL,
                           benchmark=FALSE,percentile_ci=TRUE,r1=NULL,r1_bench=NULL){
      
      
      if(is.null(r1_bench)==F){ 
        if(r1=="-1" & r1_bench=="-1") return(1)}
      
      boot_object[boot_object %in% "NaN"]<-NA
      if(!is.null(r)) r[r%in%"NaN"]<-NA
      in_interval <-TRUE
      alpha<-0
      
      ### check if boot_object contains na ###
      if(sum(is.na(boot_object)) > 0 & is.null(iteration)==FALSE){
        
        warning(paste(sum(is.na(boot_object)), "of the", length(boot_object),
                      "bootstraps contain not enough combined cases for the variable pair of:",
                      row[iteration],"and",col[iteration],".\n"))
      }
      if(benchmark==TRUE & sum(is.na(boot_object))==length(boot_object)) return("NaN")
      if(sum(is.na(boot_object))==length(boot_object)) return(NA)
      #if(!is.null(r)) if(is.na(r)) return(NA)
      if(is.null(r)==FALSE) {if(length(r)==1){if(is.na(r)) return(NA)}}
      if(is.null(r)==FALSE) {if(length(r)==1){if(all(is.na(r))) return(NA)}}
      
      
      
      
      ### get p_values up to 0.00001
      while(in_interval & alpha<1){
        alpha <- alpha + 0.001
        #if(iteration==69)browser()
        if(is.null(r) & percentile_ci==TRUE) cis<-c(stats::quantile(boot_object, probs=(1-(alpha/2)),na.rm=TRUE),stats::quantile(boot_object, probs=(alpha/2),na.rm=TRUE))
        if(is.null(r) & percentile_ci==FALSE){
          SE=stats::sd(boot_object,na.rm = T)
          #if(alpha=="2") #browser()#return(1)
          cis<-c(r1 + stats::qnorm(1-alpha/2) * SE,
                 r1 - stats::qnorm(1-alpha/2) * SE)}

        
        if(is.null(r)==FALSE){
          if(percentile_ci==TRUE){
          diff<- boot_object - r
          cis<-c(stats::quantile(diff, probs=(1-(alpha/2)),na.rm=TRUE),stats::quantile(diff, probs=(alpha/2),na.rm=TRUE))}
          
          if(percentile_ci==FALSE){
            SE=stats::sd(boot_object-r,na.rm = T)
            if(alpha=="1") return(1)
            cis<-c(r1-r1_bench + stats::qnorm(1-alpha/2) * SE,
                   r1-r1_bench - stats::qnorm(1-alpha/2) * SE)
            }
          # lower_ci<- (r) - (mean(boot_object,na.rm=TRUE) - (r))-stats::qnorm(1-alpha/2) * (suppressWarnings(sqrt(var(boot_object))))
          # upper_ci<- (r) - (mean(boot_object,na.rm=TRUE) - (r))+stats::qnorm(1-alpha/2) * (suppressWarnings(sqrt(var(boot_object))))
          # cis<-c(lower_ci,upper_ci)
        }
        
        in_interval<- (reference > cis[1] & reference < cis[2])|(reference < cis[1] & reference > cis[2])

      }
      
      alpha<-alpha-0.001
      in_interval<-TRUE
      
      while(in_interval & alpha<1){
        alpha <- alpha + 0.00001
        if(is.null(r)==TRUE & percentile_ci==TRUE) cis<-c(stats::quantile(boot_object, probs=(1-(alpha/2)),na.rm=TRUE),stats::quantile(boot_object, probs=(alpha/2),na.rm=TRUE))
        
        if(is.null(r)==TRUE & percentile_ci==FALSE){
          SE=stats::sd(boot_object,na.rm = T)
          if(alpha=="1") return(1)
          cis<-c(r1 + stats::qnorm(1-alpha/2) * SE,
                 r1 - stats::qnorm(1-alpha/2) * SE)}
        
        if(is.null(r)==FALSE){
          if(percentile_ci==TRUE){
          diff<- boot_object - r
          cis<-c(stats::quantile(diff, probs=(1-(alpha/2)),na.rm=TRUE),stats::quantile(diff, probs=(alpha/2),na.rm=TRUE))}
          
          if(percentile_ci==FALSE){
            SE=stats::sd(boot_object-r,na.rm = T)
            if(alpha=="1") return(1)
            cis<-c(r1-r1_bench + stats::qnorm(1-alpha/2) * SE,
                   r1-r1_bench - stats::qnorm(1-alpha/2) * SE)}
          # lower_ci<- (r) - (mean(boot_object) - (r))-stats::qnorm(1-alpha/2) * (suppressWarnings(sqrt(var(boot_object))))
          # upper_ci<- (r) - (mean(boot_object) - (r))+stats::qnorm(1-alpha/2) * (suppressWarnings(sqrt(var(boot_object))))
          # cis<-c(lower_ci,upper_ci)
        }
        in_interval<- (reference > cis[1] & reference < cis[2])|(reference < cis[1] & reference > cis[2])
      }
      alpha
    }
    
    
    p<-purrr::map_dbl(1:length(boot_r),~boot_pvalues(boot_r[[.x]],row=row,col=col,iteration=.x,benchmark=benchmark,percentile_ci=percentile_ci,r1=r[.x]))
    

    cor_matrix_bench[[1]][!lower.tri(cor_matrix_bench[[1]])]<-NA
    bench_cor<-c(t(cor_matrix_bench[[1]]))
    bench_cor<-bench_cor[is.na(bench_cor)==FALSE|is.nan(bench_cor)==TRUE]
    
    
    
    
    if(isFALSE(boot_all)) p_diff<-furrr::future_map(1:length(boot_r),~boot_pvalues(boot_r[[.x]],r=bench_cor[.x],benchmark=benchmark,percentile_ci=percentile_ci,r1=r[.x],r1_bench=bench_cor[.x]))
    if(isTRUE(boot_all)) p_diff<-furrr::future_map(1:length(boot_r),~boot_pvalues(boot_r[[.x]],r=boot_r_bench[[.x]],benchmark=benchmark,percentile_ci=percentile_ci, r1=as.numeric(r[.x]), r1_bench=as.numeric(r_bench[.x])))
    
    
  }
    
  
    
    r<- matrix(unlist(lapply(c(0:(length(variables)-1)),
                             na_func,values=r,vars=variables)),
               ncol=length(variables), byrow = TRUE,
               dimnames = (list(variables,variables)))
    
    n<- matrix(unlist(lapply(c(0:(length(variables)-1)),
                             na_func,values=n,vars=variables)),
               ncol=length(variables), byrow = TRUE,
               dimnames = (list(variables,variables)))
    
    p<- matrix(unlist(lapply(c(0:(length(variables)-1)),
                             na_func,values=p,vars=variables)),
               ncol=length(variables),
               nrow=length(variables), byrow = TRUE,
               dimnames = (list(variables,variables)))
    
    if(nboots==0) p_diff<-NULL
    if(nboots>0) p_diff<-matrix(unlist(lapply(c(0:(length(variables)-1)),
                                              na_func,values=p_diff,vars=variables)),
                                ncol=length(variables),
                                nrow=length(variables), byrow = TRUE,
                                dimnames = (list(variables,variables)))
    
    output<- list(r=r,n=n,P=p,p_diff=p_diff)
    
  rm(final_design)
  #if(nboots==0 ) return(output)
  #if(nboots>0 ) return(r)
  return(output)
}





# boot_pvalues2<-function(boot_object,variables){
#   
#   ps<-sapply(1:((length(variables)*(length(variables)-1))/2),subfunc_boot_pvalues2, boot_object=boot_object)
#   ps<-matrix(unlist(lapply(c(0:(length(variables)-1)),
#                            na_func,values=ps,vars=variables)),
#              ncol=length(variables),
#              nrow=length(variables), byrow = TRUE,
#              dimnames = (list(variables,variables)))
#   ps
# }
# 
# subfunc_boot_pvalues2<-function(boot_object,i){
#   
#   if(is.na(as.vector(boot_object$t0)[i])==FALSE){
#     alpha<-boot.pval::boot.pval(boot_object, type="perc",theta_null=0,index = i)}
#   
#   alpha
# }


weighted_correlation<-function(df, cor_matrix_bench,
                               weight = NULL, stratas = NULL, 
                               ids = NULL, 
                               bench=NULL,weight_bench=NULL,
                               stratas_bench=NULL,ids_bench=NULL,
                               variables = NULL, 
                               remove_nas = "pairwise", 
                               nboots = 0, parallel = FALSE, 
                               adjustment_weighting="raking",
                               adjustment_vars=NULL,
                               raking_targets=NULL,
                               post_targets=NULL,
                               corrtype="pearson",
                               benchmark=FALSE,boot_all=FALSE,
                               percentile_ci=TRUE){
  
  
  ### get only needed rows and collumns
  row<- varfunc(variables,2:(length(variables)))
  col<- unlist(sapply(c(1:(length(variables)-1)),varfunc2,vars=variables))
  
  
  if (nboots!=0 & nboots <=1) {
    stop("nboots must be 0 (for analytic p_values) or >1 for bootstrap p_values")}
  
  #if(nboots == 0){
    output<-wgt_cor(df = df, weight_var = weight, stratas = stratas, 
                    ids = ids, variables = variables, 
                    bench=bench,weight_var_bench=weight_bench,
                    stratas_bench=stratas_bench,ids_bench=ids_bench,
                    remove_nas = remove_nas, nboots=nboots,
                    row=row, col=col, 
                    adjustment_weighting=adjustment_weighting,
                    adjustment_vars=adjustment_vars,
                    raking_targets=raking_targets,
                    post_targets=post_targets,
                    cor_matrix_bench=cor_matrix_bench,
                    parallel=parallel,
                    corrtype=corrtype,
                    benchmark=benchmark,
                    boot_all=boot_all,
                    percentile_ci=percentile_ci)

    
  output
  
  
  
}

Try the sampcompR package in your browser

Any scripts or data that you put into this service are public.

sampcompR documentation built on Aug. 8, 2025, 7:35 p.m.