Correlation and Association"

echo_boot <- (count_pearsoncorr>0)
echo_pformat <- (count_pformat>0)
eval_seed <- (refs3 || refs10 || refs11)

```{whites, eval=FALSE, echo = refs3}

Function for the bootstrap confidence interval

pearsoncorr <- function(data,i){ d <- data[i, ] return(cor(d, use="complete.obs", method="pearson")[1,2]) }

```{whites, eval=FALSE, echo = echo_pformat}
# Function for the p-value format:
pformat<-function(p){
  if (p<0.001) return("<0.001") else return (round(p,3))
}
options(na.action=na.omit)

# Analysis for each pair 
for (i in 1:ncol(combs)){

  j = combs[1,i]
  k = combs[2,i]


  # Define relevant datasets for a pair 
  datapair <- data.frame(df_cont[,j],df_cont[,k])
  datapaircomplete <- datapair[complete.cases(datapair),]

  # Unique values 
  unique1 <- length(unique(datapaircomplete[,1])) 
  unique2 <- length(unique(datapaircomplete[,2])) 


  # Consider pair only sample size >= 7 and uniques >=5
  if (min(unique1,unique2) >= 5 && nrow(datapaircomplete)>=7){

          a <- which(colnames(df_code)==colnames(df_cont)[j])
          b <- which(colnames(df_code)==colnames(df_cont)[k])

          cat("`j = `",a)
          cat("\\newline ")                                                                                                    
          cat("`k =`",b)
          cat("\\newline ")
          cat("\\# `Columns:`")
          cat("\\newline ")    
          cat("`cat(colnames(df)[j],\"and\",colnames(df)[k],fill=TRUE)`")
          cat("\\newline ")    

          tryCatch({
            # Define check function for Pearson
            check <- linearity(df_cont[,j],df_cont[,k])
          }, error=function(e){cat("")})     


          tryCatch({
            # Define check function for Spearman & Kendall 
            checkm <- linearity(rank(df_cont[,j]),rank(df_cont[,k]))
          }, error=function(e){cat("")})    


          tryCatch({
            pcorrelation <- cor(df_cont[,j],df_cont[,k], use="complete.obs", method="pearson")
          }, error=function(e){cat("")})    



          tryCatch({
            spcorrelation <- cor(df_cont[,j],df_cont[,k], use="complete.obs", method="spearman")
          }, error=function(e){cat("")})   


          tryCatch({
            ken <- cor(df_cont[,j],df_cont[,k], use="complete.obs", method="kendall")
          }, error=function(e){cat("")}) 


          tryCatch({  
            if (check==TRUE && exists("pcorrelation")) {
              cat("`pcorrelation <- cor(df[,j],df[,k], use=\"complete.obs\", method=\"pearson\")`")
              cat("\\newline ") 
              cat("`round(pcorrelation,4)` # `Pearson correlation coefficient`")
              cat("\\newline ")
              cat("`round(pcorrelation**2*100,1)` # `Variance explained by one variable (percent)`")
              cat("\\newline ")
              cat("\\newline ")
            }
          }, error=function(e){cat("")}) 


          tryCatch({  
            if (check==TRUE && normality(df_cont[,j],df_cont[,k])==TRUE) {
              cat("`fisherci <- cor.test(df[,j],df[,k], method=\"pearson\")`")
              cat("\\newline ")
              cat("`c(round(fisherci$conf.int[1],4),round(fisherci$conf.int[2],4))` # `confidence interval`")
              cat("\\newline ")
              cat("`pformat(fisherci$p.value)` # `p-value`")
              cat("\\newline ")
              cat("\\newline ")

            }
          }, error=function(e){cat("")}) 


          tryCatch({  
            if (check==TRUE && normality(df_cont[,j],df_cont[,k])==FALSE && nrow(datapaircomplete) < 800) {

              set.seed(seed)
              bootdata <- boot(datapaircomplete, statistic=pearsoncorr, R=1000)

              if (length(unique(round(bootdata$t,2)))>1){

                cat("`temp <- data.frame(df[,j],df[,k])`")
                cat("\\newline ")
                cat("`temp2 <- temp[complete.cases(temp),]`")
                cat("\\newline ")
                cat("`set.seed(`",seed,"`)`") 
                cat("\\newline ")
                cat("`bootdata <- boot(temp2, statistic=pearsoncorr, R=1000)`")
                cat("\\newline ")
                cat("`bootresult <- boot.ci(bootdata, type=\"bca\")`")
                cat("\\newline ")
                cat("`c(round(bootresult$bca[4],4),round(bootresult$bca[5],4))` # `bootstrap conf. interval`")
                cat("\\newline ")
                cat("\\newline ")

                }}

          }, error=function(e){cat("")}) 


          tryCatch({  
            if ((check==TRUE && normality(df_cont[,j],df_cont[,k])==FALSE && exists("spearman")) || (check==FALSE && checkm==TRUE && exists("spearman")))   {

              cat("`spcorrelation <- cor(df[,j],df[,k], use=\"complete.obs\", method=\"spearman\")`")
              cat("\\newline ") 
              cat("`round(spcorrelation,4)` # `Spearman correlation coefficient`")
              cat("\\newline ")
              cat("\\newline ")
            }
          }, error=function(e){cat("")}) 


          tryCatch({ 
          if ((check==TRUE && normality(df_cont[,j],df_cont[,k])==FALSE && exists("spearman")) || (check==FALSE && checkm==TRUE && exists("spearman")))   {

              spearmanci <- SpearmanRho(df_cont[,j],df_cont[,k], use="complete.obs", conf.level=0.95)
              spearmanci2 <- cor.test(df_cont[,j],df_cont[,k], method="spearman")

              cat("`spearmanci <- SpearmanRho(df[,j],df[,k], use=\"complete.obs\", conf.level=0.95)`")
              cat("\\newline ") 

              if (exists("spearmanci") && !is.nan(spearmanci[2]) && !is.nan(spearmanci[3])) {
                cat("`c(round(spearmanci[2],4),round(spearmanci[3],4))` # `confidence interval`")
                cat("\\newline ") 
              }

              cat("`spearmanci2 <- suppressWarnings(cor.test(df[,j],df[,k], method=\"spearman\"))`")
              cat("\\newline ") 
              cat("`pformat(spearmanci2$p.value)` # `p-value`")
              cat("\\newline ") 
              cat("\\newline ") 

            }
          }, error=function(e){cat("")}) 


           tryCatch({  
           if ((check==TRUE && normality(df_cont[,j],df_cont[,k])==FALSE && exists("ken")) || (check==FALSE && checkm==TRUE && exists("ken"))) {

              cat("`ken <- cor(df[,j],df[,k], use=\"complete.obs\", method=\"kendall\")`")
              cat("\\newline ") 
              cat("`round(ken,4)` # `Kendall's tau correlation coefficient`")
              cat("\\newline ")
              cat("\\newline ")
            }
          }, error=function(e){cat("")}) 


           tryCatch({ 
            if ((check==TRUE && normality(df_cont[,j],df_cont[,k])==FALSE) || (check==FALSE && checkm==TRUE)) {
              kentest <- cor.test(df_cont[,j],df_cont[,k], method="kendall")

               cat("`kentest <-  suppressWarnings(cor.test(df[,j],df[,k], method=\"kendall\"))`")
              cat("\\newline ")
              cat("`pformat(kentest$p.value)` # `p-value`")
              cat("\\newline ")
              cat("\\newline ")
              }
            }, error=function(e){cat("")}) 


           tryCatch({ 

            if (check==FALSE && checkm==FALSE){

              tryCatch({ 

                if (nrow(datapaircomplete)  <= 100){

                  set.seed(seed)
                  mic <- testforDEP(df_cont[,j],df_cont[,k], test="MIC", rm.na=TRUE, p.opt="MC", num.MC = 5000, set.seed = TRUE)

                  cat("\\newline ")
                  cat("`set.seed(`",seed,"`)`") 
                  cat("\\newline ")
                  cat("`mic <- suppressWarnings(testforDEP(df[,j],df[,k], test=\"MIC\", set.seed = TRUE, rm.na=TRUE, num.MC = 5000, p.opt=\"MC\"))`")
                  cat("\\newline ")
                  cat("`micval <- minerva::mine(df[,j],df[,k], na.rm=TRUE)$MIC`")
                  cat("\\newline ")
                  cat("`round(micval,3)`     # `Maximal Information Coefficient (MIC)`")
                  cat("\\newline ")
                  cat("`round(micval*100,2)`     # `Variance explained by one variable (percent)`")
                  cat("\\newline ")
                  cat("`pformat(mic@p_value)` # `p-value MIC`")
                  cat("\\newline ")
                  cat("\\newline ")


                } else if (nrow(datapaircomplete) < 5000){

                mic <- testforDEP(df_cont[,j],df_cont[,k], test="MIC", rm.na=TRUE, p.opt="table")

                cat("\\newline ")
                cat("`mic <- testforDEP(df[,j],df[,k], test=\"MIC\", rm.na=TRUE, p.opt=\"table\")`")
                cat("\\newline ")
                cat("`micval <- minerva::mine(df[,j],df[,k], na.rm=TRUE)$MIC`")
                cat("\\newline ")
                cat("`round(micval,3)`     # `Maximal Information Coefficient (MIC)`")
                cat("\\newline ")
                cat("`round(micval*100,2)`     # `Variance explained by one variable (percent)`")
                cat("\\newline ")
                cat("`pformat(mic@p_value)` # `p-value MIC`")
                cat("\\newline ")
                cat("\\newline ")

                }
              }, error=function(e){cat("")}
              ) 

              tryCatch({ 

                set.seed(seed)
                dist <- dcor.test(datapaircomplete[,1],datapaircomplete[,2], R=100)

                 cat("`temp <- data.frame(df[,j],df[,k])`")
                 cat("\\newline ")
                 cat("`temp2 <- temp[complete.cases(temp),]`")
                 cat("\\newline ")
                 cat("`set.seed(`",seed,"`)`") 
                 cat("\\newline ")
                 cat("`dist <- dcor.test(temp2[,1],temp2[,2], R=100)`")
                 cat("\\newline ")
                 cat("`round(dist$statistic,3)`  # `Distance correlation coefficient`")
                 cat("\\newline ")
                 cat("`pformat(dist$p.value)` # `p-value`")
                 cat("\\newline ")
                 cat("\\newline ")

              }, error=function(e){cat("")}) 

            }

          }, error=function(e){cat("")}) 




          cat("`scatterplotMatrix(~df[,j]+df[,k], smooth=list(smoother=loessLine, spread=FALSE, lty.smooth=1, lwd.smooth=1.5, col.smooth=\"#396e9f\"), var.labels=colnames(df)[c(j,k)],`")
          cat("\\newline ")
          cat("`main=\"Enhanced Scatterplots\", col = \"`#`2fa42d\")`")
          cat("\\newline ")
          cat("\\newline ")
          rm(list=c("check", "checkm","pcorrelation", 
                    "spcorrelation", "ken", "dist", "mic", "fisherci", "bootresult", "spearmanci", "spearmanci2",
                    "kentest"))

}}
cat("\n# References", fill=TRUE)


Try the Statsomat package in your browser

Any scripts or data that you put into this service are public.

Statsomat documentation built on Nov. 17, 2021, 5:17 p.m.