In stp4/stp25stat: Functions for statistical computations

knitr::opts_chunk$set(echo = TRUE, warnings=FALSE)

require(stpvers)

set_my_options(prozent = list(
  digits = c(0, 0),
  style = 2,
  null_percent_sign = "."
))
scale2<- function(x, mn=1, mx=5){
x<-  x-min(x, na.rm=TRUE)
x<-  x/max(x, na.rm=TRUE)
x*(mx -1 ) + mn
}
set.seed(1234)

require(lavaan)

Functions for Statistical Computations

which_output()

#'  #  set_my_options(style_mean = list(line_break = '<br>'))
#'  #  set_my_options(prozent = list(null_percent_sign = '.'))
#'  #  set_my_options(mittelwert = list(mean.style = 2, median.style = "IQR"), 
#'  #              prozent=list(style=2))
#'  #  set_my_options(caption= "include.n")

Deskriptive Tabellen mit sig-Test

require(wakefield)


set.seed(0815)
DF <- r_data_frame(
  n = 30,
  id,
  race,
  age(x = 8:14),
  sex,
  employment = employment,
  hour,
  iq,
  height(mean = 50, sd = 10),
  died,
  Smoker = valid,
  likert,
  date_stamp(prob = probs(12))
) %>% clean_names()
DF$died <- factor(DF$died)

SDM <- function(x, by,...) {
  dat<-  na.omit(data.frame(x=x, by=by))
  n<- nrow(dat)
  or<- sdm<- NA 
  if(n>10){
    if( is.numeric(dat$x) | nlevels(dat$x)>2 ){

      xx <- split(scale(as.numeric(dat$x)), dat$by)
      x1 <- xx[[1]]
      x2 <- xx[[2]]  

      m1 <-  mean(x1)
      m2 <-  mean(x2)
      v1 <-  var(x1)
      v2 <-  var(x2)
      ss <- sqrt((v2 + v1) / 2)
      sdm <- (m1 - m2) / ss
      vr<- v1 / v2
      or<- NA
    }
    else{
      sdm <- vr <- NA 
      or<- try(fisher.test(dat$x, dat$by)$estimate )
      if( or <0.01 )  or<- "--"
      else if (or>100 )or<- "--"
      else or <- stp25rndr::Format2(or, 2)
    }
  }
  rslt<-
    cbind(SDM = stp25rndr::Format2(sdm, 2),
          VR = stp25rndr::Format2(vr, 2),
          OR = or
    )

  rslt
}

#' effect_size
#'
#' Cohen's d  d=(x1-x2)/s psych::cohen.d
#' Hedges' g  g= (x1-x2)/s1s2 ( pooled standard deviation)
#' g enspricht  x<-scale(x)  (mean(x1) - mean(x2)) 
#' 
#' Generalized Log Odds Ratios for Frequency Tables vcd::oddsratio
#' 
#' @param x vector
#' @param by factor
#' @param measure  intern c("mean", "median", "numeric", "factor", "logical")
#' @param measure.test, test  intern c("cattest", "contest", "notest", "ttest", ...)
#' @export
#'
#' @examples
#' 
#'  effect_size(c(2, 3, 4, 2, 3, 4, 3, 6,
#' 7, 6, 8, 9, 4, 5, 6, 7) ,
#' gl(2, 8, labels = c("Control", "Treat")))
#' 
#' effect_size(factor(c(2, 2, 2, 2, 1, 1, 1, 1,
#'                      2, 2, 2, 2, 1, 2, 1, 2)) ,
#'             gl(2, 8, labels = c("Control", "Treat")))
#'             
#'             
effect_size2 <- function(x,
                        by,
                        measure,
                        ...
                        ) {
  dat <-  na.omit(data.frame(x = x, by = by))
  n <- nrow(dat)
  es <- rslt <- "n.a."
  if (n > 10) {
    if (measure %in% c("mean", "median", "numeric")) {
      es <- psych::cohen.d(dat$x, dat$by)
      es <- stp25rndr::Format2(es$cohen.d)
      es <- paste0(es[2], " [", es[1], ", ", es[3], "] ES")
    }
    else  if (measure %in% c("factor", "logical")) {
      # Generalized Log Odds Ratios for Frequency Tables
      if (measure == "factor" & nlevels(dat$x) != 2) {
        es <- "n.a."
      }
      else{
        es <- vcd::oddsratio(table(dat$x, dat$by), log = FALSE)
        if (coef(es) < 0.01)
          es <- "n.a."
        else if (coef(es) > 100)
          es <- "n.a."
        else{
          es <- stp25rndr::rndr_ods(c(coef(es) ,  confint(es)))
          es <- paste0(es[1], " [", es[2], ", ", es[3], "] OR")
        }
      }
    }
  }
  cbind('Odds Ratio/Effect Size' = es)
}

# Zu Auto-Test existiiert eine eigene Funktion unter stp25stat::auto_test

auto_test2 <- function(x,
                      by,
                      measure,
                      measure.test,
                      type = 2) {
  dat <-  na.omit(data.frame(x = x, by = by))
  rslt <- NULL

  contest <- stp25stat:::contest
  cattest <- stp25stat:::cattest

  if (measure.test == "notest") {
    rslt <-  ""
  }
  else if (measure.test == "contest") {
    if (inherits(x, "factor")) {
      dat$x <- as.numeric(dat$x)
    }
    rslt <- stp25stat:::conTest(x ~ by, dat)
  }
  else if (measure.test == "cattest") {
    rslt <- stp25stat:::catTest( ~ x + by, dat)
  }
  else if (measure.test %in% contest) {
    if (inherits(x, "factor")) {
      dat$x <-
        as.numeric(dat$x)
    }
    rslt <- stp25stat:::conTest(x ~ by, dat, measure.test)
  }
  else if (measure.test %in% cattest) {
    rslt <- stp25stat:::catTest( ~ x + by, dat, measure.test)
  }



  if (type == 1) {
    cbind('Statistics' = rslt)
  }
  else if (type == 2) {
    rslt <-   strsplit(rslt, ', p')[[1]]
    cbind("Test Statistics" = rslt[1],
          "p-value" = gsub("=", "", rslt[2]))
  }   else if (type == 3) {
    cbind('Statistics' = gsub(", p", ",<BR>p", rslt))
  }


}

DF %>% Tbll_desc(
  age[median, 1],
  height[1, mean, ttest],
  sex,
  smoker[fisher],
  iq[0],
  likert[2, mean],
  by =  ~ died,
  include.custom = effect_size2,
  include.test=TRUE,
  include.n=TRUE,
  include.total=TRUE
) %>% Output()

    population.model <- '
    Sex =~ sex
    Age =~ age
    Bildungszertifikat =~ edu
    Beruf =~ beruf
    Education  ~ 1.04*Bildungszertifikat + 2.76* Beruf

    single~ 1*age + 2*edu

    Openness =~ o1+o2+o3+o4+o5
    Conscientiousness =~ c1+c2+c3+c4+c5
    Extraversion =~ e1+e2+e3+e4+e5
    Neuroticism =~  n1+n2+n3+n4+n5
    Agreeableness =~ a1+a2+a3+a4+a5

    Einkommen ~ 1.25*Sex +  (-0.31)*Age  + .12*Openness + .23*Conscientiousness + (-0.91)*Extraversion + 2.3*Neuroticism + (-0.541)*Agreeableness

    '

    DF <- lavaan::simulateData(population.model, sample.nobs = 100)

    DF<- transform(DF, 
                   sex=cut(sex, 2, c("m", "f")),
                   age=scale2(age, 18, 55),
                   edu=cut(edu,3, c("low", "med", "hig")),
                   beruf= cut(edu,2, c("blue", "withe")),
                   single =cut(single, 2, c("yes", "no")),
                   Einkommen = (4-log(Einkommen-min(Einkommen)+1))*1000
                   )

Mittelwerte und Prozent

#APA_Correlation(~o1+o2+o3+o4+o5+c1+c2+c3, DF)
Tbll_corr(~o1+o2+o3+o4+o5+c1+c2+c3, DF) %>% Output()

PCA

    vars <- c(
      "o1",  "o2",  "o3",  "o4",  "o5",  "c1",  "c2",  "c3",
      "c4",  "c5",  "e1",  "e2",  "e3",  "e4",  "e5",  "n1",
      "n2",  "n3",  "n4",  "n5",  "a1",  "a2",  "a3",  "a4",  "a5"
    )
     APA_PCA(DF[vars], 5, cut=.35, include.plot = FALSE)

Reliability

    Openness <- Reliability(~o1+o2+o3+o4+o5, DF, check.keys=TRUE)
    Conscientiousness <- Reliability(~ c1+c2+c3+c4+c5, DF, check.keys=TRUE)
    Extraversion <-Reliability(~ e1+e2+e3+e4+e5, DF, check.keys=TRUE)
    Neuroticism <- Reliability(~  n1+n2+n3+n4+n5, DF, check.keys=TRUE)
    Agreeableness <- Reliability(~ a1+a2+a3+a4+a5, DF, check.keys=TRUE)

    DF$O<- Openness$index
    DF$C<- Conscientiousness$index
    DF$E<- Extraversion$index
    DF$N<- Neuroticism$index
    DF$A<- Agreeableness$index

    Alpha2(Openness,Conscientiousness,Extraversion,Neuroticism,Agreeableness)

Test des Regressionsmodels

Mittelwerte und Effecte

fit<- lm(Einkommen ~ sex + age + O + C + E + N + A, DF)
#Tabelle2  gibt die Tabellen formatiert aus.
mean_sd<- Tabelle(fit )
mean_sd$sex %>% Output( "Mittelwerte")
#mean_sd$O %>% Output(output="md")

APA2(effects::Effect("sex", fit))

APA2(visreg::visreg(fit, "sex", plot=FALSE), digits=1)

Regressionsmodel

APA_Table(fit, include.se=FALSE, include.ci=TRUE)

GOF

 APA_Validation(fit)

library(units)
dat<- data.frame(
  spd1 = set_units(1:5, m/s),
  spd2 = set_units(1:5, km/h))

dat<- Label(dat, spd1="Ultra", spd2="Half")

dat

dat %>% Tabelle(spd1, spd2)

Test

Count Data

stats::chisq.test                   Pearson's Chi-squared Test for Count Data
stats::fisher.test                  Fisher's Exact Test for Count Data
stats::mcnemar.test                 McNemar's Chi-squared Test for Count Data
stats::pairwise.prop.test           Pairwise comparisons for proportions
stats::prop.test                    Test of Equal or Given Proportions
stats::prop.trend.test              Test for trend in proportions 
stats::binom.test                   Exact Binomial Test

stats::cor.test                     Test for Association/Correlation Between Paired Samples
stats::ansari.test                  Ansari-Bradley Test

T-Test

stats::t.test                       Student's t-Test    
stats::pairwise.t.test              Pairwise t tests
Hmisc::t.test.cluster               t-test for Clustered Data
stats::oneway.test                  Test for Equal Means in a One-Way Layout

Nicht-parametrische Tests

stats::kruskal.test                 Kruskal-Wallis Rank Sum Test
stats::wilcox.test                  Wilcoxon Rank Sum and Signed Rank Tests    
stats::pairwise.wilcox.test         Pairwise Wilcoxon rank sum tests
Hmisc::biVar                        Bivariate Summaries Computed Separately by a Series of Predictors
stats::mantelhaen.test              Cochran-Mantel-Haenszel Chi-Squared Test for Count Data
stats::friedman.test                Friedman Rank Sum Test
stats::ks.test                      Kolmogorov-Smirnov Tests
stats::shapiro.test                 Shapiro-Wilk Normality Test

survival

survival::cox.zph                   Test the Proportional Hazards Assumption of a Cox Regression
survival::plot.cox.zph              Graphical Test of Proportional Hazards
survival::survdiff                  Test Survival Curve Differences 
survival::survival-internal         Internal survival functions
survival::survobrien                O'Brien's Test for Association of a Single Variable with Survival
survival::survregDtest              Verify a survreg distribution

Power calculations

stats::power.anova.test             Power calculations for balanced one-way analysis of variance tests
stats::power.prop.test              Power calculations two sample test for proportions
stats::power.t.test                 Power calculations for one and two sample t tests    
Hmisc::bpower                       Power and Sample Size for Two-Sample Binomial Test
Hmisc::ciapower                     Power of Interaction Test for Exponential Survival
Hmisc::cpower                       Power of Cox/log-rank Two-Sample Test
Hmisc::spower                       Simulate Power of 2-Sample Test for Survival under Complex Conditions

Sobel

multilevel::sobel                   Estimate Sobel's (1982) Test for Mediation
multilevel::sobel.lme               Estimate Sobel's (1982) Test for Mediation in Two-Level lme Model

GOF

stats::bartlett.test                Bartlett Test of Homogeneity of Variances    
stats::mauchly.test                 Mauchly's Test of Sphericity    
stats::var.test                     F Test to Compare Two Variances    
vcd::coindep_test                   Test for (Conditional) Independence
vcd::goodfit                        Goodness-of-fit Tests for Discrete Data    
stats::poisson.test                 Exact Poisson tests

Andere

stats::Box.test                     Box-Pierce and Ljung-Box Tests
stats::fligner.test                 Fligner-Killeen Test of Homogeneity of Variances
stats::mood.test                    Mood Two-Sample Test of Scale
stats::PP.test                      Phillips-Perron Test for Unit Roots
stats::quade.test                   Quade Test

stp4/stp25stat documentation built on Sept. 17, 2021, 2:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com