knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-" ) require(stpvers)
Generate Random Data Sets
Generates random data sets including: data.frames, lists, and vectors. Readme(https://www.rdocumentation.org/packages/wakefield/versions/0.3.3)] wakefield
GetData(): Ladet verschiedene Dateiformate von csv bis sav. Tabellen im Text-Format koennen direkt gelesen werden. Zurueckgegeben wird ein data.frame.
xls Verwendet readxl::read_excel
Text direkt im R-File Es lassen sich direkt Daten als Text einlesen dabei kann mit Tabel_Expand = TRUE, id.vars = 1:2 gesteuert werden ob eine Kreuztabelle aufgedroeselt werden soll.
LimeSurvy Hier muss eine Liste uebergeben werden die die Filenamen beinhaltet also GetData(list("file.R","file.txt")). Das erste File ist das R-File mit den Labels das zweite die Daten. Weitere moegliche Parameter sind die Zeichencodierung
setwd("C:/Users/wpete/Dropbox/3_Forschung/R-Project/stp25aggregate") x <- "fa\xE7ile" Encoding(x) Encoding(x) <- "latin1" x xx <- iconv(x, "latin1", "UTF-8") Encoding(c(x, xx)) c(x, xx) xx<- "Abc ÖÄÜ ÖÄÜ?" Encoding(xx) <- "bytes" xx # will be encoded in hex #Encoding(xx)<- "latin1" xx #iconv(xx, Encoding(xx), "UTF-8") require(stpvers) require(readr) str(GetData("data/Data-default.csv", sep=",")) GetData("data/Data-ansi.csv", sep=",") GetData("data/Data-utf8.csv", sep=",") read.csv("data/Data-default.csv" ) read.csv("data/Data-ansi.csv") read.csv("data/Data-utf8.csv") read.csv("data/Data-default.csv" , check.names = FALSE, stringsAsFactors =FALSE) read.csv("data/Data-ansi.csv", check.names = FALSE) read.csv("data/Data-utf8.csv", check.names = FALSE) guess_encoding("data/Data-default.csv") read_csv("data/Data-default.csv") read_csv("data/Data-ansi.csv") read_csv("data/Data-utf8.csv")
dat<-GetData(" sex treatment control m 2 3 f 3 4 ", Tabel_Expand = TRUE, id.vars = 1) head(dat) #xtabs(~sex +value, dat)
# library("readxl") DF <- GetData( data_file = 'Raw data/Tabelle Beugesehnen.xlsx', raw_data = "Raw data/Beugesehnen-2.Rdata", sheet = 1, skip = 1 )
Andere Packete für SPSS und XLSX
library(excel.link) library(readxl) fil1 <- "Raw data/LTx-Datenbank aktuell_1-last_NMPvs non NMP.xlsx" dat1 <- readxl::read_excel(fil1, sheet = 1) fil2 <- "Raw data/Metra LTX Datenbank 03-04-20.xlsx" sheet1 <- "Minimal Data Set Metra Patients" sheet2 <- "non transplanted organs" dat2 <- excel.link::xl.read.file( fil2, password = "BC82", xl.sheet = "Minimal Data Set Metra Patients", top.left.cell = "A2" ) library("rio") Excel (.xls and .xlsx), using haven::read_excel. SPSS (.sav), using haven::read_sav
clean_names(tibble::tibble("Öli"=1:3, "p-k"=1:3, "95%-CI"=4:6) ) clean_names( c( " a", "a ", "a %", "a", "$a", "$$$a", "GDP ($)", "GDP (us$)", "a (#)", "a & b", "#", "$", "a_cnt", "Aa&Bb", "camelCasePhrases", "AlphaBetaGamma", "Alpha Beta", "Beta !!! Gamma", "a + b", "a - b", "a * b", "Ösel" ), abbreviate=TRUE )
df <- data.frame( Source = c("A", "B", "C", "F"), x = 1:4, y = 1:4, stringsAsFactors = FALSE ) stp25tools::add_row_df(df, c("Erste Zeile" = 1, "Dritte" = 3)) stp25tools::add_row_df(df, c("Erste Zeile" = 1, "letzte" = 5)) stp25tools::add_row_df(df, "Erste Zeile") stp25tools::add_row_df(df, c("Erste Zeile", "Zweite"))
Data and Variable Transformation Functions
Umformen von einem Breit-Format nach einem Lang-Format. Melt2 und melt2 sind Erweiterungen der reshape2::melt Funktion. Intern wird melt und dcast verwendet.
df <- tibble::tibble( month=c(1,2,3,1,2,3), student= gl(2,3, labels =c("Amy", "Bob")), A=c(9,7,6,8,6,9), B=c(6,7,8,5,6,7) ) df
tidyr::spread(df[-4], student, A) # new style with pivot_wider df[-4] %>% tidyr::pivot_wider(names_from = student, values_from = A, names_prefix = "Student_") (df_w1 <- Wide(df[-4], student, A)) df %>% Wide( month ~ student, A) df %>% Wide( student, c(A, B))
tidyr::gather(df_w1, key = "student", value = "A", Amy, Bob) Long(Amy + Bob ~ month, df_w1, key="student", value="A") Long(df_w1, id.vars=1, key = "student", value = "A")
(df_w2 <- Wide(df, student, c(A, B))) # # new style with pivot_longer Long(list(A=c("Amy_A", "Bob_A" ), B=c("Amy_B", "Bob_B")), df_w2, by = ~ month, key = "student", key.levels= c("Amy", "Bob"))
mean3<- function(x)round(mean(x, na.rm=TRUE), 1) Summarise(A + B ~ student, df, fun=mean3, key = "group", value = "cbc") Summarise(A + B ~ student, df, fun=mean3, margins = TRUE) Summarise(A + B ~ student, df, fun=mean3, formula = variable ~ student, margins = TRUE)
Alternative mosaic
require(mosaic) mean(HELPrct$age) mean( ~ age, data = HELPrct) mean3 <- function(x) round(mean(x, na.rm = TRUE), 1) # # Summarise(daysanysub + dayslink ~ homeless , HELPrct, fun=mean3, key = "group", value = "cbc") # Summarise(daysanysub + dayslink ~ homeless , HELPrct, fun=mean3, margins = TRUE) # Summarise(daysanysub + dayslink ~ homeless , # HELPrct, # fun=mean3, # formula = variable ~ homeless, # margins = TRUE) #namespace:mosaicCore Summarise(daysanysub ~ homeless , HELPrct, fun = mean3) df_stats(daysanysub ~ homeless , HELPrct, mean3, na.action = na.pass) df_stats(daysanysub ~ homeless , HELPrct, mean3) Summarise( daysanysub + dayslink ~ homeless + sex, HELPrct, fun = function(x) c(m = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE)) ) df_stats(daysanysub + dayslink ~ homeless , HELPrct, mean3, na.action = na.pass) df_stats(daysanysub + dayslink ~ homeless , HELPrct, mean3) df_stats(daysanysub + dayslink ~ homeless + sex , HELPrct, mean, sd)
Die Funktion separate_multiple_choice() transformiert einen String mit Trennzeichen zu einem Multi-Set mit 0 und 1. (Separate multiple choice) der param x ist entweder ein Character oder eine zahl
dat <- data.frame( Q1 = c(134, NA, 35, 134, 5, 24), Q2 = c( "Alm Dudler, Essig, Cola", NA, "Cola, Holer", "Alm Dudler, Cola, Essig","Holer", "Bier, Essig" ) ) dat
cbind(dat[-1], separate_multiple_choice(dat$Q2))
dat <- cbind(dat[-2], separate_multiple_choice(dat$Q1, label = c( "Alm Dudler", "Bier", "Cola", "Essig", "Holer" ))) dat
#' Mittelwerte Addieren #' #' #' @param n #' @param m #' @param sd #' calc.mean <- function(n, m, sd) { j <- length(n) #-- Anzahl an Elementen if (length(n) != length(m) | length(n) != length(sd)) stop("Ungleiche Anzahl an n,m oder sd!") #print(paste( m ,"-", trunc(m),"=", m - trunc(m))) #-- Interne Function berechnet zwei Werte calc.mean2 <- function(n, m, sd) { var <- sd ^ 2 n.total <- sum(n) n.minus1 <- n - 1 m.total <- sum(m * n) / n.total var.total <- 1 / (n.total - 1) * (sum(n.minus1 * var) + prod(n) / n.total * diff(m) ^ 2) #var3 = 1/(n1+n2-1) *( (n1-1)*var1 + (n2-1)*var2 + n1*n2/(n1+n2)*((m1-m2)^2) ) data.frame( n = c(n, n.total), m = c(m, m.total), sd = c(sd, sqrt(var.total)), var = c(var, var.total) ) } if (j == 2) { zw <- calc.mean2(n, m, sd) } else{ n1 <- n[1:2] m1 <- m[1:2] sd1 <- sd[1:2] zw <- calc.mean2(n = n1, m = m1, sd = sd1) zw.j <- zw[3, ] for (i in 3:j) { zw.i <- calc.mean2( n = c(zw.j$n, n[i]) , m = c(zw.j$m, m[i]) , sd = c(zw.j$sd , sd[i]) ) zw[i, ] <- zw.i[2, ] zw <- rbind(zw, zw.i[3, ]) } zw } cbind(value = c(1:j, "total") , zw) } x1 <- rnorm( 4, 3.0, 2.1) x2 <- rnorm(10, 4.0, 2.0) x3 <- rnorm(11, 3.5, 3.0) calc.mean( n = c(length(x1), length(x2)), m = c(mean(x1), mean(x2)), sd = c(sd(x1), sd(x2)) ) mean(c(x1, x2)) sd(c(x1, x2)) var(c(x1, x2)) calc.mean( n = c(length(x1), length(x2), length(x3)), m = c(mean(x1), mean(x2), mean(x3)), sd = c(sd(x1), sd(x2) , sd(x3)) ) mean(c(x1, x2, x3)) sd(c(x1, x2, x3)) var(c(x1, x2, x3)) # Total Asf calc.mean( n = c(22, 26, 22, 26), m = c(407, 613, 434, 565) / 100, sd = c(258, 236, 345, 253) / 100 )
library(dplyr) set.seed(0815) n<-15 df<- data.frame(x=rnorm(n), y=rnorm(n), z=rnorm(n)) df <- df %>% mutate(group = case_when(x<0 & y<0 & z<0 ~ "A", x<0 | y<0 | z<0 ~ "B", TRUE ~ "C" )) df
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.