This is a small collection of some R-stuff for every day use. It is recommended to install this package first time via devtools::install_github( "TPeschel/hlpr4life" )
devtools::install_github( "TPeschel/hlpr4life" )
Of course for that it is neccessary to have devtools installed. So I recommend the following lines for an easy use of hlpr4life
if( !"devtools" %in% rownames( installed.packages( ) ) )
install.packages( "devtools" ) )
devtools::install_github( "TPeschel/hlpr4life" )
From now on hlpr4life is kept in the latest version available on github. The dots are for some further packages that should be loaded and installed before if they are not.
hlpr4life::load.pkgs( c( "hlpr4life", ... ) )
hlpr4life::load.pkgs(
c(
"hlpr4life",
"dplyr",
"magrittr",
"stringr",
"lubridate",
"ggplot2" ) )
n <-
1000
( clndr <-
calendar(
"2017-01-01",
"2017-12-31" ) )
( sexes <-
c( "female", "male" ) )
( d <-
data.frame(
sic = s<-sample( some.sics( n.first = 100, n.last = 300 ), n, T ),
date = sample( clndr$date, n, T ) ) )
( d$sex <-
sample( sexes, n, T )[ match( d$sic, unique( d$sic ) ) ] )
( d$birth <-
sample( calendar( "2000-01-01", "2016-12-31" )$date, length( unique( d$sic ) ), T )[ match( d$sic, unique( d$sic ) ) ] )
( d$age <-
round( as.numeric( difftime( as.Date( d$date ), as.Date( d$birth ), units = "day" ) ) / 325.25, 2 ) )
( d$sci_group <-
paste0( "A8_", str_pad( floor( d$age ), 2, pad = "0" ) ) )
( d$height <-
round(
rnorm( n, c( 30, 35 )[ match( d$sex, sexes ) ], 2 + 1 * d$age ) +
( exp( 20 ) - exp( 20 - ( d$age * c( 1.01, 1.03 )[ match( d$sex, sexes ) ] - 1 ) / 15 ) ) * 200 / exp( 20 ), 2 ) )
( d$weight <-
round(
rnorm( n, c( 4, 4.5 )[ match( d$sex, sexes ) ], 1 + c( .6, .7 )[ match( d$sex, sexes ) ] * d$age ) +
( exp( 20 ) - exp( 20 - ( d$age * c( 1.01, 1.02 )[ match( d$sex, sexes ) ] - 1 ) / 18 ) ) * 100 / exp( 20 ), 2 ) )
( d$bmi <-
round( 10000 * d$weight / d$height / d$height, 2 ) )
( d <-
arrange(
d[ , c( "sic", "sci_group", "sex", "birth", "age", "date", "height", "weight", "bmi" ) ],
sic,
sci_group ) )
d <-
adjust.linearly.std( weight ~ sex * age, d )
d <-
adjust.linearly.std( height ~ sex * age, d )
d <-
adjust.linearly.std( bmi ~ sex * age, d )
thm <-
list(
theme_bw( ),
geom_point( alpha = .3 ),
geom_smooth( method = "lm" ),
scale_color_manual( values = c( "red", "blue" ) ) )
ggsubplot(
ggplot( d, aes( age, height, col = sex ) ) + thm,
ggplot( d, aes( age, weight, col = sex ) ) + thm,
ggplot( d, aes( age, bmi, col = sex ) ) + thm,
ggplot( d, aes( age, height.std.for.sex.age, col = sex ) ) + thm,
ggplot( d, aes( age, weight.std.for.sex.age, col = sex ) ) + thm,
ggplot( d, aes( age, bmi.std.for.sex.age, col = sex ) ) + thm,
cols = 2 )
Adjust a dependent variable y linearly to several independent variables x1, x2 ...
hlpr4life::load.pkgs( c( "hlpr4life", "ggplot2" ) )
set.seed( 1 )
num <- 100
sexes <- c( "female", "male" )
d <- data.frame( sex = sample( sexes, num, T, prob = c( .3, .7 ) ), age = round( runif( num, 0, 20 ), 1 ) )
d$y <- c( 30, 35 )[ match( d$sex, sexes ) ] + c( 2, 3 )[ match( d$sex, sexes ) ] * d$age + rnorm( num, 0, 10 )
d <- adjust.linearly( y ~ age, d )
d <- adjust.linearly( y ~ sex, d )
d <- adjust.linearly( y ~ age * sex, d )
thm <- list( theme_bw( ), geom_point( ), geom_smooth( method = "lm" ) )
thm.brown <- list( theme_bw( ), geom_point( col = "green3" ), geom_smooth( col = "green3", method = "lm" ) )
thm.bicol <- list.append( thm, scale_color_manual( values = c( "firebrick1", "steelblue3" ), guide = F ) )
thm.bicolfac <- list.append( thm.bicol, facet_grid( sex ~ . ) )
ggsubplot(
ggplot( d, aes( age, y ) ) + thm.brown,
ggplot( d, aes( age, y, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.adj.for.age ) ) + thm.brown,
ggplot( d, aes( age, y.adj.for.age, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.adj.for.sex ) ) + thm.brown,
ggplot( d, aes( age, y.adj.for.sex, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.adj.for.age.sex, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.adj.for.age.sex, col = sex ) ) + thm.bicolfac,
cols = 4 )
Adjust and standardize afterwards a dependent variable y linearly to several independent variables x1, x2 ...
hlpr4life::load.pkgs( c( "hlpr4life", "ggplot2" ) )
set.seed( 1 )
num <- 100
sexes <- c( "female", "male" )
d <- data.frame( sex = sample( sexes, num, T, prob = c( .3, .7 ) ), age = round( runif( num, 0, 20 ), 1 ) )
d$y <- c( 30, 35 )[ match( d$sex, sexes ) ] + c( 2, 3 )[ match( d$sex, sexes ) ] * d$age + rnorm( num, 0, 10 )
d <- adjust.linearly.std( y ~ age, d )
d <- adjust.linearly.std( y ~ sex, d )
d <- adjust.linearly.std( y ~ age * sex, d )
thm <- list( theme_bw( ), geom_point( ), geom_smooth( method = "lm" ) )
thm.brown <- list( theme_bw( ), geom_point( col = "green3" ), geom_smooth( col = "green3", method = "lm" ) )
thm.bicol <- list.append( thm, scale_color_manual( values = c( "firebrick1", "steelblue3" ), guide = F ) )
thm.bicolfac <- list.append( thm.bicol, facet_grid( sex ~ . ) )
ggsubplot(
ggplot( d, aes( age, y ) ) + thm.brown,
ggplot( d, aes( age, y, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.std.for.age ) ) + thm.brown,
ggplot( d, aes( age, y.std.for.age, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.std.for.sex ) ) + thm.brown,
ggplot( d, aes( age, y.std.for.sex, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.std.for.age.sex, col = sex ) ) + thm.bicol,
ggplot( d, aes( age, y.std.for.age.sex, col = sex ) ) + thm.bicolfac,
cols = 4 )
Load packages as library() or require() do! If some are not installed then install them.
load.pkgs( c( "dplyr", "ggplot2", "ggthemes", "reshape2" ) )
Build a dataframe with calendar data!
clndr <-
calendar(
start = "2017-01-01",
end = "2017-12-31",
abbreviate = F,
tz = "Europe/Berlin" )
( weekends.in.2017 <-
clndr[ clndr$day_of_week %in% c( "Samstag", "Sonntag" ), ] )
( working.days.in.2017 <-
clndr[ clndr$day.of.week %in% c( 2 : 6 ), ] )
( my.birthday <-
clndr[ clndr$date == "2017-11-28", -2 ] )
( summer.2017 <-
calendar(
start = "2017-06-21",
end = "2017-09-21" ) )
Plot all in R defined colors!
plot.colors( )
Build a dataframe with a color gradient between two distinct colors partitioned by steps!
color.gradient( "red", "green", 16 )
Plot a color gradient between two distinct colors partitioned by steps!
plot.color.gradient( "red", "green", 256, "horiz", F )
plot.color.gradient( "blue", "yellow", 16, "vert", T )
What's the frequency heard by playing key 25 of a piano?
freq.of.key( 25 )
What's the frequency of a certain note?
freq.of.note( "A2" )
Which key has to be pressed for a certain frequency?
key.of.freq( 110 )
Which key plays a certain note?
key.of.note( c( "C0", "D0", "E0", "F0", "G0", "A1", "B1", "C1" ) )
Which frequency is represented by a certain note?
note.of.freq( 110 )
Which note has a certain piano key?
note.of.key(key = c(4, 6, 8, 9, 11, 13, 15, 16))
Description of a piano
piano(left.key = 4, right.key = 88)
load.pkgs( c( "ggplot2", "ggthemes" ) )
piano( key.of.note( "C1" ), key.of.note( "C3" ) )
ggplot( piano( ) ) +
geom_histogram( aes( note, -c( 1, .63 )[ match( color, c( "ivory", "ebony" ) ) ], fill = color ), stat = "identity" ) +
geom_histogram( aes( note, .001 * frequency, alpha = note ), fill = "orange", stat = "identity" ) +
scale_fill_manual( values = c( "#000000", "#f0f3f4" ), guide = F ) +
geom_text( aes( note, label = note, col = color ), y = -.2, angle = 90 ) +
annotate( geom = "text", x = 24.98, y = 3.02, xmin = 30, xmax = 60, ymin = 1, ymax = 5, label = "THE PIANO", col = "white", size = 20 ) +
annotate( geom = "text", x = 25, y = 3, xmin = 30, xmax = 60, ymin = 1, ymax = 5, label = "THE PIANO", col = "black", size = 20 ) +
scale_color_manual( values = c( "#f0f3f4", "#000000" ), guide = F ) +
scale_alpha_discrete( range = c(.7,.1),guide = F ) +
theme_solid( fill = "#405060" ) +
theme(
axis.text.x = element_blank( ),
axis.title.x = element_blank( ),
axis.text.y = element_blank( ),
axis.title.y = element_blank( ),
legend.key = element_blank( ) )
Rename some columns of a dataframe.
(d<-rename.columns( data.frame(x=c(1:10),y=rnorm(10),z=c(10:1)),c("y","x"),c("x","y")))
Rename some elements of a list.
(l<-rename.list.elements(list(x="x",y="y",z="z"),c("y","z","x"),c("Ypsilon","CED","U")))
Remove columns of a list.
remove.columns(data.frame(x="X",y="Y",z="Z",w="W"),c("x","z"))
Remove elements of a list.
remove.list.elements(list(x="X",y="Y",z="Z",w="W"),c("x","z"))
Append named or unnamed one element to a list.
( lst <- list( x = 9 ) )
( lst <- list.append( lst, x = now( ), name = "TIME" ) )
( lst <- list.append( lst, "y", "Ypsilon" ) )
( lst <- list.append( lst, "unnamed" ) )
What's the time now?
now( )
What's the date today?
today( )
ggsubplot gives the opportunity to create multiplot of ggplots.
hlpr4life::load.pkgs(c(
"hlpr4life",
"ggplot2",
"dplyr" ) )
(d<-data.frame(x=rnorm(1000,10),y=sample(letters[1:3],1000,T),s=sample(c("female","male"),1000,T)))
m<-summarise(group_by(d,y,s),m=mean(x))
ggsubplot(
ggplot( d ) +
theme_bw( ) + scale_color_discrete( guide = F ) +
geom_point( aes( c( 1 : 1000 ), x, col = paste0( y, s ) ) ),
ggplot( d ) +
theme_bw( ) + scale_fill_discrete( guide = F ) + scale_color_discrete( guide = F ) +
geom_histogram( aes( x, fill = paste0( y, s ) ), alpha = .5 ) +
geom_vline( aes( xintercept = m, col = paste0( y, s ) ), m ) +
facet_grid( s ~ y ),
ggplot( d ) +
theme_bw( ) + scale_fill_discrete( guide = F ) + scale_color_discrete( guide = F ) +
geom_histogram( aes( x, fill = paste0( y, s ) ), alpha = .5 ) +
geom_vline( aes( xintercept = m, col = paste0( y, s ) ), m ),
layout = t(
matrix(
c(
1, 2,
3, 3 ),
ncol = 2 ) ) )
Creates a list of sics.
some.sics( n.first = 0, n.last = 110 )
some.sics( n.size = 9 )
some.sics( 10, prefix = "LIFE" )
key.df creates a key out of several columns of a dataframe.
key.df(data.frame(x=letters[ runif(10,1,10)],y=LETTERS[runif(10,1,10)],z=rnorm(10)),c("x","y"),"~")
key.col creates a key out of 2 columns.
key.col(letters[3:1],LETTERS[1:3]," SEPARATOR ")
Could be usefull before a merging process.
(a<-data.frame(SGROUP="A2_02",DATE="200-10-05",Sic="LI12345678"))
(b<-data.frame(GRUPPE="A3_02",DATUM="200-10-05",PSEUDONYM="LI12345678"))
(c<-data.frame(GRP="A2_02",DATE="2001-10-05",EDAT="200-10-04",PSEUDO="LI12345679",edat.new="2017-10-03"))
(infos<-get.merging.infos(c("a","b","c")))
infos$SIC
infos$SCI_GROUP
infos$DATE
Could be usefull before a merging process.
(a<-data.frame(SGROUP="A2_02",DATE="2002-10-05",Sic="LI12345678"))
(b<-data.frame(GRUPPE="A3_02",DATUM="2002-10-05",PSEUDONYM="LI12345678"))
(c<-data.frame(GRP=c("A2_02","A2_03"),DATE=c("2002-10-05","2001-10-05"),EDAT=c("2001-10-04","2001-10-02"),PSEUDO=c("LI12345679","LI1234567X"),edat.new=c("2017-10-03","2017-10-11")))
print.merging.infos(c("a","b","c"))
Count availables in every column of a dataframe!
(d<-data.frame(x=c(NA,"Hello",NA,"World",NA),y=c(1:5),z=rep(NA,5)))
sum.av(d)
Count missings in every column of a dataframe!
(d<-data.frame(x=c(NA,"Hello",NA,"World",NA),y=c(1:5),z=rep(NA,5)))
sum.na(d)
table.df returns data about missings, availables of every column in a given dataframe. If summary is TRUE, type, min, max, median and mean are shown additionally.
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")))
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),F,F,F,"NO")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),F,T,F,"MISSING")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),T,F,F,"AVAILABLE")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),T,T,F,"NAMES")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),F,F,T,"MIN")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),T,T,T,"MAX")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),T,T,T,"MEAN")
table.df(data.frame(x=c(1:3),y=c(NA,1,NA),z=c(NA,NA,NA),n=c("blonde","brown","black")),F,T,T,"MEDIAN")
get.columns searches for column names that matches a given the pattern.
(d<-data.frame(SIC="LI01234",Y=1000.,x=10,SCI_GROUP="A2_12",DATE="2017-10-05",EDAT="2017-10-04"))
get.columns(d)
get.columns(d,"S")
get.date.columns is actually a wrapper for get.columns with a given date pattern "edat|date|datum".
(d<-data.frame(DATE="2017-10-05",EDAT="2017-10-04",dat="2017-10-03",DATA="2017-10-02"))
get.date.columns(d)
(d<-data.frame(DATE="2017-10-05",EDAT="2017-10-04",MUTAD="2017-10-03"))
get.date.columns(d,"dat|muta")
get.sic.columns is actually a wrapper for get.columns with a given date pattern "sic|pseudo".
(d<-data.frame(SIC="LI12345678",sic="LI12345679",PSEUDO="LI1234567X",PSEUDONYM="LI12345670"))
get.sic.columns(d)
get.scigroup.columns is actually a wrapper for get.columns with a given date pattern "sci_group|sci-group|scigroup|sgroup|group|grp|gruppe".
(d<-data.frame(SGROUP="A2_02",SCI_GROUP="B1_10",Gruppe="A1-SK_10",GRP="A3_09"))
get.scigroup.columns(d)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.