In companieshouse/DARr: Prototype RAP package for Companies House

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

packages = c('funModeling', 'haven', 'corrplot', 'tidyverse')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

HAVEN Data Profiling

# Read the test file into a variable
my_test <- read.csv('example_data_set.csv', header=TRUE)

# Transform columns to dates
my_test$DissolutionDate <- as.Date(my_test$DissolutionDate, format = "%d/%m/%y")
my_test$IncorporationDate <- as.Date(my_test$IncorporationDate, format = "%d/%m/%y")
my_test$Accounts.NextDueDate <- as.Date(my_test$Accounts.NextDueDate, format = "%d/%m/%y")
my_test$Accounts.LastMadeUpDate <- as.Date(my_test$Accounts.LastMadeUpDate, format = "%d/%m/%y")
my_test$Returns.NextDueDate <- as.Date(my_test$Returns.NextDueDate, format = "%d/%m/%y")
my_test$Returns.LastMadeUpDate <- as.Date(my_test$Returns.LastMadeUpDate, format = "%d/%m/%y")
my_test$ConfStmtNextDueDate <- as.Date(my_test$ConfStmtNextDueDate, format = "%d/%m/%y")
my_test$ConfStmtLastMadeUpDate <- as.Date(my_test$ConfStmtLastMadeUpDate, format = "%d/%m/%y")

# Glimpse comes from the dply package
glimpse(my_test)

# Remove some extraneous previous name fields
my_test <- my_test %>%
  select(-PreviousName_2.CONDATE, -PreviousName_2.CompanyName, -PreviousName_3.CONDATE, -PreviousName_3.CompanyName,
         -PreviousName_4.CONDATE, -PreviousName_4.CompanyName, -PreviousName_5.CONDATE, -PreviousName_5.CompanyName, 
         -PreviousName_6.CONDATE, -PreviousName_6.CompanyName, -PreviousName_7.CONDATE, -PreviousName_7.CompanyName,
         -PreviousName_8.CONDATE, -PreviousName_8.CompanyName, -PreviousName_9.CONDATE, -PreviousName_9.CompanyName,
         -PreviousName_10.CONDATE, -PreviousName_10.CompanyName)


# Changes blank fields to NA
my_test[my_test==""] <- NA

# Data Integrity
data_integrity(my_test)

# Descriptive Statistics
summary(my_test)

# Profile_num
# Get a metric table with many indicators for all numerical variables, 
# automatically skipping the non-numerical variables. 
# Current metrics are: mean, std_dev: standard deviation, 
# all the p_XX: percentile at XX number, skewness, kurtosis, 
# iqr: inter quartile range, 
# variation_coef: the ratio of sd/mean, range_98 is the limit for which the 98
profiling_num(my_test)

# ggplot2 histogram of Accounts.AccountRefDay
ggplot(my_test, aes(x=Accounts.AccountRefDay)) + geom_histogram()

# Use plot_num to profile continuous variables
plot_num(my_test, bins = 15)

Change the options here to NOT include the code

Frequency Distributions

freq(my_test, input = c('RegAddress.Country','CompanyCategory', 'CompanyStatus', 'CountryOfOrigin', 'Accounts.AccountCategory'))

# Probability Density
plotar(my_test, target = 'CompanyStatus', plot_type = "histdens")

companieshouse/DARr documentation built on Oct. 22, 2022, 8:26 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

companieshouse/DARr
Prototype RAP package for Companies House

In companieshouse/DARr: Prototype RAP package for Companies House

Frequency Distributions

R Package Documentation

Browse R Packages

We want your feedback!

companieshouse/DARr Prototype RAP package for Companies House

In companieshouse/DARr: Prototype RAP package for Companies House

Frequency Distributions

R Package Documentation

Browse R Packages

We want your feedback!

companieshouse/DARr
Prototype RAP package for Companies House