R/IPDFileCheck.R
In IPDFileCheck: Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

Documented in calculate_age_from_dob calculate_age_from_year check_col_pattern_colname check_column_exists check_load_packages cohensd convert_date_numeric_stdform convert_date_string_stdform convert_to_number descriptive_stats_col_excl_nrcode get_colno_pattern_colname get_columnno_fornames get_contents_cols get_effect_size get_mode_from_vector get_sem get_summary_gtsummary get_value_from_codes keep_required_columns present_mean_sd_rmna_text represent_categorical_data_exclude_missing represent_categorical_data_forsubgroups represent_categorical_data_include_missing represent_categorical_textdata represent_numerical_data_forsubgroups return_longitudinal_summary return_subgroup_omitna return_subgroup_withNA test_age test_column_contents test_columnnames test_data_numeric test_data_numeric_norange test_data_string test_data_string_restriction test_file_exist_read test_gender

###############################################################################
#' Function to check the package is installed, if not install
#' @param pkg name of package(s)
#' @return 0, if packages cant be installed and loaded, else error
#' @examples
#' check_load_packages("dplyr")
#' @export
#' @importFrom methods is
check_load_packages <- function(pkg) {
  for (i in seq_len(length(pkg))) {
    x <- pkg[i]
    if (!suppressWarnings(require(x, character.only = TRUE))) {
      trythis <- tryCatch(install.packages(x, dependencies = TRUE,
                              repos = "http://cran.us.r-project.org"),
        error = function(e) {
          cat("Error ")
        },
        warning = function(e) {
          cat("Warning ")
        }
      )
      if (is(trythis, "warning")) {
        warning("Warning installing")
      }
      if (is(trythis, "error")) {
        warning("Error installing")
      }
      trythis <- tryCatch(require(x, character.only = TRUE),
        error = function(e) {
          warning("Error in adding to library ")
        },
        warning = function(e) {
          warning("Invalid package ")
        }
      )
    }
  }
  return(0)
}
###############################################################################
#' Function to test column names of a data being different from what specified
#' @param column_names column names of the data frame
#' @param data a data frame
#' @return 0, if success error, if failure
#' @examples
#' test_columnnames(c("name", "age"), data.frame(
#'   "Age" =  c(21, 15),
#'   "Name" =  c("John", "Dora")
#' ))
#' @export
test_columnnames <- function(column_names, data) {
  upper_given_colnames <- sort(toupper(column_names))
  upper_data_colnames <- sort(toupper(colnames(data)))
  if (sum(upper_given_colnames == upper_data_colnames) ==
      length(column_names)) {
    return(0)
  } else {
    return(-1)
  }
}
###############################################################################
#' Function to throw error on invalid directory or file and if not readable
#' @param filename name of a file or dir
#' @return 0, if success error, if failure
#' @examples
#' test_file_exist_read(system.file("extdata", "blank.txt",
#'   package = "IPDFileCheck"
#' ))
#' @export
test_file_exist_read <- function(filename) {
  if (file.exists(filename)) {
    if (file.access(filename, 0) != 0) {
      return(-1)
    }
    return(0)
  } else {
    return(-2)
  }
}
###############################################################################
#' Function to return the column number for column name
#' @param data a data frame
#' @param column_name column names of the data frame
#' @return column number, if success error, if failure
#' @examples
#' get_columnno_fornames(data.frame("Age" = c(21, 15),
#' "Name" = c("John", "Dora")), "Name")
#' @export
get_columnno_fornames <- function(data, column_name) {
  data_column_names <- toupper(colnames(data))
  if (any(data_column_names == toupper(column_name))) {
    column_no <- which(data_column_names == toupper(column_name))
    return(column_no)
  } else {
    stop("Column name does not exist")
  }
}
###############################################################################
#' Function to check the format of 'age' in data
#' @param data a data frame
#' @param agecolumn column name that corresponds to age or date pf birth
#' @param nrcode non response code corresponding to age column
#' @return 0, if success error if failure
#' @examples
#' df <- data.frame("Age" = c(21, 15), "Name" = c("John", "Dora"))
#' test_age(df, "age", 999)
#' @export
test_age <- function(data, agecolumn = "age", nrcode = NA) {
  column_no <- get_columnno_fornames(data, agecolumn)
    entry <- data[[column_no]]
    blanks <- c(which(entry == ""), which(is.na(entry)))
    if (length(blanks) != 0) {
      entry[blanks] <- nrcode
    }
    if (is.na(nrcode)) {
      this_entry <- entry[!is.na(entry)]
      this_entry_num <- suppressWarnings(as.numeric(this_entry))
      if (sum(is.na(this_entry_num)) == 0) {
        newentry <- as.numeric(this_entry)
        if (any(newentry > 150) || any(newentry < 0)) {
          return(-1)
        } else {
          return(0)
        }
      } else {
        return(-2)
      }
    } else {
      this_entry <- entry[entry != nrcode]
      this_entry_num <- suppressWarnings(as.numeric(this_entry))
      if (sum(is.na(this_entry_num)) == 0) {
        newentry <- as.numeric(this_entry)
        if (any(newentry > 150) || any(newentry < 0)) {
          return(-3)
        } else {
          return(0)
        }
      } else {
        return(-4)
      }
    }
}
###############################################################################
#' Function to check the format of 'gender' column in data
#' @param data a data frame
#' @param gendercolumn column name for gender
#' @param gendercode how gender is coded
#' @param nrcode non response code corresponding to gender column
#' @return 0, if success error if failure
#' @examples
#' test_gender(data.frame("sex" = c("m", "f"), "Name" = c("John", "Dora")),
#' c("f", "m"), "sex", 999)
#' @export
test_gender <- function(data, gendercode, gendercolumn = "gender",
                        nrcode = NA) {
  gendercode <- toupper(gendercode)
  if (get_columnno_fornames(data, gendercolumn) > 0) {
    column_no <- get_columnno_fornames(data, gendercolumn)
    entry <- data[column_no]
    if (is.na(nrcode)) {
      newentry <- entry[!is.na(entry)]
      gendercode <- gendercode[!is.na(gendercode)]
    } else {
      newentry <- entry[entry != nrcode, ]
      gendercode <- gendercode[which(gendercode != nrcode)]
    }
    facs <- levels(factor(toupper(newentry)))
    if (all(facs %in% gendercode)) {
      return(0)
    } else {
      return(-1)
    }
  }
}
###############################################################################
#' Function to get the actual value of column content if its coded
#' @param data a data frame
#' @param column column name for value
#' @param list_codes_values list of codes to understand the codes and value
#' @param nrcode non response code corresponding to gender column
#' @return 0, if success error if failure
#' @examples
#' data = data.frame("sex" = c(1, 2, 2, 1, 1),
#'  "Name" = c("John", "Dora","Dora", "John","John"))
#' list_codes_values = list(c("F", "M"),c(1,2))
#' ans <- get_value_from_codes(data, column = "sex", nrcode = NA,
#' list_codes_values)
#' @export
#' @importFrom dplyr %>%
get_value_from_codes <- function(data, column,
                              nrcode = NA, list_codes_values) {
     if (is.null(column)) {
       stop("Column name cant be null")
     } else {
       if (is.na(column))
         stop("Column name cant be NA")
     }
     if (is.null(data))
       stop("data cant be null")
     if (is.null(list_codes_values))
       stop("list_codes_values cant be null")
     if (get_columnno_fornames(data, column) > 0) {
        h <- hash::hash(key = unlist(list_codes_values[1]),
                 values = unlist(list_codes_values[2]))
        leys <- h$key
        vals <- h$values
        ipd_codes <- unlist(data %>% dplyr::select(dplyr::all_of(column)))
        this_values <- c()
        for (i in seq_len(length(ipd_codes))) {
          if (is.na(ipd_codes[i])) {
           this_val <- NA
          } else {
            this_val <- vals[leys == ipd_codes[i]]
          }
          this_values <- append(this_values, this_val)
        }
        return((this_values))
     }
}
###############################################################################
#' Function to check the format of column contents
#' @param data a data frame
#' @param column column name for gender
#' @param code how column values are  coded
#' @param nrcode non response code corresponding to gender column
#' @return 0, if success error if failure
#' @examples
#' test_column_contents(data.frame(
#'   "sex" = c("m", "f"),
#'   "Name" = c("John", "Dora")
#' ), "sex", c("m", "f"), 999)
#' @export
test_column_contents <- function(data, column, code, nrcode = NA) {
  column_no <- get_columnno_fornames(data, column)
    entry <- data[column_no]
    if (is.na(nrcode)) {
      newentry <- entry[!is.na(entry)]
      code <- code[!is.na(code)]
    } else {
      newentry <- entry[entry != nrcode, ]
      code <- code[which(code != nrcode)]
    }
    facs <- levels(factor(newentry))
    if (all(facs %in% code)) {
      return(0)
    } else {
      return(-2)
    }
}
###############################################################################
#' Function to check the format of a numeric column
#' @param column_name the column name
#' @param data data frame
#' @param nrcode non response code corresponding to the column
#' @param minval minimum value allowed
#' @param maxval maximum value allowed
#' @return 0, if success error, if failure
#' @examples
#' test_data_numeric("age", data.frame(
#'   "Age" =  c(21, 15),
#'   "Name"  = c("John", "Dora")
#' ), -99, 0, 100)
#' @export
test_data_numeric <- function(column_name, data, nrcode = NA, minval, maxval) {
  column_no <- get_columnno_fornames(data, column_name)
    entry <- (data[[column_no]])
    if (is.na(nrcode)) {
      new_entry <- (entry[which(!is.na(entry))])
    } else {
      new_entry <- (entry[which(entry != nrcode)])
    }
    if (is.numeric(new_entry)) {
      if (any(new_entry < minval) || any(new_entry > maxval)) {
        return(-2)
      } else {
        return(0)
      }
    } else {
      return(-3)
    }
}
###############################################################################
#' Function to check the format of a numeric column when the values
#' are not bounded
#' @param column_name the column name
#' @param data data frame
#' @param nrcode non response code corresponding to the column
#' @return 0, if success error, if failure
#' @examples
#' test_data_numeric_norange("marks", data.frame(
#'   "marks"  =  c(210, 99),
#'   "Name" =  c("John", "Dora")
#' ), -99)
#' @export
test_data_numeric_norange <- function(column_name, data, nrcode = NA) {
  column_no <- get_columnno_fornames(data, column_name)
    entry <- unlist(data.frame(data[[column_no]], stringsAsFactors = FALSE))
    if (is.na(nrcode)) {
      no_nrcode_entries <- entry[which(!is.na(entry))]
    }
    else {
      no_nrcode_entries <- entry[which(entry != nrcode)]
    }
    if (is.numeric(no_nrcode_entries)) {
      return(0)
    } else {
      return(-2)
    }
}
###############################################################################
#' Function to check the format of a string column
#' @param data data frame
#' @param column_name the column name
#' @param nrcode non response code corresponding to the column
#' @return 0, if success error, if failure
#' @examples
#' test_data_string(data.frame("Age" = c(21, 15), "Name" = c("John", "Dora")),
#' "name", -999)
#' @export
test_data_string <- function(data, column_name, nrcode = NA) {
  column_no <- get_columnno_fornames(data, column_name)
    temp <- data[column_no]
    temp <- unlist(temp[!is.na(temp)])
    if (!is.na(nrcode)) {
      new_entry <- temp[temp != nrcode]
    } else {
      new_entry <- temp[!is.na(temp)]
    }
    new_entry <- suppressWarnings(as.numeric(as.character(new_entry)))
    if (any(!is.na(new_entry))) {
      return(-2)
    } else {
      return(0)
    }
}
###############################################################################
#' Function to check the format of a string column when the string values
#' are given
#' @param data data frame
#' @param column_name the column name
#' @param nrcode non response code corresponding to the column
#' @param allowed_strings allowed strings or characters to represent
#' meaningful entry
#' @return 0, if success error, if failure
#' @examples
#' test_data_string_restriction(
#'   data.frame("Age" = c(21, 15), "sex" = c("m", "f")),
#'   "sex", -999, c("f", "m")
#' )
#' @export
test_data_string_restriction <- function(data, column_name, nrcode = NA,
                                         allowed_strings) {
  res <- test_data_string(data, column_name, nrcode)
  if (res == 0) {
    column_no <- get_columnno_fornames(data, column_name)
      if (length(allowed_strings) >= 1) {
        entry <- toupper(data[[column_no]])
        if (!is.na(nrcode)) {
          new_entry <- entry[entry != nrcode]
        } else {
          new_entry <- entry[!is.na(entry)]
        }
        if (any(is.na(new_entry) == TRUE) ||
          sum(toupper(allowed_strings) %in% unique(new_entry))
          < length(unique(new_entry))) {
          return(-2)
        } else {
          return(0)
        }
      } else {
        return(-3)
      }
  } else {
    if (res == -2) {
      return(-5)
    }
  }
}
#' ############################################################################
#' Function to check if a given pattern is contained in
#' the column names of a data
#' @param pattern a string that needs to be checked
#' @param column_names column names actually have
#' @return TRUE, if success FALSE, if failure
#' @examples
#' check_col_pattern_colname("age", "female_age")
#' @export
check_col_pattern_colname <- function(pattern, column_names) {
  if (is.na(pattern) || pattern == "") {
    return(-1)
  } else {
    if (is.numeric(pattern)) {
      test <- grep(toString(pattern), toupper(column_names))
    } else {
      test <- grep(toupper(pattern), toupper(column_names))
    }
    if (length(test) == 0) {
      return(FALSE)
    } else {
      return(TRUE)
    }
  }
}
###############################################################################
#' Function to return the column number if a given pattern is contained
#' in the column names of a data
#' @param pattern a string that needs to be checked
#' @param column_names column names actually have
#' @return column number, if success error, if failure
#' @examples
#' get_colno_pattern_colname("age", "female_age")
#' @export
get_colno_pattern_colname <- function(pattern, column_names) {
  if (check_col_pattern_colname(pattern, column_names) == TRUE) {
    test <- grep(toupper(pattern), toupper(column_names))
    return(test)
  } else {
    return(-1)
  }
}
#' ############################################################################
#'  Function to keep only certain variables
#' @param variables list of variables
#' @param the_data data to be sub setting
#' @return subset
#' @examples
#' the_data <- data.frame("Age" = c(21, 15), "sex" = c("m", "f"))
#' variable <- "Age"
#' keep_required_columns(variable, the_data)
#' @export
keep_required_columns <- function(variables, the_data) {
  for (i in seq_len(length(variables))) {
    if (!is.null(variables[i])) {
      if (is.na(variables[i])) stop("Some variables are NA")
    } else {
      stop("Some variables are NULL, please check")
    }
  }
  exists <- unlist(lapply(variables, IPDFileCheck::check_column_exists,
                         the_data))
  if (sum(exists) != 0) {
    stop("Some variables do not exists in data, please check")
  }
  subset <- the_data[(names(the_data) %in% variables)]
  if (ncol(subset) == 0 | nrow(subset) == 0)
    stop(" null value provided")
  return(subset)
}
#' ############################################################################
#' Function to return mode
#' @param v a vector
#' @return mode
#' @examples
#' get_mode_from_vector(c(1, 1, 2, 3))
#' @export
get_mode_from_vector <- function(v) {
  if (is.numeric(v)) {
    uniqv <- unique(v)
    uniqv[which.max(tabulate(match(v, uniqv)))]
  } else {
    return(-1)
  }
}
###############################################################################
#' Function to return descriptive statistics, sum, no of observations, mean,
#' mode. median, range, standard deviation and standard error
#' @param data data frame
#' @param column_name the column name
#' @param nrcode non response code corresponding to the column
#' @return the descriptive statistics for success , error for failure
#' @examples
#' descriptive_stats_col_excl_nrcode(data.frame("age" = c(21, 15),
#' "Name" = c("John", "Dora")), "age", NA)
#' @import stats
#' @export
descriptive_stats_col_excl_nrcode <- function(data, column_name, nrcode = NA) {
  col.names <- colnames(data)
  if (column_name %in% col.names) {
    if (test_data_numeric_norange(column_name, data, nrcode) == 0) {
      this_column <- data[column_name]
      if (is.na(nrcode)) {
        missing_count <- sum(is.na(this_column))
        this_column <- this_column[!is.na(data[column_name])]
      } else {
        missing_count <- sum(this_column == nrcode)
        this_column <- this_column[data[column_name] != nrcode
        & !is.na(data[column_name])]
      }
      if (length(this_column) == 0) {
        return(0)
      } else {
        this_sum <- round(sum(this_column), 3)
        this_av <- round(mean(this_column), 3)
        this_med <- round(median(this_column), 3)
        this_mode <- round(get_mode_from_vector(this_column), 3)
        this_range_low <- round(min(this_column), 3)
        this_range_high <- round(max(this_column), 3)
        this_sd <- round(sd(this_column), 3)
        this_se <- round(this_sd / sqrt(length(this_column)), 3)
        this_lq <- round(quantile(this_column, c(0.25)), 3)
        this_uq <- round(quantile(this_column, c(0.75)), 3)
        this_ci_low <- round(quantile(this_column, c(0.025)), 3)
        this_ci_high <- round(quantile(this_column, c(0.975)), 3)
        this_range <- paste(this_range_low, "-", this_range_high)
        results <- matrix(c(
          this_sum, this_av, this_sd, this_med, this_mode,
          this_se, this_range_low, this_range_high, this_range,
          length(this_column), this_lq, this_uq, this_ci_low, this_ci_high,
          missing_count
        ), byrow = TRUE, nrow = 1)
        colnames(results) <- c(
          "Sum", "Mean", "SD", "Median", "Mode", "SE", "Minimum", "Maximum",
          "Range", "Count", "LQ", "UQ", "CIlow", "CIhigh", "MissingCount"
        )
        rownames(results) <- column_name
        return(results)
      }
    } else {
      stop("Error - column contents not numeric")
    }
  } else {
    stop("Error - no column or column name different")
  }
}
###############################################################################
#' Function to check the given column exists
#' @param column_name a column name
#' @param data data frame
#' @return 0 if success error if failure
#' @examples
#' check_column_exists("age", data.frame("Age" = c(21, 15),
#' "Name" = c("John", "Dora")))
#' @export
check_column_exists <- function(column_name, data) {
  if (any(toupper(colnames(data)) == toupper(column_name))) {
    return(0)
  } else {
    return(-1)
  }
}
#' ############################################################################
#' Function to present the mean and sd of a data set in the form Mean (SD)
#' @param data data frame
#' @param column_name the column name
#' @param nrcode non response code corresponding to the column
#' @return the mean(sd), error for failure
#' @examples
#' present_mean_sd_rmna_text(data.frame(
#'   "age" = c(21, 15),
#'   "Name" = c("John", "Dora")
#' ), "age", NA)
#' @export
present_mean_sd_rmna_text <- function(data, column_name, nrcode = NA) {
  desc <- descriptive_stats_col_excl_nrcode(data, column_name, nrcode)
    desc <- data.frame(desc)
    this_mean <- as.numeric(desc$Mean)
    this_sd <- as.numeric(desc$SD)
    ans <- paste(round(this_mean, 2), " (", round(this_sd, 2), ")", sep = "")
    return(ans)

}
###############################################################################
#' Function to return a subgroup when certain variable equals the given value
#' while omitting those with NA
#' @param data data frame
#' @param variable that corresponds to a column
#' @param value a value that can be taken by the variable
#' @examples
#' return_subgroup_omitna(data.frame(
#'   "age" = c(21, 15),
#'   "Name" = c("John", "Dora")
#' ), "age", 10)
#' @return subgroup a data frame if success error if failure
#' @export
return_subgroup_omitna <- function(data, variable, value) {
  if (check_column_exists(variable, data) == 0) {
    column_no <- get_columnno_fornames(data, variable)
    subgroup <- data[which(data[column_no] == value &
                             !is.na(data[column_no])), ]
    return(subgroup)
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
###############################################################################
#' Function to return a subgroup when certain variable equals the given value
#' while omitting those with NA
#' @param data data frame
#' @param variable that corresponds to a column
#' @param value a value that can be taken by the variable
#' @examples
#' return_subgroup_omitna(data.frame(
#'   "age" = c(21, 15),
#'   "Name" = c("John", "Dora")
#' ), "age", 10)
#' @return subgroup a data frame if success error if failure
#' @export
return_subgroup_omitna <- function(data, variable, value) {
  if (check_column_exists(variable, data) == 0) {
    column_no <- get_columnno_fornames(data, variable)
    subgroup <- data[which(data[column_no] == value &
                             !is.na(data[column_no])), ]
    return(subgroup)
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
###############################################################################
#' Function to return a subgroup when certain variable equals the given value
#' while omitting those with NA
#' @param data data frame
#' @param variable that corresponds to a column
#' @param value a value that can be taken by the variable
#' @examples
#' return_subgroup_withNA(data.frame(
#'   "age" = c(21, 15),
#'   "Name" = c("John", "Dora")
#' ), "age", 10)
#' @return subgroup a data frame if success error if failure
#' @export
return_subgroup_withNA <- function(data, variable, value) {
  if (check_column_exists(variable, data) == 0) {
    column_no <- get_columnno_fornames(data, variable)
    if (is.na(value)) {
      subgroup <- data[is.na(data[column_no]), ]
    } else {
      subgroup <- data[which(data[column_no] == value), ]
    }
    return(subgroup)
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
###############################################################################
#' Function to find the effect size Cohen's d
#' @param x, a vector
#' @param y, another vector
#' @return cohens d estimated with 95% CI or error if failure
#' @examples
#' cohensd(c(1, 2, 3, 4), c(3, 4, 5, 6))
#' @export
cohensd <- function(x, y) {
  xx <- suppressWarnings(as.numeric(x))
  yy <- suppressWarnings(as.numeric(y))
  xnotna <- sum(!is.na(xx))
  ynotna <- sum(!is.na(yy))
  if (xnotna == length(x) && ynotna == length(y)) {
    lx <- length(x) - 1
    ly <- length(y) - 1
    md <- abs(mean(x) - mean(y)) ## mean difference (numerator)
    csd <- lx * var(x) + ly * var(y)
    csd <- csd / (lx + ly)
    csd <- sqrt(csd) ## common sd computation
    cd <- md / csd ## cohen's d
    var_d <- 1 / (lx + 1) + 1 / (ly + 1) + (cd^2) / (2 * (lx + ly + 2))
    ans <- c(cd, cd - 1.96 * sqrt(var_d), cd + 1.96 * sqrt(var_d))
    return(ans)
  } else {
    stop("Vector contains atleast one NA or string")
  }
}
#' ############################################################################
#' Function to estimate standard error of the mean
#' @param x, a vector
#' @return SE the standard error of the mean
#' @examples
#' get_sem(c(1, 2, 3, 4))
#' @export
get_sem <- function(x) {
  xx <- suppressWarnings(as.numeric(x))
  if (sum(is.na(xx)) > 0) {
    stop("Vector contains non numeric data")
  } else {
    ans <- sd(x) / sqrt(length(x))
    return(ans)
  }
}
#############################################################################
#' Function to find the number and percentages of categories
#' @param data, a data frame
#' @param variable the column name
#' @param nrcode non response code
#' @return number and percentages or error if failure
#' @examples
#' this.df <- data.frame(c(11, 78), c("m", "f"), stringsAsFactors = FALSE)
#' colnames(this.df) <- c("mark", "gender")
#' represent_categorical_data_exclude_missing(this.df, "gender", NA)
#' @export
represent_categorical_data_exclude_missing <- function(data, variable,
                                                       nrcode = NA) {
  coding <- unique(toupper(factor(data[[variable]])))
  if (is.na(nrcode)) {
    coding <- coding[!is.na(coding)]
  } else {
    coding <- coding[coding != nrcode]
  }
  coding <- sort(coding)
  num_categories <- length(coding)
  if (check_column_exists(variable, data) == 0) {
    ans <- rep(0, 2 * num_categories)
    all_names <- list()
    for (i in 1:num_categories) {
      if (coding[i] == "NA") {
        uppervals <- toupper(factor(data[[variable]]))
        num <- nrow(data[which(uppervals == coding[i]), ])
      } else {
        uppervals <- toupper(factor(data[[variable]]))
        if (is.na(nrcode)) {
          num <- nrow(data[which(uppervals == coding[i] &
                                   !is.na(uppervals)), ])
        } else {
          num <- nrow(data[which(uppervals == coding[i] &
                                   uppervals != nrcode), ])
        }
      }
      perc <- 100 * num / nrow(data)
      ans[2 * i] <- round(perc, 2)
      ans[2 * i - 1] <- round(num, 2)
      names_here <- c(paste(coding[i]))
      all_names <- c(all_names, names_here)
    }
    mat_ans <- matrix(ans, ncol = length(coding))
    colnames(mat_ans) <- all_names
    rownames(mat_ans) <- c("Number", "Percentage")
    return(mat_ans)
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
#############################################################################
#' Function to find the number and percentages of categories
#' @param data, a data frame
#' @param variable the column name
#' @param nrcode non response code
#' @return number and percentages or error if failure
#' @examples
#' this.df <- data.frame(c(11, 78), c("m", "f"), stringsAsFactors = FALSE)
#' colnames(this.df) <- c("mark", "gender")
#' represent_categorical_data_include_missing(this.df, "gender", NA)
#' @export
represent_categorical_data_include_missing <- function(data, variable,
                                                       nrcode = NA) {
  coding <- unique(toupper(factor(data[[variable]])))
  num_categories <- length(coding)
  if (check_column_exists(variable, data) == 0) {
    ans <- rep(0, 2 * num_categories)
    all_names <- list()
    uppervals <- toupper(factor(data[[variable]]))
    for (i in 1:num_categories) {
      if (is.na(coding[i])) {
        num <- nrow(data[which(is.na(uppervals)), ])
      } else {
        num <- nrow(data[which(uppervals == coding[i]), ])
      }
      perc <- 100 * num / nrow(data)
      ans[2 * i] <- round(perc, 2)
      ans[2 * i - 1] <- round(num, 2)
      names_here <- c(paste(coding[i]))
      all_names <- c(all_names, names_here)
    }
    mat_ans <- matrix(ans, ncol = length(coding))
    colnames(mat_ans) <- all_names
    rownames(mat_ans) <- c("Number", "Percentage")
    return(mat_ans)
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
#############################################################################
#' Function to represent categorical data in the form - numbers (percentage)
#' @param data data frame
#' @param variable column name
#' @param nrcode non response code
#' @return the numbers (percentage) , error for failure
#' @examples
#' df <- data.frame(c(11, 78), c("m", "f"), stringsAsFactors = FALSE)
#' colnames(df) <- c("mark", "gender")
#' represent_categorical_textdata(df, "gender", NA)
#' @export
represent_categorical_textdata <- function(data, variable, nrcode) {
  intresult <- represent_categorical_data_include_missing(data, variable,
                                                          nrcode)
  ans <- rep(0, ncol(intresult))
  i <- 1
  while (i <= ncol(intresult)) {
    num <- intresult[1, i]
    perc <- intresult[2, i]
    temp <- c(paste(round(num, 2), " (", round(perc, 2), ")", sep = ""))
    ans[i] <- temp
    i <- i + 1
  }
  names(ans) <- colnames(intresult)
  return(ans)
}

#############################################################################
#' Function to find the number and percentages of categories
#' @param data, a data frame
#' @param variable1 the column name of the variable to be grouped based on
#' @param variable2 the column name of the variable to represented
#' @param nrcode non response code for the variable2
#' @return the subgroup
#' @examples
#' this.df <- data.frame(c(11, 78,22), c("m", "f", "f"), c(1,2,2),
#' stringsAsFactors = FALSE)
#' colnames(this.df) <- c("mark", "gender", "group")
#' represent_categorical_data_forsubgroups(this.df, "group", "gender", NA)
#' @export
represent_categorical_data_forsubgroups <- function(data, variable1, variable2,
                                                    nrcode = NA) {
  if (is.null(variable1) | is.null(variable2) | is.null(data)) {
    stop("Some of the arguments are NULL")
  }
  cols_shouldhave <- c(variable1, variable2)
  resuts <- sum(unlist(lapply(cols_shouldhave, check_column_exists, data)))
  if (resuts != 0) {
    stop("Some variables are not in the data")
  } else {
    coding <- unique(toupper(factor(data[[variable1]])))
    variables <- unique(toupper(factor(data[[variable2]])))
    coding_len <- length(coding)
    var_len <- length(variables)
    all_list <-  c()
    for (i in seq_len(length(coding))) {
      this_subgroup1 <- return_subgroup_withNA(data, variable1, coding[i])
      this_rep <- (represent_categorical_data_include_missing(
        this_subgroup1, variable2, nrcode))
      if (ncol(this_rep) < var_len) {
        not_repr <- c()
        for (j in seq_len(var_len)) {
          if (is.na(variables[j])) {
            check <- sum("NA" %in% colnames(this_rep))
          } else {
            check <- variables[j] %in% colnames(this_rep)
          }
          if (!check)
            not_repr <- append(not_repr, variables[j])
        }
        num_not_repr <- length(not_repr)
        new_col <- rep(0, nrow(this_rep))
        new_colnames <- append(colnames(this_rep), not_repr)
        for (i in seq_len(num_not_repr)) {
          this_rep <- cbind(this_rep, new_col)
        }
        colnames(this_rep) <- new_colnames
        this_rep <- this_rep[, order(colnames(this_rep))]
      }
      this_rep <- this_rep[, order(colnames(this_rep))]
      names_list <- (colnames(this_rep))
      all_list <- cbind(all_list, this_rep)
    }
    all_list <- data.frame(all_list)
    row.names(all_list) <- row.names(this_rep)

    out <- kableExtra::kbl(all_list, "html", booktabs = T, align = c("r"),
               col.names = rep(names_list, coding_len))
    out2 <- kableExtra::kable_styling(out, "striped", full_width = F,
                                      position = "left", font_size = 12)
    header <- rep(var_len, coding_len)
    names(header) <- coding
    header <- c("", header)
    out3 <- kableExtra::add_header_above(out2, header = header)
    results <- structure(list(
      list_results = all_list,
      table_results = out3
    ))
    return(results)
  }
}
#############################################################################
#' Function to find the number and percentages of categories
#' @param data, a data frame
#' @param variable1 the column name of the variable to be grouped based on
#' (categorical column)
#' @param variable2 the column name of the variable to represented
#' (numerical data)
#' @param nrcode non response code for the variable2
#' @return the subgroup
#' @examples
#' this.df <- data.frame(c(11, 78,22), c("m", "f", "f"), c(1,2,2),
#' stringsAsFactors = FALSE)
#' colnames(this.df) <- c("mark", "gender", "group")
#' represent_numerical_data_forsubgroups(this.df, "group", "mark", NA)
#' @export
represent_numerical_data_forsubgroups <- function(data, variable1, variable2,
                                                    nrcode = NA) {
  if (is.null(variable1) | is.null(variable2) | is.null(data)) {
    stop("Some of the arguments are NULL")
  }
  cols_shouldhave <- c(variable1, variable2)
  resuts <- sum(unlist(lapply(cols_shouldhave, check_column_exists, data)))
  if (resuts != 0) {
    stop("Some variables are not in the data")
  } else {
    coding <- unique(toupper(factor(data[[variable1]])))
    all_list <- new_list <- c()
    for (i in seq_len(length(coding))) {
      this_subgroup1 <- return_subgroup_withNA(data, variable1, coding[i])
      this_rep <- data.frame(descriptive_stats_col_excl_nrcode(this_subgroup1,
                                                    variable2, nrcode))
      if (nrow(this_rep) < 1) {
        this_rep <- rep(0, 15)
      }
      all_list <- rbind(all_list, this_rep)
    }
    all_list <- data.frame(all_list)
    new_list <- cbind(coding, all_list)
    new_list <- data.frame(new_list)
    colnames(new_list) <- append("Group",colnames(all_list))
    rownames(new_list) <- NULL
    out <- kableExtra::kbl(new_list, "html", booktabs = T, align = c("l"))
    out2 <- kableExtra::kable_styling(out, "striped", full_width = F,
                        position = "left", font_size = 12)
    results <- structure(list(
      list_results = new_list,
      table_results = out2
    ))
    return(results)
  }
}


################################################################################
#' Function that convert a number represented as character array
#' @param character_array a character array of numbers
#' @return converted_number in numeric form
#' @examples
#' convert_to_number(c("1", "9", "8"))
#' @export
convert_to_number <- function(character_array) {
  converted <- 0
  ending <- length(character_array)
  for (i in 1:ending) {
    converted <- converted +
      suppressWarnings(as.numeric(character_array[i]) * 10 ^ (ending - i))
  }
  if (!is.na(converted)) {
    return(converted)
  } else {
    return(-1)
  }
}
###############################################################################
#' Helper function to keep date formats in year-month-date
#' @param column a data frame or a  vector
#' @param index those correspond to valid date in numeic form (omitting non
#' response code or no entry)
#' @param orderby give the order such as mdy, dmy etc where d refers to day,
#' m to month and y to year
#' @return entry corrected entries as in standard date format
#' @examples
#' convert_date_numeric_stdform(c("01/01/2000", "02/02/2002"), c(1, 2), "dmy")
#' @export
convert_date_numeric_stdform <- function(column, index, orderby = "dmy") {
  required <- column[index]
  leng <- length(required)
  converted <- list()
  for (i in 1:leng) {
    converted_date <- convert_date_string_stdform(required[i], orderby)
    converted <- append(converted, converted_date)
  }
  converted <- unlist(converted)
  column[index] <- converted
  return(column)
}
###############################################################################
#' Helper function to keep date formats in year-month-date
#' @param entry a date e.g 1 Jan 2020 with no commas
#' @param orderby give the order such as mdy, dmy etc where d refers to day,
#' m to month and y to year
#' @return entry corrected entries as in standard date format
#' @examples
#' convert_date_string_stdform("Jan-1-2020", "mdy")
#' @export
convert_date_string_stdform <- function(entry, orderby) {
  contents <- unlist(strsplit(entry, ""))
  months <- toupper(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
                      "Sep", "Oct", "Nov", "Dec"))
  months_full <- toupper(c(
    "January", "February", "March", "April", "May", "June", "July", "August",
    "September", "October", "November", "December"
  ))

  spec_chars <- c(" ", "/", "-")
  spec_char_exist <- NULL
  stop <- length(spec_chars)
  i <- 1
  while (i <= stop) {
    spec_char_exist <- grep(spec_chars[i], contents)
    if (length(spec_char_exist) != 0) {
      i <- length(spec_chars) + 1
    } else {
      i <- i + 1
    }
  }
  len_spchar <- length(spec_char_exist)
  if (len_spchar != 2) {
    stop("Many special characters, check your date format -
         only mid separators needed including the white space")
  }

  first <- list()
  for (i in 1:spec_char_exist[1] - 1) {
    this <- (contents[i])
    first <- append(first, this)
  }
  first <- unlist(first)
  if ("," %in% first) {
    comma_ind <- grep(",", first)
    first <- first[-comma_ind]
  }
  last <- list()
  start <- spec_char_exist[2] + 1
  ending <- length(contents)
  for (i in start:ending) {
    this <- (contents[i])
    last <- append(last, this)
  }
  last <- unlist(last)
  if ("," %in% last) {
    comma_ind <- grep(",", last)
    last <- last[-comma_ind]
  }

  mid <- list()
  start <- spec_char_exist[1] + 1
  ending <- spec_char_exist[2] - 1
  for (i in start:ending) {
    this <- (contents[i])
    mid <- append(mid, this)
  }
  mid <- unlist(mid)
  if ("," %in% mid) {
    comma_ind <- grep(",", mid)
    mid <- mid[-comma_ind]
  }

  if (orderby == "dmy") {
    if (convert_to_number(first) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      day <- convert_to_number(first)
    }
    month <- toupper(paste(mid, collapse = ""))
    if (convert_to_number(last) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      year <- convert_to_number(last)
    }
  }
  if (orderby == "dym") {
    if (convert_to_number(first) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      day <- convert_to_number(first)
    }
    month <- toupper(paste(last, collapse = ""))
    if (convert_to_number(mid) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      year <- convert_to_number(mid)
    }
  }
  if (orderby == "mdy") {
    if (convert_to_number(mid) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      day <- convert_to_number(mid)
    }
    month <- toupper(paste(first, collapse = ""))
    if (convert_to_number(last) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      year <- convert_to_number(last)
    }
  }
  if (orderby == "myd") {
    if (convert_to_number(last) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      day <- convert_to_number(last)
    }
    month <- toupper(paste(first, collapse = ""))
    if (convert_to_number(mid) == -1) {
      stop("The format or the year given wrong -error in converting to number")
    } else {
      year <- convert_to_number(mid)
    }
  }
  if (orderby == "ydm") {
    if (convert_to_number(mid) == -1) {
      stop("The format or the year given wrong -error in converting to number")
    } else {
      day <- convert_to_number(mid)
    }
    month <- toupper(paste(last, collapse = ""))
    if (convert_to_number(first) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      year <- convert_to_number(first)
    }
  }
  if (orderby == "ymd") {
    if (convert_to_number(last) == -1) {
      stop("The format or the date given wrong -error in converting to number")
    } else {
      day <- convert_to_number(last)
    }
    month <- toupper(paste(mid, collapse = ""))

    if (convert_to_number(first) == -1) {
      stop("The format or the year given wrong -error in converting to number")
    } else {
      year <- convert_to_number(first)
    }
  }
  if (suppressWarnings(is.na(as.numeric(month)))) {
    if (month %in% months) {
      month_index <- which(month == months)
    } else {
      if (month %in% months_full) {
        month_index <- which(month == months_full)
      } else {
        stop("Check the spelling for the month")
      }
    }
  } else {
    month <- as.numeric(month)
    month_index <- month
  }
  if (month_index == 2) {
    if (year %% 4 != 0) {
      if (day > 28) {
        stop("Date is not valid")
      }
    } else {
      if (day > 29) {
        stop("Date is not valid")
      }
    }
  }
  if (day > 30 & (month_index == 4 | month_index == 6 |
                  month_index == 9 | month_index == 11)) {
    stop("Date can not larger than 31")
  }
  if (day > 31 | day < 1 | month_index > 12 | month_index < 1) {
    stop("Month or date not valid")
  }
  date_return <- paste(year, "-", month_index, "-", day, sep = "")
  return(date_return)
}

###############################################################################
#' Function to calculate age from date of birth
#' @param data a data frame
#' @param columnname name of column corresponding to date of birth
#' @param enddatecol column contaiining when to calculate the age to,
#' default value is null, this means the age is calculated to the current date
#' @param dateformat format of date e.g. dmy default is dmy
#' @param nrcode non response code corresponding to date of birth
#' @return data if success error if failure
#' @examples
#' library(IPDFileCheck)
#' this.df <- data.frame(c("1987-05-28", "1987-06-18"), c(1, 2),
#' stringsAsFactors = FALSE)
#' colnames(this.df) <- c("dob", "num")
#' calculate_age_from_dob(this.df, "dob", NULL, "ymd")
#' @importFrom eeptools age_calc
#' @export
calculate_age_from_dob <- function(data, columnname, enddatecol = NULL,
                                   dateformat = "dmy",
                                   nrcode = NA) {
    column_no <- get_columnno_fornames(data, columnname)
    data <- as.data.frame(data, string.as.factors = FALSE)
    entry <- data[[column_no]]
    entry <- as.character(entry)
    blanks <- c(which(entry == ""), which(is.na(entry)))
    if (length(blanks) != 0) {
      entry[blanks] <- nrcode
    }
    calculated_ages <- rep(0, length(entry))
    if (is.na(nrcode)) {
      index <- which(!is.na(entry))
    } else {
      index <- which(entry != nrcode)
    }
    mod_entry <- convert_date_numeric_stdform(entry, index,
                                              orderby = dateformat)
    if (is.null(enddatecol)) {
      enddate <- Sys.Date()
    } else {
      if (is.na(enddatecol)) {
        enddate <- Sys.Date()
      } else {
        enddate <- as.character(data[[enddatecol]])
        mod_end_entry <- convert_date_numeric_stdform(enddate, index,
                                                      orderby = dateformat)
        enddate <- as.Date(mod_end_entry[index])
      }
    }
    result <- eeptools::age_calc(as.Date(mod_entry[index]), enddate, units = "years")
    calculated_ages[index] <- result
    calculated_ages[blanks] <- NA
    non_na_ages <- calculated_ages[!is.na(calculated_ages)]
    if (any(non_na_ages > 150) || any(non_na_ages < 0)) {
      stop("Age can not be negative OR greater than 150")
    } else {
      data["age"] <- calculated_ages
      return(data)
    }
}

###############################################################################
#' Function to calculate age from year of birth
#' @param data a data frame
#' @param columnname name of column corresponding to year of birth
#' @param endyearcol name of column where the year is entered to calculate
#' the age upto, by default its the current year
#' @param nrcode non response code corresponding to date of birth
#' @return data, if success error if failure
#' @examples
#' this.data.frame <- data.frame(c(1951, 1980), c("John", "Dora"))
#' colnames(this.data.frame) <- c("yob", "name")
#' calculate_age_from_year(this.data.frame, "yob", NULL, NA)
#' @export
calculate_age_from_year <- function(data, columnname, endyearcol = NULL, nrcode = NA) {
  column_no <- get_columnno_fornames(data, columnname)
    entry <- data[[column_no]]
    blanks <- c(which(entry == ""), which(is.na(entry)))
    if (length(blanks) != 0) {
      entry[blanks] <- nrcode
    }
    calculated_ages <- rep(0, length(entry))
    if (is.null(endyearcol)) {
      this_year <- lubridate::year(as.Date(Sys.Date(), format = "%d/%m/%y"))
    } else {
      if (is.na(endyearcol)) {
        this_year <- lubridate::year(as.Date(Sys.Date(), format = "%d/%m/%y"))
      } else {
        res <- sum(is.na(suppressWarnings(as.numeric(data[[endyearcol]]))))
        if (res == 0) {
          this_year <- as.numeric(data[[endyearcol]])
        } else {
          stop("The year is not numeric")
        }
      }
    }
    if (is.na(nrcode)) {
      index <- which(!is.na(entry))
      if (length(this_year) == 1) the_year <- this_year
      else the_year <- this_year[index]
      calculated_ages[index] <- the_year -
        as.numeric(as.character(entry[index]))
      calculated_ages[blanks] <- NA
    } else {
      index <- which(entry != nrcode)
      if (length(this_year) == 1) the_year <- this_year
      else the_year <- this_year[index]
      calculated_ages[index] <- the_year -
        as.numeric(as.character(entry[index]))
      calculated_ages[blanks] <- NA
    }
    non_na_ages <- calculated_ages[!is.na(calculated_ages)]
    if (any(non_na_ages > 150) || any(non_na_ages < 0)) {
      stop("Age can not be negative OR greater than 150")
    } else {
      data["calc.age.yob"] <- calculated_ages
      return(data)
    }
}
#############################################################################
#' Function to return the unique contents of the column given the column name
#' @param data a data frame
#' @param colname name of column corresponding to year of birth
#' @return the contents of the column, if success error if failure
#' @examples
#' get_contents_cols(data.frame(
#'   "yob" =  c(1951, 1980),
#'   "Name" =  c("John", "Dora")
#' ), "yob")
#' @export
get_contents_cols <- function(data, colname) {
  # check to see if the columnname exists
  if (check_column_exists(colname, data) == 0) {
    data <- as.data.frame(data, stringAsFactors = FALSE)
    codes <- unique(data[[colname]])
    if (sum(is.na(suppressWarnings(as.numeric(codes)))) < length(codes)) {
      return(as.numeric(codes))
    } else {
      return(codes)
    }
  } else {
    stop("Data does not contain the column with the specfied column name")
  }
}
########################################################################
#' Function to get the effect size
#' @param data a data frame
#' @param variable  variables to be selected for summary
#' @param by   A column name (quoted or unquoted) in data.
#' @param ... extra parameters required
#' @return returns the effect sizes
#' @export
get_effect_size <- function(data, variable, by, ...) {
  # Cohen's D
  d <- effsize::cohen.d(data[[variable]] ~ as.factor(data[[by]]),
                        conf.level = .95, pooled = TRUE, paired = FALSE,
                        hedges.correction = TRUE)
  # Formatting statistic with CI
  est <- gtsummary::style_sigfig(d$estimate)
  ci <- gtsummary::style_sigfig(d$conf.int) %>% paste(collapse = ", ")

  # returning estimate with CI together
  stringr::str_glue("{est} ({ci})")
}
############################################################################
#' Function to return the summary table using gtsummary package
#' @param the_data a data frame
#' @param selectvar variables to be selected for summary
#' @param byvar A column name (quoted or unquoted) in data.
#' Summary statistics will be calculated separately for each level
#' of the by variable. If NULL, summary statistics are calculated
#' using all observations.
#' @param label List of formulas specifying variables labels,
#' @return the summary using gtsummarys  tbl_summary option
#' @examples
#' trial <- gtsummary::trial
#' table1 <- get_summary_gtsummary(trial, c("trt", "age", "grade"),
#' byvar = "trt")
#' @export
get_summary_gtsummary <- function(the_data, selectvar, byvar = NULL,
                                  label = NULL){
  wilcoxtest <- function(data, variable, by, ...) {
    wilcox.test(data[[variable]] ~ as.factor(data[[by]]), data = data)$p.value
  }
  if (is.null(the_data)) {
    stop("data cant be null")
  }
  if (is.null(tidyselect::all_of(selectvar))) {
    stop("selectvar cant be null")
  } else {
    if (sum(is.na(tidyselect::all_of(selectvar))) == length(tidyselect::all_of(selectvar))) {
      stop("selectvar cant be NA")
    }
  }
  subset_data <- dplyr::select(the_data, tidyselect::all_of(selectvar))
  if (is.null(tidyselect::all_of(byvar))) {
    summary_table <-
      gtsummary::tbl_summary(
        subset_data,
        by = tidyselect::all_of(byvar), # split table by group
        digits = everything() ~ 2,
        type = where(is.numeric) ~ "continuous2",
        statistic = where(is.numeric) ~ c("{N_nonmiss}",
                                          "{mean} ({sd})",
                                          "{median} ({p25}, {p75})",
                                          "{min}, {max}",
                                          "{N_miss} ({p_miss})"),
        missing =  "always",
        missing_text = "(Missing)"
      ) %>%
      gtsummary::modify_header(label = "**Variable**") %>% # update the column header
      gtsummary::bold_labels()
  } else{
    summary_table <-
      gtsummary::tbl_summary(
        subset_data,
        by = tidyselect::all_of(byvar), # split table by group
        digits = everything() ~ 2,
        type = where(is.numeric) ~ "continuous2",
        statistic = where(is.numeric) ~ c("{N_nonmiss}",
                                          "{mean} ({sd})",
                                          "{median} ({p25}, {p75})",
                                          "{min}, {max}",
                                          "{N_miss} ({p_miss})"),
        missing =  "always",
        missing_text = "(Missing)"
      ) %>%
      gtsummary::add_overall() %>%
      gtsummary::add_difference() %>%
      gtsummary::add_stat(fns = where(is.numeric) ~ get_effect_size) %>%
      gtsummary::modify_header(add_stat_1 ~ "**Difference in mean**") %>%
      gtsummary::add_stat(where(is.numeric) ~ wilcoxtest) %>%
      gtsummary::modify_header(add_stat_2 ~ "**p value (Wilcox rank sum test for equal median)**") %>%
      gtsummary::modify_header(label = "**Variable**") %>% # update the column header
      gtsummary::bold_labels()
  }
  return(summary_table)
}
#####################################################
#' Function to get the longitudinal summary mean and sd
#' @param thedata a data frame
#' @param columnnames  column names of the data that are some observations
#' at some time points
#' @param nrcode the non response code in the data
#' @return returns the effect sizes
#' @examples
#' test_data <- as.data.frame(cbind(c(1,2,3,4,5), c(20,40,60,80,100),
#' c("F", "F", "M", "M", "F")))
#' colnames(test_data) <- c("no", "marks", "gender")
#' test_data$marks <- as.numeric(test_data$marks)
#' results <- return_longitudinal_summary(test_data, "marks", NA)
#' @export
return_longitudinal_summary <- function(thedata, columnnames, nrcode = NA){
 result <- unlist(lapply(columnnames, check_column_exists, thedata))
 if (sum(result) != 0)
   stop("Error - some columns do not exists in the data")
 means <- c()
 se <- c()
 for (i in 1:length(columnnames)) {
   this_col <- columnnames[i]
   check <- IPDFileCheck::test_data_numeric_norange(this_col, thedata)
   if (check != 0)
     stop("Need numerical data to estimate mean and sd")
   res <- as.data.frame(descriptive_stats_col_excl_nrcode(thedata, this_col, nrcode))
   this_mean = as.numeric(res$Mean)
   this_se =  as.numeric(res$SE)
   means <- append(means, this_mean)
   se <- append(se, this_se)
 }
 results <- structure(list(means  = means, se = se ))
 return(results)
}
Any scripts or data that you put into this service are public.
IPDFileCheck documentation built on Feb. 1, 2022, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
IPDFileCheck
Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

R/IPDFileCheck.R
In IPDFileCheck: Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

Try the IPDFileCheck package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

IPDFileCheck Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

R/IPDFileCheck.R In IPDFileCheck: Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

Try the IPDFileCheck package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

IPDFileCheck
Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File

R/IPDFileCheck.R
In IPDFileCheck: Basic Functions to Check Readability, Consistency, and Content of an Individual Participant Data File