WIAT-4

TODO: - still need to fix more

Load libraries

Sys.setenv(JAVA_HOME = "/Library/Java/JavaVirtualMachines/temurin-11.jdk/Contents/Home")
options(java.parameters = "-Xmx16000m")
knitr::opts_chunk$set(
  root.dir = normalizePath("./"),
  echo = FALSE,
  message = FALSE,
  warning = FALSE,
  error = TRUE
)
library(bwu)
library(dplyr)
library(hablar)
library(here)
library(knitr)
library(magrittr)
library(miniUI)
library(readr)
library(rJava)
library(rmarkdown)
library(shiny)
library(tabulapdf)
library(tabulapdfjars)
library(tibble)
library(tidyr)
library(vroom)

Patient

patient <- params$patient

Test

test <- params$test

Upload/attach PDF

file <- params$file

Pages

pages <- params$pages
writeLines(file, here::here(patient, "pre_csv", "wiat4_pdf"))
file <- readLines(here::here(patient, "pre_csv", "wiat4_pdf"))

Locate areas

# top left bottom right
area <- list(
  c(142, 32, 383, 580),
  c(142, 32, 415, 580)
)
# area <- list(
#   c(143, 32, 335, 580)
# )
# area <- list(
#   c(141, 32, 383, 580)
# )
# area <- area
# if unknown
# patient <- "Aria"
# file <- file.choose()
# area <- bwu::gpluck_locate_areas(
#   file = file,
#   pages = c(9, 10)
# )
saveRDS(area, file = here::here(patient, "pre_csv", "area_wiat4.rds"), compress = FALSE)
area <- readRDS(here::here(patient, "pre_csv", "area_wiat4.rds"))

Extract tables

# file <- file.choose()

plucked_table <- tabulapdf::extract_tables(
  file = file,
  pages = c(12, 13),
  method = "stream",
  area = area
)

######
######## text works
plucked_tables <- tabulapdf::extract_text(
  file = file,
  pages = c(12, 13)
)

plucked_table <- bwu::gpluck_extract_table(
  file = file,
  pages = c(12, 13),
  method = "lattice" # decide
  # area = area,
  # guess = NULL,
  # output = "matrix" # data.frame
)
# Load necessary libraries
library(dplyr)
library(purrr)

patient <- "Biggie"
file <- fs::fs_path(file.choose())

df1 <- plucked_tables[[1]]
df2 <- plucked_tables[[2]]

wr <- "Word Reading"
rc <- "Reading Comprehension"

# Convert the string into a vector of lines
lines <- strsplit(df1, "\n")[[1]]
lines2 <- strsplit(df2, "\n")[[1]]

# Filter lines that start with "Word Reading"
wr_lines <- lines[grepl("^Word Reading", lines)]
r_lines <- lines[grepl("^Reading", lines)]
rc_lines <- lines[grepl("^Comprehension", lines)]

# table 2
pd_lines <- lines2[grepl("^Pseudoword Decoding", lines2)]
d_lines <- lines2[grepl("^Decoding", lines2)]
of_lines <- lines2[grepl("^Orthographic Fluency", lines2)]
df_lines <- lines2[grepl("^Decoding Fluency", lines2)]
di_lines <- lines2[grepl("^Dyslexia Index", lines2)]

# word_reading_data is now a list of numeric vectors, where each vector is the scores/values from one "Word Reading" line

# Define a function to convert a line to a tibble
# one word scales
line_to_tibble1 <- function(line) {
  elements <- strsplit(line, " ")[[1]]

  tibble(
    Scale = paste0(elements[1]),
    RawScore = as.integer(elements[2]),
    Score = as.integer(elements[3]),
    CI_95 = paste(elements[4:6], collapse = " "),
    Percentile = as.integer(elements[7]),
    DescriptiveCategory = elements[8],
    AgeEquivalent = elements[9],
    GradeEquivalent = as.numeric(elements[10]),
    GSV = as.integer(elements[11])
  )
}

line_to_tibble2 <- function(line) {
  elements <- strsplit(line, " ")[[1]]

  tibble(
    Scale = paste0(elements[1:2], collapse = " "),
    RawScore = as.integer(elements[3]),
    Score = as.integer(elements[4]),
    CI_95 = paste(elements[5:7], collapse = " "),
    Percentile = as.integer(elements[8]),
    DescriptiveCategory = elements[9],
    AgeEquivalent = elements[10],
    GradeEquivalent = as.numeric(elements[11]),
    GSV = as.integer(elements[12])
  )
}

# Convert all "Word Reading" lines to tibbles
wr <- map_dfr(wr_lines, line_to_tibble2)[1, ]
r <- map_dfr(r_lines, line_to_tibble1)[2, ]
rc <- map_dfr(rc_lines, line_to_tibble1)[1, ]

pd <- map_dfr(pd_lines, line_to_tibble2)[1, ]
d <- map_dfr(d_lines, line_to_tibble1)[1, ]
of <- map_dfr(of_lines, line_to_tibble2)[1, ]
df <- map_dfr(df_lines, line_to_tibble2)[1, ]
di <- map_dfr(di_lines, line_to_tibble2)[1, ]

# word_reading_data is now a tibble where each row corresponds to a "Word Reading" line

wiat4 <- rbind(wr, r, rc, pd, d, of, df, di)[, 1:5]
wiat4[3, 1] <- c("Reading Comprehension")

# Replace "NA" strings with actual NA values
wiat4 <-
  wiat4 %>%
  tidytable::mutate(across(everything(), function(x) ifelse(x == "NA NA NA", NA, x))) |>
  janitor::clean_names()

readr::write_csv(wiat4, here::here(patient, "csv", "wiat4.csv"))

Table 1

Pluck and tidy tables

table1 <- tibble::as_tibble(plucked_table[[1]])
table1 <- tidytable::mutate(table1, raw_score = "", .before = V2)
colnames(table1) <- params$colnames
# table1$scale <- ""
# table1 <- table1 |> slice(-3, -11) # redundant, change each time to remove rows with extra line
to_double <- c("raw_score", "score", "percentile")
table1 <-
  table1 %>%
  hablar::convert(dbl(all_of(to_double))) %>%
  dplyr::relocate(scale, .before = raw_score)

Scale names

table1$scale <- (params$table1)
# table1[1, 1] <- c("Total Achievement")
# table1[2, 1] <- c("Word Reading")
# table1[3, 1] <- c("Reading Comprehension")
# table1[4, 1] <- c("Spelling")
# table1[5, 1] <- c("Essay Composition")
# table1[6, 1] <- c("Math Problem Solving")
# table1[7, 1] <- c("Numerical Operations")
# table1[8, 1] <- c("Reading")
# table1[9, 1] <- c("Word Reading")
# table1[10, 1] <- c("Reading Comprehension")
# table1[11, 1] <- c("Written Expression")
# table1[12, 1] <- c("Spelling")
# table1[13, 1] <- c("Sentence Composition")
# table1[14, 1] <- c("Essay Composition")
# table1[15, 1] <- c("Mathematics")
# table1[16, 1] <- c("Math Problem Solving")
# table1[17, 1] <- c("Numerical Operations")

Table 2

table2 <- as_tibble(plucked_table[[2]]) # usually 2
table2 <- tidytable::mutate(table2, raw_score = NULL, .before = V2)
colnames(table2) <- params$colnames
# table2 <- dplyr::na_if(table2, "")
# table2 <- dplyr::na_if(table2, "NA")
# table2 <- dplyr::na_if(table2, "-")
# table2 <- dplyr::na_if(table2, "--")
# table2 <- dplyr::na_if(table2, "---")
# table2 <- dplyr::slice(table2, 2) # check each time
# table2 <- dplyr::distinct(table2, .keep_all = FALSE) # check each time
to_double <- c("raw_score", "score", "percentile")
table2 <- table2 |> hablar::convert(dbl(all_of(to_double)))

Scale names

table2$scale <- params$table2
# table2[1, 1] <- c("Basic Reading")
# table2[2, 1] <- c("Pseudoword Decoding")
# table2[3, 1] <- c("Phonemic Proficiency")
# table2[4, 1] <- c("Word Reading")
# table2[5, 1] <- c("Decoding")
# table2[6, 1] <- c("Pseudoword Decoding")
# table2[7, 1] <- c("Word Reading")
# table2[8, 1] <- c("Phonological Processing")
# table2[9, 1] <- c("Pseudoword Decoding")
# table2[10, 1] <- c("Phonemic Proficiency")
# table2[11, 1] <- c("Orthographic Processing")
# table2[12, 1] <- c("Orthographic Fluency")
# table2[13, 1] <- c("Spelling")
# table2[14, 1] <- c("Dyslexia Index")
# table2[15, 1] <- c("Word Reading")
# table2[16, 1] <- c("Pseudoword Decoding")
# table2[17, 1] <- c("Orthographic Fluency")

# table2[6, 1] <- c("Reading Fluency")
# table2[7, 1] <- c("Oral Reading Fluency")
# table2[9, 1] <- c("Decoding Fluency")
# table2[13, 1] <- c("Orthographic Processing Extended")
# table2[14, 1] <- c("Orthographic Choice")
# table2[14, 1] <- c("Math Fluency")
# table2[5, 1] <- c("Math Fluency-Addition")
# table2[6, 1] <- c("Math Fluency-Subtraction")
# table2[7, 1] <- c("Math Fluency-Multiplication")
# table2[5, 1] <- c("Oral Language")
# table2[6, 1] <- c("Listening Comprehension")
# table2[7, 1] <- c("Oral Expression")

Select variables to keep

table1 <- table1 |> tidytable::select(all_of(params$keep))
table2 <- table2 |> tidytable::select(all_of(params$keep))
table <- rbind(table1, table2)
table <- dplyr::distinct(table, .keep_all = FALSE)
# View(table)

Mutate columns

table <- wiat4

table <- bwu::gpluck_make_columns(
  table,
  range = "",
  test = params$test,
  test_name = params$test_name,
  domain = "Academic Skills",
  subdomain = "",
  narrow = "",
  pass = "",
  verbal = "",
  timed = "",
  test_type = "npsych_test",
  score_type = "standard_score",
  description = "",
  result = ""
)

Test score ranges

table <- bwu::gpluck_make_score_ranges(table = table, test_type = "npsych_test")

Subdomain

table <-
  table |>
  mutate(
    subdomain = case_when(
      scale == "Total Achievement" ~ "Academic Skills",
      scale == "Reading" ~ "Reading",
      scale == "Word Reading" ~ "Reading",
      scale == "Reading Comprehension" ~ "Reading",
      scale == "Spelling" ~ "Writing",
      scale == "Sentence Composition" ~ "Writing",
      scale == "Mathematics" ~ "Math",
      scale == "Math Problem Solving" ~ "Math",
      scale == "Numerical Operations" ~ "Math",
      scale == "Written Expression" ~ "Writing",
      scale == "Essay Composition" ~ "Writing",
      scale == "Basic Reading" ~ "Reading",
      scale == "Decoding" ~ "Reading",
      scale == "Decoding Fluency" ~ "Reading",
      scale == "Pseudoword Decoding" ~ "Reading",
      scale == "Phonemic Proficiency" ~ "Reading",
      scale == "Phonological Processing" ~ "Reading",
      scale == "Orthographic Processing Extended" ~ "Reading",
      scale == "Orthographic Processing" ~ "Language",
      scale == "Orthographic Fluency" ~ "Reading",
      scale == "Orthographic Choice" ~ "Reading",
      scale == "Dyslexia Index" ~ "Reading",
      scale == "Math Fluency" ~ "Math",
      scale == "Math Fluency-Addition" ~ "Math",
      scale == "Math Fluency-Subtraction" ~ "Math",
      scale == "Math Fluency-Multiplication" ~ "Math",
      TRUE ~ as.character(subdomain)
    )
  )

Narrow subdomain

table <-
  table |>
  mutate(
    narrow = case_when(
      scale == "Total Achievement" ~ "Academic Achievement",
      scale == "Reading" ~ "Reading",
      scale == "Word Reading" ~ "Reading Decoding",
      scale == "Reading Comprehension" ~ "Reading Comprehension",
      scale == "Spelling" ~ "Spelling Ability",
      scale == "Sentence Composition" ~ "Writing Ability",
      scale == "Mathematics" ~ "Quantitative Reasoning",
      scale == "Math Problem Solving" ~ "Mathematical Knowledge",
      scale == "Numerical Operations" ~ "Mathematical Achievement",
      scale == "Written Expression" ~ "Writing Ability",
      scale == "Essay Composition" ~ "Writing Ability",
      scale == "Basic Reading" ~ "Basic Reading",
      scale == "Decoding" ~ "Reading Decoding",
      scale == "Decoding Fluency" ~ "Reading Fluency",
      scale == "Pseudoword Decoding" ~ "Reading Decoding",
      scale == "Phonemic Proficiency" ~ "Reading Decoding",
      scale == "Phonological Processing" ~ "Reading Decoding",
      scale == "Orthographic Processing Extended" ~ "Reading Orthography",
      scale == "Orthographic Processing" ~ "Reading Orthography",
      scale == "Orthographic Fluency" ~ "Reading Fluency",
      scale == "Orthographic Choice" ~ "Reading Orthography",
      scale == "Dyslexia Index" ~ "Reading",
      scale == "Math Fluency" ~ "Number Facility",
      scale == "Math Fluency-Addition" ~ "Number Facility",
      scale == "Math Fluency-Subtraction" ~ "Number Facility",
      scale == "Math Fluency-Multiplication" ~ "Number Facility",
      TRUE ~ as.character(narrow)
    )
  )

PASS model

As necessary.

table <-
  table |>
  mutate(
    pass = case_when(
      scale == "Total Achievement" ~ "Sequential",
      scale == "Reading" ~ "Sequential",
      scale == "Word Reading" ~ "Sequential",
      scale == "Reading Comprehension" ~ "Sequential",
      scale == "Spelling" ~ "Sequential",
      scale == "Sentence Composition" ~ "Sequential",
      scale == "Mathematics" ~ "Simultaneous",
      scale == "Math Problem Solving" ~ "Simultaneous",
      scale == "Numerical Operations" ~ "Simultaneous",
      scale == "Written Expression" ~ "Sequential",
      scale == "Essay Composition" ~ "Sequential",
      scale == "Basic Reading" ~ "Sequential",
      scale == "Decoding" ~ "Sequential",
      scale == "Decoding Fluency" ~ "Sequential",
      scale == "Pseudoword Decoding" ~ "Sequential",
      scale == "Phonemic Proficiency" ~ "Sequential",
      scale == "Phonological Processing" ~ "Sequential",
      scale == "Orthographic Processing Extended" ~ "Sequential",
      scale == "Orthographic Processing" ~ "Sequential",
      scale == "Orthographic Fluency" ~ "Sequential",
      scale == "Orthographic Choice" ~ "Sequential",
      scale == "Dyslexia Index" ~ "Sequential",
      scale == "Math Fluency" ~ "Simultaneous",
      scale == "Math Fluency-Addition" ~ "Sequential",
      scale == "Math Fluency-Subtraction" ~ "Sequential",
      scale == "Math Fluency-Multiplication" ~ "Sequential",
      TRUE ~ as.character(pass)
    )
  )

Verbal vs Nonverbal

As necessary.

table <-
  table |>
  mutate(
    verbal = case_when(
      scale == "Total Achievement" ~ "",
      scale == "Reading" ~ "Verbal",
      scale == "Word Reading" ~ "Verbal",
      scale == "Reading Comprehension" ~ "Verbal",
      scale == "Spelling" ~ "Verbal",
      scale == "Sentence Composition" ~ "Verbal",
      scale == "Mathematics" ~ "Nonverbal",
      scale == "Math Problem Solving" ~ "Nonverbal",
      scale == "Numerical Operations" ~ "Nonverbal",
      scale == "Written Expression" ~ "Verbal",
      scale == "Essay Composition" ~ "Verbal",
      scale == "Basic Reading" ~ "Verbal",
      scale == "Decoding" ~ "Verbal",
      scale == "Decoding Fluency" ~ "Verbal",
      scale == "Pseudoword Decoding" ~ "Verbal",
      scale == "Phonemic Proficiency" ~ "Verbal",
      scale == "Phonological Processing" ~ "Verbal",
      scale == "Orthographic Processing Extended" ~ "Verbal",
      scale == "Orthographic Processing" ~ "Verbal",
      scale == "Orthographic Fluency" ~ "Verbal",
      scale == "Orthographic Choice" ~ "Verbal",
      scale == "Dyslexia Index" ~ "Verbal",
      scale == "Math Fluency" ~ "Nonverbal",
      scale == "Math Fluency-Addition" ~ "Nonverbal",
      scale == "Math Fluency-Subtraction" ~ "Nonverbal",
      scale == "Math Fluency-Multiplication" ~ "Nonverbal",
      TRUE ~ as.character(verbal)
    )
  )

Timed vs Untimed

As necessary.

table <-
  table |>
  mutate(
    timed = case_when(
      scale == "Total Achievement" ~ "Untimed",
      scale == "Reading" ~ "Untimed",
      scale == "Word Reading" ~ "Untimed",
      scale == "Reading Comprehension" ~ "Untimed",
      scale == "Spelling" ~ "Untimed",
      scale == "Sentence Composition" ~ "Untimed",
      scale == "Mathematics" ~ "Untimed",
      scale == "Math Problem Solving" ~ "Untimed",
      scale == "Numerical Operations" ~ "Untimed",
      scale == "Written Expression" ~ "Untimed",
      scale == "Essay Composition" ~ "Untimed",
      scale == "Basic Reading" ~ "Untimed",
      scale == "Decoding" ~ "Untimed",
      scale == "Decoding Fluency" ~ "Timed",
      scale == "Pseudoword Decoding" ~ "Untimed",
      scale == "Phonemic Proficiency" ~ "Timed",
      scale == "Phonological Processing" ~ "Untimed",
      scale == "Orthographic Processing Extended" ~ "Timed",
      scale == "Orthographic Processing" ~ "Timed",
      scale == "Orthographic Fluency" ~ "Timed",
      scale == "Orthographic Choice" ~ "Timed",
      scale == "Dyslexia Index" ~ "",
      scale == "Math Fluency" ~ "Timed",
      scale == "Math Fluency-Addition" ~ "Timed",
      scale == "Math Fluency-Subtraction" ~ "Timed",
      scale == "Math Fluency-Multiplication" ~ "Timed",
      TRUE ~ as.character(timed)
    )
  )

Description

table <-
  table |>
  mutate(
    description = case_when(
      scale == "Total Achievement" ~ "Overall academic achievement in the areas of reading, math, and writing",
      scale == "Reading" ~ "A composite score of decoding and comprehension that balances word-level and text-level reading skills",
      scale == "Word Reading" ~ "Single word reading/decoding of a list of regular and irregular words",
      scale == "Reading Comprehension" ~ "Reading comprehension skills at the level of word, sentence, and passage",
      scale == "Reading Fluency" ~ "A composite measure of overall oral reading fluency skills, including  reading in context and in isolation",
      scale == "Spelling" ~ "Written spelling from dictations",
      scale == "Sentence Composition" ~ "Sentence formulation skills",
      scale == "Sentence Writing Fluency" ~ "Sentence composition fluency",
      scale == "Mathematics" ~ "An estimate of overall mathematics skills in the domains of math comprehension and problem solving and math computation",
      scale == "Math Problem Solving" ~ "Applying mathematical principles to real-life situations, ranging from basic concepts, everyday applications, geometry, and algebra",
      scale == "Numerical Operations" ~ "Paper-and-pencil math calculation skills, ranging from basic operations with integers to geometry, algebra, and calculus problems",
      scale == "Written Expression" ~ "Overall written expression skills including spelling and expository/contextual writing",
      scale == "Essay Composition" ~ "spontaneous writing fluency at the discourse level",
      scale == "Basic Reading" ~ "A composite measure of phonological skills, phonic decoding, and single-word reading skills",
      scale == "Decoding" ~ "A composite estimate of decontextualized phonic decoding and word reading skills",
      scale == "Pseudoword Decoding" ~ "Phonic decoding skills as assessed by reading aloud a list of pseudowords",
      scale == "Decoding Fluency" ~ "Phonic decoding fluency",
      scale == "Phonemic Proficiency" ~ "Speed and accuracy of phonological/phonemic skills including elision, substitution, and reversal",
      scale == "Phonological Processing" ~ "A phonological processing composite measure of phonemic proficiency and phonic decoding skills",
      scale == "Orthographic Processing" ~ "An overall measure of orthographic processing, including the size of the examinees orthographic lexicon and the quality of orthographic representations",
      scale == "Orthographic Processing Extended" ~ "An overall measure of orthographic processing, including the size of the examinees orthographic lexicon and the quality of orthographic representations (extended)",
      scale == "Orthographic Fluency" ~ "Orthographic lexicon, or sight vocabulary fluency",
      scale == "Orthographic Choice" ~ "Orthographic knowledge, or the quality of stored orthographic representations (i.e., correct word spellings)",
      scale == "Dyslexia Index" ~ "A clinically sensitive composite score for identifying risk for dyslexia among students",
      scale == "Math Fluency" ~ "An overall speed/fluency in solving fact-based addition, subtraction, and multiplication problems",
      scale == "Math Fluency-Addition" ~ "Speed of addition fact fluency",
      scale == "Math Fluency-Subtraction" ~ "Speed of subtraction fact fluency",
      scale == "Math Fluency-Multiplication" ~ "Speed of multiplication fact fluency",
      scale == "Listening Comprehension" ~ "Receptive language comprehension at the level of the word, sentence, and passage",
      scale == "Receptive Vocabulary" ~ "Selecting pictures that best illustrate the mean of individual target words",
      scale == "Oral Discourse Comprehension" ~ "Listening to passages and then responding aloud to comprehension questions",
      scale == "Alphabet Writing Fluency" ~ "Letter writing fluency",
      scale == "Oral Expression" ~ "Oral expression at the level of the word and sentence",
      scale == "Expressive Vocabulary" ~ "Seeing a picture and hearing a definition and then saying the word that best corresponds to the picture and definition",
      scale == "Oral Word Fluency" ~ "Naming as many things as possible belonging to a given category within 60 seconds",
      scale == "Sentence Repetition" ~ "Listening to a sentence and then repeating it verbatim",
      scale == "Oral Reading Fluency" ~ "Oral reading fluency",
      TRUE ~ as.character(description)
    )
  )

Glue results

table <-
  table %>%
  tidytable::mutate(
    result = glue::glue(
      "{description} was {range}."
    )
  )

Relocate variables

wiat4 <- table |> relocate(c(raw_score, score, percentile, range, ci_95), .before = test)

Write out final csv

readr::write_csv(wiat4, here::here(patient, "csv", "wiat4.csv"), col_names = TRUE, na = "")


jtrampush/npsych.data documentation built on Feb. 25, 2025, 12:30 a.m.