R/datasets_reshaping.R

Defines functions wide_to_long_muts wide_to_long_cov long_to_wide_muts long_to_wide_cov

long_to_wide_cov = function(cov.df) {
  ## transforms the input dataset from long to wide format
  ## input columns are: "coverage", "timepoints", "lineage", "IS"
  return(
    cov.df %>%
     dplyr::select(dplyr::starts_with("cov"), "IS", "timepoints", "lineage", dplyr::contains("labels")) %>%
     tidyr::pivot_wider(names_from=c("timepoints","lineage"),
                        names_prefix="cov.",
                        names_sep=".",
                        values_from=dplyr::starts_with("cov"),
                        values_fill=0) # fill missing values with 0 -> means 0 coverage
    )
}


long_to_wide_muts = function(vaf.df) {
  return(
    vaf.df %>%
      dplyr::select(timepoints, lineage, IS, mutation,
                    dplyr::starts_with("alt"),
                    dplyr::starts_with("ref"),
                    dplyr::starts_with("dp"),
                    dplyr::starts_with("vaf"),
                    dplyr::starts_with("theta"),
                    dplyr::starts_with("labels"),
                    dplyr::starts_with("pi")) %>%

      # keep "IS" and "mutation" as id columns
      tidyr::pivot_wider(names_from=c("timepoints","lineage"), names_sep=".",
                         values_from=c(dplyr::starts_with("alt"),
                                       dplyr::starts_with("ref"),
                                       dplyr::starts_with("dp"),
                                       dplyr::starts_with("vaf"),
                                       dplyr::starts_with("theta")),
                         values_fill=0)  # fill missing values with 0 -> means 0 ref/alt/vaf
  )
}


wide_to_long_cov = function(cov.df) {
  return(
    cov.df %>%
      tidyr::pivot_longer(cols=starts_with("cov"),
                          names_to="else.time.lineage",
                          values_to="coverage") %>%
      tidyr::separate("else.time.lineage",
                      into=c("else","timepoints","lineage"),
                      sep="[.]") %>%
      dplyr::mutate("else"=NULL) %>%

      mutate_tp(fn=as.integer, colnm="timepoints")
  )
}


wide_to_long_muts = function(vaf.df) {
  return(
    vaf.df %>%
      tidyr::pivot_longer(cols=c(dplyr::starts_with("alt"),
                                 dplyr::starts_with("ref"),
                                 dplyr::starts_with("dp"),
                                 dplyr::starts_with("theta"),
                                 dplyr::starts_with("vaf")),
                          names_to="type.timepoints.lineage") %>%
      tidyr::separate("type.timepoints.lineage",
                      into=c("type", "timepoints", "lineage"),
                      sep="[.]") %>%
      tidyr::pivot_wider(names_from="type",
                         values_from="value") %>%

      mutate_tp(fn=as.integer, colnm="timepoints")
  )
}
caravagnalab/LineaGT documentation built on June 13, 2025, 1:58 p.m.