R/readCombineFiles.R

# Define PMIDs already added or excluded

alreadyAdded_pancreas    <-
    readr::read_csv("~/AutoJournalWatch/data/removed/alreadyAdded_pancreas.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

excluded_pancreas        <-
    readr::read_csv("~/AutoJournalWatch/data/removed/excluded_pancreas.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")


previous_pancreas <-
    readr::read_csv("~/AutoJournalWatch/data/PMIDList_pancreas.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")


alreadyAdded_gallbladder <-
    readr::read_csv("~/AutoJournalWatch/data/removed/alreadyAdded_gallbladder.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

excluded_gallbladder     <-
    readr::read_csv("~/AutoJournalWatch/data/removed/excluded_gallbladder.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")


previous_gallbladder <-
    readr::read_csv("~/AutoJournalWatch/data/PMIDList_gallbladder.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")



alreadyAdded_bileducts   <-
    readr::read_csv("~/AutoJournalWatch/data/removed/alreadyAdded_bileducts.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

excluded_bileducts       <-
    readr::read_csv("~/AutoJournalWatch/data/removed/excluded_bileducts.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

previous_bileducts <-
    readr::read_csv("~/AutoJournalWatch/data/PMIDList_bileducts.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

alreadyAdded_ampulla     <-
    readr::read_csv("~/AutoJournalWatch/data/removed/alreadyAdded_ampulla.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")

excluded_ampulla         <-
    readr::read_csv("~/AutoJournalWatch/data/removed/excluded_ampulla.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")


previous_ampulla <-
    readr::read_csv("~/AutoJournalWatch/data/PMIDList_ampulla.csv",
                    col_names = FALSE,
                    readr::cols(
                        X1 = col_character()
                    )) %>%
    dplyr::pull("X1")



# Read PMIDList files and new PMIDs, combine them, remove duplicates, already added, or excluded

pathFiles                <- "~/AutoJournalWatch/data"


files_pancreas           <-
    list.files(path = pathFiles,
               pattern = "*pancreas*.csv",
               full.names = TRUE)

files_gallbladder        <-
    list.files(path = pathFiles,
               pattern = "*gallbladder*.csv",
               full.names = TRUE)

files_bileducts          <-
    list.files(path = pathFiles,
               pattern = "*bileducts*.csv",
               full.names = TRUE)

files_ampulla            <-
    list.files(path = pathFiles,
               pattern = "*ampulla*.csv",
               full.names = TRUE)


newPMIDList_pancreas        <-
    purrr::map(.x = files_pancreas, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% previous_pancreas) %>%
    dplyr::filter(!X1 %in% alreadyAdded_pancreas) %>%
    dplyr::filter(!X1 %in% excluded_pancreas)

PMIDList_pancreas        <-
    purrr::map(.x = files_pancreas, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% alreadyAdded_pancreas) %>%
    dplyr::filter(!X1 %in% excluded_pancreas)



newPMIDList_gallbladder        <-
    purrr::map(.x = files_gallbladder, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% previous_gallbladder) %>%
    dplyr::filter(!X1 %in% alreadyAdded_gallbladder) %>%
    dplyr::filter(!X1 %in% excluded_gallbladder)


PMIDList_gallbladder     <-
    purrr::map(.x = files_gallbladder, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% alreadyAdded_gallbladder) %>%
    dplyr::filter(!X1 %in% excluded_gallbladder)


newPMIDList_bileducts        <-
    purrr::map(.x = files_bileducts, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% previous_bileducts) %>%
    dplyr::filter(!X1 %in% alreadyAdded_bileducts) %>%
    dplyr::filter(!X1 %in% excluded_bileducts)



PMIDList_bileducts       <-
    purrr::map(.x = files_bileducts, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% alreadyAdded_bileducts) %>%
    dplyr::filter(!X1 %in% excluded_bileducts)


newPMIDList_ampulla        <-
    purrr::map(.x = files_ampulla, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% previous_ampulla) %>%
    dplyr::filter(!X1 %in% alreadyAdded_ampulla) %>%
    dplyr::filter(!X1 %in% excluded_ampulla)

PMIDList_ampulla         <-
    purrr::map(.x = files_ampulla, .f = read_csv, col_names = FALSE) %>%
    dplyr::bind_rows() %>%
    unique() %>%
    dplyr::filter(!X1 %in% alreadyAdded_ampulla) %>%
    dplyr::filter(!X1 %in% excluded_ampulla)
sbalci/AutoJournalWatch documentation built on Aug. 13, 2020, 4:18 p.m.