R/MHQuant_CRISPResso.R

Defines functions MHQuant_CRISPResso

MHQuant_CRISPResso <- function(directory){
  # this is a wrapper function for MH quantification of
  # deletions analysed by CRISPREsso (Pinello, 2016)
  # it takes only one argument, directory
  # e.g. MHQuant_main(directory="/path/to/crispresso/output/")
  # directory should contain only Allele_frequency_table.txt files
  # that were generated by crispresso/1.0.x


  files <- list.files(directory, pattern="*.txt")

  Results <- list()
  cat("Looking in the specified directory for CRISPResso output files...", "\n")

  for (f in 1:length(files)){                   # read each file one at a time
    file <- paste0(directory, "/", files[f])    # read the file name and paste it with the directory
    data <- gather(file)                        # use gather function to clean up data

    cat("Now analysing", files[f], "\n")             # tell us which file is now being analysed

          results_int <- NULL                         # initiate the output

          for (i in 1:nrow(data)){                    # read each row one at a time
            df <- data[i,]


            # return the cleaned up deletion sequence + index, and reference sequence for each allele
            index_out <- index(df$Aligned_Sequence,
                               df$Reference_Sequence)

            # determine whether the deletion is a simple one
            # (i.e. one contiguous region)
            simple_out <- is.simple(index_out)

            if (!simple_out){                   # if the deletion is NOT simple skip the row
              next
            } else {

              # determine if a mutation is present in the sequence
              mut_prep_out <- is.mut.prep(index_out)
              is_mutated <- is.mutated(mut_prep_out$Del_test,
                                       mut_prep_out$Ref_test,
                                       exclude="-")

              if (is_mutated){                  # if the deletion is mutated skip the row
                next
              } else {

                # get the sequences to search for MHs in
                MHSeqSize <- MHQuant.prep1(index_out)
                MHQuant_seqs <- MHQuant.prep2(index_out, MHSeqSize)

                MHQuant_out <- MHQuant_sub(MHQuant_seqs)      # look for MH in the corresponding left and hand sequences
                MHSeq_out <- MHSeq.return(MHQuant_out,    # return MH the sequence, if any
                                          index_out)
              }
              if(MHSeq_out=="No_MH"){
                altMH_out <- "No_MH"
              } else {
                altMH_out <- altMH(MHSeq_out,
                                   index_out)
              }
            }

            # add the calculations to a new row of the output
            results_int <- rbind(results_int,
                                 data.frame(MutantSequence=index_out$delSeq,
                                            ReferenceSequence=index_out$refSeq,
                                            SizeOfDeletion=df$n_deleted,
                                            NumberOfReads=df$X.Reads,
                                            MH_amount=MHQuant_out$max_MH,
                                            MH_sequence=MHSeq_out,
                                            altMH_count=altMH_out))



          } # end of the for each row loop

          dir.create(paste0(directory, "/MHQuant_out/"), showWarnings = F)
          write.table(results_int, file=paste0(directory, "/MHQuant_out/", "MHQuant_", files[f]), quote=F, row.names =F)
          cat("Microhomology analysis complete for", files[f], "moving on to next file...", "\n")          # tell us which file is now being analysed
          Results[[f]] <- results_int # return a list of data frames for each file

  } # end of the for each file loop

  names(Results) <- files # rename the list entries to all the files
  cat("Done!", "\n")
  cat("Results are in", paste0(directory, "/MHQuant_out/"))
  return(Results)
}
d0minicO/mhscanR documentation built on May 4, 2021, 5:22 p.m.