packages <- c(
    "tidyverse",
    "printr",
    "ggthemes",
    "readr",
    "miR34AasRNAproject",
    "grid",
    "gtable"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
projectUrl <- "https://github.com/GranderLab/miR34a_asRNA_project/raw/master/inst"
dataUrl <- "https://github.com/GranderLab/miR34a_asRNA_project/raw/master/"

Introduction

We desired to complement the bioinformatics-based coding potential analyses by scanning the publically available proteomics dataset for peptides matching miR34a asRNA.



Methods

Orbitrap raw MS/MS files for 11 human cell lines (A549, GAMG, HEK293, HeLa, HepG2, K562, MCF7, RKO, U2OS, LnCap and Jurkat) were downloaded from the PRIDE repository (PXD002395, (Geiger et al., 2012)) converted to mzML format using msConvert from the ProteoWizard tool suite (Holman et al., 2014). Spectra were then searched using MSGF+ (v10072) (Kim and Pevzner, 2014) and Percolator (v2.08) (Granholm et al., 2014). All searches were done against the human protein subset of Ensembl 75 in the Galaxy platform (Boekel et al., 2015) supplemented with the 6 frame translation of uc057cap.1 and the PCR cloned sequence of miRNA34a asRNA (see below). MSGF+ settings included precursor mass tolerance of 10 ppm, fully-tryptic peptides, maximum peptide length of 50 amino acids and a maximum charge of 6. Fixed modification was carbamidomethylation on cysteine residues, a variable modification was used for oxidation on methionine residues. PSMs found at 1% FDR (false discovery rate) were used to infer peptide identities. The output from all searches are available in coding_potential_geiger.rds.



RNA sequences used for translation

tibble(
  name = c("miR34AasRNA_cloned", "uc057cap.1"),
  sequence = c(
    "AAACACAAGCGTTTACCTGGGTGCATGCTGGGACGGGGCGAGCAGGTAGTGCAGGCTTCCAGGCCTCTCCTGCCCCGGAGCTTGGCTGCAGGACTCCCGCAAAATCTCCAAATGCCCCCGATCTGCGTGGTCACCGAGAAGCAGCCCGGCTCGGAACAAGCCCAGGCAAGCCCAGGCAGAGCCCGCCGCCGGGTCCTCCTTCCTGCTCGTGCCACCAGGCCCCGGGGCCGCGACGCGTCTCTCCAGCCCGGGATCCGGGGAGCTGGGCTGTCCCCAGACCGACGGGACAGCGGCATCTCCTCCACCTGAAAAGGAAAGAGGACCAGTTTGCAGGACTCCGAACTGGGCCCGCGAGATCTCCACCTGCGCAAAACGAAAGGGCGGATTCTCCTTGGACTCACGAGGCAACCGCTCCCCGGGGTGAGAACGGGGGACTCATTCCTCCGGCACTGGGAGAAGACGATTCTTTAGGAGGAGGACAGGGAAGCGAATGCTACCCAGATGTCTCAGTATACTGGCTCGCGGCACATCGGGCAAATGAACCTATCAGATAACAACGGCAGATCAGATGCCTGAGCATTCAGAAGCAACAGCTGTGGAGCCCCCGTGGGTTCAGAAGGCCTGGTTCCCGTCTCCAGAAGCCTGGCTCTCCTCCCTCCTGGGCCCACTACTTTGGCTTCTTGTTCCTACGTACAAGGAGTTGCGAAGAAGGCAACTCTTCCCCTCCCTGAAGCCAAAGGAATGAAACAGACTAGGGCGGGAGAGGTGGCCATCCGTCATTAGTTGCGGCCATCA",
    "ACCAGGCCCCGGGGCCGCGACGCGTCTCTCCAGCCCGGGATCCGGGGAGCTGGGCTGTCCCCAGACCGACGGGACAGCGGCATCTCCTCCACCTGAAAAGGAAAGAGGACCAGTTTGCAGGACTCCGAACTGGGCCCGCGAGATCTCCACCTGCGCAAAACGAAAGGGCGGATTCTCCTTGGACTCACGAGGCAACCGCTCCCCGGGGTGAGAACGGGGGACTCATTCCTCCGGCACTGGGAGAAGACGATTCTTTAGGAGGAGGACAGGGAAGCGAATGCTACCCAGGATAACAACGGCAGATCAGATGCCTGAGCATTCAGAAGCAACAGCTGTGGAGCCCCCGTGGGTTCAGAAGGCCTGGTTCCCGTCTCCAGAAGCCTGGCTCTCCTCCCTCCTGGGCCCACTACTTTGGCTTCTTGTTCCTACGTACAAGGAGTTGCGAAGAAGGCAACTCTTCCCCTCCCTGAAGCCAAAGGAATGAAACAGACTAGGGCGGGAGAGGTGGCCATCCGTCATTAGTTGCGGCCATCAGTAACAGCAACAGGACACGGAACCTAAGGCTGTATCCATCCTGGGCCCCCAGGGAAACATCAGCGGGAGCGGTACTAAGGAAGTGCTCATCTCTTAGAGACAAAGGCCCATGGAGGGGAACAGTAACCATCCCCTCCCAATTCAGAAAATGTTAACATAAGCACTTCATTTCTCATGCAGATAACCACATAAGTCTATTAATTCCATGCCAGCAGGGTATAGGAATTGGTCTATAGGAGAAAGGGTCACCCACTGAAAGGTGGGCTGAATAGAATTCCTTGCCTGGGCTTTGAGGTCCTGGCATGGAGAAGGCTGTAGAAATGCTGGCATCAGTGGAACCCTCAATAAACAGAATTCTTGTTAAAGAA"
  )
) %>% 
  mutate(sequence = str_replace_all(sequence, "(.{3})", "\\1 "))

6 frame translations

tibble(
  name = c(
    "miR34AasRNA_cloned_5_3_Frame_1", "miR34AasRNA_cloned_5_3_Frame_2", "miR34AasRNA_cloned_5_3_Frame_3",
    "miR34AasRNA_cloned_3_5_Frame_1", "miR34AasRNA_cloned_3_5_Frame_2", "miR34AasRNA_cloned_3_5_Frame_3",
    "uc057cap.1_5_3_Frame_1", "uc057cap.1_5_3_Frame_2", "uc057cap.1_5_3_Frame_3",
    "uc057cap.1_3_5_Frame_1", "uc057cap.1_3_5_Frame_2", "uc057cap.1_3_5_Frame_3"
  ),
  sequence = c(
    "KHKRLPGCMLGRGEQVVQASRPLLPRSLAAGLPQNLQMPPICVVTEKQPGSEQAQASPGRARRRVLLPARATRPRGRDASLQPGIRGAGLSPDRRDSGISST*KGKRTSLQDSELGPRDLHLRKTKGRILLGLTRQPLPGVRTGDSFLRHWEKTIL*EEDREANATQMSQYTGSRHIGQMNLSDNNGRSDA*AFRSNSCGAPVGSEGLVPVSRSLALLPPGPTTLASCSYVQGVAKKATLPLPEAKGMKQTRAGEVAIRH*LRPS",
    "NTSVYLGACWDGASR*CRLPGLSCPGAWLQDSRKISKCPRSAWSPRSSPARNKPRQAQAEPAAGSSFLLVPPGPGAATRLSSPGSGELGCPQTDGTAASPPPEKERGPVCRTPNWAREISTCAKRKGGFSLDSRGNRSPG*ERGTHSSGTGRRRFFRRRTGKRMLPRCLSILARGTSGK*TYQITTADQMPEHSEATAVEPPWVQKAWFPSPEAWLSSLLGPLLWLLVPTYKELRRRQLFPSLKPKE*NRLGRERWPSVISCGH",
    "TQAFTWVHAGTGRAGSAGFQASPAPELGCRTPAKSPNAPDLRGHREAARLGTSPGKPRQSPPPGPPSCSCHQAPGPRRVSPARDPGSWAVPRPTGQRHLLHLKRKEDQFAGLRTGPARSPPAQNERADSPWTHEATAPRGENGGLIPPALGEDDSLGGGQGSECYPDVSVYWLAAHRANEPIR*QRQIRCLSIQKQQLWSPRGFRRPGSRLQKPGSPPSWAHYFGFLFLRTRSCEEGNSSPP*SQRNETD*GGRGGHPSLVAAI",
    "*WPQLMTDGHLSRPSLFHSFGFREGKSCLLRNSLYVGTRSQSSGPRREESQASGDGNQAF*THGGSTAVASECSGI*SAVVI**VHLPDVPRASILRHLGSIRFPVLLLKNRLLPVPEE*VPRSHPGERLPRESKENPPFRFAQVEISRAQFGVLQTGPLSFSGGGDAAVPSVWGQPSSPDPGLERRVAAPGPGGTSRKEDPAAGSAWACLGLFRAGLLLGDHADRGHLEILRESCSQAPGQERPGSLHYLLAPSQHAPR*TLVF", 
    "DGRN**RMATSPALVCFIPLASGRGRVAFFATPCT*EQEAKVVGPGGRRARLLETGTRPSEPTGAPQLLLLNAQASDLPLLSDRFICPMCREPVY*DIWVAFASLSSS*RIVFSQCRRNESPVLTPGSGCLVSPRRIRPFVLRRWRSRGPSSESCKLVLFPFQVEEMPLSRRSGDSPAPRIPGWRDASRPRGLVARAGRRTRRRALPGLAWACSEPGCFSVTTQIGGIWRFCGSPAAKLRGRRGLEACTTCSPRPSMHPGKRLC",
    "MAATNDGWPPLPP*SVSFLWLQGGEELPSSQLLVRRNKKPK*WAQEGGEPGFWRREPGLLNPRGLHSCCF*MLRHLICRCYLIGSFARCAASQYTETSG*HSLPCPPPKESSSPSAGGMSPPFSPRGAVAS*VQGESALSFCAGGDLAGPVRSPANWSSFLFRWRRCRCPVGLGTAQLPGSRAGETRRGPGAWWHEQEGGPGGGLCLGLPGLVPSRAASR*PRRSGAFGDFAGVLQPSSGAGEAWKPALPARPVPACTQVNACV",
    "PGPGAATRLSSPGSGELGCPQTDGTAASPPPEKERGPVCRTPNWAREISTCAKRKGGFSLDSRGNRSPG*ERGTHSSGTGRRRFFRRRTGKRMLPRITTADQMPEHSEATAVEPPWVQKAWFPSPEAWLSSLLGPLLWLLVPTYKELRRRQLFPSLKPKE*NRLGRERWPSVISCGHQ*QQQDTEPKAVSILGPQGNISGSGTKEVLIS*RQRPMEGNSNHPLPIQKMLT*ALHFSCR*PHKSINSMPAGYRNWSIGERVTH*KVG*IEFLAWALRSWHGEGCRNAGISGTLNKQNSC*R",
    "QAPGPRRVSPARDPGSWAVPRPTGQRHLLHLKRKEDQFAGLRTGPARSPPAQNERADSPWTHEATAPRGENGGLIPPALGEDDSLGGGQGSECYPG*QRQIRCLSIQKQQLWSPRGFRRPGSRLQKPGSPPSWAHYFGFLFLRTRSCEEGNSSPP*SQRNETD*GGRGGHPSLVAAISNSNRTRNLRLYPSWAPRETSAGAVLRKCSSLRDKGPWRGTVTIPSQFRKC*HKHFISHADNHISLLIPCQQGIGIGL*EKGSPTERWAE*NSLPGL*GPGMEKAVEMLASVEPSINRILVKE",
    "TRPRGRDASLQPGIRGAGLSPDRRDSGISST*KGKRTSLQDSELGPRDLHLRKTKGRILLGLTRQPLPGVRTGDSFLRHWEKTIL*EEDREANATQDNNGRSDA*AFRSNSCGAPVGSEGLVPVSRSLALLPPGPTTLASCSYVQGVAKKATLPLPEAKGMKQTRAGEVAIRH*LRPSVTATGHGT*GCIHPGPPGKHQRERY*GSAHLLETKAHGGEQ*PSPPNSENVNISTSFLMQITT*VY*FHASRV*ELVYRRKGHPLKGGLNRIPCLGFEVLAWRRL*KCWHQWNPQ*TEFLLK",
    "FFNKNSVY*GFH*CQHFYSLLHARTSKPRQGILFSPPFSG*PFLL*TNSYTLLAWN**TYVVICMRNEVLMLTFSELGGDGYCSPPWAFVSKR*ALP*YRSR*CFPGGPGWIQP*VPCPVAVTDGRN**RMATSPALVCFIPLASGRGRVAFFATPCT*EQEAKVVGPGGRRARLLETGTRPSEPTGAPQLLLLNAQASDLPLLSWVAFASLSSS*RIVFSQCRRNESPVLTPGSGCLVSPRRIRPFVLRRWRSRGPSSESCKLVLFPFQVEEMPLSRRSGDSPAPRIPGWRDASRPRGL",
    "SLTRILFIEGSTDASISTAFSMPGPQSPGKEFYSAHLSVGDPFSYRPIPIPCWHGINRLMWLSA*EMKCLC*HFLNWEGMVTVPLHGPLSLRDEHFLSTAPADVSLGAQDGYSLRFRVLLLLLMAATNDGWPPLPP*SVSFLWLQGGEELPSSQLLVRRNKKPK*WAQEGGEPGFWRREPGLLNPRGLHSCCF*MLRHLICRCYPG*HSLPCPPPKESSSPSAGGMSPPFSPRGAVAS*VQGESALSFCAGGDLAGPVRSPANWSSFLFRWRRCRCPVGLGTAQLPGSRAGETRRGPGAW",
    "L*QEFCLLRVPLMPAFLQPSPCQDLKAQARNSIQPTFQWVTLSPIDQFLYPAGMELIDLCGYLHEK*SAYVNIF*IGRGWLLFPSMGLCL*EMSTSLVPLPLMFPWGPRMDTALGSVSCCCY*WPQLMTDGHLSRPSLFHSFGFREGKSCLLRNSLYVGTRSQSSGPRREESQASGDGNQAF*THGGSTAVASECSGI*SAVVILGSIRFPVLLLKNRLLPVPEE*VPRSHPGERLPRESKENPPFRFAQVEISRAQFGVLQTGPLSFSGGGDAAVPSVWGQPSSPDPGLERRVAAPGPG"
  )
) %>% 
  mutate(sequence = str_replace_all(sequence, "(.{3})", "\\1 "))

Conclusions

No peptides were identified that reliably mapped to either sequence. Therefore, it doesn’t seem to be a highly abundant protein or peptide that is ubiquitiously present in cancer at the very least. The results should not be intrepreted such that there are no matching peptides, but the likelihood is lower. It is nigh on impossible to prove that the sequence is never translated but given the search results it seems likelier that miR34a asRNA is non-coding.



GranderLab/miR34a_asRNA_project documentation built on May 26, 2019, 7:26 a.m.