CINXE.COM
MIM: A Species Independent Approach for Classifying Coding and Non-Coding DNA Sequences in Bacterial and Archaeal Genomes
<!DOCTYPE html> <html lang="en" dir="ltr"> <head> <!-- Google tag (gtag.js) --> <script async src="https://www.googletagmanager.com/gtag/js?id=G-P63WKM1TM1"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-P63WKM1TM1'); </script> <!-- Yandex.Metrika counter --> <script type="text/javascript" > (function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)}; m[i].l=1*new Date(); for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }} k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)}) (window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym"); ym(55165297, "init", { clickmap:false, trackLinks:true, accurateTrackBounce:true, webvisor:false }); </script> <noscript><div><img src="https://mc.yandex.ru/watch/55165297" style="position:absolute; left:-9999px;" alt="" /></div></noscript> <!-- /Yandex.Metrika counter --> <!-- Matomo --> <!-- End Matomo Code --> <title>MIM: A Species Independent Approach for Classifying Coding and Non-Coding DNA Sequences in Bacterial and Archaeal Genomes</title> <meta name="description" content="MIM: A Species Independent Approach for Classifying Coding and Non-Coding DNA Sequences in Bacterial and Archaeal Genomes"> <meta name="keywords" content="Coding Non-coding Classification, Entropy, GeneRecognition, Mutual Information."> <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=no"> <meta charset="utf-8"> <meta name="citation_title" content="MIM: A Species Independent Approach for Classifying Coding and Non-Coding DNA Sequences in Bacterial and Archaeal Genomes"> <meta name="citation_author" content="Achraf El Allali"> <meta name="citation_author" content="John R. Rose"> <meta name="citation_publication_date" content="2010/10/24"> <meta name="citation_journal_title" content="International Journal of Bioengineering and Life Sciences"> <meta name="citation_volume" content="4"> <meta name="citation_issue" content="10"> <meta name="citation_firstpage" content="742"> <meta name="citation_lastpage" content="749"> <meta name="citation_pdf_url" content="https://publications.waset.org/9008/pdf"> <link href="https://cdn.waset.org/favicon.ico" type="image/x-icon" rel="shortcut icon"> <link href="https://cdn.waset.org/static/plugins/bootstrap-4.2.1/css/bootstrap.min.css" rel="stylesheet"> <link href="https://cdn.waset.org/static/plugins/fontawesome/css/all.min.css" rel="stylesheet"> <link href="https://cdn.waset.org/static/css/site.css?v=150220211555" rel="stylesheet"> </head> <body> <header> <div class="container"> <nav class="navbar navbar-expand-lg navbar-light"> <a class="navbar-brand" href="https://waset.org"> <img src="https://cdn.waset.org/static/images/wasetc.png" alt="Open Science Research Excellence" title="Open Science Research Excellence" /> </a> <button class="d-block d-lg-none navbar-toggler ml-auto" type="button" data-toggle="collapse" data-target="#navbarMenu" aria-controls="navbarMenu" aria-expanded="false" aria-label="Toggle navigation"> <span class="navbar-toggler-icon"></span> </button> <div class="w-100"> <div class="d-none d-lg-flex flex-row-reverse"> <form method="get" action="https://waset.org/search" class="form-inline my-2 my-lg-0"> <input class="form-control mr-sm-2" type="search" placeholder="Search Conferences" value="" name="q" aria-label="Search"> <button class="btn btn-light my-2 my-sm-0" type="submit"><i class="fas fa-search"></i></button> </form> </div> <div class="collapse navbar-collapse mt-1" id="navbarMenu"> <ul class="navbar-nav ml-auto align-items-center" id="mainNavMenu"> <li class="nav-item"> <a class="nav-link" href="https://waset.org/conferences" title="Conferences in 2024/2025/2026">Conferences</a> </li> <li class="nav-item"> <a class="nav-link" href="https://waset.org/disciplines" title="Disciplines">Disciplines</a> </li> <li class="nav-item"> <a class="nav-link" href="https://waset.org/committees" rel="nofollow">Committees</a> </li> <li class="nav-item dropdown"> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdownPublications" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> Publications </a> <div class="dropdown-menu" aria-labelledby="navbarDropdownPublications"> <a class="dropdown-item" href="https://publications.waset.org/abstracts">Abstracts</a> <a class="dropdown-item" href="https://publications.waset.org">Periodicals</a> <a class="dropdown-item" href="https://publications.waset.org/archive">Archive</a> </div> </li> <li class="nav-item"> <a class="nav-link" href="https://waset.org/page/support" title="Support">Support</a> </li> </ul> </div> </div> </nav> </div> </header> <main> <div class="container mt-4"> <div class="row"> <div class="col-md-9 mx-auto"> <form method="get" action="https://publications.waset.org/search"> <div id="custom-search-input"> <div class="input-group"> <i class="fas fa-search"></i> <input type="text" class="search-query" name="q" placeholder="Author, Title, Abstract, Keywords" value=""> <input type="submit" class="btn_search" value="Search"> </div> </div> </form> </div> </div> <div class="row mt-3"> <div class="col-sm-3"> <div class="card"> <div class="card-body"><strong>Commenced</strong> in January 2007</div> </div> </div> <div class="col-sm-3"> <div class="card"> <div class="card-body"><strong>Frequency:</strong> Monthly</div> </div> </div> <div class="col-sm-3"> <div class="card"> <div class="card-body"><strong>Edition:</strong> International</div> </div> </div> <div class="col-sm-3"> <div class="card"> <div class="card-body"><strong>Paper Count:</strong> 33100</div> </div> </div> </div> <div class="card publication-listing mt-3 mb-3"> <h5 class="card-header" style="font-size:.9rem">MIM: A Species Independent Approach for Classifying Coding and Non-Coding DNA Sequences in Bacterial and Archaeal Genomes</h5> <div class="card-body"> <p class="card-text"><strong>Authors:</strong> <a href="https://publications.waset.org/search?q=Achraf%20El%20Allali">Achraf El Allali</a>, <a href="https://publications.waset.org/search?q=John%20R.%20Rose"> John R. Rose</a> </p> <p class="card-text"><strong>Abstract:</strong></p> A number of competing methodologies have been developed to identify genes and classify DNA sequences into coding and non-coding sequences. This classification process is fundamental in gene finding and gene annotation tools and is one of the most challenging tasks in bioinformatics and computational biology. An information theory measure based on mutual information has shown good accuracy in classifying DNA sequences into coding and noncoding. In this paper we describe a species independent iterative approach that distinguishes coding from non-coding sequences using the mutual information measure (MIM). A set of sixty prokaryotes is used to extract universal training data. To facilitate comparisons with the published results of other researchers, a test set of 51 bacterial and archaeal genomes was used to evaluate MIM. These results demonstrate that MIM produces superior results while remaining species independent. <iframe src="https://publications.waset.org/9008.pdf" style="width:100%; height:400px;" frameborder="0"></iframe> <p class="card-text"><strong>Keywords:</strong> <a href="https://publications.waset.org/search?q=Coding%20Non-coding%20Classification" title="Coding Non-coding Classification">Coding Non-coding Classification</a>, <a href="https://publications.waset.org/search?q=Entropy" title=" Entropy"> Entropy</a>, <a href="https://publications.waset.org/search?q=GeneRecognition" title=" GeneRecognition"> GeneRecognition</a>, <a href="https://publications.waset.org/search?q=Mutual%20Information." title=" Mutual Information."> Mutual Information.</a> </p> <p class="card-text"><strong>Digital Object Identifier (DOI):</strong> <a href="https://doi.org/10.5281/zenodo.1071956" target="_blank">doi.org/10.5281/zenodo.1071956</a> </p> <a href="https://publications.waset.org/9008/mim-a-species-independent-approach-for-classifying-coding-and-non-coding-dna-sequences-in-bacterial-and-archaeal-genomes" class="btn btn-primary btn-sm">Procedia</a> <a href="https://publications.waset.org/9008/apa" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">APA</a> <a href="https://publications.waset.org/9008/bibtex" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">BibTeX</a> <a href="https://publications.waset.org/9008/chicago" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">Chicago</a> <a href="https://publications.waset.org/9008/endnote" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">EndNote</a> <a href="https://publications.waset.org/9008/harvard" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">Harvard</a> <a href="https://publications.waset.org/9008/json" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">JSON</a> <a href="https://publications.waset.org/9008/mla" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">MLA</a> <a href="https://publications.waset.org/9008/ris" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">RIS</a> <a href="https://publications.waset.org/9008/xml" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">XML</a> <a href="https://publications.waset.org/9008/iso690" target="_blank" rel="nofollow" class="btn btn-primary btn-sm">ISO 690</a> <a href="https://publications.waset.org/9008.pdf" target="_blank" class="btn btn-primary btn-sm">PDF</a> <span class="bg-info text-light px-1 py-1 float-right rounded"> Downloads <span class="badge badge-light">1728</span> </span> <p class="card-text"><strong>References:</strong></p> <br>[1] A. Lukashin and M. Borodovsky, "Genemark.hmm: new solutions for gene finding." Nucleic Acids Res., vol. 26, pp. 1107-1115, 1998. <br>[2] D. Hyatt, G.-L. Chen, P. F. LoCascio, M. L. Land, F. W. Larimer, and L. J. Hauser, "Prodigal: prokaryotic gene recognition and translation initiation site identification," BMC Bioinformatics, vol. 11, 2010. <br>[3] A. Delcher, K. Bratke, E. Powers, and S. Salzberg, "Identifying bacterial genes and endosymbiont dna with glimmer," Bioinformatics, vol. 23, pp. 673-679, 2007. <br>[4] G.-Q. Hu, X. Zheng, H.-Q. Zhu, and Z.-S. She, "Prediction of translation initiation site with tritisa," Bioinformatics, vol. 25, pp. 123-125, 2009. <br>[5] H. Ou, F. Guo, and C. Zhang, "Gs-finder: a program to find bacterial gene start sites with a self-training method," Int. J. Biochem. Cell Biol., vol. 36, pp. 535-544, 2004. <br>[6] I. Rogozin and L. Milanesi, "Analysis of donor splice signals in different organisms," J. Mol. Evl., vol. 45, pp. 50-59, 1997. <br>[7] J. Kleffe, K. Hermann, W. Vahrson, B. Wittig, and V. Brendel, "Logitlinear models for the prediction of splice sites in plant pre-mrna sequences," Nucleic Acids Res., vol. 24, pp. 4709-4718, 1996. <br>[8] S. Brunak, J. Engelbrecht, and S. Knudsen, "Prediction of human mrna donor and acceptor sites from the dna sequence," J. Mol. Biol., vol. 220, pp. 49-65, 1991. <br>[9] S. M. Hebsgaard, P. G. Korning, N. Tolstrup, J. Engelbrecht, P. Rouz, and S. Brunak, "Splice site prediction in arabidopsis thaliana pre mrna by combining local and global sequence information," Nucleic Acids Res., vol. 24, pp. 3439-3452, 1996. <br>[10] M. Q. Zhang and T. G. Marr, "A weight array method for splicing signal analysis," Comput. Appl. Biosci., vol. 9, pp. 499-509, 1993. <br>[11] S. F. Altschul, W. Gish, W. Miller, E. W. Myers, and D. J. Lipman, "Basic local alignment search tool," J. Mol. Biol., vol. 215, pp. 403- 410, 1990. <br>[12] P. McCaklon and P. Argos, "Oligopeptide biases in protein sequences and their use in predicting protein coding regions in nucleotide sequences," Proteins: Structure, Function and Genetics, vol. 4, pp. 99-122, 1988. <br>[13] R. Staden and A. D. McLachlan, "Codon preferences and its uses in identifying protein coding regions in long dna sequences," Nucleic Acids Res., vol. 10, pp. 141-156, 1982. <br>[14] A. S. Kolaskar and B. V. B. Reddy, "A method to locate protein sequences in dna and prokaryotic systems," Nucleic Acids Res., vol. 13, pp. 185-194, 1985. <br>[15] R. D. Blake and S. Early, "Distribution and evolution of sequence characterisitcs in e. coli genome," J. Biomol. Struct. Dynam., vol. 4, pp. 291-307, 1996. <br>[16] J. R. Rose and A. El Allali, "Mutual information measure for distinguishing coding and non-coding dna sequences," Biocomp, vol. 1, pp. 214-219, 2008. <br>[17] Z. Ouyang and Z. S. She, "Multivariate entropy distance method for distinguishing coding and non-coding dna sequences," J. Bioinform. Comput. Biol., vol. 2, pp. 353-373, 2004. <br>[18] L. Q. Zhou, Z. G. Yu, J. Q. Deng, V. Anh, and S. C. Long, "A fractal method to distinguish coding and non-coding sequences in a complete genome based on a number sequence representation, j," Theor. Biol., vol. 232, pp. 559-567, 2004. <br>[19] Y. Zhou, L. Q. Zhou, Z. G. Yu, and V. V. Anh, "Distinguish coding and noncoding sequences in a complete genome using fourier transform," International Conference on Natural Computation, pp. 295-299, 2007. <br>[20] V. A. Guo-Sheng and Y. Zu-Guo, "Distinguishing coding from noncoding sequences in prokaryote complete genome based on the global desciptor," IEEE Computer Society: Sixth International Conference on Fuzzy Systems and Knowledge Discovery, pp. 42-46, 2009. <br>[21] D. A. Benson, I. Karsch-Mizrachi, D. Lipman, J. Ostell, and E. Sayers, "Genbank," Nucleic Acids Res., vol. 37(Database issue), pp. D26-31, 2009. <br>[22] M. W. Bern and D. Goldberg, "Automatic selection of representative proteins for bacterial phylogeny," BMC Evolutionary Biology, vol. 5, 2005. <br>[23] M. Burset and R. Guigo, "Evaluation of gene structure prediction programs," Genomics, vol. 34, pp. 353-367, 1996. <br>[24] R. K.E., "Ecogene: a genome sequence database for escherichia coli k-12," Nucleic Acids Res., vol. 28, pp. 60-64, 2000. </div> </div> </div> </main> <footer> <div id="infolinks" class="pt-3 pb-2"> <div class="container"> <div style="background-color:#f5f5f5;" class="p-3"> <div class="row"> <div class="col-md-2"> <ul class="list-unstyled"> About <li><a href="https://waset.org/page/support">About Us</a></li> <li><a href="https://waset.org/page/support#legal-information">Legal</a></li> <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/WASET-16th-foundational-anniversary.pdf">WASET celebrates its 16th foundational anniversary</a></li> </ul> </div> <div class="col-md-2"> <ul class="list-unstyled"> Account <li><a href="https://waset.org/profile">My Account</a></li> </ul> </div> <div class="col-md-2"> <ul class="list-unstyled"> Explore <li><a href="https://waset.org/disciplines">Disciplines</a></li> <li><a href="https://waset.org/conferences">Conferences</a></li> <li><a href="https://waset.org/conference-programs">Conference Program</a></li> <li><a href="https://waset.org/committees">Committees</a></li> <li><a href="https://publications.waset.org">Publications</a></li> </ul> </div> <div class="col-md-2"> <ul class="list-unstyled"> Research <li><a href="https://publications.waset.org/abstracts">Abstracts</a></li> <li><a href="https://publications.waset.org">Periodicals</a></li> <li><a href="https://publications.waset.org/archive">Archive</a></li> </ul> </div> <div class="col-md-2"> <ul class="list-unstyled"> Open Science <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/Open-Science-Philosophy.pdf">Open Science Philosophy</a></li> <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/Open-Science-Award.pdf">Open Science Award</a></li> <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/Open-Society-Open-Science-and-Open-Innovation.pdf">Open Innovation</a></li> <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/Postdoctoral-Fellowship-Award.pdf">Postdoctoral Fellowship Award</a></li> <li><a target="_blank" rel="nofollow" href="https://publications.waset.org/static/files/Scholarly-Research-Review.pdf">Scholarly Research Review</a></li> </ul> </div> <div class="col-md-2"> <ul class="list-unstyled"> Support <li><a href="https://waset.org/page/support">Support</a></li> <li><a href="https://waset.org/profile/messages/create">Contact Us</a></li> <li><a href="https://waset.org/profile/messages/create">Report Abuse</a></li> </ul> </div> </div> </div> </div> </div> <div class="container text-center"> <hr style="margin-top:0;margin-bottom:.3rem;"> <a href="https://creativecommons.org/licenses/by/4.0/" target="_blank" class="text-muted small">Creative Commons Attribution 4.0 International License</a> <div id="copy" class="mt-2">© 2024 World Academy of Science, Engineering and Technology</div> </div> </footer> <a href="javascript:" id="return-to-top"><i class="fas fa-arrow-up"></i></a> <div class="modal" id="modal-template"> <div class="modal-dialog"> <div class="modal-content"> <div class="row m-0 mt-1"> <div class="col-md-12"> <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">×</span></button> </div> </div> <div class="modal-body"></div> </div> </div> </div> <script src="https://cdn.waset.org/static/plugins/jquery-3.3.1.min.js"></script> <script src="https://cdn.waset.org/static/plugins/bootstrap-4.2.1/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.waset.org/static/js/site.js?v=150220211556"></script> <script> jQuery(document).ready(function() { /*jQuery.get("https://publications.waset.org/xhr/user-menu", function (response) { jQuery('#mainNavMenu').append(response); });*/ jQuery.get({ url: "https://publications.waset.org/xhr/user-menu", cache: false }).then(function(response){ jQuery('#mainNavMenu').append(response); }); }); </script> </body> </html>