CINXE.COM

{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-RZD586MC3Q\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-RZD586MC3Q\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022biorxiv;714402v2\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022biorxiv;714402v2\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022biorxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022full-text\u0022,\u0022variant\u0022:\u0022full-text\u0022,\u0022view\u0022:\u0022full\u0022,\u0022pisa\u0022:\u0022biorxiv;714402v2\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-RZD586MC3Q\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-RZD586MC3Q\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/advagg_css\/css__dn-cpI1YtkU_iLHgA5WhlkxgYWyat_IxjF_B-WSYrpE__a9hIbt0eaZ7d5nhwnm2weG8R_2eXK4EvoOx9dOxouHE__V7p9f6xKfJWB4tz1ZOzGhbp_vlwczIKATHxjqvc4v4c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__V7p9f6xKfJWB4tz1ZOzGhbp_vlwczIKATHxjqvc4v4c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-article-tab-full-text\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n \n \n \n \u003Cdiv class=\u0022pane-content\u0022\u003E\n \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article fulltext-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cp id=\u0022p-3\u0022\u003ETargeted optimizing of existing DNA sequences for useful properties, has the potential to enable several synthetic biology applications from modifying DNA to treat genetic disorders to designing regulatory elements to fine tune context-specific gene expression. Current approaches for targeted genome editing are largely based on prior biological knowledge or ad-hoc rules. Few if any machine learning approaches exist for targeted optimization of regulatory DNA sequences.\u003C\/p\u003E\u003Cp id=\u0022p-4\u0022\u003EHere, we propose a novel generative neural network architecture for targeted DNA sequence editing \u2013 the EDA architecture \u2013 consisting of an encoder, decoder, and analyzer. We showcase the use of EDA to optimize regulatory DNA sequences to bind to the transcription factor SPI1. Compared to other state-of-the-art approaches such as a textual variational autoencoder and rule-based editing, EDA significantly improves predicted binding of SPI1 of genomic sequences with the minimal set of edits. We also use EDA to design regulatory elements with optimized grammars of CREB1 binding sites that can tune reporter expression levels as measured by massively parallel reporter assays (MPRA). We analyze the properties of the binding sites in the edited sequences and find patterns that are consistent with previously reported grammatical rules which tie gene expression to CRE binding site density, spacing and affinity.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E1 Introduction\u003C\/h2\u003E\u003Cp id=\u0022p-5\u0022\u003ERecent generative models for genomic DNA sequences, such as generative adversarial networks, variational autoencoders, and recurrent neural networks, have largely focused on ab initio generation of biological sequences from distributions learned over a large collection of exemplar sequences[\u003Ca id=\u0022xref-ref-10-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-10\u0022\u003E10\u003C\/a\u003E, \u003Ca id=\u0022xref-ref-6-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-6\u0022\u003E6\u003C\/a\u003E, \u003Ca id=\u0022xref-ref-7-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-7\u0022\u003E7\u003C\/a\u003E]. However, generative models have been shown to suffer from low diversity \u2013 falling into the failure mode of producing generic sequences with high likelihood [\u003Ca id=\u0022xref-ref-8-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E]. Generative models that are capable of editing an existing sequence, rather than generating an entirely new sequence from scratch, may be able to draw from the natural diversity present in biological sequences, while still allowing useful changes to the data. Also, many genome engineering applications typically require editing an existing DNA sequence in order to knock out or repair disease genes or modify regulatory DNA to modulate gene expression in specific cell types and states.\u003C\/p\u003E\u003Cp id=\u0022p-6\u0022\u003EMachine learning approaches for editing existing sequences for desired properties have been significantly less well studied than ab-initio generative models. Guu \u003Cem\u003Eet al\u003C\/em\u003E proposed a neural editor for natural language to transform an input sentence into an output based on a sampled edit vector; however, the edit vectors are latent and must be interpreted after training [\u003Ca id=\u0022xref-ref-8-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-7\u0022\u003EHere, we propose a novel Encoder-Decoder-Analyzer (EDA) neural network architecture that radically departs from status quo methods [\u003Ca id=\u0022xref-ref-8-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E]. EDA combines recurrent sequence-to-sequence models, latent vectors based on an explicit predictor of desired sequence properties and adversarial example generation techniques. EDA automatically generates candidate modifications to prespecified regulatory DNA sequences to optimize specific properties such as binding of transcription factors or reporter gene expression. This model represents a unique approach to edit sequences for desired properties by leveraging existing supervised learning models that can accurately map regulatory sequences to specific properties. We showcase the EDA model on two pilot case studies. In the first case study, we use EDA to edit regulatory DNA sequences to increase the binding probability of a transcription factor SPI1 by leveraging \u003Cem\u003Ein vivo\u003C\/em\u003E genome-wide binding profiles (ChIP-seq) for SPI1. In the second case study, we use EDA to generate candidate regulatory sequences containing configurations of binding sites of the CREB1 transcription factor that can produce a desired gene expression readout as measured by a massively parallel reporter assay (MRPA) from Davis \u003Cem\u003Eet al,\u003C\/em\u003E 2019 [\u003Ca id=\u0022xref-ref-3-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E]. The EDA approach significantly outperforms existing state-of-the-art approaches.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-2\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E2 Methods\u003C\/h2\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ESequence Variational Autoencoder (SVAE) as a baseline method\u003C\/h3\u003E\u003Cp id=\u0022p-8\u0022\u003EThe sequence variational autoencoder (SVAE) for editing is based off the recurrent architecture described in [\u003Ca id=\u0022xref-ref-1-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-1\u0022\u003E1\u003C\/a\u003E]; the encoder produces the parameters (\u003Cem\u003E\u03bc\u003C\/em\u003E, \u03a3) of a Gaussian distribution in latent space, from which \u003Cem\u003Ez\u003C\/em\u003E, a latent vector encoding the sequence \u003Cem\u003Ex\u003C\/em\u003E, is sampled. The decoder attempts to reconstruct the input sequence from \u003Cem\u003Ez\u003C\/em\u003E. The loss function of the VAE is given in \u003Ca id=\u0022xref-disp-formula-1-1\u0022 class=\u0022xref-disp-formula\u0022 href=\u0022#disp-formula-1\u0022\u003EEquation 1\u003C\/a\u003E. For editing, the latent space of the SVAE was perturbed through the addition of Gaussian Noise \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E where \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-2\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E as proposed in Guu \u003Cem\u003Eet al\u003C\/em\u003E [\u003Ca id=\u0022xref-ref-8-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E].\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEncoder Decoder Analyzer (EDA) Custom Architecture\u003C\/h3\u003E\u003Cp id=\u0022p-9\u0022\u003EOur novel architecture called EDA consists of three deep neural network components: an Encoder, Decoder, and Analyzer. The Encoder and Decoder are recurrent neural networks (RNNs) with attention. The encoder architecture used here consisted of an embedding layer followed by a recurrent layer. The embedding layer contained learnable weights and output size \u003Cem\u003Eh\u003C\/em\u003E = 256, and the embedded outputs are then fed into a one layer bidirectional GRU, with dropout of 0.1. Similarly, the decoder consists of an embedding layer (output size \u003Cem\u003Eh\u003C\/em\u003E = 256 and dropout \u003Cem\u003Ep\u003C\/em\u003E = 0.1) which creates an embedding for the input base pair at each time step, followed by attention over the encoder outputs. The outputs from the attention layer are concatenated with the input at each time step, and fed into a dense connected layer with a ReLU activation function. The outputs from this dense layer are fed into a one layer GRU with dropout \u003Cem\u003Ep\u003C\/em\u003E = 0.1, followed by a fully connected layer with a softmax activation function. The output from the decoder at each time step is the predicted next token in the sequence. Conceptually, the Encoder learns to transform any one-hot encoded input DNA sequence to a compact latent representation. The Decoder learns to generate an output DNA sequence given a specific instantiation of the latent representation learned by the encoder. The Analyzer is a neural network that can map the latent representation of a DNA sequence to a specific property that we are typically interested in optimizing. Here, we use a convolutional neural network (CNN) as the Analyzer architecture, although it can be any differentiable architecture. Details of the Analyzer architecture are as follows: the model consists of three convolutional layers (15 filters, kernel size of 3, padding of 1), an average pooling layer, and two densely connected layers. The activation function following each layer was a ReLU activation, save for the last layer, which had a sigmoid activation function in the classification setting, and no nonlinearity in the regression setting.\u003C\/p\u003E\u003Cp\u003EThe procedure for editing in the EDA architecture consists of three phases.\n\u003C\/p\u003E\u003Cul class=\u0022list-simple \u0022 id=\u0022list-1\u0022\u003E\u003Cli id=\u0022list-item-1\u0022\u003E\u003Cp id=\u0022p-11\u0022\u003E\u003Cstrong\u003EStage 1: Training Encoder-Decoder\u003C\/strong\u003E. The Encoder-Decoder seq2seq architecture is trained as an autoencoder, with loss equal to the categorical cross entropy between the softmax outputs and one-hot-encoded next base pair, summed over every position in the sequence. The loss was minimized with the Adam optimizer with learning rate 0.001. The Encoder takes in sequence \u003Cem\u003Ex\u003C\/em\u003E and produces a latent space embedding \u003Cem\u003Ez\u003C\/em\u003E, while the Decoder takes \u003Cem\u003Ez\u003C\/em\u003E and attempts to reproduce the original sequence \u003Cem\u003Ex\u003C\/em\u003E.\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-2\u0022\u003E\u003Cp id=\u0022p-12\u0022\u003E\u003Cstrong\u003EStage 2: Training Analyzer\u003C\/strong\u003E The same Encoder from stage 1 is also followed by an Analyzer module, which takes in the latent state \u003Cem\u003Ez\u003C\/em\u003E of a sequence from the Encoder, and produces an output prediction \u003Cem\u003Ey\u003C\/em\u003E of a property of the sequence.\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-3\u0022\u003E\u003Cp id=\u0022p-13\u0022\u003E\u003Cstrong\u003EStage 3: Editing\u003C\/strong\u003E. Given input sentence \u003Cem\u003Ex\u003C\/em\u003E, the encoder produces the latent state embedding \u003Cem\u003Ez\u003C\/em\u003E for the sentence. We update the latent state to minimize the loss function \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-3\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E(the binary cross entropy loss) between the analyzer\u2019s prediction \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-4\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-4.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-4.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E and the desired score \u003Cem\u003Ey\u003C\/em\u003E via the Fast Gradient Sign Method (FGSM) [\u003Ca id=\u0022xref-ref-5-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-5\u0022\u003E5\u003C\/a\u003E] as in \u003Ca id=\u0022xref-disp-formula-2-1\u0022 class=\u0022xref-disp-formula\u0022 href=\u0022#disp-formula-2\u0022\u003EEquation 2\u003C\/a\u003E.\u003C\/p\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003Cp\u003E\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-2\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/graphic-2.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/graphic-2.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\u003C\/p\u003E\u003Cdiv id=\u0022statement-1\u0022 class=\u0022statement\u0022\u003E\u003Cspan class=\u0022statement-label\u0022\u003EAlgorithm 1\u003C\/span\u003E\u003Ch3\u003EEDA Architecture Editing.\u003C\/h3\u003E\u003Cdiv id=\u0022F1\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F1.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022219\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022219\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F1.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F1.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-14\u0022\u003E\u003C\/p\u003E\u003C\/div\u003E\u003Cp id=\u0022p-15\u0022\u003EThe latent state is updated until the loss is approximately \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-5\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-5.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-5.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E. The decoder produces the edited sequence \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-6\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-6.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-6.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E from the modified latent representation \u003Cem\u003Ez\u003C\/em\u003E\u2032. Epsilon (\u003Cem\u003E\u03f5\u003C\/em\u003E) is a hyperparameter varying between zero and one corresponding to the size of steps taken in the latent space.\u003C\/p\u003E\u003Cp id=\u0022p-16\u0022\u003EThe pseudocode for the EDA training is shown in Algorithm 1. The training algorithm includes an additional step where the desired property (e.g. SPI binding or reporter expression) is predicted from the edited sentence \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-7\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-7.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-7.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E. This step is necessary as the latent representation \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-8\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-8.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-8.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E of the decoded sequence \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-9\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-9.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/highwire\/biorxiv\/early\/2019\/07\/27\/714402\/embed\/inline-graphic-9.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E may not be the same as the perturbed latent representation \u003Cem\u003Ez\u003C\/em\u003E\u2032 due to noise in the decoding process.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-5\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E3 Optimizing regulatory DNA sequences for binding of the SPI1 transcription factor\u003C\/h2\u003E\u003Cdiv id=\u0022sec-6\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EDataset\u003C\/h3\u003E\u003Cp id=\u0022p-17\u0022\u003E43,787 reproducibly-identified peaks from an ENCODE ChlP-seq experiment targeting the SPI1 transcription factor in lymphoblastoid cell line GM12878 (GEO GSM803531) were used as the positive labeled set of putative SPI1 bound sequences. The negative labeled set was constructed from an equal number of non-overlapping unbound 200 bp sequences from the human genome. Datapoints from chromosomes 1 and 2 were used as the test and validation set, respectively.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-7\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EBaselines\u003C\/h3\u003E\u003Cp id=\u0022p-18\u0022\u003EAn SVAE model was trained as a neural baseline, as described above. A simple rule-based editing model was also constructed that randomly adds the SPI1 consensus binding site (\u201cAGGAA\u201d) if not already present in the sequence.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-8\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEvaluation Method\u003C\/h3\u003E\u003Cp id=\u0022p-19\u0022\u003EEdited sequences are evaluated on three quantitative metrics: similarity to the original DNA sequence, predicted binding score (probability) of SPI1, and percent of sequences with matches to known SPI1 binding motifs [\u003Ca id=\u0022xref-ref-9-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E]. Binding score is measured by an independent CNN model trained to discriminate 200bp sequence labeled as bound by SPI1 ChIP-seq data from a balanced number of background sequences from the geneome. This independent CNN model achieves AUROC of 0.979 and AUPRC of 0.978 on a held-out test set, where, for training the independent model, datapoints from chromosomes 1 and 2 were used as the test and validation set, specifically, while all other sequences were used in the training set. Similarity of edited sequences to the original sequences was calculated by the gapped kmer-mismatch (GKM) kernel, which evaluates DNA sequence similarity based on gapped kernel overlap [\u003Ca id=\u0022xref-ref-4-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E]. We also used the BLEU-4 score, a metric more commonly used in NLP translation as another measure of sequence similarity.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-9\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E3.1 SPI1 Editing Results\u003C\/h3\u003E\u003Cdiv id=\u0022sec-10\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003ETraining Results\u003C\/h4\u003E\u003Cp id=\u0022p-20\u0022\u003EThe loss curve for the SVAE, as well as the Encoder-Decoder portion of the EDA Architecture is shown in \u003Ca id=\u0022xref-fig-2-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFigure 1\u003C\/a\u003E. Whereas the SVAE loss is extremely noisy and difficult to optimize during training, The Encoder-Decoder of EDA achieves average edit distance of 8.8 out of a maximum possible edit distance of 150 between the input and output sequences after 20,000 iterations of training, which shows that the autoencoder is accurately learning to replicate the sequence. The accuracy of the analyzer in the EDA architecture is 92.674%, with AUROC of 0.979 and AUPRC of 0.978.\u003C\/p\u003E\u003Cdiv id=\u0022F2\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F2.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Training and validation loss curves of EDA Architecture (left) and VAE architecture (right).\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Training and validation loss curves of EDA Architecture (left) and VAE architecture (right).\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 1:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022158\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 1:\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022158\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F2.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 1:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F2.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 1:\u003C\/span\u003E \u003Cp id=\u0022p-21\u0022 class=\u0022first-child\u0022\u003ETraining and validation loss curves of EDA Architecture (left) and VAE architecture (right).\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-11\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EEDA Edited Sequences\u003C\/h4\u003E\u003Cp id=\u0022p-22\u0022\u003E500 randomly selected sequences from the balanced test set were edited using EDA, where previously bound sequences in the test set were classified based on overlap with SPI1 ChIP-seq peaks). Results are shown in \u003Ca id=\u0022xref-table-wrap-1-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E. Edits from the EDA architecture demonstrate high similarity (52.39% on average) to the original sequences, whereas the SVAE edits achieve similarity of only 5.2%. Any two random sequences from the set have GKM similarity of 2.048%. Thus, rather than editing, the SVAE appears to be sampling separate sequences.\u003C\/p\u003E\u003Cdiv id=\u0022T1\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/847962\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/847962\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/847962\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 1:\u003C\/span\u003E \u003Cp id=\u0022p-23\u0022 class=\u0022first-child\u0022\u003EComparison of Editing Methods in terms of sequence similarity (out of 1), BLEU-4 score (out of 100), and percentage of sentences predicted positive.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-24\u0022\u003EOverall, 84.4% of EDA-edited sequences are predicted to bind to SPI1 by the independent CNN model trained on SPI1 ChIP-seq data. 34.2% of these sequences contain a deterministic match to the SPI1 motif \u201cAGGAA\u201d, and 63.4% of sequences contain the \u201cGGAA\u201d portion, which is has the highest information content in the SPI1 PWM [\u003Ca id=\u0022xref-ref-9-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E]. The rule-based baseline achieves only 45% sequences predicted positive, similar to the probability that any randomly chosen test set sequence would be predicted positive. Thus, editing these sequences to optimize for binding score is more complex than simply inserting high affinity SPI1 binding sites.\u003C\/p\u003E\u003Cp id=\u0022p-25\u0022\u003E\u003Cstrong\u003E\u003Ca id=\u0022xref-table-wrap-2-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T2\u0022\u003ETable 2\u003C\/a\u003E\u003C\/strong\u003E shows a DNA sequence which initially had a low binding score on the independent model, whose edit received a high score. The EDA model modifies the area in the initial sequence in gray into the full SPI1 binding motif shown in orange.\u003C\/p\u003E\u003Cdiv id=\u0022T2\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/847960\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/847960\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/847960\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 2:\u003C\/span\u003E \u003Cp id=\u0022p-26\u0022 class=\u0022first-child\u0022\u003EOriginal sequence and edited sequence from the EDA architecture. The SPI1 motif is highlighted in orange.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-12\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E4 Optimizing Reporter Expression of regulatory DNA sequences containing CREB1 binding site grammars\u003C\/h2\u003E\u003Cdiv id=\u0022sec-13\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EDataset\u003C\/h3\u003E\u003Cp id=\u0022p-27\u0022\u003EThe CRE MPRA dataset from Davis et al. measures reporter gene expression of a library of DNA sequences with various configurations of CREB1 binding sites by varying motif strength, density, spacing, and distance from the core promoter[\u003Ca id=\u0022xref-ref-3-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E]. The genomic MPRA dataset consists of 3480 sequences of 150bp within 3 backgrounds with different combinations and locations of CREB1 binding sites. Davis \u003Cem\u003Eet al.\u003C\/em\u003E define a strong CREB1 consensus binding site as \u201cTGACGTCA\u201d, and a weak binding site is \u201cTGAAGTCA\u201d. Reporter expression of the library is measured by the log ratio of counts of RNA barcode reads of a sequence to the count of DNA reads. A histogram of log fold change expression levels is shown in \u003Ca id=\u0022xref-fig-3-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFigure 2\u003C\/a\u003E. Seventy percent of this dataset was randomly selected for training, with twenty percent for validation and ten percent for testing.\u003C\/p\u003E\u003Cdiv id=\u0022F3\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F3.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Histogram of log(expression) levels for sequences in CREB1 MPRA dataset; expression ranges widely, from less than zero, meaning that number of RNA barcoded reads are less than the original DNA reads, to six.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Histogram of log(expression) levels for sequences in CREB1 MPRA dataset; expression ranges widely, from less than zero, meaning that number of RNA barcoded reads are less than the original DNA reads, to six.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 2:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F3.medium.gif\u0022 width=\u0022440\u0022 height=\u0022311\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 2:\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F3.medium.gif\u0022 width=\u0022440\u0022 height=\u0022311\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F3.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 2:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F3.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 2:\u003C\/span\u003E \u003Cp id=\u0022p-28\u0022 class=\u0022first-child\u0022\u003EHistogram of log(expression) levels for sequences in CREB1 MPRA dataset; expression ranges widely, from less than zero, meaning that number of RNA barcoded reads are less than the original DNA reads, to six.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-29\u0022\u003EFrom analysis of the MPRA dataset, Davis \u003Cem\u003Eet al\u003C\/em\u003E find four main correlations between CREB1 binding site configurations in the library and corresponding reporter expression levels: 1) number of strong CRE binding sites positively correlates with expression, 2) weak binding sites increase expression given the presence of at least one strong binding site, 3) higher expression occurs with shorter distance of CRE binding sites to the core promoter, and 4) spacing between CRE binding sites modulates periodicity of expression, as two strong binding sites are moved along the sequence.\u003C\/p\u003E\u003Cp id=\u0022p-30\u0022\u003EHere, the editing task is to optimize sequences for particular expression profiles; in particular, to edit MPRA sequences that have high measured reporter expression (log(expression)) \u2265 5) to new sequences that have low predicted expression, and vice versa. As several correlations between sequence properties and expression are already discussed by Davis \u003Cem\u003Eet al.,\u003C\/em\u003E we investigate whether the edited sequences show evidence of these previously discovered rules.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-14\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EIndependent Analyzer\u003C\/h3\u003E\u003Cp id=\u0022p-31\u0022\u003EAs an independent predictor from the analyzer in the EDA architecture, we train a log-linear model of expression levels similar to Davis \u003Cem\u003Eet al.,\u003C\/em\u003E with expression predicted from the number of strong and weak binding sites, sequence background, average spacing between CREB1 sites, and distance from the minimal promoter element; in addition, polynomial features of degree 2 were used to model interaction terms. This simple model achieves \u003Cem\u003ER\u003C\/em\u003E\u003Csup\u003E2\u003C\/sup\u003E = 0.801 on a held out test set consisting of 10 percent of the training data, which was the same test set as used for the training of the EDA architecture (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-4-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFigure 3a\u003C\/a\u003E.\u003C\/strong\u003E). This independent log linear model was used to evaluate the edited sequences from the EDA model.\u003C\/p\u003E\u003Cdiv id=\u0022F4\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F4.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Predicted expression versus measured expression for a) independent log-linear model, and b) analyzer from EDA architecture\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Predicted expression versus measured expression for a) independent log-linear model, and b) analyzer from EDA architecture\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 3:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022179\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 3:\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022179\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F4.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 3:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F4.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 3:\u003C\/span\u003E \u003Cp id=\u0022p-32\u0022 class=\u0022first-child\u0022\u003EPredicted expression versus measured expression for a) independent log-linear model, and b) analyzer from EDA architecture\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-15\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ETraining Results\u003C\/h3\u003E\u003Cp id=\u0022p-33\u0022\u003EThe components of the EDA model were trained on the CRE MPRA dataset to learn a latent representation of the sequences through the encoder-decoder portion, and to predict reporter expression levels from this latent representation through the analyzer. As described in \u003Ca id=\u0022xref-sec-2-1\u0022 class=\u0022xref-sec\u0022 href=\u0022#sec-2\u0022\u003ESection 2\u003C\/a\u003E, the encoder and decoder were both recurrent neural networks, where the decoder has softmax attention over the encoder outputs. The seq2seq autoencoder achieved an average edit distance of 6.06 between input and output after training for 10,000 iterations. The analyzer, as above, was a CNN architecture with three convolutional layers, each followed by a ReLU activation, average pooling, and two dense layers; this architecture achieved \u003Cem\u003ER\u003C\/em\u003E\u003Csup\u003E2\u003C\/sup\u003E = 0.93 on a held out test set; the analyzer\u2019s predicted expressions correlate well with measured expressions, as shown in \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-4-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFigure 3b\u003C\/a\u003E\u003C\/strong\u003E.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-16\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEDA Editing Results\u003C\/h3\u003E\u003Cp id=\u0022p-34\u0022\u003EHere, our editing task is to edit CREB1 MPRA sequences from a held out test set that exhibit high measured expression to new sequences with low expression and vice versa. We further evaluate whether the resulting edited sequences displayed known patterns of CRE binding site placement elucidated by Davis \u003Cem\u003Eet al\u003C\/em\u003E. 204 sequences with measured MPRA log(expression) \u0026lt;= 0 were edited using EDA to a higher desired target level of log(expression) = 5.0. 96 sequences with measured MPRA log(expression) \u0026gt;= 5 were edited using EDA to obtain a lower target level of log(expression) = 0.\u003C\/p\u003E\u003Cp id=\u0022p-35\u0022\u003EAs evaluated by the independent analyzer, 79.4% of sequences to be edited from low expression to high expression were predicted to have higher expression post editing. 100% of sequences edited from high to low expression were predicted to have lower expression post editing. The histograms of log expression levels both before and after editing are shown in \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFigure 4\u003C\/a\u003E\u003C\/strong\u003E.\u003C\/p\u003E\u003Cdiv id=\u0022F5\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F5.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Histograms of log expression levels before and after editing by the EDA architecture and evaluation on an independent linear model. Log expression of original sequences is shown in orange, while log expression of edited sequences is shown in blue, where expression of edited sequences is predicted by the independent model. Edits targeted from low to high expression are shown in a, and edits from high to low expresssion are shown in b. Here, low expression was defined as log(expression) \u0026#x2264; 0.0, and high expression was defined as log(expression) \u0026#x2265; 5.0.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Histograms of log expression levels before and after editing by the EDA architecture and evaluation on an independent linear model. Log expression of original sequences is shown in orange, while log expression of edited sequences is shown in blue, where expression of edited sequences is predicted by the independent model. Edits targeted from low to high expression are shown in a, and edits from high to low expresssion are shown in b. Here, low expression was defined as log(expression) \u0026#x2264; 0.0, and high expression was defined as log(expression) \u0026#x2265; 5.0.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 4:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022161\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 4:\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022161\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F5.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 4:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F5.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 4:\u003C\/span\u003E \u003Cp id=\u0022p-36\u0022 class=\u0022first-child\u0022\u003EHistograms of log expression levels before and after editing by the EDA architecture and evaluation on an independent linear model. Log expression of original sequences is shown in orange, while log expression of edited sequences is shown in blue, where expression of edited sequences is predicted by the independent model. Edits targeted from low to high expression are shown in a, and edits from high to low expresssion are shown in b. Here, low expression was defined as log(expression) \u2264 0.0, and high expression was defined as log(expression) \u2265 5.0.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-37\u0022\u003ENext, we inspected several representative edited sequences in order to evaluate whether they contained CREB1 binding site configurations that were previously associated with high and low expression read outs, where examples are shown in \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-6-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F6\u0022\u003EFigure 5\u003C\/a\u003E\u003C\/strong\u003E.\u003C\/p\u003E\u003Cdiv id=\u0022F6\u0022 class=\u0022fig pos-float type-figure odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F6.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Strong CREB1 binding motif is highlighted in yellow, while weak motif is highlighted in orange. Predicted expression, measured by the log of the ratio of RNA to DNA counts, is shown above each sequence.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-1304969065\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Strong CREB1 binding motif is highlighted in yellow, while weak motif is highlighted in orange. Predicted expression, measured by the log of the ratio of RNA to DNA counts, is shown above each sequence.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 5:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F6.medium.gif\u0022 width=\u0022440\u0022 height=\u0022424\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 5:\u0022 src=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F6.medium.gif\u0022 width=\u0022440\u0022 height=\u0022424\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F6.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 5:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/biorxiv\/early\/2019\/07\/27\/714402\/F6.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 5:\u003C\/span\u003E \u003Cp id=\u0022p-38\u0022 class=\u0022first-child\u0022\u003EStrong CREB1 binding motif is highlighted in yellow, while weak motif is highlighted in orange. Predicted expression, measured by the log of the ratio of RNA to DNA counts, is shown above each sequence.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-39\u0022\u003EExample 1 illustrates the result that number of strong CREB1 binding sites is positively correlated with expression levels, as the edited sequence has the third strong CREB1 binding site changed to a weak site, resulting in expression being predicted to be more than 2x lower after editing.\u003C\/p\u003E\u003Cp id=\u0022p-40\u0022\u003EIn example 2, the original sequence has four weak binding sites and exhibits low measured expression (\u22120.101). After editing, the third weak binding site is changed to a strong binding site, the fourth weak binding site is deleted, and four additional strong binding sites are added to the sequence. This edit, with predicted expression 4.58, also displays the result found in Davis \u003Cem\u003Eet al\u003C\/em\u003E, that number of weak binding sites increases reporter gene expression given the presence of at least one strong binding site.\u003C\/p\u003E\u003Cp id=\u0022p-41\u0022\u003EIn the third example, EDA moves a CREB1 motif moved further away from the minimal promoter element in order to reduce expression. This transformation of the sequence aligns with the previous reported observation from the MPRA study that distance of motifs from the core promoter negatively correlated with expression. These examples are presented with the caveat that they cannot be used to show that the model has \u201clearned\u201d particular rules \u2013 only that the results from the EDA architecture align with known experimental correlations between CREB1 binding sites and reporter expression from the MPRA study.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-17\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E5 Conclusion\u003C\/h2\u003E\u003Cp id=\u0022p-42\u0022\u003EThe EDA architecture proposed for targeted genomic editing brings together a broad array of techniques from attention-based seq2seq models, adversarial example generation, and computer vision. The architecture leverages existing genomic predictors to generate candidate modifications of sequences with diverse properties such as binding probability of a transcription factor or reporter gene expression levels. In the first case study of optimizing binding of the SPI1 transcription factor, we compared the EDA model to existing neural baselines \u2013 such as the Sequence VAE model and a rule-based baseline \u2013 and showed that EDA vastly improves upon existing models in both predicted binding affinity and similarity of original to edited sequences.\u003C\/p\u003E\u003Cp id=\u0022p-43\u0022\u003EIn the second case study, where we optimized binding site configurations of the CREB1 transcription factor in regulatory DNA sequences to tune reporter expression levels, we showed that a high proportion of the edited sequences show the desired shift in expression as predicted by an independent model. Furthermore, several edited sequences displayed CREB1 motif configurations in terms of binding site strength, density and position that agreed with previously derived rules.\u003C\/p\u003E\u003Cp id=\u0022p-44\u0022\u003EThis study primarily serves as a proof of concept and introduction of a novel neural architecture for targeted DNA sequence editing. In this work, we used independent predictors of the desired properties of DNA sequences to computationally validate the edited sequences. In the near future, we plan to provide experimental validation of the properties of edited sequences as more definitive support for our approach. EDA is very flexible and can also be easily adapted to other applications involving targeted DNA and RNA editing. We expect further advances in generative models that can perform targeted editing of biological sequences such as DNA, RNA and proteins have the potential to complement and improve the precision of experimental approaches for genome engineering and synthetic biology.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ack\u0022 id=\u0022ack-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E6 Acknowledgements\u003C\/h2\u003E\u003Cp id=\u0022p-45\u0022\u003EWe would like to thank Georgi Marinov for his help with processing and understanding the MPRA dataset. We would like to thank the authors of Davis \u003Cem\u003Eet al.\u003C\/em\u003E [\u003Ca id=\u0022xref-ref-3-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E] for sharing their MPRA data pre-publication. We would like to thank Avanti Shrikumar and other members of the Kundaje lab for helpful discussions.\u003C\/p\u003E\u003Cp id=\u0022p-46\u0022\u003EThis work was supported by NIH grants 1DP2GM123485, 1U01HG009431 and 1R01HG00967401 awarded to AK.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section fn-group\u0022 id=\u0022fn-group-1\u0022\u003E\u003Ch2\u003EFootnotes\u003C\/h2\u003E\u003Cul\u003E\u003Cli class=\u0022fn\u0022 id=\u0022fn-1\u0022\u003E\u003Cp id=\u0022p-1\u0022\u003E\u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Eakundaje{at}stanford.edu\u003C\/span\u003E\u003C\/span\u003E\u003C\/p\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ref-list\u0022 id=\u0022ref-list-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EReferences\u003C\/h2\u003E\u003Col class=\u0022cit-list ref-use-labels\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[1].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-1-1\u0022 title=\u0022View reference [1] in text\u0022 id=\u0022ref-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. R.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBowman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EVilnis\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EVinyals\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. M.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDai\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJozefowicz\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBengio\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EGenerating sentences from a continuous space\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E preprint\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-arxiv-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003EarXiv:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-arxiv\u0022\u003E1511.06349\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2015\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[2].\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-714402v2.2\u0022 data-doi=\u002210.1038\/nature11247\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE. P.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EConsortium\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EAn integrated encyclopedia of dna elements in the human genome\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENature\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E489\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E7414\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E57\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2012\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNature%26rft.stitle%253DNature%26rft.aulast%253DBernstein%26rft.auinit1%253DB.%2BE.%26rft.volume%253D489%26rft.issue%253D7414%26rft.spage%253D57%26rft.epage%253D74%26rft.atitle%253DAn%2Bintegrated%2Bencyclopedia%2Bof%2BDNA%2Belements%2Bin%2Bthe%2Bhuman%2Bgenome.%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fnature11247%26rft_id%253Dinfo%253Apmid%252F22955616%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/nature11247\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=22955616\u0026amp;link_type=MED\u0026amp;atom=%2Fbiorxiv%2Fearly%2F2019%2F07%2F27%2F714402.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=000308347000039\u0026amp;link_type=ISI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-newisilink cit-ref-sprinkles-webofscience\u0022\u003E\u003Cspan\u003EWeb of Science\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[3].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-3-1\u0022 title=\u0022View reference [3] in text\u0022 id=\u0022ref-3\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-714402v2.3\u0022 data-doi=\u002210.1101\/625434\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. E.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDavis\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK. D.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EInsigne\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE. M.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJones\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ. B.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHastings\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKosuri\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMultiplexed dissection of a model human transcription factor binding site architecture\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EbioRxiv\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E.\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi: \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.1101\/625434\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E.\u003C\/span\u003E URL \u003Ca href=\u0022https:\/\/www.biorxiv.org\/content\/early\/2019\/05\/02\/625434\u0022\u003Ehttps:\/\/www.biorxiv.org\/content\/early\/2019\/05\/02\/625434\u003C\/a\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DbioRxiv%26rft_id%253Dinfo%253Adoi%252F10.1101%252F625434%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/ijlink\/YTozOntzOjQ6InBhdGgiO3M6MTQ6Ii9sb29rdXAvaWpsaW5rIjtzOjU6InF1ZXJ5IjthOjQ6e3M6ODoibGlua1R5cGUiO3M6NDoiQUJTVCI7czoxMToiam91cm5hbENvZGUiO3M6NzoiYmlvcnhpdiI7czo1OiJyZXNpZCI7czo4OiI2MjU0MzR2MiI7czo0OiJhdG9tIjtzOjM3OiIvYmlvcnhpdi9lYXJseS8yMDE5LzA3LzI3LzcxNDQwMi5hdG9tIjt9czo4OiJmcmFnbWVudCI7czowOiIiO30=\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-ijlink\u0022\u003E\u003Cspan\u003E\u003Cspan class=\u0022cit-reflinks-abstract\u0022\u003EAbstract\u003C\/span\u003E\u003Cspan class=\u0022cit-sep cit-reflinks-variant-name-sep\u0022\u003E\/\u003C\/span\u003E\u003Cspan class=\u0022cit-reflinks-full-text\u0022\u003E\u003Cspan class=\u0022free-full-text\u0022\u003EFREE \u003C\/span\u003EFull Text\u003C\/span\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[4].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-4-1\u0022 title=\u0022View reference [4] in text\u0022 id=\u0022ref-4\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.4\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGhandi\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELee\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMohammad-Noori\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBeer\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EEnhanced regulatory sequence prediction using gapped k-mer features\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EPLoS computational biology\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E10\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E7\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003Ee1003711\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2014\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DPLoS%2Bcomputational%2Bbiology%26rft.volume%253D10%26rft.spage%253D1003711e%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[5].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-5-1\u0022 title=\u0022View reference [5] in text\u0022 id=\u0022ref-5\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGoodfellow\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShlens\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESzegedy\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EExplaining and harnessing adversarial examples\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003Estat\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E1050\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003E20\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2015\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253Dstat%26rft.volume%253D1050%26rft.spage%253D20%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[6].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-6-1\u0022 title=\u0022View reference [6] in text\u0022 id=\u0022ref-6\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGupta\u003C\/span\u003E\u003C\/span\u003E and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZou\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EFeedback gan for dna optimizes protein functions\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENature Machine Intelligence\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E1\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E2\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E105\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNature%2BMachine%2BIntelligence%26rft.volume%253D1%26rft.spage%253D105%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[7].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-7-1\u0022 title=\u0022View reference [7] in text\u0022 id=\u0022ref-7\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.7\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGupta\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. T.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EM\u00fcller\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHuisman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFuchs\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESchneider\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESchneider\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EGenerative recurrent networks for de novo drug design\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EMolecular informatics\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E37\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1-2\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E1700111\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DMolecular%2Binformatics%26rft.volume%253D37%26rft.spage%253D1700111%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[8].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-8-1\u0022 title=\u0022View reference [8] in text\u0022 id=\u0022ref-8\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.8\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGuu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET. B.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHashimoto\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EOren\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELiang\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EGenerating sentences by editing prototypes\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ETransactions of the Association of Computational Linguistics\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E6\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003E437\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E450\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DTransactions%2Bof%2Bthe%2BAssociation%2Bof%2BComputational%2BLinguistics%26rft.volume%253D6%26rft.spage%253D437%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[9].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-9-1\u0022 title=\u0022View reference [9] in text\u0022 id=\u0022ref-9\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.9\u0022 data-doi=\u002210.1016\/j.molcel.2010.05.004\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHeinz\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBenner\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESpann\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBertolino\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY. C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELin\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELaslo\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. X.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECheng\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMurre\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESingh\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. K.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGlass\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ESimple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and b cell identities\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EMolecular cell\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E38\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E4\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E576\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E589\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2010\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DMolecular%2Bcell%26rft.stitle%253DMol%2BCell%26rft.aulast%253DHeinz%26rft.auinit1%253DS.%26rft.volume%253D38%26rft.issue%253D4%26rft.spage%253D576%26rft.epage%253D589%26rft.atitle%253DSimple%2Bcombinations%2Bof%2Blineage-determining%2Btranscription%2Bfactors%2Bprime%2Bcis-regulatory%2Belements%2Brequired%2Bfor%2Bmacrophage%2Band%2BB%2Bcell%2Bidentities.%26rft_id%253Dinfo%253Adoi%252F10.1016%252Fj.molcel.2010.05.004%26rft_id%253Dinfo%253Apmid%252F20513432%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1016\/j.molcel.2010.05.004\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=20513432\u0026amp;link_type=MED\u0026amp;atom=%2Fbiorxiv%2Fearly%2F2019%2F07%2F27%2F714402.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=000278448100012\u0026amp;link_type=ISI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-newisilink cit-ref-sprinkles-webofscience\u0022\u003E\u003Cspan\u003EWeb of Science\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[10].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-10-1\u0022 title=\u0022View reference [10] in text\u0022 id=\u0022ref-10\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-714402v2.10\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKilloran\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELee\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDelong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDuvenaud\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFrey\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EGenerating and designing dna with deep generative models\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E preprint\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-arxiv-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003EarXiv:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-arxiv\u0022\u003E1712.06148\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2017\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E \u003C\/div\u003E\n\n \n \u003C\/div\u003E\n\u003C\/div\u003E\n \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.biorxiv.org\/sites\/default\/files\/js\/js_zP7WWIfzbyzvaM63L39cNV2juU_1XVH7wduFK9gcMNI.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}