CINXE.COM

Google Cloud Storage Access

<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en" dir="ltr"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> <meta http-equiv="x-ua-compatible" content="IE=edge"></meta><meta name="format-detection" content="telephone=no"></meta><meta name="viewport" content="width=device-width, initial-scale=1"></meta><meta name="search" content="default"></meta><meta name="use.ic" content="no"></meta><meta name="tocstandalone" content="yes"></meta><link rel="canonical" href="https://help.alteryx.com/dataprep/en/platform/connections/connection-types/google-cloud-storage-access.html"></link><meta name="theme" content="1"></meta><meta name="search.placeholder" content="Search"></meta><meta name="search.results" content="Search results"></meta><meta name="no.search.results" content="No results found"></meta><script type="text/javascript"> var theme = '1'; window.versionsfile = ''; window.indexDict = new Array(); window.store = {}; window.portalLanguage = 'en'; window.enterKey = 'none'; var fuse_threshold = 0.3; var local_csh = false; var anchoroption = false; var instantsearch_minlength = 1; var useanchorlinks = false; useanchorlinks = true; var clicktocopy = 'Click to copy link'; var linkcopied = 'Copied!'; var collapseTocSectionOnLinkTitleClick = false; </script><title>Google Cloud Storage Access</title><link rel="stylesheet" type="text/css" href="../../../../css/docbook.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/font-awesome.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/roboto.font.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/theme1.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/theme1-colors.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/content-theme2.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/sm-core-css.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/sm-simple.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/style-print.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/style-common.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/style-modern-tables.css"></link><link rel="stylesheet" type="text/css" href="../../../../css/layout-custom-style.css"></link><script src="../../../../js/jquery-3/jquery-3.4.1.min.js" type="text/javascript"></script><script src="../../../../js/jquery-migrate-3.4.1.min.js" type="text/javascript"></script><script src="../../../../js/materialize.min.js" type="text/javascript"></script><script src="../../../../js/bootstrap.min.js" type="text/javascript"></script><script src="../../../../js/purl.js" type="text/javascript"></script><script src="../../../../js/jquery.smartmenus.js" type="text/javascript"></script><script src="../../../js/toc.js" type="text/javascript"></script><script src="../../../../js/create-toc.js" type="text/javascript"></script><script src="../../../../js/html5-2-mp-common.js" type="text/javascript"></script><script src="../../../../js/html5-2.js" type="text/javascript"></script><script src="../../../../js/checklist.js" type="text/javascript"></script><script src="../../../../js/clipboard.min.js" type="text/javascript"></script><script src="../../../../js/anchorlinks.js" type="text/javascript"></script><script src="../../../../js/updateurl.js" type="text/javascript"></script><script src="../../../../js/lunr.js" type="text/javascript"></script><script src="../../../../js/jquery.mark.min.js" type="text/javascript"></script><script src="../../../js/data.js" type="text/javascript"></script><script src="../../../../js/search.js" type="text/javascript"></script><script src="../../../../js/csh.js" type="text/javascript"></script><script src="../../../../js/jquery.paligocode.js" type="text/javascript"></script><script src="../../../../js/highlight-mergehtml-plugin.js" type="text/javascript"></script><script src="../../../../js/layout-custom-script.js" type="text/javascript"></script><meta name="generator" content="Paligo"></meta><link rel="prev" href="google-analytics-4-connections.html#create-connection-6707300" title="Create Connection"></link><link rel="next" href="google-cloud-storage-access.html#enable-6707301" title="Enable"></link><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML" async="async"></script><link rel="icon" href="/favicon.ico" type="image/x-icon"></link><script type="text/javascript">const containerId = 'GTM-KLR42PWW'; $(document).on('cookies.consented', () => { (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer',containerId); }); </script><script type="text/javascript" src="../../../../js/cookie-consent.js"></script><script type="text/javascript"> $(document).ready(function () { $(".mediaobject img").addClass('materialboxed'); //Exclude images with links $(".mediaobject a img").removeClass('materialboxed'); if (!document.documentMode) { $('.materialboxed').materialbox(); }}); </script><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/googlecode.min.css"></link><script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script><script type="text/javascript"> $(document).on('ready ajaxComplete', function() { hljs.addPlugin(mergeHTMLPlugin); hljs.configure({'ignoreUnescapedHTML': true}); $('pre:not(.embedcode)').each(function() { /** * @type {string} */ var language = $(this).data('language'); if (language === 'plaintext' || language === 'text') { $(this).addClass(language).addClass('hljs'); return true; } else { var clone = this.cloneNode(true); hljs.highlightElement(clone); $(this).empty().append($(clone).contents()); $(this).attr("class", $(clone).attr("class")); } }); }); </script></head><body class="theme1 page-toc collapsible-sidebar-nav" data-spy="scroll" data-target=".section-nav-container" data-offset="100" data-link-prefix="../../../"><style type="text/css"> div.skipnav { } div.skipnav a { position: fixed; left: -10000px; top: 1.5em; width: 1px; height: 1px; overflow: hidden; } div.skipnav a:focus, div.skipnav a:active, div.skipnav a:hover { background: white; color: black; box-shadow: 5px 5px 5px 0px rgba(0,0,0,0.5); position: fixed; left: 2em; top: 1.5em; width: auto; height: auto; overflow: visible; text-decoration: underline; z-index: 99999; /* has to be higher than the side panel */ } </style><div class="skipnav"><a href="#content-wrapper">Skip to main content</a></div><header class="site-header"><nav class="site-header-navbar navbar navbar-fixed-top"><div class="navbar-container"><div class="navbar-header"><button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target=".site-sidebar" aria-expanded="false" aria-controls="navbar"><span class="sr-only">Toggle navigation</span><span class="icon-bar"></span><span class="icon-bar"></span><span class="icon-bar"></span></button><div id="logotype-container" class="pull-left"><a class="navbar-brand" href="/index.html" target="_blank" rel="noopener"><img id="logotype-pageheader" src="../../../../css/image/corporate-logo.png" alt="Corporate logotype" data-role="logotype" class="logo"></img></a></div></div><div class="navbar-collapse collapse" id="navbar"><ul class="top-nav-menu sm sm-simple"><li data-lang="en"><a href="https://help.alteryx.com/release-notes.html" class="external-top-nav-link" target="_blank">Release Notes</a></li><li data-lang="en"><a href="https://community.alteryx.com/" class="external-top-nav-link" target="_blank">Community</a></li><li data-lang="en"><a href="https://my.alteryx.com/" class="external-top-nav-link" target="_blank">My Alteryx</a></li></ul></div></div></nav></header><div class="site-body"><div class="site-body-container"><div class="site-body-row"><aside class="site-sidebar"><div class="site-sidebar-header"><button type="button" class="navbar-toggle" aria-controls="nav-site-sidebar"><span class="sr-only">Toggle navigation</span><span class="icon-bar"></span><span class="icon-bar"></span><span class="icon-bar"></span></button><a href="/index.html" target="_blank" rel="noopener"><img id="logotype-pageheader" src="../../../../css/image/corporate-logo.png" alt="Corporate logotype" data-role="logotype" class="logo"></img></a></div><form class="site-sidebar-search" autocomplete="off"><input type="text" placeholder="Search" class="form-control search-field" id="aa-search-input"></input></form><div id="toc-standalone-placeholder"></div></aside><div class="site-content"><div class="toolbar top-nav-on"><div class="toolbar-tools"><div id="navbar" class="navbar-collapse collapse"><ul class="top-nav-menu sm sm-simple"><li data-lang="en"><a href="https://help.alteryx.com/release-notes.html" class="external-top-nav-link" target="_blank">Release Notes</a></li><li data-lang="en"><a href="https://community.alteryx.com/" class="external-top-nav-link" target="_blank">Community</a></li><li data-lang="en"><a href="https://my.alteryx.com/" class="external-top-nav-link" target="_blank">My Alteryx</a></li></ul></div><div class="tool-print"><i class="fa fa-print" aria-hidden="true">print</i></div><div class="tool-search"><i class="fa fa-search" aria-hidden="true"></i></div><button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target=".site-sidebar" aria-expanded="false" aria-controls="navbar"><span class="sr-only">Toggle navigation</span><span class="icon-bar"></span><span class="icon-bar"></span><span class="icon-bar"></span></button></div></div><main><div id="top-pager"><ul class="pager"><li class="previous"><a accesskey="p" class="prev pull-left prev visible-lg visible-md" id="header-navigation-prev" href="google-analytics-4-connections.html#create-connection-6707300">Prev</a></li><li class="next"><a accesskey="n" class="pull-right next visible-lg visible-md" id="header-navigation-next" href="google-cloud-storage-access.html#enable-6707301">Next</a></li></ul></div><article class="topic content-container" id="content-wrapper"><div id="topic-content" class="topic-content"><div class="breadcrumb-container"><ul class="breadcrumb"><li class="breadcrumb-link"><span class="current-category">Dataprep by Trifacta</span></li><li class="breadcrumb-link"><a href="../../../platform.html">Platform</a></li><li class="breadcrumb-link"><a href="../../connections.html">Connections</a></li><li class="breadcrumb-link"><a href="../connection-types.html">Connection Types</a></li><li class="breadcrumb-node">Google Cloud Storage Access</li></ul></div><section xml:lang="en" lang="en" dir="ltr" class="section original-topic" data-origin-id="UUID-711360f5-fbc0-ca4e-1824-1da5030d0ffa" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49" data-permalink="platform/connections/connection-types/google-cloud-storage-access.html" data-topic-level="4" data-relative-prefix="../../../" data-publication-date="" id="google-cloud-storage-access"><div class="titlepage"><div><div class="title"><h1 class="title" style="clear: both">Google Cloud Storage Access</h1></div></div></div><p><span class="phrase">Cloud Storage</span> is a web service for storing and accessingfileson<span class="phrase">Google Cloud Platform</span>. The service combines the performance and scalability of the Google cloud with advanced security.</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>For more information on <span class="phrase">Cloud Storage</span>, see <a class="link" href="https://cloud.google.com/storage" target="_blank" rel="noopener">https://cloud.google.com/storage</a>.</p></li></ul></div><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Enable" data-publication-date="" id="enable-6707301"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Enable</h2></div></div></div><p>A project owner does not need to enable access to <span class="phrase">Cloud Storage</span>. Access to <span class="phrase">Cloud Storage</span> is governed by permissions.</p><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-IAMrole" data-publication-date="" id="iam-role-6707301"><div class="titlepage"><div><div class="title"><h3 class="title">IAM role</h3></div></div></div><p>An IAM role is used by the product to enable access for it to Google Cloud resources. The default IAM role that is assigned to each user in the project is granted access to read and write data in <span class="phrase">Cloud Storage</span>.</p><p>If you are using a custom IAM role to access Google Cloud resources, you must ensure that the role contains the appropriate permissions to read and write data in <span class="phrase">Cloud Storage</span>.</p><p>For more information, see <a class="link linktype-component" href="../../admin/admin-reference/required-dataprep-user-permissions.html" title="Required Dataprep User Permissions">Required Dataprep User Permissions</a>.</p></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Serviceaccount" data-publication-date="" id="service-account-6707301"><div class="titlepage"><div><div class="title"><h3 class="title">Service account</h3></div></div></div><p>A service account is used by the product to run jobs in <span class="phrase">Dataflow</span>. The default service account that is assigned to each user in the project is granted access to <span class="phrase">Cloud Storage</span>.</p><p>If you are using a custom service account, you must ensure that the account contains the appropriate permissions to read and write data in <span class="phrase">Cloud Storage</span>.</p><p>For more information, see <a class="link linktype-component" href="../../admin/admin-reference/required-dataprep-user-permissions/google-service-account-management.html" title="Google Service Account Management">Google Service Account Management</a>.</p></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Limitations" data-publication-date="" id="limitations-6707301"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Limitations</h2></div></div></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>The platform supports a single, global connection to <span class="phrase">Cloud Storage</span>. All users must use this connection.</p></div></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-CreateConnection" data-publication-date="" id="create-cloud-storage-connection"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Create Cloud Storage Connection</h2></div></div></div><p>You do not need to create a connection to <span class="phrase">Cloud Storage</span>. It is accessible based on permissions. See above.</p><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-CreateviaAPI" data-publication-date="" id="create-via-api-6707301"><div class="titlepage"><div><div class="title"><h3 class="title">Create via API</h3></div></div></div><p>You cannot create <span class="phrase">Cloud Storage</span> connections through the APIs.</p></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-UsingConnections" data-publication-date="" id="using-cloud-storage-connections"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Using Cloud Storage Connections</h2></div></div></div><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Usesof" data-publication-date="" id="uses-of-cloud-storage"><div class="titlepage"><div><div class="title"><h3 class="title">Uses of Cloud Storage</h3></div></div></div><p><span class="phrase">Dataprep by Trifacta</span> can use <span class="phrase">Cloud Storage</span> for the following reading and writing tasks:</p><div class="procedure"><ol class="procedure" type="1"><li class="step"><p><span class="bold"><strong>Upload through application: </strong></span>When files are imported into <span class="phrase">Dataprep by Trifacta</span> as datasets, it is uploaded and stored in a location in <span class="phrase">Cloud Storage</span>. For more information, see <a class="link linktype-component" href="../../profile/preferences-page/user-profile-page.html" title="User Profile Page">User Profile Page</a>.</p></li><li class="step"><p><span class="bold"><strong>Creating Datasets from <span class="phrase">Cloud Storage</span> Files: </strong></span>You can read in from source data stored in <span class="phrase">Cloud Storage</span>. A source may be a single <span class="phrase">Cloud Storage</span> file or a folder of identically structured files. See Reading from Sources below.</p></li><li class="step"><p><span class="bold"><strong>Reading Datasets: </strong></span>When creating a dataset, you can pull your data from another dataset defined in <span class="phrase">Cloud Storage</span>.</p></li><li class="step"><p><span class="bold"><strong>Writing Results: </strong></span>After a job has been executed, you can write the results back to <span class="phrase">Cloud Storage</span>.</p></li></ol></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>When <span class="phrase">Dataprep by Trifacta</span> executes a job on a dataset, the source data is untouched. Results are written to a new location, so that no data is disturbed by the process.</p></div><p><span class="bold"><strong>&gt; </strong></span><a class="link" href="https://console.cloud.google.com/storage/browser" target="_blank" rel="noopener">Open GCS</a></p></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Beforeyoubeginusing" data-publication-date="" id="before-you-begin-using-cloud-storage"><div class="titlepage"><div><div class="title"><h3 class="title">Before you begin using Cloud Storage</h3></div></div></div><p>Your administrator must configure read/write permissions to locations in <span class="phrase">Cloud Storage</span>. Please see the <span class="phrase">Cloud Storage</span> documentation.</p><div dir="ltr" class="warning warning"><p class="admonition-label"><strong>Warning</strong></p><p><span class="bold"><strong>Avoid reading and writing in the following locations:</strong></span></p><p><span class="bold"><strong>The Scratch Area location is used by <span class="phrase">Dataprep by Trifacta</span> for temporary storage.</strong></span></p><p><span class="bold"><strong>The Upload location is used for storing data that has been uploaded from local file.</strong></span></p><p><span class="bold"><strong>For more information on these locations, see <a class="link linktype-component" href="../../profile/preferences-page/user-profile-page.html" title="User Profile Page">User Profile Page</a>.</strong></span></p></div><p><span class="bold"><strong>Limitations:</strong></span></p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>The Requestor Pays feature of <span class="phrase">Cloud Storage</span> is not supported in <span class="phrase">Dataprep by Trifacta</span>.</p></li></ul></div></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-StoringDatain" data-publication-date="" id="storing-data-in-cloud-storage"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Storing Data in Cloud Storage</h2></div></div></div><p>Your administrator should provide raw data or locations and access for storing raw data within <span class="phrase">Cloud Storage</span>.</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>All <span class="phrase">Alteryx</span> users should have a clear understanding of the folder structure within <span class="phrase">Cloud Storage</span> where each individual can read from and write results.</p></li><li class="listitem"><p>Users should know where shared data is located and where personal data can be saved without interfering with or confusing other users.</p></li></ul></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p><span class="phrase">Dataprep by Trifacta</span> does not modify source data in <span class="phrase">Cloud Storage</span>. Sources stored in <span class="phrase">Cloud Storage</span> are read without modification from their source locations, and sources that are uploaded to the platform are stored in the designated Upload location for each user. See <a class="link linktype-component" href="../../profile/preferences-page/user-profile-page.html" title="User Profile Page">User Profile Page</a>.</p></div><p><span class="bold"><strong>Show hidden:</strong></span></p><p>When reading from or writing to buckets on <span class="phrase">Cloud Storage</span>, you can optionally choose to show hidden files and folders. The names of hidden objects begin with a dot (<code class="code">.</code>) or an underscore (<code class="code">_</code>).</p><p>You can choose to use these files as:</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>Sources for imported datasets</p></li><li class="listitem"><p>Output targets for writing job results</p></li></ul></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>Hidden files and folders are typically hidden for a reason. For example, <span class="phrase">Dataprep by Trifacta</span> may write temporary files to buckets and then delete them. File structures may change at any time and without notice.</p></div><div dir="ltr" class="tip tip"><p class="admonition-label"><strong>Tip</strong></p><p>When importing a file from <span class="phrase">Cloud Storage</span>, you can optionally choose to show hidden files and folders. Hidden files may contain useful information, such as JSON representations of your visual profiles. File structures in hidden folders may change without notice at any time.</p></div><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>For more information on importing hidden files, see<a class="link linktype-component" href="../../library-for-data/library-for-data-page/import-data-page.html" title="Import Data Page">Import Data Page</a>.</p></li><li class="listitem"><p>For more information when writing to storage, see <a class="link linktype-component" href="../../../trifacta-application/reference/ui-reference/transformer-page/run-job-page.html" title="Run Job Page">Run Job Page</a>.</p></li></ul></div><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-SupportforCMEK" data-publication-date="" id="support-for-cmek-6707301"><div class="titlepage"><div><div class="title"><h3 class="title">Support for CMEK</h3></div></div></div><p>Use of Customer Managed Encryption Keys (CMEK) is supported and is transparent to the user. For more information, see <a class="link" href="https://cloud.google.com/kms/docs/cmek" target="_blank" rel="noopener">https://cloud.google.com/kms/docs/cmek</a>.</p></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Readingfromsources" data-publication-date="" id="reading-from-sources"><div class="titlepage"><div><div class="title"><h3 class="title">Reading from sources</h3></div></div></div><p>You can create a dataset from one or more files stored in <span class="phrase">Cloud Storage</span>.</p><p><span class="bold"><strong>Wildcards:</strong></span></p><p>You can parameterize your input paths to import source files as part of the same imported dataset. For more information, see <a class="link linktype-component" href="../../../trifacta-application/concepts/feature-overviews/overview-of-parameterization.html" title="Overview of Parameterization">Overview of Parameterization</a>.</p><p><span class="bold"><strong>Folder selection:</strong></span></p><p>When you select a folder in <span class="phrase">Cloud Storage</span> to create your dataset, you select all files in the folder to be included.</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>This option selects all files in all sub-folders and bundles them into a single dataset. If your sub-folders contain separate datasets, you should be more specific in your folder selection.</p></li><li class="listitem"><p>All files used in a single imported dataset must be of the same format and have the same structure. For example, you cannot mix and match CSV and JSON files if you are reading from a single directory.</p></li></ul></div><p><span class="bold"><strong>Read file formats:</strong></span></p><p>From <span class="phrase">Cloud Storage</span>, <span class="phrase">Dataprep by Trifacta</span> can read the following file formats:</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>CSV</p></li><li class="listitem"><p>JSON</p></li><li class="listitem"><p>AVRO</p></li><li class="listitem"><p>GZIP</p></li><li class="listitem"><p>BZIP2</p></li><li class="listitem"><p>TXT</p></li><li class="listitem"><p>XLS/XLSX</p></li><li class="listitem"><p>LOG</p></li><li class="listitem"><p>TSV</p></li></ul></div></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Creatingdatasets" data-publication-date="" id="creating-datasets"><div class="titlepage"><div><div class="title"><h3 class="title">Creating datasets</h3></div></div></div><p>When creating a dataset, you can choose to read data from a source stored from <span class="phrase">Cloud Storage</span> or from a local file.</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p><span class="phrase">Cloud Storage</span> sources are not moved or changed.</p></li><li class="listitem"><p>Local file sources are uploaded to the designated Upload location in <span class="phrase">Cloud Storage</span> where they remain and are not changed. This location is specified in your user profile. See <a class="link linktype-component" href="../../profile/preferences-page/user-profile-page.html" title="User Profile Page">User Profile Page</a>.</p></li></ul></div><p>Data may be individual files or all of the files in a folder. For more information, see Reading from Sources above.</p><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-FullexecutiononBigQuery" data-publication-date="" id="full-execution-on-bigquery"><div class="titlepage"><div><div class="title"><h4 class="title">Full execution on BigQuery</h4></div></div></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>This feature may not be available in all product editions. For more information on available features, see <a class="link" href="https://www.trifacta.com/pricing/" target="_blank" rel="noopener">Compare Editions</a>.</p></div><p>For <span class="phrase">Cloud Storage</span> data sources that are written to BigQuery, you may be able to execute the job in BigQuery.</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>You must enable the Full execution for the GCS file option for existing flows that use files from <span class="phrase">Cloud Storage</span>. For more information, see <a class="link linktype-component" href="../../../trifacta-application/reference/ui-reference/flows-page/flow-view-page/flow-optimization-settings-dialog.html" title="Flow Optimization Settings Dialog">Flow Optimization Settings Dialog</a>.</p></li><li class="listitem"><p>Additional configuration and limitations may apply. For more information, see <a class="link linktype-component" href="../../../trifacta-application/concepts/feature-overviews/overview-of-job-execution/bigquery-running-environment.html" title="BigQuery Running Environment">BigQuery Running Environment</a>.</p></li></ul></div></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Writingresults" data-publication-date="" id="writing-results"><div class="titlepage"><div><div class="title"><h3 class="title">Writing results</h3></div></div></div><p>When your results from a job are generated, they can be stored back in <span class="phrase">Cloud Storage</span>. The <span class="phrase">Cloud Storage</span> location is available through the Output Destinations tab in the Job Details page. See <a class="link linktype-component" href="../../job-history/job-history-page/job-details-page.html" title="Job Details Page">Job Details Page</a>.</p><div dir="ltr" class="warning warning"><p class="admonition-label"><strong>Warning</strong></p><p><span class="bold"><strong>If your environment is using <span class="phrase">Cloud Storage</span>, do not use the Upload location for storage. This directory is used for storing uploads, which may be used by multiple users. Manipulating files outside of the product can destroy other users' data. Please use the tools provided through the interface for managing uploads from <span class="phrase">Cloud Storage</span>.</strong></span></p></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>During the publishing process, <span class="phrase">Dataprep by Trifacta</span> may write temporary files to your storage bucket and then delete them. If you have enabled a storage retention policy on your bucket, that time period may interfere with the publishing process. For more information on storage retention policy, see <a class="link" href="https://cloud.google.com/storage/docs/bucket-lock#retention-policy" target="_blank" rel="noopener">https://cloud.google.com/storage/docs/bucket-lock#retention-policy</a>.</p></div></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Creatinganewdatasetfromresults" data-publication-date="" id="creating-a-new-dataset-from-results"><div class="titlepage"><div><div class="title"><h3 class="title">Creating a new dataset from results</h3></div></div></div><p>As part of writing results, you can choose to create a new dataset, so that you can chain together data wrangling tasks.</p><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>When you create a new dataset as part of your results, the file or files are written to the designated output location for your user account. Depending on how your permissions are configured, this location may not be accessible to other users.</p></div></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Maintenance" data-publication-date="" id="maintenance"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Maintenance</h2></div></div></div><div dir="ltr" class="note note"><p class="admonition-label"><strong>Note</strong></p><p>Files stored in <code class="code">/uploads</code> should be deleted with caution. These files are the sources for imported datasets created by uploading files from the local desktop. Files in <code class="code">/uploads</code> should only be removed if you are confident that they are no longer used as source data for any imported datasets. Otherwise, those datasets are broken.</p></div><p>Files stored in <code class="code">/tmp</code> are suitable for removal. Some details are below.</p><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-jobtempfiles" data-publication-date="" id="dataflow-job-temp-files"><div class="titlepage"><div><div class="title"><h3 class="title">Dataflow job temp files</h3></div></div></div><p>When your jobs are executed on <span class="phrase">Dataflow</span>, the following temp files may be generated:</p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p><code class="code">.pb</code> and <code class="code">.json</code> files are generated with each job run. After a job run has been completed, these files can be safely removed.</p></li><li class="listitem"><p><code class="code">dataflow-bundle.jar</code> contains code dependencies and may be reused in future job submissions. This JAR file can be more than 150 MB.</p><div dir="ltr" class="tip tip"><p class="admonition-label"><strong>Tip</strong></p><p>You can safely remove all but the latest version of <code class="code">dataflow-bundle.jar</code>. Older versions are no longer used. You can also delete the latest one, which it is automatically replaced in the next job execution. However, it does require a re-transfer of the file.</p></div></li></ul></div></section></section><section dir="ltr" class="section internal" data-origin-id="" data-legacy-id="UUID-ab84f122-fcd0-f0c5-3f3a-f78b30782b49_id_GoogleCloudStorageAccess-Reference" data-publication-date="" id="reference-6707301"><div class="titlepage"><div><div class="title"><h2 class="title" style="clear: both">Reference</h2></div></div></div><p><span class="bold"><strong>Enable:</strong></span> Automatically enabled.</p><p><span class="bold"><strong>Create New Connection:</strong></span> n/a</p></section></section><div class="footer-content"><div class="section-toc section-toc-after"><div class="section-toc-title">In this section<span class="section-toc-title-delimiter">: </span></div></div><div class="glossary-definitions"></div></div><footer></footer></div></article><aside class="section-nav-container"><ul class="section-nav nav"><li><a href="#google-cloud-storage-access">Google Cloud Storage Access</a></li><li><a href="#enable-6707301">Enable</a><ul class="nav"><li><a href="#iam-role-6707301">IAM role</a></li><li><a href="#service-account-6707301">Service account</a></li></ul></li><li><a href="#limitations-6707301">Limitations</a></li><li><a href="#create-cloud-storage-connection">Create Cloud Storage Connection</a><ul class="nav"><li><a href="#create-via-api-6707301">Create via API</a></li></ul></li><li><a href="#using-cloud-storage-connections">Using Cloud Storage Connections</a><ul class="nav"><li><a href="#uses-of-cloud-storage">Uses of Cloud Storage</a></li><li><a href="#before-you-begin-using-cloud-storage">Before you begin using Cloud Storage</a></li></ul></li><li><a href="#storing-data-in-cloud-storage">Storing Data in Cloud Storage</a><ul class="nav"><li><a href="#support-for-cmek-6707301">Support for CMEK</a></li><li><a href="#reading-from-sources">Reading from sources</a></li><li><a href="#creating-datasets">Creating datasets</a><ul class="nav"></ul></li><li><a href="#writing-results">Writing results</a></li><li><a href="#creating-a-new-dataset-from-results">Creating a new dataset from results</a></li></ul></li><li><a href="#maintenance">Maintenance</a><ul class="nav"><li><a href="#dataflow-job-temp-files">Dataflow job temp files</a></li></ul></li><li><a href="#reference-6707301">Reference</a></li></ul></aside><article id="search-result-wrapper"><div class="search-container" style="display: none;"><h2>Search results</h2><ul class="searchresults"></ul><p class="noresults">No results found</p></div></article></main><div id="bottom-pager"><ul class="pager"><li class="previous"><a accesskey="p" class="prev pull-left prev visible-lg visible-md" id="header-navigation-prev" href="google-analytics-4-connections.html#create-connection-6707300">Prev</a></li><li class="next"><a accesskey="n" class="pull-right next visible-lg visible-md" id="header-navigation-next" href="google-cloud-storage-access.html#enable-6707301">Next</a></li></ul></div><footer class="site-footer"><div class="inner"><div class="copyright"> 漏 2025 Alteryx, Inc. </div><div class="publication-date"><span class="publication-date-text">Publication date</span><span class="pubdate-delimiter">: </span><span class="formatted-date"></span></div></div></footer></div></div></div></div><!--Google Tag Manager (noscript)--><noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-KLR42PWW" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript><!--End Google Tag Manager (noscript)--></body></html>

Pages: 1 2 3 4 5 6 7 8 9 10