CINXE.COM
[SAMZA-348] Configure Samza jobs through a stream - ASF JIRA
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=Edge"/> <meta name="application-name" content="JIRA" data-name="jira" data-version="8.20.10"><meta name="ajs-server-scheme" content="https"> <meta name="ajs-server-port" content="443"> <meta name="ajs-server-name" content="issues.apache.org"> <meta name="ajs-behind-proxy" content="null"> <meta name="ajs-base-url" content="https://issues.apache.org/jira"> <meta name="ajs-viewissue-use-history-api" content="false"> <meta name="ajs-jira-base-url" content="https://issues.apache.org/jira"> <meta name="ajs-serverRenderedViewIssue" content="true"> <meta name="ajs-dev-mode" content="false"> <meta name="ajs-context-path" content="/jira"> <meta name="ajs-version-number" content="8.20.10"> <meta name="ajs-build-number" content="820010"> <meta name="ajs-is-beta" content="false"> <meta name="ajs-is-rc" content="false"> <meta name="ajs-is-snapshot" content="false"> <meta name="ajs-is-milestone" content="false"> <meta name="ajs-remote-user" content=""> <meta name="ajs-remote-user-fullname" content=""> <meta name="ajs-user-locale" content="en_UK"> <meta name="ajs-user-locale-group-separator" content=","> <meta name="ajs-app-title" content="ASF JIRA"> <meta name="ajs-keyboard-shortcuts-enabled" content="true"> <meta name="ajs-keyboard-accesskey-modifier" content="Alt"> <meta name="ajs-enabled-dark-features" content="["com.atlassian.jira.agile.darkfeature.editable.detailsview","nps.survey.inline.dialog","com.atlassian.jira.agile.darkfeature.edit.closed.sprint.enabled","jira.plugin.devstatus.phasetwo","jira.frother.reporter.field","atlassian.rest.xsrf.legacy.enabled","jira.issue.status.lozenge","com.atlassian.jira.config.BIG_PIPE","com.atlassian.jira.projects.issuenavigator","com.atlassian.jira.config.PDL","jira.plugin.devstatus.phasetwo.enabled","atlassian.aui.raphael.disabled","app-switcher.new","frother.assignee.field","com.atlassian.jira.projects.ProjectCentricNavigation.Switch","sd.internal.base.off.thread.on.completion.events.enabled","jira.onboarding.cyoa","com.atlassian.jira.agile.darkfeature.kanplan.enabled","sd.slavalue.record.updated.date.enabled","com.atlassian.jira.config.ProjectConfig.MENU","com.atlassian.jira.projects.sidebar.DEFER_RESOURCES","com.atlassian.jira.agile.darkfeature.kanplan.epics.and.versions.enabled","com.atlassian.jira.agile.darkfeature.sprint.goal.enabled","jira.zdu.admin-updates-ui","jira.zdu.jmx-monitoring","sd.sla.improved.rendering.enabled","com.atlassian.jira.migration.features.assess-l1-cloud-tooling","sd.canned.responses.enabled","sd.new.settings.sidebar.location.disabled","jira.zdu.cluster-upgrade-state","com.atlassian.jira.email.templates.readFromJiraHome","com.atlassian.jira.agile.darkfeature.splitissue","com.atlassian.jira.config.CoreFeatures.LICENSE_ROLES_ENABLED","jira.export.csv.enabled"]"> <meta name="ajs-in-admin-mode" content="false"> <meta name="ajs-is-sysadmin" content="false"> <meta name="ajs-is-admin" content="false"> <meta name="ajs-outgoing-mail-enabled" content="true"> <meta name="ajs-archiving-enabled" content="true"> <meta name="ajs-date-relativize" content="true"> <meta name="ajs-date-time" content="HH:mm"> <meta name="ajs-date-day" content="EEEE HH:mm"> <meta name="ajs-date-dmy" content="dd/MMM/yy"> <meta name="ajs-date-complete" content="dd/MMM/yy HH:mm"> <script type="text/javascript">var AJS=AJS||{};AJS.debug=true;</script> <meta id="atlassian-token" name="atlassian-token" content="A5KQ-2QAV-T4JA-FDED_d249f54d2e4d0f0d835e0ee348cc9ac51803806a_lout"> <link rel="shortcut icon" href="/jira/s/-lmkfjk/820010/13pdxe5/_/images/fav-jsw.png"> <!--[if IE]><![endif]--> <script type="text/javascript"> (function() { var contextPath = '/jira'; function printDeprecatedMsg() { if (console && console.warn) { console.warn('DEPRECATED JS - contextPath global variable has been deprecated since 7.4.0. Use `wrm/context-path` module instead.'); } } Object.defineProperty(window, 'contextPath', { get: function() { printDeprecatedMsg(); return contextPath; }, set: function(value) { printDeprecatedMsg(); contextPath = value; } }); })(); </script> <script> window.WRM=window.WRM||{};window.WRM._unparsedData=window.WRM._unparsedData||{};window.WRM._unparsedErrors=window.WRM._unparsedErrors||{}; WRM._unparsedData["com.atlassian.plugins.atlassian-plugins-webresource-plugin:context-path.context-path"]="\"/jira\""; WRM._unparsedData["jira.core:feature-flags-data.feature-flag-data"]="{\"enabled-feature-keys\":[\"com.atlassian.jira.agile.darkfeature.editable.detailsview\",\"nps.survey.inline.dialog\",\"com.atlassian.jira.agile.darkfeature.edit.closed.sprint.enabled\",\"jira.plugin.devstatus.phasetwo\",\"jira.frother.reporter.field\",\"atlassian.rest.xsrf.legacy.enabled\",\"jira.issue.status.lozenge\",\"com.atlassian.jira.config.BIG_PIPE\",\"com.atlassian.jira.projects.issuenavigator\",\"com.atlassian.jira.config.PDL\",\"jira.plugin.devstatus.phasetwo.enabled\",\"atlassian.aui.raphael.disabled\",\"app-switcher.new\",\"frother.assignee.field\",\"com.atlassian.jira.projects.ProjectCentricNavigation.Switch\",\"sd.internal.base.off.thread.on.completion.events.enabled\",\"jira.onboarding.cyoa\",\"com.atlassian.jira.agile.darkfeature.kanplan.enabled\",\"sd.slavalue.record.updated.date.enabled\",\"com.atlassian.jira.config.ProjectConfig.MENU\",\"com.atlassian.jira.projects.sidebar.DEFER_RESOURCES\",\"com.atlassian.jira.agile.darkfeature.kanplan.epics.and.versions.enabled\",\"com.atlassian.jira.agile.darkfeature.sprint.goal.enabled\",\"jira.zdu.admin-updates-ui\",\"jira.zdu.jmx-monitoring\",\"sd.sla.improved.rendering.enabled\",\"com.atlassian.jira.migration.features.assess-l1-cloud-tooling\",\"sd.canned.responses.enabled\",\"sd.new.settings.sidebar.location.disabled\",\"jira.zdu.cluster-upgrade-state\",\"com.atlassian.jira.email.templates.readFromJiraHome\",\"com.atlassian.jira.agile.darkfeature.splitissue\",\"com.atlassian.jira.config.CoreFeatures.LICENSE_ROLES_ENABLED\",\"jira.export.csv.enabled\"],\"feature-flag-states\":{\"com.atlassian.jira.use.same.site.none.for.xsrf.token.cookie\":true,\"com.atlassian.jira.agile.darkfeature.handle.ug.usernames\":true,\"com.atlassian.jira.security.endpoint.non.admin.access.screens\":false,\"com.atlassian.jira.mailHandlerImapMessageQueryLegacy\":false,\"com.atlassian.jira.webhookEventsAsyncProcessing\":false,\"com.atlassian.jira.serAllowShareWithNonMember\":true,\"com.atlassian.troubleshooting.healthcheck.jira.license.limit\":false,\"com.atlassian.jira.diagnostics.perflog\":true,\"com.atlassian.jira.dbr\":true,\"com.atlassian.jira.agile.darkfeature.legacy.epic.picker\":false,\"com.atlassian.jira.issuetable.move.links.hidden\":true,\"com.atlassian.jira.agile.darkfeature.unlink.sprints.on.issue.move\":true,\"jira.renderer.consider.variable.format\":true,\"com.atlassian.jira.user.dbIdBasedKeyGenerationStrategy\":true,\"com.atlassian.jira.plugin.issuenavigator.jql.autocomplete.eagerlyLoaded\":false,\"com.atlassian.portfolio.dcBundling\":true,\"com.atlassian.jira.sharedEntityEditRights\":true,\"com.atlassian.jira.security.endpoint.anonymous.access.resolution\":false,\"com.atlassian.jira.agile.darkfeature.sprint.goal\":false,\"com.atlassian.jira.thumbnailsDeferredGeneration\":true,\"jira.cluster.monitoring.show.offline.nodes\":true,\"com.atlassian.jira.commentReactions\":true,\"data.pipeline.feature.jira.issue.links.export\":true,\"com.atlassian.jira.custom.csv.escaper\":true,\"com.atlassian.jira.fixedCommentDeletionNotifications\":true,\"com.atlassian.jira.allThumbnailsDeferred\":false,\"com.atlassian.jira.plugin.issuenavigator.filtersUxImprovment\":true,\"com.atlassian.jira.agile.darkfeature.kanplan.epics.and.versions\":false,\"com.atlassian.jira.jsd.mobile\":true,\"jira.customfields.cleanup.identification\":true,\"data.pipeline.feature.jira.all.exportable.custom.fields\":true,\"com.atlassian.jira.defaultValuesForSystemFields\":true,\"jira.dc.cleanup.cluser.tasks\":true,\"jira.customfields.bulk.delete\":true,\"com.atlassian.jira.issues.archiving.filters\":false,\"mail.batching.override.core\":true,\"jira.users.and.roles.page.in.react\":true,\"jira.security.csp.sandbox\":true,\"com.atlassian.jira.agile.darkfeature.velocity.chart.ui\":true,\"com.atlassian.jira.returnDefaultAvatarsForBrokenAvatars\":true,\"com.atlassian.jira.agile.darkfeature.sprint.auto.management\":false,\"jira.jql.suggestrecentfields\":false,\"com.atlassian.jira.gdpr.rtbf\":true,\"com.atlassian.jira.security.xsrf.session.token\":true,\"com.atlassian.jira.agile.darkfeature.optimistic.transitions\":true,\"com.atlassian.jira.security.project.admin.revoke.with.application.access\":true,\"com.atlassian.jiranomenclature\":true,\"com.atlassian.jira.security.endpoint.non.admin.access.avatar.system\":false,\"com.atlassian.jira.agile.darkfeature.kanplan\":false,\"com.atlassian.jira.agile.darkfeature.future.sprint.dates\":true,\"com.atlassian.jira.filtersAndDashboardsShareableWithAllGroupsAndRoles\":true,\"jira.customfields.paginated.ui\":true,\"com.atlassian.jira.agile.darkfeature.edit.closed.sprint\":false,\"jira.create.linked.issue\":true,\"com.atlassian.jira.advanced.audit.log\":true,\"jira.sal.host.connect.accessor.existing.transaction.will.create.transactions\":true,\"external.links.new.window\":true,\"jira.quick.search\":true,\"jira.jql.smartautoselectfirst\":false,\"jira.jql.membersof.ignoreGlobalPermissionsForAnonymous\":false,\"data.pipeline.feature.jira.issue.history.export\":true,\"com.atlassian.jira.security.endpoint.non.browse.projects.access.fields\":false,\"atlassian.cdn.static.assets\":true,\"jira.richeditor.bidi.warning\":true,\"mail.batching\":false,\"com.atlassian.jira.privateEntitiesEditable\":true,\"com.atlassian.jira.security.endpoint.anonymous.access.priority\":false,\"jira.priorities.per.project.edit.default\":false,\"com.atlassian.jira.agile.darkfeature.issues.in.epic.details.view\":true,\"jira.priorities.per.project.jsd\":true,\"com.atlassian.jira.plugin.issuenavigator.anonymousPreventCfData\":false,\"com.atlassian.jira.agile.darkfeature.rapid.boards.bands\":true,\"com.atlassian.jira.agile.darkfeature.flexible.boards\":true,\"com.atlassian.jira.agile.darkfeature.sprint.picker.allsprints.suggestion\":true,\"com.atlassian.jira.agile.darkfeature.epic.validate.visibility\":true,\"jira.dc.lock.leasing\":true,\"com.atlassian.jira.accessibility.personal.settings\":true,\"mail.batching.create.section.cf\":true,\"com.atlassian.jira.send.email.notifications.to.user.without.application.access\":false,\"com.atlassian.jira.security.endpoint.non.browse.projects.access.autocompletedata\":false,\"com.atlassian.mail.server.managers.hostname.verification\":true,\"com.atlassian.advancedroadmaps.rebrand\":true,\"com.atlassian.jira.upgrade.startup.fix.index\":true,\"com.atlassian.jira.security.endpoint.anonymous.access.projectCategory\":false,\"jira.redirect.anonymous.404.errors\":true,\"com.atlassian.jira.issuetable.draggable\":true,\"com.atlassian.jira.attachments.generate.unique.suffix\":true,\"com.atlassian.jira.agile.darkfeature.kanban.hide.old.done.issues\":true,\"jira.version.based.node.reindex.service\":true,\"com.atlassian.jira.agile.darkfeature.backlog.showmore\":true,\"com.atlassian.jira.agile.darkfeature.sprint.plan\":false,\"com.atlassian.jira.security.endpoint.anonymous.access.issueLinkType\":false,\"com.atlassian.jira.security.LegacyJiraTypeResolver.WARN_ONLY\":false,\"data.pipeline.feature.jira.schema.version.2\":true,\"com.atlassian.jira.agile.darkfeature.burnupchart\":true,\"com.atlassian.jira.agile.darkfeature.velocity.sprint.picker\":false,\"com.atlassian.jira.issues.archiving.browse\":true,\"jira.instrumentation.laas\":false,\"com.atlassian.jira.security.ChartUtils.browse.projects.permission.check\":false,\"mail.batching.user.notification\":true,\"com.atlassian.portfolio.permission.check.for.permissions\":true,\"com.atlassian.jira.agile.darkfeature.dataonpageload\":true,\"data.pipeline.feature.jira.archived.issue.export\":false,\"com.atlassian.jira.projects.per.project.permission.query\":true,\"com.atlassian.jira.issues.archiving\":true,\"index.use.snappy\":true,\"jira.priorities.per.project\":true}}"; WRM._unparsedData["jira.core:default-comment-security-level-data.DefaultCommentSecurityLevelHelpLink"]="{\"extraClasses\":\"default-comment-level-help\",\"title\":\"Commenting on an Issue\",\"url\":\"https://docs.atlassian.com/jira/jcore-docs-0820/Editing+and+collaborating+on+issues#Editingandcollaboratingonissues-restrictacomment\",\"isLocal\":false}"; WRM._unparsedData["jira.core:dateFormatProvider.allFormats"]="{\"dateFormats\":{\"meridiem\":[\"AM\",\"PM\"],\"eras\":[\"BC\",\"AD\"],\"months\":[\"January\",\"February\",\"March\",\"April\",\"May\",\"June\",\"July\",\"August\",\"September\",\"October\",\"November\",\"December\"],\"monthsShort\":[\"Jan\",\"Feb\",\"Mar\",\"Apr\",\"May\",\"Jun\",\"Jul\",\"Aug\",\"Sep\",\"Oct\",\"Nov\",\"Dec\"],\"weekdaysShort\":[\"Sun\",\"Mon\",\"Tue\",\"Wed\",\"Thu\",\"Fri\",\"Sat\"],\"weekdays\":[\"Sunday\",\"Monday\",\"Tuesday\",\"Wednesday\",\"Thursday\",\"Friday\",\"Saturday\"]},\"lookAndFeelFormats\":{\"relativize\":\"true\",\"time\":\"HH:mm\",\"day\":\"EEEE HH:mm\",\"dmy\":\"dd/MMM/yy\",\"complete\":\"dd/MMM/yy HH:mm\"}}"; WRM._unparsedData["com.atlassian.jira.jira-issue-nav-components:issueviewer.features"]="{\"rteEnabled\":true}"; WRM._unparsedData["com.atlassian.jira.jira-quick-edit-plugin:create-issue-data.data"]="{\"configurableSystemFieldIds\":[\"description\"]}"; WRM._unparsedData["com.onresolve.jira.groovy.groovyrunner:behaviours-translations.behaviours-translations-data-provider"]="{\"field.title.missing.value\":\"You must enter a value for this field\"}"; WRM._unparsedData["com.atlassian.jira.plugins.jira-dnd-attachment-plugin:dnd-issue-drop-zone.thumbnail-mime-types"]="\"image/png,image/vnd.wap.wbmp,image/x-png,image/jpeg,image/bmp,image/gif\""; WRM._unparsedData["com.atlassian.jira.plugins.jira-dnd-attachment-plugin:dnd-issue-drop-zone.upload-limit"]="\"62914560\""; WRM._unparsedData["com.atlassian.plugins.helptips.jira-help-tips:help-tip-manager.JiraHelpTipData"]="{\"anonymous\":true}"; WRM._unparsedData["com.atlassian.jira.jira-view-issue-plugin:controller-subtasks.controller.subtasks.parameters"]="{\"url\":\"/rest/api/2/issue/{issueId}/subtask/move\"}"; WRM._unparsedData["com.atlassian.jira.plugins.jira-wiki-editor:wiki-editor-thumbnails.thumbnails-allowed"]="false"; WRM._unparsedData["com.atlassian.jira.plugins.jira-wiki-editor:wiki-editor-resources.help-data"]="{\"showHelp\":true,\"editorDocumentationUrl\":[\"https://docs.atlassian.com/jira/jcore-docs-0820/Visual+editing\"],\"editorDocumentationTitle\":[\"Show me documentation for the visual editor\"]}"; WRM._unparsedData["jira.core:terminology-data.terminology"]="{\"terminologyEntries\":[{\"originalName\":\"sprint\",\"originalNamePlural\":\"sprints\",\"newName\":\"sprint\",\"newNamePlural\":\"sprints\",\"isDefault\":true},{\"originalName\":\"epic\",\"originalNamePlural\":\"epics\",\"newName\":\"epic\",\"newNamePlural\":\"epics\",\"isDefault\":true}],\"isTerminologyActive\":false}"; WRM._unparsedData["com.atlassian.analytics.analytics-client:policy-update-init.policy-update-data-provider"]="false"; WRM._unparsedData["com.atlassian.analytics.analytics-client:programmatic-analytics-init.programmatic-analytics-data-provider"]="false"; WRM._unparsedData["com.onresolve.jira.groovy.groovyrunner:web-item-response-renderer.web-item-actions-data-provider"]="[]"; WRM._unparsedData["com.atlassian.jira.plugins.jira-slack-server-integration-plugin:slack-link-error-resources.slack-link-error"]="{}"; WRM._unparsedData["jira.core:avatar-picker-data.data"]="{}"; WRM._unparsedData["com.atlassian.jira.jira-header-plugin:dismissedFlags.flags"]="{\"dismissed\":[]}"; WRM._unparsedData["com.atlassian.jira.jira-header-plugin:newsletter-signup-tip-init.newsletterSignup"]="{\"signupDescription\":\"Get updates, inspiration and best practices from the team behind Jira.\",\"formUrl\":\"https://www.atlassian.com/apis/exact-target/{0}/subscribe?mailingListId=1401671\",\"signupTitle\":\"Sign up!\",\"signupId\":\"newsletter-signup-tip\",\"showNewsletterTip\":false}"; WRM._unparsedData["com.atlassian.jira.project-templates-plugin:project-templates-plugin-resources.ptAnalyticsData"]="{\"instanceCreatedDate\":\"2011-01-31\"}"; WRM._unparsedData["jira.core:user-message-flags-data.adminLockout"]="{}"; WRM._unparsedData["jira.request.correlation-id"]="\"43987c039ea539\""; WRM._unparsedData["project-id"]="12314526"; WRM._unparsedData["project-key"]="\"SAMZA\""; WRM._unparsedData["project-name"]="\"Samza\""; WRM._unparsedData["project-type"]="\"software\""; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:generic-filters"]="[{\"id\":\"allissues\",\"jql\":\"project = \\\"{0}\\\" ORDER BY {1}\",\"defaultOrderby\":\"created DESC\",\"label\":\"All issues\",\"requiresUser\":false,\"supportsInlineIssueCreate\":true,\"fields\":[]},{\"id\":\"allopenissues\",\"jql\":\"project = \\\"{0}\\\" AND resolution = Unresolved ORDER BY {1}\",\"defaultOrderby\":\"priority DESC, updated DESC\",\"label\":\"Open issues\",\"requiresUser\":false,\"supportsInlineIssueCreate\":true,\"fields\":[\"resolution\"]},{\"id\":\"doneissues\",\"jql\":\"project = \\\"{0}\\\" AND statusCategory = Done ORDER BY {1}\",\"defaultOrderby\":\"updated DESC\",\"label\":\"Done issues\",\"requiresUser\":false,\"supportsInlineIssueCreate\":false,\"fields\":[\"status\"]},{\"id\":\"recentlyviewed\",\"jql\":\"project = \\\"{0}\\\" AND issuekey in issueHistory() ORDER BY {1}\",\"defaultOrderby\":\"lastViewed DESC\",\"label\":\"Viewed recently\",\"requiresUser\":false,\"supportsInlineIssueCreate\":true,\"fields\":[\"issuekey\"]},{\"id\":\"addedrecently\",\"jql\":\"project = \\\"{0}\\\" AND created \u003e= -1w ORDER BY {1}\",\"defaultOrderby\":\"created DESC\",\"label\":\"Created recently\",\"requiresUser\":false,\"supportsInlineIssueCreate\":true,\"fields\":[\"created\"]},{\"id\":\"resolvedrecently\",\"jql\":\"project = \\\"{0}\\\" AND resolutiondate \u003e= -1w ORDER BY {1}\",\"defaultOrderby\":\"updated DESC\",\"label\":\"Resolved recently\",\"requiresUser\":false,\"supportsInlineIssueCreate\":false,\"fields\":[\"resolutiondate\"]},{\"id\":\"updatedrecently\",\"jql\":\"project = \\\"{0}\\\" AND updated \u003e= -1w ORDER BY {1}\",\"defaultOrderby\":\"updated DESC\",\"label\":\"Updated recently\",\"requiresUser\":false,\"supportsInlineIssueCreate\":true,\"fields\":[\"updated\"]}]"; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:default-filter-priority"]="[\"allopenissues\",\"allissues\"]"; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:can-manage-filters"]="false"; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:project-filters"]="[]"; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:can-create-issues"]="false"; WRM._unparsedData["projectId"]="12314526"; WRM._unparsedData["projectKey"]="\"SAMZA\""; WRM._unparsedData["projectType"]="\"software\""; WRM._unparsedData["com.atlassian.jira.jira-projects-issue-navigator:server-rendered"]="true"; WRM._unparsedData["archivingProjectHelpUrl"]="\"https://docs.atlassian.com/jira/jadm-docs-0820/Archiving+a+project\""; WRM._unparsedData["archivingIssueHelpUrl"]="\"https://docs.atlassian.com/jira/jadm-docs-0820/Archiving+an+issue\""; if(window.WRM._dataArrived)window.WRM._dataArrived();</script> <link type="text/css" rel="stylesheet" href="/jira/s/b62489a2eaac59d9b8a093c1a51d034f-CDN/-lmkfjk/820010/13pdxe5/49fa3aa3d35a2cc689cbf274e66cc41a/_/download/contextbatch/css/_super/batch.css" data-wrm-key="_super" data-wrm-batch-type="context" media="all"> <link type="text/css" rel="stylesheet" href="/jira/s/56490edcf9d54e35149505f78cca6a47-CDN/-lmkfjk/820010/13pdxe5/6cbddbc35e560711d24cc3abdeb0b2dd/_/download/contextbatch/css/jira.browse.project,jira.view.issue,project.issue.navigator,atl.general,atl.global,jira.global,jira.general,-_super/batch.css?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&richediton=true&slack-enabled=true" data-wrm-key="jira.browse.project,jira.view.issue,project.issue.navigator,atl.general,atl.global,jira.global,jira.general,-_super" data-wrm-batch-type="context" media="all"> <script type="text/javascript" src="/jira/s/376e46e0779ab163ee1e4a22ec634b03-CDN/-lmkfjk/820010/13pdxe5/49fa3aa3d35a2cc689cbf274e66cc41a/_/download/contextbatch/js/_super/batch.js?locale=en-UK" data-wrm-key="_super" data-wrm-batch-type="context" data-initially-rendered></script> <script type="text/javascript" src="/jira/s/6b40dcb2aa50c5e4045ef6e58ab3f0ef-CDN/-lmkfjk/820010/13pdxe5/6cbddbc35e560711d24cc3abdeb0b2dd/_/download/contextbatch/js/jira.browse.project,jira.view.issue,project.issue.navigator,atl.general,atl.global,jira.global,jira.general,-_super/batch.js?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&locale=en-UK&richediton=true&slack-enabled=true" data-wrm-key="jira.browse.project,jira.view.issue,project.issue.navigator,atl.general,atl.global,jira.global,jira.general,-_super" data-wrm-batch-type="context" data-initially-rendered></script> <script type="text/javascript" src="/jira/s/d41d8cd98f00b204e9800998ecf8427e-CDN/-lmkfjk/820010/13pdxe5/1.0/_/download/batch/jira.webresources:calendar-en/jira.webresources:calendar-en.js" data-wrm-key="jira.webresources:calendar-en" data-wrm-batch-type="resource" data-initially-rendered></script> <script type="text/javascript" src="/jira/s/d41d8cd98f00b204e9800998ecf8427e-CDN/-lmkfjk/820010/13pdxe5/1.0/_/download/batch/jira.webresources:calendar-localisation-moment/jira.webresources:calendar-localisation-moment.js" data-wrm-key="jira.webresources:calendar-localisation-moment" data-wrm-batch-type="resource" data-initially-rendered></script> <link type="text/css" rel="stylesheet" href="/jira/s/981f587853769311cda7c3b845131a06-CDN/-lmkfjk/820010/13pdxe5/cb5a5495a038c0744457f25821ba9ee8/_/download/contextbatch/css/jira.global.look-and-feel,-_super/batch.css" data-wrm-key="jira.global.look-and-feel,-_super" data-wrm-batch-type="context" media="all"> <script type="text/javascript" src="/jira/rest/api/1.0/shortcuts/820010/13d0e3ba62194e16a034b080ac028ecf/shortcuts.js?context=issuenavigation&context=issueaction"></script> <script> window.WRM=window.WRM||{};window.WRM._unparsedData=window.WRM._unparsedData||{};window.WRM._unparsedErrors=window.WRM._unparsedErrors||{}; WRM._unparsedData["com.atlassian.jira.jira-issue-nav-components:inline-edit-enabled"]="true"; WRM._unparsedData["should-display-chaperone"]="false"; if(window.WRM._dataArrived)window.WRM._dataArrived();</script> <link type="text/css" rel="stylesheet" href="/jira/s/3ac36323ba5e4eb0af2aa7ac7211b4bb-CDN/-lmkfjk/820010/13pdxe5/efa42a25652b26dfd802540c024826b3/_/download/contextbatch/css/com.atlassian.jira.projects.sidebar.init,-_super,-jira.view.issue,-project.issue.navigator/batch.css?jira.create.linked.issue=true&richediton=true" data-wrm-key="com.atlassian.jira.projects.sidebar.init,-_super,-jira.view.issue,-project.issue.navigator" data-wrm-batch-type="context" media="all"> <script type="text/javascript" src="/jira/s/7d73e82dc911998b80de6f4ba150e07e-CDN/-lmkfjk/820010/13pdxe5/efa42a25652b26dfd802540c024826b3/_/download/contextbatch/js/com.atlassian.jira.projects.sidebar.init,-_super,-jira.view.issue,-project.issue.navigator/batch.js?jira.create.linked.issue=true&locale=en-UK&richediton=true" data-wrm-key="com.atlassian.jira.projects.sidebar.init,-_super,-jira.view.issue,-project.issue.navigator" data-wrm-batch-type="context" data-initially-rendered></script> <meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="ajs-can-search-users" content="false"> <meta name="ajs-can-edit-watchers" content="false"> <meta name="ajs-default-avatar-url" content="https://issues.apache.org/jira/secure/useravatar?size=xsmall&avatarId=10453"> <meta name="ajs-issue-project-type" content="software"> <meta name="ajs-issue-key" content="SAMZA-348"> <meta name="ajs-server-view-issue-is-editable" content="false"> <title>[SAMZA-348] Configure Samza jobs through a stream - ASF JIRA</title> <link rel="search" type="application/opensearchdescription+xml" href="/jira/osd.jsp" title="[SAMZA-348] Configure Samza jobs through a stream - ASF JIRA"/> </head> <body id="jira" class="aui-layout aui-theme-default " data-version="8.20.10"> <div id="page"> <header id="header" role="banner"> <script> require(["jquery", "jira/license-banner"], function ($, licenseBanner) { $(function () { // eslint-disable-line @atlassian/onready-checks/no-jquery-onready licenseBanner.showLicenseBanner(""); licenseBanner.showLicenseFlag(""); }); }); </script> <div id="browser-warning" class="aui-message aui-message-error closeable"> <p> You are using an unsupported browser or your browser might be in the Compatibility View mode. See <a href='https://docs.atlassian.com/jira/jadm-docs-0820/Supported+Platforms'> Supported Platforms </a> for more information. </p> </div> <a class="aui-skip-link" href="/jira/login.jsp?os_destination=%2Fbrowse%2FSAMZA-348">Log in</a><a class="aui-skip-link" href="#main">Skip to main content</a><a class="aui-skip-link" href="#sidebar">Skip to sidebar</a><nav class="aui-header aui-dropdown2-trigger-group" aria-label="Site"><div class="aui-header-inner"><div class="aui-header-before"><button class=" aui-dropdown2-trigger app-switcher-trigger aui-dropdown2-trigger-arrowless" aria-controls="app-switcher" aria-haspopup="true" role="button" data-aui-trigger href="#app-switcher"><span class="aui-icon aui-icon-small aui-iconfont-appswitcher">Linked Applications</span></button><div id="app-switcher" class="aui-dropdown2 aui-style-default" role="menu" hidden data-is-switcher="true" data-environment="{"isUserAdmin":false,"isAppSuggestionAvailable":false,"isSiteAdminUser":false}"><div class="app-switcher-loading">Loading…</div></div></div><div class="aui-header-primary"><span id="logo" class="aui-header-logo aui-header-logo-custom"><a href="https://issues.apache.org/jira/secure/MyJiraHome.jspa" aria-label="Go to home page"><img src="/jira/s/-lmkfjk/820010/13pdxe5/_/jira-logo-scaled.png" alt="ASF JIRA" /></a></span><ul class='aui-nav'><li><a href="/jira/secure/Dashboard.jspa" class=" aui-nav-link aui-dropdown2-trigger aui-dropdown2-ajax" id="home_link" aria-haspopup="true" aria-controls="home_link-content" title="View and manage your dashboards" accesskey="d" elementtiming="app-header">Dashboards</a><div class="aui-dropdown2 aui-style-default" id="home_link-content" data-aui-dropdown2-ajax-key="home_link"></div></li><li><a href="/jira/browse/SAMZA" class=" aui-nav-link aui-dropdown2-trigger aui-dropdown2-ajax" id="browse_link" aria-haspopup="true" aria-controls="browse_link-content" title="View recent projects and browse a list of projects" accesskey="p">Projects</a><div class="aui-dropdown2 aui-style-default" id="browse_link-content" data-aui-dropdown2-ajax-key="browse_link"></div></li><li><a href="/jira/issues/" class=" aui-nav-link aui-dropdown2-trigger aui-dropdown2-ajax" id="find_link" aria-haspopup="true" aria-controls="find_link-content" title="Search for issues and view recent issues" accesskey="i">Issues</a><div class="aui-dropdown2 aui-style-default" id="find_link-content" data-aui-dropdown2-ajax-key="find_link"></div></li> </ul></div><div class="aui-header-secondary"><ul class='aui-nav'> <li id="quicksearch-menu"> <form action="/jira/secure/QuickSearch.jspa" method="get" id="quicksearch" class="aui-quicksearch dont-default-focus ajs-dirty-warning-exempt"> <input id="quickSearchInput" autocomplete="off" class="search" type="text" title="Search" placeholder="Search" name="searchString" accessKey="q" /> <input type="submit" class="hidden" value="Search"> </form> </li> <li id="system-help-menu"> <a class="aui-nav-link aui-dropdown2-trigger aui-dropdown2-trigger-arrowless" id="help_menu" aria-haspopup="true" aria-owns="system-help-menu-content" href="https://docs.atlassian.com/jira/jcore-docs-0820/" target="_blank" rel="noopener noreferrer" title="Help"><span class="aui-icon aui-icon-small aui-iconfont-question-filled">Help</span></a> <div id="system-help-menu-content" class="aui-dropdown2 aui-style-default"> <div class="aui-dropdown2-section"> <ul id="jira-help" class="aui-list-truncate"> <li> <a id="view_core_help" class="aui-nav-link " title="Go to the online documentation for Jira Core" href="https://docs.atlassian.com/jira/jcore-docs-0820/" target="_blank" >Jira Core help</a> </li> <li> <a id="keyshortscuthelp" class="aui-nav-link " title="Get more information about Jira's Keyboard Shortcuts" href="/jira/secure/ViewKeyboardShortcuts!default.jspa" target="_blank" >Keyboard Shortcuts</a> </li> <li> <a id="view_about" class="aui-nav-link " title="Get more information about Jira" href="/jira/secure/AboutPage.jspa" >About Jira</a> </li> <li> <a id="view_credits" class="aui-nav-link " title="See who did what" href="/jira/secure/credits/AroundTheWorld!default.jspa" target="_blank" >Jira Credits</a> </li> </ul> </div> </div> </li> <li id="user-options"> <a class="aui-nav-link login-link" href="/jira/login.jsp?os_destination=%2Fbrowse%2FSAMZA-348">Log In</a> <div id="user-options-content" class="aui-dropdown2 aui-style-default"> <div class="aui-dropdown2-section"> </div> </div> </li> </ul></div></div><!-- .aui-header-inner--><aui-header-end></aui-header-end></nav><!-- .aui-header --> </header> <div id="announcement-banner" class="alertHeader"> <div id="announcement-banner" class="alertHeader"> <div style="border: solid red; border-width: 0 1em 0; font-size:1.2em; text-align:center;background-color:#EEE"><p>Public signup for this instance is <strong>disabled</strong>. Go to our <a href="https://selfserve.apache.org/jira-account.html">Self serve sign up page</a> to request an account. Report potential security issues <a href="https://apache.org/security/#reporting-a-vulnerability">privately</a></p></div> </div> </div> <div id="content"> <big-pipe data-id="sidebar-id" unresolved></big-pipe><section class="aui-sidebar sidebar-placeholder" ><div class="aui-sidebar-wrapper"><div class="aui-sidebar-body"></div><div class="aui-sidebar-footer"><button class="aui-button aui-button-subtle aui-sidebar-toggle aui-sidebar-footer-tipsy" title="Expand sidebar ( [ )" data-tooltip="Expand sidebar ( [ )"><span class="aui-icon aui-icon-small aui-iconfont-chevron-double-left"></span></button></div></div></section><script id="projects-sidebar-events-attach"> (function () { var scriptTag = document.getElementById('projects-sidebar-events-attach'); var sidebarElement = document.querySelector('big-pipe[data-id=sidebar-id]'); sidebarElement.addEventListener('error', function (e) { require(['jira/projects/sidebar/sidebar-placeholder-initializer'], function (sidebarPlaceholderInitializer) { sidebarPlaceholderInitializer.onError(e); }); }); sidebarElement.addEventListener('success', function (e) { require(['jira/projects/sidebar/sidebar-placeholder-initializer'], function (sidebarPlaceholderInitializer) { sidebarPlaceholderInitializer.onSuccess(e); }); }); scriptTag.parentElement.removeChild(scriptTag); }()) </script><script id="projects-sidebar-init"> require(['jira/projects/sidebar/expansion-manager'], function(expansionManager) { var scriptTag = document.getElementById('projects-sidebar-init'); var sidebar = AJS.sidebar('.aui-sidebar'); expansionManager(sidebar); scriptTag.parentElement.removeChild(scriptTag); }); </script><div class="aui-page-panel" ><div class="aui-page-panel-inner"><div class="issue-navigator"><main id="main" class="content" role="main"><div class="issue-view"><div class="navigation-tools"><div class="pager-container"></div><div class="collapse-container"></div></div><div class="issue-container"><div id="issue-content" class="issue-edit-form"><header id="stalker" class="issue-header js-stalker"><div class="issue-header-content"><div class="aui-page-header" ><div class="aui-page-header-inner"><div class="aui-page-header-image" ><span id="12314526" class="aui-avatar aui-avatar-large aui-avatar-project"><span class="aui-avatar-inner"><img id="project-avatar" alt="Uploaded image for project: 'Samza'" src="https://issues.apache.org/jira/secure/projectavatar?pid=12314526&avatarId=36734" /></span></span></div><div class="aui-page-header-main" ><ol class="aui-nav aui-nav-breadcrumbs"><li><a id="project-name-val" href="/jira/browse/SAMZA">Samza</a></li><li><a class="issue-link" data-issue-key="SAMZA-348" href="/jira/browse/SAMZA-348" id="key-val" rel="12728226">SAMZA-348</a></li></ol><h1 id="summary-val">Configure Samza jobs through a stream</h1></div><div class="aui-page-header-actions" ><div id="issue-header-pager"></div></div></div></div><div class="command-bar"><div class="ops-cont"><div class="ops-menus aui-toolbar2"><div class="aui-toolbar2-inner"><div class="aui-toolbar2-primary"><div id="opsbar-ops-login-lnk_container" class="aui-buttons pluggable-ops"><a id="ops-login-lnk"title="Log In"class="aui-button toolbar-trigger" href="/jira/login.jsp?os_destination=%2Fbrowse%2FSAMZA-348"><span class="trigger-label">Log In</span></a></div><div id="opsbar-opsbar-operations" class="aui-buttons pluggable-ops"></div><div id="opsbar-opsbar-transitions" class="aui-buttons pluggable-ops"></div><div id="opsbar-opsbar-admin" class="aui-buttons pluggable-ops"></div><div id="opsbar-opsbar-restore" class="aui-buttons pluggable-ops"></div></div><div class="aui-toolbar2-secondary"><div id="opsbar-jira.issue.tools" class="aui-buttons pluggable-ops"><a href="#"id="viewissue-export"aria-owns="viewissue-export_drop"aria-haspopup="true"title="Export this issue in another format"class="aui-button aui-dropdown2-trigger" data-aui-alignment-container=".command-bar"><span class="icon icon-default aui-icon aui-icon-small aui-iconfont-export"></span> <span class="dropdown-text">Export</span></a></div></div></div></div><aui-dropdown-menu id="viewissue-export_drop"><ul><aui-item-link href="/jira/si/jira.issueviews:issue-xml/SAMZA-348/SAMZA-348.xml"id="jira.issueviews:issue-xml"><span class="trigger-label">XML</span></aui-item-link><aui-item-link href="/jira/si/jira.issueviews:issue-word/SAMZA-348/SAMZA-348.doc"id="jira.issueviews:issue-word"><span class="trigger-label">Word</span></aui-item-link><aui-item-link href="/jira/si/jira.issueviews:issue-html/SAMZA-348/SAMZA-348.html"id="jira.issueviews:issue-html"><span class="trigger-label">Printable</span></aui-item-link><aui-item-link href="/jira/si/com.atlassian.jira.plugins.jira-importers-plugin:issue-json/SAMZA-348/SAMZA-348.json"id="com.atlassian.jira.plugins.jira-importers-plugin:issue-json"><span class="trigger-label">JSON</span></aui-item-link></ul></aui-dropdown-menu></div></div></div></header><div class="issue-body-content"><div class="aui-group issue-body"><div class="aui-item issue-main-column"><div id=details-module class="module toggle-wrap"><div id="details-module_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Details" aria-controls="details-module" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="details-module-label">Details</h4><ul class="ops"></ul></div><div class="mod-content"> <ul id="issuedetails" class="property-list two-cols"> <li class="item"> <div class="wrap"> <strong class="name" title="Type"> <label for="issuetype">Type:</label> </strong> <span id="type-val" class="value"> <img alt="" height="16" src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" title="Bug - A problem which impairs or prevents the functions of the product." width="16" /> Bug </span> </div> </li> <li class="item item-right"> <div class="wrap"> <strong class="name" title="Status">Status:</strong> <span id="status-val" class="value"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new jira-issue-status-lozenge-max-width-medium" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </span> </div> </li> <li class="item new"> <div class="wrap"> <strong class="name" title="Priority"> <label for="priority-field">Priority:</label> </strong> <span id="priority-val" class="value"> <img alt="" height="16" src="/jira/images/icons/priorities/major.svg" title="Major - Major loss of function." width="16" /> Major </span> </div> </li> <li class="item item-right"> <div class="wrap"> <strong class="name" title="Resolution">Resolution:</strong> <span id="resolution-val" class="value unresolved" > Unresolved </span> </div> </li> <li class="item"> <div class="wrap"> <strong class="name" title="Affects Version/s"> <label for="versions-textarea">Affects Version/s:</label> </strong> <span id="versions-val" class="value"> <span class="shorten" id="versions-field"> <span title="0.7.0 ">0.7.0</span> </span> </span> </div> </li> <li class="item item-right"> <div class="wrap"> <strong class="name" title="Fix Version/s"> <label for="fixVersions"> Fix Version/s: </label> </strong> <span id="fixfor-val" class="value"> None </span> </div> </li> <li class="item"> <div class="wrap"> <strong class="name" title="Component/s"> <label for="components">Component/s:</label> </strong> <span id="components-val" class="value"> None </span> </div> </li> <li class="item full-width"> <div class="wrap" id="wrap-labels"> <strong class="name" title="Labels"> <label for="labels-textarea">Labels:</label> </strong> <div class="labels-wrap value"> <ul class="labels" id="labels-12728226-value"> <li><a class="lozenge" href="/jira/issues/?jql=labels+%3D+design" title="design"><span>design</span></a></li> <li><a class="lozenge" href="/jira/issues/?jql=labels+%3D+project" title="project"><span>project</span></a></li> </ul> </div> </div> </li> </ul> </div></div><div id=descriptionmodule class="module toggle-wrap"><div id="descriptionmodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Description" aria-controls="descriptionmodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="descriptionmodule-label">Description</h4><ul class="ops"></ul></div><div class="mod-content"><div id="description-val" class="field-ignore-highlight"> <div class="user-content-block"> <p>Samza's existing config setup is problematic for a number of reasons:</p> <ol> <li>It's completely immutable once a job starts. This prevents any dynamic reconfiguration and auto-scaling. It is debatable whether we want these feature or not, but our existing implementation actively prevents it. See <a href="https://issues.apache.org/jira/browse/SAMZA-334" title="Need for asymmetric container config" class="issue-link" data-issue-key="SAMZA-334">SAMZA-334</a> for discussion.</li> <li>We pass existing configuration through environment variables. YARN exports environment variables in a shell script, which limits the size to the varargs length on the machine. This is usually ~128KB. See <a href="https://issues.apache.org/jira/browse/SAMZA-333" title="Large samza configurations results in yarn job failure" class="issue-link" data-issue-key="SAMZA-333">SAMZA-333</a> and <a href="https://issues.apache.org/jira/browse/SAMZA-337" title="Compress Samza configuration passed to Yarn" class="issue-link" data-issue-key="SAMZA-337"><del>SAMZA-337</del></a> for details.</li> <li>User-defined configuration (the Config object) and programmatic configuration (checkpoints and TaskName:State mappings (see <a href="https://issues.apache.org/jira/browse/SAMZA-123" title="Move topic partition grouping to the AM and generalize" class="issue-link" data-issue-key="SAMZA-123"><del>SAMZA-123</del></a>)) are handled differently. It's debatable whether this makes sense.</li> </ol> <p>In <a href="https://issues.apache.org/jira/browse/SAMZA-123" title="Move topic partition grouping to the AM and generalize" class="issue-link" data-issue-key="SAMZA-123"><del>SAMZA-123</del></a>, <a href="https://issues.apache.org/jira/secure/ViewProfile.jspa?name=jghoman" class="user-hover" rel="jghoman">jghoman</a> and I propose implementing a ConfigLog. This log would replace both the checkpoint topic and the existing config environment variables in SamzaContainer and Samza's YARN AM.</p> <p>I'd like to keep this ticket's scope limited to just the implementation of the ConfigLog, and not re-designing how Samza's config is used in the code (<a href="https://issues.apache.org/jira/browse/SAMZA-40" title="Refactor Samza configuration" class="issue-link" data-issue-key="SAMZA-40">SAMZA-40</a>). We should, however, discuss how this feature would affect dynamic reconfiguration/auto-scaling.</p> </div> </div> </div></div><div id=dnd-metadata class="module toggle-wrap"><div id="dnd-metadata_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Attachments" aria-controls="dnd-metadata" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="dnd-metadata-label">Attachments</h4><ul class="ops"></ul></div><div class="mod-content"><div id="dnd-metadata-webpanel" data-can-attach="false" data-project-type="software" data-upload-limit="62914560" data-thumbnails-allowed="false"></div></div></div><div id=attachmentmodule class="module toggle-wrap"><div id="attachmentmodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Attachments" aria-controls="attachmentmodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="attachmentmodule-label">Attachments</h4><ul class="ops"><li class="drop"><div class="aui-dd-parent"><button class="aui-button aui-button-compact aui-button-subtle js-default-dropdown" title="Options" aria-label="Attachments panel options"><span class="aui-icon aui-icon-small aui-iconfont-more">Options</span></button><div class="aui-dropdown-content aui-list"><ul id="attachment-sorting-options"class="aui-list-section aui-first"><li class="aui-list-item"><a id="attachment-sort-key-name"href="/jira/browse/SAMZA-348?attachmentSortBy=fileName#attachmentmodule"class="aui-list-checked aui-checked aui-list-item-link"title="viewissue.subtasks.tab.show.all.name"><span>Sort By Name</span></a></li><li class="aui-list-item"><a id="attachment-sort-key-date"href="/jira/browse/SAMZA-348?attachmentSortBy=dateTime#attachmentmodule"class="aui-list-checked aui-list-item-link"title="Sort By Date"><span>Sort By Date</span></a></li></ul><ul id="attachment-sorting-order-options"class="aui-list-section aui-last"><li class="aui-list-item"><a id="attachment-sort-direction-asc"href="/jira/browse/SAMZA-348?attachmentOrder=asc#attachmentmodule"class="aui-list-checked aui-checked aui-list-item-link"title="Ascending"><span>Ascending</span></a></li><li class="aui-list-item"><a id="attachment-sort-direction-desc"href="/jira/browse/SAMZA-348?attachmentOrder=desc#attachmentmodule"class="aui-list-checked aui-list-item-link"title="Descending"><span>Descending</span></a></li></ul></div></div></li></ul></div><div class="mod-content"><ol id="file_attachments" class="item-attachments" data-sort-key="fileName" data-sort-order="asc"><li class="attachment-content js-file-attachment" data-attachment-id="12668454"data-issue-id="12728226"data-attachment-type="file"><div class="attachment-thumb"><a href="/jira/secure/attachment/12668454/DESIGN-SAMZA-348-0.md" draggable="true" data-downloadurl="application/octet-stream:DESIGN-SAMZA-348-0.md:https://issues.apache.org/jira/secure/attachment/12668454/DESIGN-SAMZA-348-0.md"><span class="aui-icon aui-icon-small attachment-icon aui-iconfont-devtools-file" title="File"></span></a></div><dl><dt class="attachment-title"><a href="/jira/secure/attachment/12668454/DESIGN-SAMZA-348-0.md" title="Latest 12/Sep/14 20:59 - Chris Riccomini" draggable="true" data-downloadurl="application/octet-stream:DESIGN-SAMZA-348-0.md:https://issues.apache.org/jira/secure/attachment/12668454/DESIGN-SAMZA-348-0.md">DESIGN-SAMZA-348-0.md</a></dt><dd class="attachment-delete"><span class="icon"></span></dd><dd class="attachment-date"><time class="livestamp" datetime="2014-09-12T20:59:40.177Z">12/Sep/14 20:59</time></dd><dd class="attachment-size">30 kB</dd><dd class="attachment-author">Chris Riccomini</dd></dl></li><li class="attachment-content js-file-attachment" data-attachment-id="12668455"data-issue-id="12728226"data-attachment-type="file"><div class="attachment-thumb"><a href="/jira/secure/attachment/12668455/DESIGN-SAMZA-348-0.pdf" draggable="true" data-downloadurl="application/pdf:DESIGN-SAMZA-348-0.pdf:https://issues.apache.org/jira/secure/attachment/12668455/DESIGN-SAMZA-348-0.pdf" file-preview-id="12668455" file-preview-title="DESIGN-SAMZA-348-0.pdf" file-preview-type="document"><span class="aui-icon aui-icon-small attachment-icon aui-iconfont-file-pdf" title="PDF File"></span></a></div><dl><dt class="attachment-title"><a href="/jira/secure/attachment/12668455/DESIGN-SAMZA-348-0.pdf" title="Latest 12/Sep/14 20:59 - Chris Riccomini" draggable="true" data-downloadurl="application/pdf:DESIGN-SAMZA-348-0.pdf:https://issues.apache.org/jira/secure/attachment/12668455/DESIGN-SAMZA-348-0.pdf" file-preview-id="12668455" file-preview-title="DESIGN-SAMZA-348-0.pdf" file-preview-type="document">DESIGN-SAMZA-348-0.pdf</a></dt><dd class="attachment-delete"><span class="icon"></span></dd><dd class="attachment-date"><time class="livestamp" datetime="2014-09-12T20:59:40.182Z">12/Sep/14 20:59</time></dd><dd class="attachment-size">220 kB</dd><dd class="attachment-author">Chris Riccomini</dd></dl></li><li class="attachment-content js-file-attachment" data-attachment-id="12670649"data-issue-id="12728226"data-attachment-type="file"><div class="attachment-thumb"><a href="/jira/secure/attachment/12670649/DESIGN-SAMZA-348-1.md" draggable="true" data-downloadurl="application/octet-stream:DESIGN-SAMZA-348-1.md:https://issues.apache.org/jira/secure/attachment/12670649/DESIGN-SAMZA-348-1.md"><span class="aui-icon aui-icon-small attachment-icon aui-iconfont-devtools-file" title="File"></span></a></div><dl><dt class="attachment-title"><a href="/jira/secure/attachment/12670649/DESIGN-SAMZA-348-1.md" title="Latest 23/Sep/14 07:29 - Chris Riccomini" draggable="true" data-downloadurl="application/octet-stream:DESIGN-SAMZA-348-1.md:https://issues.apache.org/jira/secure/attachment/12670649/DESIGN-SAMZA-348-1.md">DESIGN-SAMZA-348-1.md</a></dt><dd class="attachment-delete"><span class="icon"></span></dd><dd class="attachment-date"><time class="livestamp" datetime="2014-09-23T07:29:53.780Z">23/Sep/14 07:29</time></dd><dd class="attachment-size">45 kB</dd><dd class="attachment-author">Chris Riccomini</dd></dl></li><li class="attachment-content js-file-attachment" data-attachment-id="12670650"data-issue-id="12728226"data-attachment-type="file"><div class="attachment-thumb"><a href="/jira/secure/attachment/12670650/DESIGN-SAMZA-348-1.pdf" draggable="true" data-downloadurl="application/pdf:DESIGN-SAMZA-348-1.pdf:https://issues.apache.org/jira/secure/attachment/12670650/DESIGN-SAMZA-348-1.pdf" file-preview-id="12670650" file-preview-title="DESIGN-SAMZA-348-1.pdf" file-preview-type="document"><span class="aui-icon aui-icon-small attachment-icon aui-iconfont-file-pdf" title="PDF File"></span></a></div><dl><dt class="attachment-title"><a href="/jira/secure/attachment/12670650/DESIGN-SAMZA-348-1.pdf" title="Latest 23/Sep/14 07:29 - Chris Riccomini" draggable="true" data-downloadurl="application/pdf:DESIGN-SAMZA-348-1.pdf:https://issues.apache.org/jira/secure/attachment/12670650/DESIGN-SAMZA-348-1.pdf" file-preview-id="12670650" file-preview-title="DESIGN-SAMZA-348-1.pdf" file-preview-type="document">DESIGN-SAMZA-348-1.pdf</a></dt><dd class="attachment-delete"><span class="icon"></span></dd><dd class="attachment-date"><time class="livestamp" datetime="2014-09-23T07:29:53.790Z">23/Sep/14 07:29</time></dd><dd class="attachment-size">304 kB</dd><dd class="attachment-author">Chris Riccomini</dd></dl></li></ol></div></div><div id=linkingmodule class="module toggle-wrap"><div id="linkingmodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Issue Links" aria-controls="linkingmodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="linkingmodule-label">Issue Links</h4><ul class="ops"></ul></div><div class="mod-content"> <div class="links-container" data-default-link-icon="/jira/images/icons/generic_link_16.png"> <dl class="links-list "> <dt title="incorporates">incorporates</dt> <dd id="internal-12907268_12310010" > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-798: Performance and stability issue after combining checkpoint and coordinator stream"> <a href="/jira/browse/SAMZA-798" data-issue-key="SAMZA-798" class="issue-link link-title resolution">SAMZA-798</a> <span class="link-summary">Performance and stability issue after combining checkpoint and coordinator stream</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </li> </ul> </div> </dd> </dl> <dl class="links-list "> <dt title="is related to">is related to</dt> <dd id="internal-12664883_10030" > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-40: Refactor Samza configuration"> <a href="/jira/browse/SAMZA-40" data-issue-key="SAMZA-40" class="issue-link link-title">SAMZA-40</a> <span class="link-summary">Refactor Samza configuration</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> <dd id="internal-12664894_10030" > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-42: Add a job setup phase to Samza"> <a href="/jira/browse/SAMZA-42" data-issue-key="SAMZA-42" class="issue-link link-title">SAMZA-42</a> <span class="link-summary">Add a job setup phase to Samza</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> <dd id="internal-12727210_10030" > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-333: Large samza configurations results in yarn job failure"> <a href="/jira/browse/SAMZA-333" data-issue-key="SAMZA-333" class="issue-link link-title">SAMZA-333</a> <span class="link-summary">Large samza configurations results in yarn job failure</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> <dd id="internal-12732904_10030" > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-374: Need to be able to change SSP Grouper"> <a href="/jira/browse/SAMZA-374" data-issue-key="SAMZA-374" class="issue-link link-title resolution">SAMZA-374</a> <span class="link-summary">Need to be able to change SSP Grouper</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </li> </ul> </div> </dd> <dd id="internal-12739249_10030" class=" collapsed-link " > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21141&avatarType=issuetype" width="16" height="16" title="New Feature - A new feature of the product, which has yet to be developed." alt="New Feature - A new feature of the product, which has yet to be developed." /> <span title="SAMZA-406: Hot standby containers"> <a href="/jira/browse/SAMZA-406" data-issue-key="SAMZA-406" class="issue-link link-title">SAMZA-406</a> <span class="link-summary">Hot standby containers</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> </dl> <dl class="links-list collapsed-links-list"> <dt title="relates to">relates to</dt> <dd id="internal-12732920_10030" class=" collapsed-link " > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21133&avatarType=issuetype" width="16" height="16" title="Bug - A problem which impairs or prevents the functions of the product." alt="Bug - A problem which impairs or prevents the functions of the product." /> <span title="SAMZA-375: Investigate Mesos Job Support"> <a href="/jira/browse/SAMZA-375" data-issue-key="SAMZA-375" class="issue-link link-title">SAMZA-375</a> <span class="link-summary">Investigate Mesos Job Support</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> <dd id="internal-12742303_10030" class=" collapsed-link " > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21141&avatarType=issuetype" width="16" height="16" title="New Feature - A new feature of the product, which has yet to be developed." alt="New Feature - A new feature of the product, which has yet to be developed." /> <span title="SAMZA-416: Samza Configuration DSL"> <a href="/jira/browse/SAMZA-416" data-issue-key="SAMZA-416" class="issue-link link-title">SAMZA-416</a> <span class="link-summary">Samza Configuration DSL</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </li> </ul> </div> </dd> </dl> <dl class="links-list collapsed-links-list"> <dt title="supercedes">supercedes</dt> <dd id="internal-12708431_12310051" class=" collapsed-link " > <div class="link-content"> <p> <img src="/jira/secure/viewavatar?size=xsmall&avatarId=21140&avatarType=issuetype" width="16" height="16" title="Wish - General wishlist item." alt="Wish - General wishlist item." /> <span title="SAMZA-237: Consider implementing job control topic to support dynamic inputs, capacity changes, etc."> <a href="/jira/browse/SAMZA-237" data-issue-key="SAMZA-237" class="issue-link link-title resolution">SAMZA-237</a> <span class="link-summary">Consider implementing job control topic to support dynamic inputs, capacity changes, etc.</span> </span> </p> <ul class="link-snapshot"> <li class="priority"> <img src="/jira/images/icons/priorities/major.svg" width="16" height="16" title="Major - Major loss of function." alt="Major - Major loss of function." /> </li> <li class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </li> </ul> </div> </dd> </dl> <div id="show-more-links"> <button class="aui-button aui-button-link" id="show-more-links-link">Show 4 more links</button> <span>(1 is related to, 2 relates to, 1 supercedes)</span> </div> </div> </div></div><div id=view-subtasks class="module toggle-wrap"><div id="view-subtasks_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Sub-Tasks" aria-controls="view-subtasks" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="view-subtasks-label">Sub-Tasks</h4><ul class="ops"><li class="drop"><div class="aui-dd-parent"><button class="aui-button aui-button-compact aui-button-subtle js-default-dropdown" title="Options" aria-label="Attachments panel options"><span class="aui-icon aui-icon-small aui-iconfont-more">Options</span></button><div class="aui-dropdown-content aui-list"><ul id="subtask-view-options"class="aui-list-section aui-first aui-last"><li class="aui-list-item"><a id="subtasks-show-all"href="/jira/browse/SAMZA-348?subTaskView=all#issuetable"class="aui-list-checked aui-checked aui-list-item-link"title="Show All"><span>Show All</span></a></li><li class="aui-list-item"><a id="subtasks-show-open"href="/jira/browse/SAMZA-348?subTaskView=unresolved#issuetable"class="aui-list-checked aui-list-item-link"title="Show Open"><span>Show Open</span></a></li><li class="aui-list-item"><a id="subtasks-bulk-operation"href="/jira/issue/bulkedit/BulkEdit1!default.jspa?reset=true&searchParent=SAMZA-348"class="aui-list-checked aui-list-item-link"title="Bulk operation"><span>Bulk operation</span></a></li><li class="aui-list-item"><a id="subtasks-open-issue-navigator"href="/jira/issues/?jql=parent%3DSAMZA-348"class="aui-list-checked aui-list-item-link"title="Open issue navigator"><span>Open issue navigator</span></a></li></ul></div></div></li></ul></div><div class="mod-content"><div class="mod-content intform subtask-table-container"> <issuetable-web-component data-content="subtasks"> <table id="issuetable" > <tr id="issuerow12749618" rel="12749618" data-issuekey="SAMZA-438" class="issuerow"> <td class="stsequence"><div rel="0" class="subtask-done">1.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-438' href='/jira/browse/SAMZA-438'>Pass config via HTTP</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-438" href="/jira/browse/SAMZA-438"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="criccomini" id="assignee_criccomini" href="/jira/secure/ViewProfile.jspa?name=criccomini">Chris Riccomini</a></span> </td> </tr> <tr id="issuerow12750138" rel="12750138" data-issuekey="SAMZA-444" class="issuerow"> <td class="stsequence"><div rel="1" class="subtask-done">2.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-444' href='/jira/browse/SAMZA-444'>Provide a Samza job data model for job coordinator</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-444" href="/jira/browse/SAMZA-444"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="criccomini" id="assignee_criccomini" href="/jira/secure/ViewProfile.jspa?name=criccomini">Chris Riccomini</a></span> </td> </tr> <tr id="issuerow12751120" rel="12751120" data-issuekey="SAMZA-448" class="issuerow"> <td class="stsequence"><div rel="2" class="subtask-done">3.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-448' href='/jira/browse/SAMZA-448'>Pass config from JobRunner to JobCoordinator via ConfigStream</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-448" href="/jira/browse/SAMZA-448"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="criccomini" id="assignee_criccomini" href="/jira/secure/ViewProfile.jspa?name=criccomini">Chris Riccomini</a></span> </td> </tr> <tr id="issuerow12754875" rel="12754875" data-issuekey="SAMZA-465" class="issuerow"> <td class="stsequence"><div rel="3" class="subtask-done">4.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-465' href='/jira/browse/SAMZA-465'>Use coordinator stream and eliminate CheckpointManager</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-465" href="/jira/browse/SAMZA-465"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="naveenatceg" id="assignee_naveenatceg" href="/jira/secure/ViewProfile.jspa?name=naveenatceg">Naveen Somasundaram</a></span> </td> </tr> <tr id="issuerow12785339" rel="12785339" data-issuekey="SAMZA-614" class="issuerow"> <td class="stsequence"><div rel="4" class="subtask-done">5.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-614' href='/jira/browse/SAMZA-614'>Document coordinator stream</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-614" href="/jira/browse/SAMZA-614"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="navina" id="assignee_navina" href="/jira/secure/ViewProfile.jspa?name=navina">Navina Ramesh</a></span> </td> </tr> <tr id="issuerow12785347" rel="12785347" data-issuekey="SAMZA-615" class="issuerow"> <td class="stsequence"><div rel="5" class="subtask-done">6.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-615' href='/jira/browse/SAMZA-615'>Migrate checkpoint from checkpoint topic to Coordinator stream</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-615" href="/jira/browse/SAMZA-615"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="nickpan47" id="assignee_nickpan47" href="/jira/secure/ViewProfile.jspa?name=nickpan47">Yi Pan</a></span> </td> </tr> <tr id="issuerow12828246" rel="12828246" data-issuekey="SAMZA-671" class="issuerow"> <td class="stsequence"><div rel="6" class="subtask-done">7.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-671' href='/jira/browse/SAMZA-671'>Disable checkpoint in coordinator stream</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-671" href="/jira/browse/SAMZA-671"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="nickpan47" id="assignee_nickpan47" href="/jira/secure/ViewProfile.jspa?name=nickpan47">Yi Pan</a></span> </td> </tr> <tr id="issuerow12830332" rel="12830332" data-issuekey="SAMZA-678" class="issuerow"> <td class="stsequence"><div rel="7">8.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-678' href='/jira/browse/SAMZA-678'>Integrate CoordinatorStream to use SystemConsumers and SystemProducers</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-678" href="/jira/browse/SAMZA-678"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </td> <td class="assignee"> <em>Unassigned</em> </td> </tr> <tr id="issuerow12830334" rel="12830334" data-issuekey="SAMZA-679" class="issuerow"> <td class="stsequence"><div rel="8">9.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-679' href='/jira/browse/SAMZA-679'>Optimize CoordinatorStream's bootstrap mechanism</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-679" href="/jira/browse/SAMZA-679"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </td> <td class="assignee"> <em>Unassigned</em> </td> </tr> <tr id="issuerow12830855" rel="12830855" data-issuekey="SAMZA-682" class="issuerow"> <td class="stsequence"><div rel="9" class="subtask-done">10.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-682' href='/jira/browse/SAMZA-682'>Refactor Coordinator stream messages</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-682" href="/jira/browse/SAMZA-682"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-green jira-issue-status-lozenge-done aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Resolved</span><br><span class="jira-issue-status-tooltip-desc">A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.</span>">Resolved</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="jjung" id="assignee_jjung" href="/jira/secure/ViewProfile.jspa?name=jjung">J贸zsef M谩rton Jung</a></span> </td> </tr> <tr id="issuerow12955034" rel="12955034" data-issuekey="SAMZA-925" class="issuerow"> <td class="stsequence"><div rel="10">11.</div> </td> <td class="stsummary"><a class='issue-link' data-issue-key='SAMZA-925' href='/jira/browse/SAMZA-925'>Explicit restart containers to pick up dynamic JobModel changes</a></td> <td class="issuetype"> <a class="issue-link" data-issue-key="SAMZA-925" href="/jira/browse/SAMZA-925"> <img src="https://issues.apache.org/jira/secure/viewavatar?size=xsmall&avatarId=21146&avatarType=issuetype" height="16" width="16" border="0" align="absmiddle" alt="Sub-task" title="Sub-task - The sub-task of the issue"> </a> </td> <td class="status"> <span class=" jira-issue-status-lozenge aui-lozenge jira-issue-status-lozenge-blue-gray jira-issue-status-lozenge-new aui-lozenge-subtle jira-issue-status-lozenge-max-width-short" data-tooltip="<span class="jira-issue-status-tooltip-title">Open</span><br><span class="jira-issue-status-tooltip-desc">The issue is open and ready for the assignee to start work on it.</span>">Open</span> </td> <td class="assignee"> <span class="tinylink"><a class="user-hover" rel="alex.buck10" id="assignee_alex.buck10" href="/jira/secure/ViewProfile.jspa?name=alex.buck10">Alex Buck</a></span> </td> </tr> </tbody> </table> </issuetable-web-component> <div class="end-of-stable-message"></div> </div> </div></div><div id=activitymodule class="module toggle-wrap"><div id="activitymodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Activity" aria-controls="activitymodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="activitymodule-label">Activity</h4><ul class="ops"></ul></div><div class="mod-content"> <big-pipe data-id="activity-panel-pipe-id" style="height: 70px"> <div></div> </big-pipe> </div></div></div><div id="viewissuesidebar" class="aui-item issue-side-column"><div id=peoplemodule class="module toggle-wrap"><div id="peoplemodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="People" aria-controls="peoplemodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="peoplemodule-label">People</h4><ul class="ops"></ul></div><div class="mod-content"> <div class="item-details people-details" id="peopledetails"> <dl> <dt title="Assignee"> <label for="assignee-field">Assignee:</label> </dt> <dd> <span id="assignee-val" class="view-issue-field"> <span class="user-hover" id="issue_summary_assignee_criccomini" rel="criccomini"> <span class="aui-avatar aui-avatar-small"><span class="aui-avatar-inner"><img src="https://issues.apache.org/jira/secure/useravatar?size=small&avatarId=10452" alt="criccomini" /></span></span> Chris Riccomini </span> </span> </dd> </dl> <dl> <dt title="Reporter"> <label for="reporter-field">Reporter:</label> </dt> <dd> <span id="reporter-val" class="view-issue-field"> <span class="user-hover" id="issue_summary_reporter_criccomini" rel="criccomini"> <span class="aui-avatar aui-avatar-small"><span class="aui-avatar-inner"><img src="https://issues.apache.org/jira/secure/useravatar?size=small&avatarId=10452" alt="criccomini" /></span></span> Chris Riccomini </span> </span> </dd> </dl> </div> <div class="item-details"> <dl> <dt title="Votes">Votes:</dt> <dd> <aui-badge id="vote-data" class="">0</aui-badge> <span id="vote-label" title="You have to be logged in to vote for an issue.">Vote for this issue</span> </dd> </dl> <dl> <dt title="Watchers">Watchers:</dt> <dd> <aui-badge id="watcher-data" class="">15</aui-badge> <span id="watch-label" title="You have to be logged in to watch an issue.">Start watching this issue</span> </dd> </dl> </div> </div></div><div id=datesmodule class="module toggle-wrap"><div id="datesmodule_heading" class="mod-header"><button class="aui-button toggle-title" aria-label="Dates" aria-controls="datesmodule" aria-expanded="true"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14"><g fill="none" fill-rule="evenodd"><path d="M3.29175 4.793c-.389.392-.389 1.027 0 1.419l2.939 2.965c.218.215.5.322.779.322s.556-.107.769-.322l2.93-2.955c.388-.392.388-1.027 0-1.419-.389-.392-1.018-.392-1.406 0l-2.298 2.317-2.307-2.327c-.194-.195-.449-.293-.703-.293-.255 0-.51.098-.703.293z" fill="#344563"/></g></svg></button><h4 class="toggle-title" id="datesmodule-label">Dates</h4><ul class="ops"></ul></div><div class="mod-content"> <div class="item-details"> <dl class="dates"> <dt> Created: </dt> <dd class="date user-tz" title="18/Jul/14 16:10" > <span data-name="Created" id="created-val" data-fieldtype="datetime" > <time class="livestamp" datetime="2014-07-18T16:10:59+0000">18/Jul/14 16:10</time> </span> </dd> </dl> <dl class="dates"> <dt> Updated: </dt> <dd class="date user-tz" title="31/Mar/16 17:44" > <span data-name="Updated" id="updated-val" data-fieldtype="datetime" > <time class="livestamp" datetime="2016-03-31T17:44:12+0000">31/Mar/16 17:44</time> </span> </dd> </dl> </div> </div></div></div></div></div></div></div></div></main></div></div></div><div class="issue-navigator-init"></div> </div> <footer id="footer" role="contentinfo"> <section class="footer-body"> <ul class="atlassian-footer"> <li> Atlassian Jira <a class="seo-link" rel="nofollow" href="https://www.atlassian.com/software/jira">Project Management Software</a> </li> <li> <a id="about-link" rel="nofollow" href="/jira/secure/AboutPage.jspa/secure/AboutPage.jspa">About Jira</a> </li> <li> <a id="footer-report-problem-link" rel="nofollow" href="/jira/secure/CreateIssue!default.jspa">Report a problem</a> </li> </ul> <p class="atlassian-footer"> <span class="licensemessage"> Powered by a free Atlassian <a rel='nofollow' href='http://www.atlassian.com/software/jira'>Jira</a> open source license for Apache Software Foundation. Try Jira - <a rel='nofollow' href='http://www.atlassian.com/software/jira'>bug tracking software</a> for <i>your</i> team. </span> </p> <div id="footer-logo"><a rel="nofollow" href="http://www.atlassian.com/">Atlassian</a></div> </section> <fieldset class="hidden parameters"> <input type="hidden" title="loggedInUser" value=""> <input type="hidden" title="ajaxTimeout" value="The call to the Jira server did not complete within the timeout period. We are unsure of the result of this operation."> <input type="hidden" title="JiraVersion" value="8.20.10" /> <input type="hidden" title="ajaxUnauthorised" value="You are not authorised to perform this operation. Please log in."> <input type="hidden" title="baseURL" value="https://issues.apache.org/jira" /> <input type="hidden" title="ajaxCommsError" value="The Jira server could not be contacted. This may be a temporary glitch or the server may be down. "> <input type="hidden" title="ajaxServerError" value="The Jira server was contacted but has returned an error response. We are unsure of the result of this operation."> <input type="hidden" title="ajaxErrorCloseDialog" value="Close this dialog and press refresh in your browser"> <input type="hidden" title="ajaxErrorDialogHeading" value="Communications Breakdown"> <input type="hidden" title="dirtyMessage" value="You have entered new data on this page. If you navigate away from this page without first saving your data, the changes will be lost."> <input type="hidden" title="dirtyDialogMessage" value="You have entered new data in this dialog. If you navigate away from this dialog without first saving your data, the changes will be lost. Click cancel to return to the dialog."> <input type="hidden" title="keyType" value="Type"> <input type="hidden" title="keyThen" value="then"> <input type="hidden" title="dblClickToExpand" value="Double click to expand"> <input type="hidden" title="actions" value="Actions"> <input type="hidden" title="removeItem" value="Remove"> <input type="hidden" title="workflow" value="Workflow"> <input type="hidden" title="labelNew" value="New Label"> <input type="hidden" title="issueActionsHint" value="Begin typing for available operations or press down to see all"> <input type="hidden" title="closelink" value="Close"> <input type="hidden" title="dotOperations" value="Operations"> <input type="hidden" title="dotLoading" value="Loading..."> <input type="hidden" title="frotherSuggestions" value="Suggestions"> <input type="hidden" title="frotherNomatches" value="No Matches"> <input type="hidden" title="multiselectVersionsError" value="{0} is not a valid version."> <input type="hidden" title="multiselectComponentsError" value="{0} is not a valid component."> <input type="hidden" title="multiselectGenericError" value="The value {0} is invalid."> </fieldset> </footer> </div> <script type="text/javascript" src="/jira/s/d41d8cd98f00b204e9800998ecf8427e-CDN/-lmkfjk/820010/13pdxe5/1.0/_/download/batch/jira.webresources:bigpipe-js/jira.webresources:bigpipe-js.js" data-wrm-key="jira.webresources:bigpipe-js" data-wrm-batch-type="resource" data-initially-rendered></script> <script> window.WRM=window.WRM||{};window.WRM._unparsedData=window.WRM._unparsedData||{};window.WRM._unparsedErrors=window.WRM._unparsedErrors||{}; WRM._unparsedData["activity-panel-pipe-id"]="\"\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \u003cdiv class=\\\"tabwrap aui-tabs horizontal-tabs aui-tabs-disabled\\\"\u003e\\n\\n \u003cul id=\\\"issue-tabs\\\" class=\\\"tabs-menu\\\"\u003e\\n \\n \u003cli class=\\\"menu-item \\\"\\n id=\\\"all-tabpanel\\\"\\n data-id=\\\"all-tabpanel\\\"\\n data-key=\\\"com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel\\\"\\n data-label=\\\"All\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel\\\"\\n \u003e\\n \u003ca id=\\\"all-tabpanel\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel\\\" class=\\\"ajax-activity-content\\\"\u003eAll\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \\n \u003cli class=\\\"menu-item active-tab active \\\"\\n id=\\\"comment-tabpanel\\\"\\n data-id=\\\"comment-tabpanel\\\"\\n data-key=\\\"com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel\\\"\\n data-label=\\\"Comments\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel\\\"\\n \u003e\\n \u003ca tabindex=\\\"0\\\"\u003eComments\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \\n \u003cli class=\\\"menu-item \\\"\\n id=\\\"worklog-tabpanel\\\"\\n data-id=\\\"worklog-tabpanel\\\"\\n data-key=\\\"com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel\\\"\\n data-label=\\\"Work Log\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel\\\"\\n \u003e\\n \u003ca id=\\\"worklog-tabpanel\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel\\\" class=\\\"ajax-activity-content\\\"\u003eWork Log\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \\n \u003cli class=\\\"menu-item \\\"\\n id=\\\"changehistory-tabpanel\\\"\\n data-id=\\\"changehistory-tabpanel\\\"\\n data-key=\\\"com.atlassian.jira.plugin.system.issuetabpanels:changehistory-tabpanel\\\"\\n data-label=\\\"History\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:changehistory-tabpanel\\\"\\n \u003e\\n \u003ca id=\\\"changehistory-tabpanel\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.jira.plugin.system.issuetabpanels:changehistory-tabpanel\\\" class=\\\"ajax-activity-content\\\"\u003eHistory\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \\n \u003cli class=\\\"menu-item \\\"\\n id=\\\"activity-stream-issue-tab\\\"\\n data-id=\\\"activity-stream-issue-tab\\\"\\n data-key=\\\"com.atlassian.streams.streams-jira-plugin:activity-stream-issue-tab\\\"\\n data-label=\\\"Activity\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.streams.streams-jira-plugin:activity-stream-issue-tab\\\"\\n \u003e\\n \u003ca id=\\\"activity-stream-issue-tab\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.atlassian.streams.streams-jira-plugin:activity-stream-issue-tab\\\" class=\\\"ajax-activity-content\\\"\u003eActivity\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \\n \u003cli class=\\\"menu-item \\\"\\n id=\\\"transitions-summary-tabpanel\\\"\\n data-id=\\\"transitions-summary-tabpanel\\\"\\n data-key=\\\"com.googlecode.jira-suite-utilities:transitions-summary-tabpanel\\\"\\n data-label=\\\"Transitions\\\"\\n data-href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.googlecode.jira-suite-utilities:transitions-summary-tabpanel\\\"\\n \u003e\\n \u003ca id=\\\"transitions-summary-tabpanel\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?page=com.googlecode.jira-suite-utilities:transitions-summary-tabpanel\\\" class=\\\"ajax-activity-content\\\"\u003eTransitions\u003c\\/a\u003e\\n \u003c\\/li\u003e\\n \u003c\\/ul\u003e\\n\\n \u003cdiv class=\\\"sortwrap\\\"\u003e\\n \u003ca class=\\\"issue-activity-sort-link ajax-activity-content\\\" rel=\\\"nofollow\\\" data-tab-sort data-order=\\\"desc\\\" href=\\\"\\/jira\\/browse\\/SAMZA-348?actionOrder=desc\\\" title=\\\"Ascending order - Click to sort in descending order\\\"\u003e\\n \u003cspan class=\\\"aui-icon aui-icon-small aui-iconfont-up\\\"\u003eAscending order - Click to sort in descending order\u003c\\/span\u003e\\n \u003c\\/a\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"tabs-pane active-pane\\\"\u003e\u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"issuePanelWrapper\\\"\u003e\\n \u003cdiv class=\\\"issuePanelProgress\\\"\u003e\u003c\\/div\u003e\\n \u003cdiv class=\\\"issuePanelContainer\\\" id=\\\"issue_actions_container\\\"\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14066578\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14066578_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14066578&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14066578\' class=\'commentdate_14066578_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'18\\/Jul\\/14 17:35\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-07-18T17:35:13+0000\'\u003e18\\/Jul\\/14 17:35\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eWe are discussing storing checkpoints in the ConfigLog. The way we handle checkpoints might change when we want to leverage Kafka\'s proposed transactionality feature:\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003ca href=\\\"https:\\/\\/cwiki.apache.org\\/confluence\\/display\\/KAFKA\\/Transactional+Messaging+in+Kafka\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttps:\\/\\/cwiki.apache.org\\/confluence\\/display\\/KAFKA\\/Transactional+Messaging+in+Kafka\u003c\\/a\u003e\u003c\\/p\u003e\\n\\n\u003cp\u003eI believe that this feature requires us to store checkpoints in Kafka, since the offset commit and transaction commit must happen atomically. I need to re-read the design doc to refresh my memory, but if this holds true, then the ConfigLog wouldn\'t be useful for storing Kafka offsets. It might still be useful for other systems (e.g. file system), though.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14066578_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14066578&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14066578\' class=\'commentdate_14066578_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'18\\/Jul\\/14 17:35\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-07-18T17:35:13+0000\'\u003e18\\/Jul\\/14 17:35\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e We are discussing storing checkpoints in the ConfigLog. The way we handle checkpoints might change when we want to leverage Kafka\'s proposed transactionality feature: \\n\\n https:\\/\\/cwiki.apache.org\\/confluence\\/display\\/KAFKA\\/Transactional+Messaging+in+Kafka \\n\\n I believe that this feature requires us to store checkpoints in Kafka, since the offset commit and transaction commit must happen atomically. I need to re-read the design doc to refresh my memory, but if this holds true, then the ConfigLog wouldn\'t be useful for storing Kafka offsets. It might still be useful for other systems (e.g. file system), though. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14102616\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14102616_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102616&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102616\' class=\'commentdate_14102616_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 18:38\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T18:38:57+0000\'\u003e19\\/Aug\\/14 18:38\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eJust for my education : are we talking about embedding a Kafka consumer inside the YARN AM ?\u003c\\/p\u003e\\n\\n\u003cp\u003eThe way I imagined this is that the AM will read the config from some Kafka topic (per Samza topology) which will then initialize (or modify) containers based on this ?\u003c\\/p\u003e\\n\\n\u003cp\u003eDo we also need the AM -> container communication mechanism, discussed in a previous ticket as part of this ? OR - in case of a config change, the AM simply destroys and re-creates the required containers ?\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14102616_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102616&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102616\' class=\'commentdate_14102616_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 18:38\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T18:38:57+0000\'\u003e19\\/Aug\\/14 18:38\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Just for my education : are we talking about embedding a Kafka consumer inside the YARN AM ? \\n\\n The way I imagined this is that the AM will read the config from some Kafka topic (per Samza topology) which will then initialize (or modify) containers based on this ? \\n\\n Do we also need the AM -> container communication mechanism, discussed in a previous ticket as part of this ? OR - in case of a config change, the AM simply destroys and re-creates the required containers ? \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14102733\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14102733_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102733&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102733\' class=\'commentdate_14102733_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 19:56\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T19:56:54+0000\'\u003e19\\/Aug\\/14 19:56\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eare we talking about embedding a Kafka consumer inside the YARN AM\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eYes, though in a round about way. We\'d probably make the interface pluggable, and then provide a YARN-based implementation. I don\'t think we\'ve thought about this in great detail yet, but the short answer is that there could end up being a Kafka consumer inside the YARN AM process.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eThe way I imagined this is that the AM will read the config from some Kafka topic (per Samza topology) which will then initialize (or modify) containers based on this ?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIf we follow this strategy, then we need to provide some RPC mechanism for each SamzaContainer to call back and get the config for itself. This could be done via HTTP, and I believe we already expose configs over HTTP in the YARN AM, so it should be pretty easy to add as a JSON blob.\u003c\\/p\u003e\\n\\n\u003cp\u003eAn alternative strategy would be to have each SamzaContainer fully read the ConfigLog topic, and not coordinate directly with the AM.\u003c\\/p\u003e\\n\\n\u003cp\u003eOne decision that needs to be made is whether the centralized coordinator should be the AM, or whether the SamzaContainers themselves should have some influence on restart decisions. It seems to me, at first glance, than having the containers call back to the AM to get their config, and having the AM be in complete control over when containers are restarted is the most desirable way to do things since it means the containers can be completely dumb, and we have a single place to make all container-related decisions (the AM).\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eDo we also need the AM -> container communication mechanism, discussed in a previous ticket as part of this ? OR - in case of a config change, the AM simply destroys and re-creates the required containers ?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThe simplest approach is to not have AM-to-container communication, and simply have the AM restart the containers when it needs to change them. The trade-off is that if your container has state, and you change config\\/restart the container, then you have to wait for the container to restore its state.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14102733_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102733&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102733\' class=\'commentdate_14102733_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 19:56\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T19:56:54+0000\'\u003e19\\/Aug\\/14 19:56\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e are we talking about embedding a Kafka consumer inside the YARN AM \\n\\n Yes, though in a round about way. We\'d probably make the interface pluggable, and then provide a YARN-based implementation. I don\'t think we\'ve thought about this in great detail yet, but the short answer is that there could end up being a Kafka consumer inside the YARN AM process. \\n\\n The way I imagined this is that the AM will read the config from some Kafka topic (per Samza topology) which will then initialize (or modify) containers based on this ? \\n\\n If we follow this strategy, then we need to provide some RPC mechanism for each SamzaContainer to call back and get the config for itself. This could be done via HTTP, and I believe we already expose configs over HTTP in the YARN AM, so it should be pretty easy to add as a JSON blob. \\n\\n An alternative strategy would be to have each SamzaContainer fully read the ConfigLog topic, and not coordinate directly with the AM. \\n\\n One decision that needs to be made is whether the centralized coordinator should be the AM, or whether the SamzaContainers themselves should have some influence on restart decisions. It seems to me, at first glance, than having the containers call back to the AM to get their config, and having the AM be in complete control over when containers are restarted is the most desirable way to do things since it means the containers can be completely dumb, and we have a single place to make all container-related decisions (the AM). \\n\\n Do we also need the AM -> container communication mechanism, discussed in a previous ticket as part of this ? OR - in case of a config change, the AM simply destroys and re-creates the required containers ? \\n\\n The simplest approach is to not have AM-to-container communication, and simply have the AM restart the containers when it needs to change them. The trade-off is that if your container has state, and you change config\\/restart the container, then you have to wait for the container to restore its state. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14102751\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14102751_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102751&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102751\' class=\'commentdate_14102751_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 20:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T20:09:18+0000\'\u003e19\\/Aug\\/14 20:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003ehave each SamzaContainer fully read the ConfigLog topic, and not coordinate directly with the AM ... One decision that needs to be made is whether the centralized coordinator should be the AM\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI think doing the config change through AM (and not bypassing it) seems better. Given that AM has more visibility (as you pointed out) - we can then leverage this to make more informed \\/ intelligent decisions regarding container restarts. \u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eThe simplest approach is to not have AM-to-container communication\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eAgreed - I think simple restarts would just work.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14102751_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14102751&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14102751\' class=\'commentdate_14102751_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'19\\/Aug\\/14 20:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-08-19T20:09:18+0000\'\u003e19\\/Aug\\/14 20:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e have each SamzaContainer fully read the ConfigLog topic, and not coordinate directly with the AM ... One decision that needs to be made is whether the centralized coordinator should be the AM \\n\\n I think doing the config change through AM (and not bypassing it) seems better. Given that AM has more visibility (as you pointed out) - we can then leverage this to make more informed \\/ intelligent decisions regarding container restarts. \\n\\n The simplest approach is to not have AM-to-container communication \\n\\n Agreed - I think simple restarts would just work. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14132099\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14132099_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14132099&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14132099\' class=\'commentdate_14132099_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'12\\/Sep\\/14 20:59\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-12T20:59:40+0000\'\u003e12\\/Sep\\/14 20:59\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI\'m attaching a preliminary design doc. There are still some TODOs in the implementation section, but the rest of the document is pretty in-depth.\u003c\\/p\u003e\\n\\n\u003cp\u003eThe goal is to get feedback.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14132099_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14132099&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14132099\' class=\'commentdate_14132099_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'12\\/Sep\\/14 20:59\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-12T20:59:40+0000\'\u003e12\\/Sep\\/14 20:59\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I\'m attaching a preliminary design doc. There are still some TODOs in the implementation section, but the rest of the document is pretty in-depth. \\n\\n The goal is to get feedback. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14132439\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14132439_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14132439&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14132439\' class=\'commentdate_14132439_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Sep\\/14 01:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-13T01:22:07+0000\'\u003e13\\/Sep\\/14 01:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eIt might be a good idea to have this as an interface named \\\"ConfigState\\\" (or something like that) instead of \\\"ConfigStream\\\", unless there\'s a specific feature of this that can only exist when it\'s backed by something similiar to a Kafka stream (instead of Zookeeper for example).\u003c\\/p\u003e\\n\\n\u003cp\u003eAlong that line of thought, does this avoid using Zookeeper to avoid a new dependency? Or for some other reason?\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\\n\u003cp\u003eThe YARN AM exposes the Config object\'s data via an HTTP JSON webapp.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIs the intent for this API to allow changing the config without restarting the AM? If so, this might be a nice way to manage several Samza jobs with an external system (perhaps one with a nice web UI, like a Mesos meta-scheduler).\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\\n\u003cp\u003eThis refactoring would also help make Mesos support easier.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI definitely agree.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14132439_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14132439&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14132439\' class=\'commentdate_14132439_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Sep\\/14 01:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-13T01:22:07+0000\'\u003e13\\/Sep\\/14 01:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e It might be a good idea to have this as an interface named \\\"ConfigState\\\" (or something like that) instead of \\\"ConfigStream\\\", unless there\'s a specific feature of this that can only exist when it\'s backed by something similiar to a Kafka stream (instead of Zookeeper for example). \\n\\n Along that line of thought, does this avoid using Zookeeper to avoid a new dependency? Or for some other reason? \\n\\n \\n The YARN AM exposes the Config object\'s data via an HTTP JSON webapp. \\n\\n Is the intent for this API to allow changing the config without restarting the AM? If so, this might be a nice way to manage several Samza jobs with an external system (perhaps one with a nice web UI, like a Mesos meta-scheduler). \\n\\n \\n This refactoring would also help make Mesos support easier. \\n\\n I definitely agree. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14133423\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14133423_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14133423&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14133423\' class=\'commentdate_14133423_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Sep\\/14 22:28\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-14T22:28:05+0000\'\u003e14\\/Sep\\/14 22:28\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eIt might be a good idea to have this as an interface named \\\"ConfigState\\\" (or something like that) instead of \\\"ConfigStream\\\", unless there\'s a specific feature of this that can only exist when it\'s backed by something similiar to a Kafka stream (instead of Zookeeper for example).\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eWe started by referring to this as state, but there was strong feedback from \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\" class=\\\"user-hover\\\" rel=\\\"martinkl\\\"\u003emartinkl\u003c\\/a\u003e not to use the word \\\"state\\\" again, since it\'s already overloaded, and used for Samza\'s state management feature. The other idea was \\\"ConfigLog\\\".\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eAlong that line of thought, does this avoid using Zookeeper to avoid a new dependency? Or for some other reason?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThe two reasons for not using ZK are:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eAvoid adding a new dependency. Thus far, we\'ve gotten by without it.\u003c\\/li\u003e\\n\\t\u003cli\u003eIf we use Kafka as the system, it lets us transactionally store offset checkpoints in the ConfigStream. If we have two systems (ZK, and Kafka), then we can no longer transactionally commit everything at once (offset checkpoint, output, state changelog, etc).\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eRegarding (2), it looks more and more like it will be a requirement for transactionality to have the ConfigStream and a job\'s output streams go to the same underlying system. It\'s not necessarily the case that only Kafka can support this. I believe HBase would work as well.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eIs the intent for this API to allow changing the config without restarting the AM?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\u003cp\u003eYes, this is the intent. In its most naive form, the flow looks something like:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eAM updates config that\'s exposed via HTTP JSON API.\u003c\\/li\u003e\\n\\t\u003cli\u003eAM kills all existing containers.\u003c\\/li\u003e\\n\\t\u003cli\u003eAM brings up new containers.\u003c\\/li\u003e\\n\\t\u003cli\u003eNew containers query AM\'s HTTP JSON API, which has new configs.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cblockquote\u003e\u003cp\u003eIf so, this might be a nice way to manage several Samza jobs with an external system (perhaps one with a nice web UI, like a Mesos meta-scheduler).\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI hadn\'t considered this in detail, but it seems like it might work. I think as long as the HTTP JSON API is well defined, I don\'t the an individual SamzaContainer should care about whether the API it\'s querying is just managing one job, or many jobs.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14133423_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14133423&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14133423\' class=\'commentdate_14133423_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Sep\\/14 22:28\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-14T22:28:05+0000\'\u003e14\\/Sep\\/14 22:28\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e It might be a good idea to have this as an interface named \\\"ConfigState\\\" (or something like that) instead of \\\"ConfigStream\\\", unless there\'s a specific feature of this that can only exist when it\'s backed by something similiar to a Kafka stream (instead of Zookeeper for example). \\n\\n We started by referring to this as state, but there was strong feedback from martinkl not to use the word \\\"state\\\" again, since it\'s already overloaded, and used for Samza\'s state management feature. The other idea was \\\"ConfigLog\\\". \\n\\n Along that line of thought, does this avoid using Zookeeper to avoid a new dependency? Or for some other reason? \\n\\n The two reasons for not using ZK are: \\n\\n \\n\\t Avoid adding a new dependency. Thus far, we\'ve gotten by without it. \\n\\t If we use Kafka as the system, it lets us transactionally store offset checkpoints in the ConfigStream. If we have two systems (ZK, and Kafka), then we can no longer transactionally commit everything at once (offset checkpoint, output, state changelog, etc). \\n \\n\\n\\n Regarding (2), it looks more and more like it will be a requirement for transactionality to have the ConfigStream and a job\'s output streams go to the same underlying system. It\'s not necessarily the case that only Kafka can support this. I believe HBase would work as well. \\n\\n Is the intent for this API to allow changing the config without restarting the AM? \\n Yes, this is the intent. In its most naive form, the flow looks something like: \\n\\n \\n\\t AM updates config that\'s exposed via HTTP JSON API. \\n\\t AM kills all existing containers. \\n\\t AM brings up new containers. \\n\\t New containers query AM\'s HTTP JSON API, which has new configs. \\n \\n\\n\\n If so, this might be a nice way to manage several Samza jobs with an external system (perhaps one with a nice web UI, like a Mesos meta-scheduler). \\n\\n I hadn\'t considered this in detail, but it seems like it might work. I think as long as the HTTP JSON API is well defined, I don\'t the an individual SamzaContainer should care about whether the API it\'s querying is just managing one job, or many jobs. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14134128\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14134128_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134128&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134128\' class=\'commentdate_14134128_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 17:04\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T17:04:35+0000\'\u003e15\\/Sep\\/14 17:04\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eThis looks pretty good . +1\u003c\\/p\u003e\\n\\n\u003cp\u003eThe HTTP JSON interface with a pull model sounds good to me ! This also makes it easy for the user to see what config is actually being used (a common problem in distributed systems).\u003c\\/p\u003e\\n\\n\u003cp\u003eMy comments on the open questions:\u003c\\/p\u003e\\n\u003cul\u003e\\n\\t\u003cli\u003eMulti writer problem: I think we can make the auto-config by the Samza AM - as a tunable property. This should be used when the user does not want to keep tuning the config. In addition, maybe it is better for the user to make any config related changes from a web based endpoint (maybe hosted in the AM). This way, the config hosted by the AM becomes the source of truth and not cfg2 (something similar to what Azkaban also does).\u003c\\/li\u003e\\n\u003c\\/ul\u003e\\n\\n\\n\u003cul\u003e\\n\\t\u003cli\u003eConfig stream naming:\u003cbr\\/\u003e\\nMaybe we can still standardize this. The configure-job.sh script can take a job name for which a config stream is to be written. We can simply wait for the Kafka topic deletion to be available - to solve the problem of resetting the config.\u003c\\/li\u003e\\n\u003c\\/ul\u003e\\n\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14134128_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134128&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134128\' class=\'commentdate_14134128_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 17:04\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T17:04:35+0000\'\u003e15\\/Sep\\/14 17:04\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e This looks pretty good . +1 \\n\\n The HTTP JSON interface with a pull model sounds good to me ! This also makes it easy for the user to see what config is actually being used (a common problem in distributed systems). \\n\\n My comments on the open questions: \\n \\n\\t Multi writer problem: I think we can make the auto-config by the Samza AM - as a tunable property. This should be used when the user does not want to keep tuning the config. In addition, maybe it is better for the user to make any config related changes from a web based endpoint (maybe hosted in the AM). This way, the config hosted by the AM becomes the source of truth and not cfg2 (something similar to what Azkaban also does). \\n \\n\\n\\n \\n\\t Config stream naming: \\nMaybe we can still standardize this. The configure-job.sh script can take a job name for which a config stream is to be written. We can simply wait for the Kafka topic deletion to be available - to solve the problem of resetting the config. \\n \\n\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14134172\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14134172_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134172&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134172\' class=\'commentdate_14134172_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 17:37\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T17:37:19+0000\'\u003e15\\/Sep\\/14 17:37\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eMaybe we can still standardize this. The configure-job.sh script can take a job name for which a config stream is to be written. We can simply wait for the Kafka topic deletion to be available - to solve the problem of resetting the config.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eYea, I think I\'m leaning this way now as well. Something like:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"preformatted panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"preformattedContent panelContent\\\"\u003e\\n\u003cpre\u003e$ configure-job.sh --location kafka:\\/\\/localhost:10251 --job.name <job name> --job.id <job id> --property task.inputs=kafka.foo\\n$ run-job.sh --location kafka:\\/\\/localhost:10251 --job.name <job name> --job.id <job id>\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI think we can make the auto-config by the Samza AM - as a tunable property. This should be used when the user does not want to keep tuning the config. \u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI was just thinking that we could ignore this problem. As long as we keep the job coordinator as the \u003cb\u003eonly\u003c\\/b\u003e thing that programmatically mutates configuration, then the race condition is between the coordinator and the human dev. I think this should be fine.\u003c\\/p\u003e\\n\\n\u003cp\u003eThe offset checkpoint messages pose a bigger problem, though. Since they\'re checkpointed once per minute, the chance of an over-write is relatively high in the case where a developer is trying to set all offsets back to 0, for example. If we suppose there are 64 StreamTasks, and each one is checkpointing once perm minute, then you\'re getting 1 checkpoint per second, on average. If the developer writes all 64 offset messages back to offset 0, it seems likely that a SamzaContainer might over-write the developer\'s offset message with its own before the container is killed off and restarted. The order of events would be:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"preformatted panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"preformattedContent panelContent\\\"\u003e\\n\u003cpre\u003eSamzaContainer: offset 3976\\nSamzaContainer: offset 4320\\nDeveloper: offset 0\\nSamzaContainer: offset 5320\\nJob coordinator: restarts SamzaContainer\\nSamzaContainer: starts with offset 5320\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cp\u003eI don\'t have a good idea on how to solve this. One way would be to add some generation number that invalidates all future offsets from SamzaContainer after the developer\'s message is written. I haven\'t thought about this in great detail.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eIn addition, maybe it is better for the user to make any config related changes from a web based endpoint (maybe hosted in the AM). This way, the config hosted by the AM becomes the source of truth and not cfg2 (something similar to what Azkaban also does).\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI was thinking the source of truth would be the underlying stream, since this is what the job coordinator will use to run the job. Whether the config is mutated from the AM web UI, or from a CLI, I haven\'t considered very much.\u003c\\/p\u003e\\n\\n\u003cp\u003eMaybe you\'re trying to get at the idea that we could try and funnel all mutations to the ConfigStrem through a single writer?\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14134172_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134172&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134172\' class=\'commentdate_14134172_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 17:37\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T17:37:19+0000\'\u003e15\\/Sep\\/14 17:37\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Maybe we can still standardize this. The configure-job.sh script can take a job name for which a config stream is to be written. We can simply wait for the Kafka topic deletion to be available - to solve the problem of resetting the config. \\n\\n Yea, I think I\'m leaning this way now as well. Something like: \\n\\n \\n $ configure-job.sh --location kafka:\\/\\/localhost:10251 --job.name <job name> --job.id <job id> --property task.inputs=kafka.foo\\n$ run-job.sh --location kafka:\\/\\/localhost:10251 --job.name <job name> --job.id <job id>\\n \\n \\n\\n I think we can make the auto-config by the Samza AM - as a tunable property. This should be used when the user does not want to keep tuning the config. \\n\\n I was just thinking that we could ignore this problem. As long as we keep the job coordinator as the only thing that programmatically mutates configuration, then the race condition is between the coordinator and the human dev. I think this should be fine. \\n\\n The offset checkpoint messages pose a bigger problem, though. Since they\'re checkpointed once per minute, the chance of an over-write is relatively high in the case where a developer is trying to set all offsets back to 0, for example. If we suppose there are 64 StreamTasks, and each one is checkpointing once perm minute, then you\'re getting 1 checkpoint per second, on average. If the developer writes all 64 offset messages back to offset 0, it seems likely that a SamzaContainer might over-write the developer\'s offset message with its own before the container is killed off and restarted. The order of events would be: \\n\\n \\n SamzaContainer: offset 3976\\nSamzaContainer: offset 4320\\nDeveloper: offset 0\\nSamzaContainer: offset 5320\\nJob coordinator: restarts SamzaContainer\\nSamzaContainer: starts with offset 5320\\n \\n \\n\\n I don\'t have a good idea on how to solve this. One way would be to add some generation number that invalidates all future offsets from SamzaContainer after the developer\'s message is written. I haven\'t thought about this in great detail. \\n\\n In addition, maybe it is better for the user to make any config related changes from a web based endpoint (maybe hosted in the AM). This way, the config hosted by the AM becomes the source of truth and not cfg2 (something similar to what Azkaban also does). \\n\\n I was thinking the source of truth would be the underlying stream, since this is what the job coordinator will use to run the job. Whether the config is mutated from the AM web UI, or from a CLI, I haven\'t considered very much. \\n\\n Maybe you\'re trying to get at the idea that we could try and funnel all mutations to the ConfigStrem through a single writer? \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14134337\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14134337_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134337&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134337\' class=\'commentdate_14134337_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 19:12\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T19:12:55+0000\'\u003e15\\/Sep\\/14 19:12\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eI was thinking the source of truth would be the underlying stream, since this is what the job coordinator will use to run the job. Whether the config is mutated from the AM web UI, or from a CLI, I haven\'t considered very much.Maybe you\'re trying to get at the idea that we could try and funnel all mutations to the ConfigStrem through a single writer?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThe source of truth is the stream. However, in case a lot of modifications are done (either manually or automatically), the user might lose track of what the exact config is. So yeah, funneling all mutations through a single writer (like the AM) might add value - so that :\u003c\\/p\u003e\\n\u003cul\u003e\\n\\t\u003cli\u003eWe can reflect the current config accurately (for example - if within LinkedIn, the user only modifies the config via cfg2, then there\'s an extra overhead of keeping that in sync with the actual config - since config mutations might be done via the AM).\u003c\\/li\u003e\\n\\t\u003cli\u003eAvoid all concurrency issues.\u003c\\/li\u003e\\n\u003c\\/ul\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"cpsoman\\\" id=\\\"commentauthor_14134337_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=cpsoman\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"cpsoman\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chinmay Soman\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134337&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134337\' class=\'commentdate_14134337_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 19:12\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T19:12:55+0000\'\u003e15\\/Sep\\/14 19:12\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I was thinking the source of truth would be the underlying stream, since this is what the job coordinator will use to run the job. Whether the config is mutated from the AM web UI, or from a CLI, I haven\'t considered very much.Maybe you\'re trying to get at the idea that we could try and funnel all mutations to the ConfigStrem through a single writer? \\n\\n The source of truth is the stream. However, in case a lot of modifications are done (either manually or automatically), the user might lose track of what the exact config is. So yeah, funneling all mutations through a single writer (like the AM) might add value - so that : \\n \\n\\t We can reflect the current config accurately (for example - if within LinkedIn, the user only modifies the config via cfg2, then there\'s an extra overhead of keeping that in sync with the actual config - since config mutations might be done via the AM). \\n\\t Avoid all concurrency issues. \\n \\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14134414\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14134414_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134414&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134414\' class=\'commentdate_14134414_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 20:17\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T20:17:12+0000\'\u003e15\\/Sep\\/14 20:17\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003ethe user might lose track of what the exact config is\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eFor this, I was thinking that configure-job.sh could have a --read switch, to get all existing configs for a job. I agree it\'s super useful to have the AM expose them as well, which we can continue to do.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eAvoid all concurrency issues.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIsn\'t there still a concurrency issue if two writers update the AM UI at the same time?\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eWe can reflect the current config accurately (for example - if within LinkedIn, the user only modifies the config via cfg2, then there\'s an extra overhead of keeping that in sync with the actual config - since config mutations might be done via the AM).\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eFor this kind of use case, I was figuring we\'d have configure-job.sh behave a lot like run-job.sh does today: take a URI and a factory, and resolve configs. For example, something like:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"preformatted panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"preformattedContent panelContent\\\"\u003e\\n\u003cpre\u003e$ configure-job.sh --uri kafa:\\/\\/localhost:1025 --job.name foo --job.id bar --config-file=file:\\/\\/... --config-factory=PropertiesConfigFactory\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cp\u003eYou could have configure-job.sh run against a static config file every time run-job.sh is run. This would essentially mirror how Samza currently works.\u003c\\/p\u003e\\n\\n\u003cp\u003eOne other thought: if we depend on a UI (in YARN or otherwise), we get into a problem where we might need to edit config while the job is down (the UI is unavailable).\u003c\\/p\u003e\\n\\n\u003cp\u003eI haven\'t really fully baked any of this, but this is just along the lines of what I\'m thinking right now. I think it\'s OK to live with concurrency issues for config, but for offsets, it could be problematic. I haven\'t spent much time thinking about how to fix that yet.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14134414_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134414&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134414\' class=\'commentdate_14134414_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 20:17\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T20:17:12+0000\'\u003e15\\/Sep\\/14 20:17\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e the user might lose track of what the exact config is \\n\\n For this, I was thinking that configure-job.sh could have a --read switch, to get all existing configs for a job. I agree it\'s super useful to have the AM expose them as well, which we can continue to do. \\n\\n Avoid all concurrency issues. \\n\\n Isn\'t there still a concurrency issue if two writers update the AM UI at the same time? \\n\\n We can reflect the current config accurately (for example - if within LinkedIn, the user only modifies the config via cfg2, then there\'s an extra overhead of keeping that in sync with the actual config - since config mutations might be done via the AM). \\n\\n For this kind of use case, I was figuring we\'d have configure-job.sh behave a lot like run-job.sh does today: take a URI and a factory, and resolve configs. For example, something like: \\n\\n \\n $ configure-job.sh --uri kafa:\\/\\/localhost:1025 --job.name foo --job.id bar --config-file=file:\\/\\/... --config-factory=PropertiesConfigFactory\\n \\n \\n\\n You could have configure-job.sh run against a static config file every time run-job.sh is run. This would essentially mirror how Samza currently works. \\n\\n One other thought: if we depend on a UI (in YARN or otherwise), we get into a problem where we might need to edit config while the job is down (the UI is unavailable). \\n\\n I haven\'t really fully baked any of this, but this is just along the lines of what I\'m thinking right now. I think it\'s OK to live with concurrency issues for config, but for offsets, it could be problematic. I haven\'t spent much time thinking about how to fix that yet. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14134685\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"sriramsub\\\" id=\\\"commentauthor_14134685_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=sriramsub\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"sriramsub\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Sriram\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134685&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134685\' class=\'commentdate_14134685_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 23:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T23:22:55+0000\'\u003e15\\/Sep\\/14 23:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eHere are my thoughts so far - \u003c\\/p\u003e\\n\\n\u003cp\u003eAt the high level, I agree that all communication from the AM to the containers should happen through the AM API. All the communication from containers to the AM should happen through the stream (at least for now till we can think of certain soft states that the containers would like to communicate with the AM that need not be persisted). Multiple containers\\/tasks can write to this stream and any race conditions that can occur due to multiple writers is present irrespective of the underlying storage medium. \u003c\\/p\u003e\\n\\n\u003cp\u003e1. AM catch up speed of config log\u003cbr\\/\u003e\\nWe plan to use the config log as the source of truth and use it in the future to make dynamic updates to configs. If we want the AM to take actions on the cluster immediately after the config updates, it is important the AM is always caught up with the config log. It would be useful to do a rough math on the potential size that this log can grow assuming a worst case scenario for cluster setup. We would turn on key deduplication in Kafka which would ensure this log size is bounded but having some estimate would be useful.\u003c\\/p\u003e\\n\\n\u003cp\u003e2. AM startup time\u003cbr\\/\u003e\\nThis is kind of related to the previous point. On startup, the AM needs to read the entire config log from head and restore the job configuration and offset state. This could potentially add more time to the startup. Today, the offsets are individually restored by the tasks and if they have multiple partitions, the restoration can potentially happen faster. This largely again depends on how big the stream can grow.\u003c\\/p\u003e\\n\\n\u003cp\u003e3. AM failure and containers running\u003cbr\\/\u003e\\nI am assuming that in the future we would like to have the containers running even when the AM fails. The containers would continue to write their offsets into the config log. However, if the container fails when the AM is down, they would not be able to start since they cannot get the offsets from the AM. If the AM is highly available, we can safely assume that the new AM will be chosen within few seconds (AM start + config log restore) and the containers can proceed. \u003c\\/p\u003e\\n\\n\u003cp\u003e4. Mixing transactional and non transactional updates\u003cbr\\/\u003e\\nIt would be worth mentioning that by writing the offsets and the job configurations into the same task, we would potentially mix transactions and non transactional messages into the same topic. The transactional feature is required to ensure exactly once semantics in Samza. The AM would need to use a transactionally aware consumer to ensure it reads the data in a consistent state.\u003c\\/p\u003e\\n\\n\u003cp\u003e5. Dynamic config updates\u003cbr\\/\u003e\\nIf we let dynamic config updates to happen, we would need some kind of boundaries to declare when the AM can take actions on the config changes. For example, you may want to change the max memory size and the total number of containers and we would like the AM to react to these changes once. One option would be to batch changes in the AM for some interval before acting on it. Another option would be to introduce the notion of batch config change messages. We would simply add a batch config message start header followed by all the config changes and then push a batch config message end header when the AM knows that it needs to act on the change. \u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"sriramsub\\\" id=\\\"commentauthor_14134685_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=sriramsub\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"sriramsub\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Sriram\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134685&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134685\' class=\'commentdate_14134685_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Sep\\/14 23:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-15T23:22:55+0000\'\u003e15\\/Sep\\/14 23:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Here are my thoughts so far - \\n\\n At the high level, I agree that all communication from the AM to the containers should happen through the AM API. All the communication from containers to the AM should happen through the stream (at least for now till we can think of certain soft states that the containers would like to communicate with the AM that need not be persisted). Multiple containers\\/tasks can write to this stream and any race conditions that can occur due to multiple writers is present irrespective of the underlying storage medium. \\n\\n 1. AM catch up speed of config log \\nWe plan to use the config log as the source of truth and use it in the future to make dynamic updates to configs. If we want the AM to take actions on the cluster immediately after the config updates, it is important the AM is always caught up with the config log. It would be useful to do a rough math on the potential size that this log can grow assuming a worst case scenario for cluster setup. We would turn on key deduplication in Kafka which would ensure this log size is bounded but having some estimate would be useful. \\n\\n 2. AM startup time \\nThis is kind of related to the previous point. On startup, the AM needs to read the entire config log from head and restore the job configuration and offset state. This could potentially add more time to the startup. Today, the offsets are individually restored by the tasks and if they have multiple partitions, the restoration can potentially happen faster. This largely again depends on how big the stream can grow. \\n\\n 3. AM failure and containers running \\nI am assuming that in the future we would like to have the containers running even when the AM fails. The containers would continue to write their offsets into the config log. However, if the container fails when the AM is down, they would not be able to start since they cannot get the offsets from the AM. If the AM is highly available, we can safely assume that the new AM will be chosen within few seconds (AM start + config log restore) and the containers can proceed. \\n\\n 4. Mixing transactional and non transactional updates \\nIt would be worth mentioning that by writing the offsets and the job configurations into the same task, we would potentially mix transactions and non transactional messages into the same topic. The transactional feature is required to ensure exactly once semantics in Samza. The AM would need to use a transactionally aware consumer to ensure it reads the data in a consistent state. \\n\\n 5. Dynamic config updates \\nIf we let dynamic config updates to happen, we would need some kind of boundaries to declare when the AM can take actions on the config changes. For example, you may want to change the max memory size and the total number of containers and we would like the AM to react to these changes once. One option would be to batch changes in the AM for some interval before acting on it. Another option would be to introduce the notion of batch config change messages. We would simply add a batch config message start header followed by all the config changes and then push a batch config message end header when the AM knows that it needs to act on the change. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14135809\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14135809_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14135809&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14135809\' class=\'commentdate_14135809_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'16\\/Sep\\/14 17:49\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-16T17:49:39+0000\'\u003e16\\/Sep\\/14 17:49\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eIt would be useful to do a rough math on the potential size that this log can grow assuming a worst case scenario for cluster setup.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eAgreed. Other math that we should do is how many containers we can have running if they\'re polling the job coordinator\'s HTTP server with a frequency of N seconds.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eHowever, if the container fails when the AM is down, they would not be able to start since they cannot get the offsets from the AM.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eTrue. I\'ve been thinking about this a bit more. I think this should be fine. In both Mesos and YARN, if the container fails, it actually won\'t be restarted anyway (since restarting the container requires a job coordinator to decide which partitions are assigned, which box the container should be on, etc). Even if the distributed execution framework were to restart the container, I think the desired behavior is to just block until the AM comes back, or to kill itself permanently, and wait for the AM to come back and restart it properly.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eThe AM would need to use a transactionally aware consumer to ensure it reads the data in a consistent state.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eGood point. Related to this, if we implement the protocol in the ConfigStream as key-value (vs. an entire config blob as one value), then you might wish to use transactions to atomically write multiple key-value pairs together (all-or-nothing) into the ConfigStream. Again, this would require a transactional consumer.\u003c\\/p\u003e\\n\\n\u003cp\u003eI\'ll update the design docs with this feedback.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14135809_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14135809&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14135809\' class=\'commentdate_14135809_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'16\\/Sep\\/14 17:49\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-16T17:49:39+0000\'\u003e16\\/Sep\\/14 17:49\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e It would be useful to do a rough math on the potential size that this log can grow assuming a worst case scenario for cluster setup. \\n\\n Agreed. Other math that we should do is how many containers we can have running if they\'re polling the job coordinator\'s HTTP server with a frequency of N seconds. \\n\\n However, if the container fails when the AM is down, they would not be able to start since they cannot get the offsets from the AM. \\n\\n True. I\'ve been thinking about this a bit more. I think this should be fine. In both Mesos and YARN, if the container fails, it actually won\'t be restarted anyway (since restarting the container requires a job coordinator to decide which partitions are assigned, which box the container should be on, etc). Even if the distributed execution framework were to restart the container, I think the desired behavior is to just block until the AM comes back, or to kill itself permanently, and wait for the AM to come back and restart it properly. \\n\\n The AM would need to use a transactionally aware consumer to ensure it reads the data in a consistent state. \\n\\n Good point. Related to this, if we implement the protocol in the ConfigStream as key-value (vs. an entire config blob as one value), then you might wish to use transactions to atomically write multiple key-value pairs together (all-or-nothing) into the ConfigStream. Again, this would require a transactional consumer. \\n\\n I\'ll update the design docs with this feedback. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14137909\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14137909_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14137909&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14137909\' class=\'commentdate_14137909_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'17\\/Sep\\/14 20:24\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-17T20:24:15+0000\'\u003e17\\/Sep\\/14 20:24\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Jon Bringhurst - 17\\/Sep\\/14 20:25\\\"\u003eedited\u003c\\/span\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eRegarding the client commands to create and modify Samza jobs (such as configure-job and run-job), it may be useful to review existing commands that perform a similar role:\u003c\\/p\u003e\\n\u003col\u003e\\n\\t\u003cli\u003eMy personal favorite is Slurm\'s set of client commands, of which sbatch is probably the most relevant (\u003ca href=\\\"http:\\/\\/www.schedmd.com\\/slurmdocs\\/sbatch.html\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttp:\\/\\/www.schedmd.com\\/slurmdocs\\/sbatch.html\u003c\\/a\u003e).\u003c\\/li\u003e\\n\\t\u003cli\u003eTo go back a bit further in history, it might be a good idea to take a look at the POSIX qsub style command from PBS\\/Torque (\u003ca href=\\\"http:\\/\\/docs.adaptivecomputing.com\\/torque\\/4-1-4\\/Content\\/topics\\/commands\\/qsub.htm\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttp:\\/\\/docs.adaptivecomputing.com\\/torque\\/4-1-4\\/Content\\/topics\\/commands\\/qsub.htm\u003c\\/a\u003e). Moab\'s msub also follows this design.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eRegarding a possible DSL for building configuration, it may be useful to look at Slurm\'s lua callback for job configuration (\u003cb\u003ewarning, GPLv2 code\u003c\\/b\u003e) \u003ca href=\\\"https:\\/\\/github.com\\/SchedMD\\/slurm\\/blob\\/master\\/contribs\\/lua\\/job_submit.lua\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttps:\\/\\/github.com\\/SchedMD\\/slurm\\/blob\\/master\\/contribs\\/lua\\/job_submit.lua\u003c\\/a\u003e\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14137909_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14137909&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14137909\' class=\'commentdate_14137909_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'17\\/Sep\\/14 20:24\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-17T20:24:15+0000\'\u003e17\\/Sep\\/14 20:24\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Jon Bringhurst - 17\\/Sep\\/14 20:25\\\"\u003eedited\u003c\\/span\u003e Regarding the client commands to create and modify Samza jobs (such as configure-job and run-job), it may be useful to review existing commands that perform a similar role: \\n \\n\\t My personal favorite is Slurm\'s set of client commands, of which sbatch is probably the most relevant ( http:\\/\\/www.schedmd.com\\/slurmdocs\\/sbatch.html ). \\n\\t To go back a bit further in history, it might be a good idea to take a look at the POSIX qsub style command from PBS\\/Torque ( http:\\/\\/docs.adaptivecomputing.com\\/torque\\/4-1-4\\/Content\\/topics\\/commands\\/qsub.htm ). Moab\'s msub also follows this design. \\n \\n\\n\\n Regarding a possible DSL for building configuration, it may be useful to look at Slurm\'s lua callback for job configuration ( warning, GPLv2 code ) https:\\/\\/github.com\\/SchedMD\\/slurm\\/blob\\/master\\/contribs\\/lua\\/job_submit.lua \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14138156\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"davidzchen\\\" id=\\\"commentauthor_14138156_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=davidzchen\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"davidzchen\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e David Chen\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14138156&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14138156\' class=\'commentdate_14138156_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'17\\/Sep\\/14 22:53\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-17T22:53:34+0000\'\u003e17\\/Sep\\/14 22:53\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eAnother idea would be to implement a configuration DSL using a scripting language like Python, which is both easy to implement and also allows you to embed Python code in your configuration script.\u003c\\/p\u003e\\n\\n\u003cp\u003eI prefer to have a declarative DSL such as the one used by \u003ca href=\\\"http:\\/\\/google-engtools.blogspot.com\\/2011\\/08\\/build-in-cloud-how-build-system-works.html\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003eGoogle\'s build system, Blaze\u003c\\/a\u003e. I found a more detailed example can be found in \u003ca href=\\\"https:\\/\\/gist.github.com\\/wiseman\\/3834928\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ethis GitHub Gist\u003c\\/a\u003e.\u003c\\/p\u003e\\n\\n\u003cp\u003eThis would not be difficult to implement since these statements are simply Python function calls and since the DSL is valid Python code, it is also possible to have regular Python code in your configuration script. This way, after each statement is evaluated, the Samza client program can either compile it into JProperties (as a stop-gap solution) or turn it into a Kafka message and publish it to the configuration stream.\u003c\\/p\u003e\\n\\n\u003cp\u003eOf course, we can have both a command line program and a DSL, and I am pretty sure that as Samza takes off, people would want to start writing DSLs and clients for other languages as well. The key would be to make sure that the common interface the DSLs and tools talk to is solid.\u003c\\/p\u003e\\n\\n\u003cp\u003eI have opened \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-416\\\" title=\\\"Samza Configuration DSL\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-416\\\"\u003eSAMZA-416\u003c\\/a\u003e to discuss the DSL further.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"davidzchen\\\" id=\\\"commentauthor_14138156_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=davidzchen\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"davidzchen\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e David Chen\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14138156&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14138156\' class=\'commentdate_14138156_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'17\\/Sep\\/14 22:53\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-17T22:53:34+0000\'\u003e17\\/Sep\\/14 22:53\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Another idea would be to implement a configuration DSL using a scripting language like Python, which is both easy to implement and also allows you to embed Python code in your configuration script. \\n\\n I prefer to have a declarative DSL such as the one used by Google\'s build system, Blaze . I found a more detailed example can be found in this GitHub Gist . \\n\\n This would not be difficult to implement since these statements are simply Python function calls and since the DSL is valid Python code, it is also possible to have regular Python code in your configuration script. This way, after each statement is evaluated, the Samza client program can either compile it into JProperties (as a stop-gap solution) or turn it into a Kafka message and publish it to the configuration stream. \\n\\n Of course, we can have both a command line program and a DSL, and I am pretty sure that as Samza takes off, people would want to start writing DSLs and clients for other languages as well. The key would be to make sure that the common interface the DSLs and tools talk to is solid. \\n\\n I have opened SAMZA-416 to discuss the DSL further. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14144503\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14144503_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14144503&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14144503\' class=\'commentdate_14144503_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'23\\/Sep\\/14 07:29\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-23T07:29:53+0000\'\u003e23\\/Sep\\/14 07:29\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI\'ve updated the design document.\u003c\\/p\u003e\\n\\n\u003cul\u003e\\n\\t\u003cli\u003eUpdated job coordinator section.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded `control-job.sh` CLI examples.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded ConfigStream implementation and protocol sections.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded scalability estimations for configuration size, write throughput, and container heartbeats.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded multi-writer race condition and transactionality design section.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded Mesos impact section.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdded YARN AM work-preserving restart impact section.\u003c\\/li\u003e\\n\\t\u003cli\u003eEliminated \'Open Questions\' section.\u003c\\/li\u003e\\n\u003c\\/ul\u003e\\n\\n\\n\u003cp\u003eThe largest change was in the \'Design Proposal\' section, which became much more detailed and complete. If you only have time to read one section, make it the \'Design Proposal\' pages.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14144503_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14144503&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14144503\' class=\'commentdate_14144503_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'23\\/Sep\\/14 07:29\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-23T07:29:53+0000\'\u003e23\\/Sep\\/14 07:29\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I\'ve updated the design document. \\n\\n \\n\\t Updated job coordinator section. \\n\\t Added `control-job.sh` CLI examples. \\n\\t Added ConfigStream implementation and protocol sections. \\n\\t Added scalability estimations for configuration size, write throughput, and container heartbeats. \\n\\t Added multi-writer race condition and transactionality design section. \\n\\t Added Mesos impact section. \\n\\t Added YARN AM work-preserving restart impact section. \\n\\t Eliminated \'Open Questions\' section. \\n \\n\\n\\n The largest change was in the \'Design Proposal\' section, which became much more detailed and complete. If you only have time to read one section, make it the \'Design Proposal\' pages. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14144513\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14144513_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14144513&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14144513\' class=\'commentdate_14144513_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'23\\/Sep\\/14 07:42\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-23T07:42:14+0000\'\u003e23\\/Sep\\/14 07:42\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Chris Riccomini - 23\\/Sep\\/14 16:21\\\"\u003eedited\u003c\\/span\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eSome open questions with this proposal:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eConfigStream seems like a bad name now. We\'re over loading the stream to do a lot more than just config.\u003c\\/li\u003e\\n\\t\u003cli\u003eThe existing proposal depends on the transactionality of the underlying SystemConsumer\\/SystemProducer. Without transactionality, offset checkpoints are no longer atomic. In Samza\'s existing implementation, even without transactionality, checkpoints are done atomically, per-task.\u003c\\/li\u003e\\n\\t\u003cli\u003eHow to handle defaults. One way would be to have the job coordinator dump all of its defaults into the ConfigStream the first time it runs. The other way would be to have the control-job.sh just show defaults as part of --help.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14144513_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14144513&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14144513\' class=\'commentdate_14144513_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'23\\/Sep\\/14 07:42\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-23T07:42:14+0000\'\u003e23\\/Sep\\/14 07:42\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Chris Riccomini - 23\\/Sep\\/14 16:21\\\"\u003eedited\u003c\\/span\u003e Some open questions with this proposal: \\n\\n \\n\\t ConfigStream seems like a bad name now. We\'re over loading the stream to do a lot more than just config. \\n\\t The existing proposal depends on the transactionality of the underlying SystemConsumer\\/SystemProducer. Without transactionality, offset checkpoints are no longer atomic. In Samza\'s existing implementation, even without transactionality, checkpoints are done atomically, per-task. \\n\\t How to handle defaults. One way would be to have the job coordinator dump all of its defaults into the ConfigStream the first time it runs. The other way would be to have the control-job.sh just show defaults as part of --help. \\n \\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14146990\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"theduderog\\\" id=\\\"commentauthor_14146990_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=theduderog\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"theduderog\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Roger Hoover\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14146990&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14146990\' class=\'commentdate_14146990_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'24\\/Sep\\/14 22:34\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-24T22:34:49+0000\'\u003e24\\/Sep\\/14 22:34\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003e1. For the name, does JobState make sense? The work of this component would be to track job-level state (job config, offsets for all tasks in the job, partition mappings for all tasks in the job).\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"theduderog\\\" id=\\\"commentauthor_14146990_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=theduderog\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"theduderog\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Roger Hoover\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14146990&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14146990\' class=\'commentdate_14146990_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'24\\/Sep\\/14 22:34\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-24T22:34:49+0000\'\u003e24\\/Sep\\/14 22:34\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e 1. For the name, does JobState make sense? The work of this component would be to track job-level state (job config, offsets for all tasks in the job, partition mappings for all tasks in the job). \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14147839\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14147839_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14147839&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14147839\' class=\'commentdate_14147839_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'25\\/Sep\\/14 15:11\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-25T15:11:32+0000\'\u003e25\\/Sep\\/14 15:11\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003edoes JobState make sense\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIt does, but the word \\\"state\\\" is already a way-overloaded term with Samza. We\'re trying to avoid using it in more places.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14147839_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14147839&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14147839\' class=\'commentdate_14147839_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'25\\/Sep\\/14 15:11\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-25T15:11:32+0000\'\u003e25\\/Sep\\/14 15:11\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e does JobState make sense \\n\\n It does, but the word \\\"state\\\" is already a way-overloaded term with Samza. We\'re trying to avoid using it in more places. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14151534\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14151534_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14151534&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14151534\' class=\'commentdate_14151534_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'29\\/Sep\\/14 09:34\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-29T09:34:54+0000\'\u003e29\\/Sep\\/14 09:34\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eThis is a nice proposal, I like it a lot. Various thoughts on the design doc (first three are responses to your questions above):\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eI think the name \\\"ConfigStream\\\" is actually ok. A checkpoint or task-to-changelog-partition assignment can quite reasonably be regarded as configuration (even though it arises as a side-effect of execution rather than from a config file). Maybe \\\"metadata\\\" rather than \\\"config\\\", but \\\"config\\\" is shorter.\u003c\\/li\u003e\\n\\t\u003cli\u003eYou say the atomicity of per-task checkpointing is broken, but I don\'t follow why. Isn\'t a checkpoint update still one message written to the ConfigStream?\u003c\\/li\u003e\\n\\t\u003cli\u003eI think defaults should not be written to the stream, but rather filled in at runtime at the last possible moment when the configuration is read. If they were written to the stream, it would become impossible for the framework to change defaults in future (as the job cannot distinguish between a default written to the stream, which should be replaced by a new default, and an explicitly configured parameter, which should be honoured).\u003c\\/li\u003e\\n\\t\u003cli\u003eDo I understand correctly that from a container\'s point of view, the config remains immutable once it has been fetched from the AM\'s HTTP endpoint? The container does not consume the ConfigStream, and there is no polling of the HTTP endpoint (except for hot standby as discussed in the doc)? I think that\'s good, because a mutable config within a container would require big changes to SystemProducers\\/Consumers etc.\u003c\\/li\u003e\\n\\t\u003cli\u003eWere you imagining that every job would have their own ConfigStream, or could a single ConfigStream be shared by multiple jobs? (I think each having their own would be simpler and nicer)\u003c\\/li\u003e\\n\\t\u003cli\u003eShould the config stream location URL include the job name? An early example in the design doc (kafka:\\/\\/localhost:10251\\/my-job) includes the job name, later ones do not.\u003c\\/li\u003e\\n\\t\u003cli\u003eWhat\'s the difference between control-job.sh and configure-job.sh?\u003c\\/li\u003e\\n\\t\u003cli\u003e+1 on explicit restart.\u003c\\/li\u003e\\n\\t\u003cli\u003eNot wild on the proposed serialisation format for config messages (t=... k=... v=...). What about escaping spaces and equals signs within values? Better to just use JSON, IMHO. (If using JSON in the key of a message sent to Kafka, need to ensure there is a deterministic order of keys, so that compaction works.)\u003c\\/li\u003e\\n\\t\u003cli\u003eMoving ConfigRewriter into the coordinator: interesting idea, not sure about the implications. What are the use cases for ConfigRewriter, besides expanding a regex for specifying input streams? e.g. it might be used for fetching config from an external configuration management system \\u2013 in that case, moving ConfigRewriter may be intrusive, as the coordinator may not be able to access that external system.\u003c\\/li\u003e\\n\\t\u003cli\u003eRegarding estimate of time to consume the ConfigStream, I should point out that \\\"control-job.sh --list\\\" will take 100 seconds too, which is not so great since it\'s an interactive command. However, most jobs will have vastly smaller config, so perhaps that\'s an edge case we can live with.\u003c\\/li\u003e\\n\\t\u003cli\u003e\\\"If 1000 Samza jobs were run in one Samza grid, 50 Kafka brokers would be required just to sustain the ConfigStream write rate\\\" \\u2014 you\'re talking 1000 jobs with 1000 containers each here, i.e. 1 million CPU cores. I hope we get to see Samza running at that scale one day \u003cimg class=\\\"emoticon\\\" src=\\\"\\/jira\\/images\\/icons\\/emoticons\\/smile.png\\\" height=\\\"16\\\" width=\\\"16\\\" align=\\\"absmiddle\\\" alt=\\\"\\\" border=\\\"0\\\"\\/\u003e\u003c\\/li\u003e\\n\\t\u003cli\u003eThe prospect of a work-preserving AM restart is nice.\u003c\\/li\u003e\\n\\t\u003cli\u003eWill ConfigStream be a general-purpose mechanism for parts of the framework which need to remember some information across job restarts? The assignment of tasks to changelog partitions is one example of such a thing that needs to be durable, and I could imagine there might be more. For example, a MySQL binlog SystemConsumer may want to remember metadata about leader failover events in some durable location. This is not a concrete need yet, just an idea to keep in mind.\u003c\\/li\u003e\\n\\t\u003cli\u003eHow does the ConfigStream checkpointing interact with Kafka\'s own consumer offset management? Is the intention that Samza will eventually switch over to Kafka\'s offset management, or will Samza keep doing its own checkpointing indefinitely?\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14151534_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14151534&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14151534\' class=\'commentdate_14151534_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'29\\/Sep\\/14 09:34\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-29T09:34:54+0000\'\u003e29\\/Sep\\/14 09:34\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e This is a nice proposal, I like it a lot. Various thoughts on the design doc (first three are responses to your questions above): \\n\\n \\n\\t I think the name \\\"ConfigStream\\\" is actually ok. A checkpoint or task-to-changelog-partition assignment can quite reasonably be regarded as configuration (even though it arises as a side-effect of execution rather than from a config file). Maybe \\\"metadata\\\" rather than \\\"config\\\", but \\\"config\\\" is shorter. \\n\\t You say the atomicity of per-task checkpointing is broken, but I don\'t follow why. Isn\'t a checkpoint update still one message written to the ConfigStream? \\n\\t I think defaults should not be written to the stream, but rather filled in at runtime at the last possible moment when the configuration is read. If they were written to the stream, it would become impossible for the framework to change defaults in future (as the job cannot distinguish between a default written to the stream, which should be replaced by a new default, and an explicitly configured parameter, which should be honoured). \\n\\t Do I understand correctly that from a container\'s point of view, the config remains immutable once it has been fetched from the AM\'s HTTP endpoint? The container does not consume the ConfigStream, and there is no polling of the HTTP endpoint (except for hot standby as discussed in the doc)? I think that\'s good, because a mutable config within a container would require big changes to SystemProducers\\/Consumers etc. \\n\\t Were you imagining that every job would have their own ConfigStream, or could a single ConfigStream be shared by multiple jobs? (I think each having their own would be simpler and nicer) \\n\\t Should the config stream location URL include the job name? An early example in the design doc (kafka:\\/\\/localhost:10251\\/my-job) includes the job name, later ones do not. \\n\\t What\'s the difference between control-job.sh and configure-job.sh? \\n\\t +1 on explicit restart. \\n\\t Not wild on the proposed serialisation format for config messages (t=... k=... v=...). What about escaping spaces and equals signs within values? Better to just use JSON, IMHO. (If using JSON in the key of a message sent to Kafka, need to ensure there is a deterministic order of keys, so that compaction works.) \\n\\t Moving ConfigRewriter into the coordinator: interesting idea, not sure about the implications. What are the use cases for ConfigRewriter, besides expanding a regex for specifying input streams? e.g. it might be used for fetching config from an external configuration management system \\u2013 in that case, moving ConfigRewriter may be intrusive, as the coordinator may not be able to access that external system. \\n\\t Regarding estimate of time to consume the ConfigStream, I should point out that \\\"control-job.sh --list\\\" will take 100 seconds too, which is not so great since it\'s an interactive command. However, most jobs will have vastly smaller config, so perhaps that\'s an edge case we can live with. \\n\\t \\\"If 1000 Samza jobs were run in one Samza grid, 50 Kafka brokers would be required just to sustain the ConfigStream write rate\\\" \\u2014 you\'re talking 1000 jobs with 1000 containers each here, i.e. 1 million CPU cores. I hope we get to see Samza running at that scale one day \\n\\t The prospect of a work-preserving AM restart is nice. \\n\\t Will ConfigStream be a general-purpose mechanism for parts of the framework which need to remember some information across job restarts? The assignment of tasks to changelog partitions is one example of such a thing that needs to be durable, and I could imagine there might be more. For example, a MySQL binlog SystemConsumer may want to remember metadata about leader failover events in some durable location. This is not a concrete need yet, just an idea to keep in mind. \\n\\t How does the ConfigStream checkpointing interact with Kafka\'s own consumer offset management? Is the intention that Samza will eventually switch over to Kafka\'s offset management, or will Samza keep doing its own checkpointing indefinitely? \\n \\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14151929\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14151929_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14151929&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14151929\' class=\'commentdate_14151929_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'29\\/Sep\\/14 17:30\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-29T17:30:05+0000\'\u003e29\\/Sep\\/14 17:30\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eMaybe \\\"metadata\\\" rather than \\\"config\\\", but \\\"config\\\" is shorter.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eHmm. MetadataStream. That might be technically more accurate and descriptive. I agree \\\"config\\\" might be construed as technically correct, but I\'m a little worried that it\'s not really what people think of when they hear the word \\\"config\\\".\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eYou say the atomicity of per-task checkpointing is broken, but I don\'t follow why. Isn\'t a checkpoint update still one message written to the ConfigStream?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eCurrently, when a StreamTask\'s offsets are committed, they\'re sent as a single message with Kafka. In the new proposal, they would be sent as a series of messages: one for each input SSP. I suppose \\\"non-transactional\\\" might be a bet more correct. The difference is that a container can fail half-way through a single StreamTask\'s offset commit in the new proposal, but not in our current implementation.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI think defaults should not be written to the stream, but rather filled in at runtime at the last possible moment when the configuration is read.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThis needs to be thought through a bit, I think. The main concern was that the control-job.sh script might be a different version than what the job is running. In such a case, you (the developer) might wish to know what value is being used for config \\\"X\\\", but the default of the Samza version that control-job.sh is running might be different from the version that the job is running on.\u003c\\/p\u003e\\n\\n\u003cp\u003ePerhaps a simple query of the AM\'s HTTP JSON web service would be a better solution, though this would require the job being up and running to fetch config defaults.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eDo I understand correctly that from a container\'s point of view, the config remains immutable once it has been fetched from the AM\'s HTTP endpoint? The container does not consume the ConfigStream, and there is no polling of the HTTP endpoint (except for hot standby as discussed in the doc)?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eCorrect.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eWere you imagining that every job would have their own ConfigStream, or could a single ConfigStream be shared by multiple jobs? (I think each having their own would be simpler and nicer)\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI had a note about this in one of my drafts, but I deleted it. The current proposal is one ConfigStream per-job. I couldn\'t come up with a good reason to have multiple jobs share a ConfigStream.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eShould the config stream location URL include the job name? An early example in the design doc (kafka:\\/\\/localhost:10251\\/my-job) includes the job name, later ones do not.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI went back and forth on this. The trade-offs between the two approaches (at least, as I see it) are listed in the proposal. I opted for the simpler (from a dev\'s perspective) approach in the latest proposal.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eWhat\'s the difference between control-job.sh and configure-job.sh?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThey are the same thing. I started calling it configure-job.sh, but I\'ve switched to calling it control-job.sh, since it might do things like restart the job, etc. Looks like I missed a few renames in the latest design doc. Everything should read control-job.sh.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eNot wild on the proposed serialisation format for config messages (t=... k=... v=...). What about escaping spaces and equals signs within values? Better to just use JSON, IMHO. (If using JSON in the key of a message sent to Kafka, need to ensure there is a deterministic order of keys, so that compaction works.)\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eOops, maybe I wasn\'t explicit enough. The current proposal is to use JSON for both keys and values. I think that I just used shorthand notation at various points in the docs. Agree we\'ll need to define an ordering for the keys, which is a bit odd\\/error prone. I\'m not sure of a good way around this.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eRegarding estimate of time to consume the ConfigStream, I should point out that \\\"control-job.sh --list\\\" will take 100 seconds too, which is not so great since it\'s an interactive command. However, most jobs will have vastly smaller config, so perhaps that\'s an edge case we can live with.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eYea, it\'s kind of annoying, but I can live with it, I think. This estimate is pretty pessimistic, though, in terms of job size. One other short-circuit that we could employ would be to have the control-job.sh script start by trying to query the AM\'s HTTP JSON server (though this would require the control-job.sh script to somehow know where the HTTP:PORT is).\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eWill ConfigStream be a general-purpose mechanism for parts of the framework which need to remember some information across job restarts?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eHmm. I hadn\'t thought about this in great detail. It seems like it\'d be useful to provide a \\\"write config\\\" facility to pluggable parts of the framework, like SystemConsumers, as you\'ve said. Given a good implementation of the ConfigStream, it seems possible to expose it.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eHow does the ConfigStream checkpointing interact with Kafka\'s own consumer offset management? Is the intention that Samza will eventually switch over to Kafka\'s offset management, or will Samza keep doing its own checkpointing indefinitely?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThe current porposal is to stay off of Kafka\'s offset management, and just use our own. Technically, we\'ll still have to interact with Kafka\'s, since that\'s where transactions are managed, but we won\'t store any offsets in it--we\'ll just tell it to commit transactions. I haven\'t thought about this in great depth, but my gut reasoning is that it\'s best to stay away from dependency directly on Kafka for things like offset checkpoints, especially if there\'s the possible for non-Kafka offsets needing to be checkpointed. It could also cause another bifurcation between the way offsets are stored, and the way other config is stored.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14151929_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14151929&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14151929\' class=\'commentdate_14151929_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'29\\/Sep\\/14 17:30\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-29T17:30:05+0000\'\u003e29\\/Sep\\/14 17:30\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Maybe \\\"metadata\\\" rather than \\\"config\\\", but \\\"config\\\" is shorter. \\n\\n Hmm. MetadataStream. That might be technically more accurate and descriptive. I agree \\\"config\\\" might be construed as technically correct, but I\'m a little worried that it\'s not really what people think of when they hear the word \\\"config\\\". \\n\\n You say the atomicity of per-task checkpointing is broken, but I don\'t follow why. Isn\'t a checkpoint update still one message written to the ConfigStream? \\n\\n Currently, when a StreamTask\'s offsets are committed, they\'re sent as a single message with Kafka. In the new proposal, they would be sent as a series of messages: one for each input SSP. I suppose \\\"non-transactional\\\" might be a bet more correct. The difference is that a container can fail half-way through a single StreamTask\'s offset commit in the new proposal, but not in our current implementation. \\n\\n I think defaults should not be written to the stream, but rather filled in at runtime at the last possible moment when the configuration is read. \\n\\n This needs to be thought through a bit, I think. The main concern was that the control-job.sh script might be a different version than what the job is running. In such a case, you (the developer) might wish to know what value is being used for config \\\"X\\\", but the default of the Samza version that control-job.sh is running might be different from the version that the job is running on. \\n\\n Perhaps a simple query of the AM\'s HTTP JSON web service would be a better solution, though this would require the job being up and running to fetch config defaults. \\n\\n Do I understand correctly that from a container\'s point of view, the config remains immutable once it has been fetched from the AM\'s HTTP endpoint? The container does not consume the ConfigStream, and there is no polling of the HTTP endpoint (except for hot standby as discussed in the doc)? \\n\\n Correct. \\n\\n Were you imagining that every job would have their own ConfigStream, or could a single ConfigStream be shared by multiple jobs? (I think each having their own would be simpler and nicer) \\n\\n I had a note about this in one of my drafts, but I deleted it. The current proposal is one ConfigStream per-job. I couldn\'t come up with a good reason to have multiple jobs share a ConfigStream. \\n\\n Should the config stream location URL include the job name? An early example in the design doc (kafka:\\/\\/localhost:10251\\/my-job) includes the job name, later ones do not. \\n\\n I went back and forth on this. The trade-offs between the two approaches (at least, as I see it) are listed in the proposal. I opted for the simpler (from a dev\'s perspective) approach in the latest proposal. \\n\\n What\'s the difference between control-job.sh and configure-job.sh? \\n\\n They are the same thing. I started calling it configure-job.sh, but I\'ve switched to calling it control-job.sh, since it might do things like restart the job, etc. Looks like I missed a few renames in the latest design doc. Everything should read control-job.sh. \\n\\n Not wild on the proposed serialisation format for config messages (t=... k=... v=...). What about escaping spaces and equals signs within values? Better to just use JSON, IMHO. (If using JSON in the key of a message sent to Kafka, need to ensure there is a deterministic order of keys, so that compaction works.) \\n\\n Oops, maybe I wasn\'t explicit enough. The current proposal is to use JSON for both keys and values. I think that I just used shorthand notation at various points in the docs. Agree we\'ll need to define an ordering for the keys, which is a bit odd\\/error prone. I\'m not sure of a good way around this. \\n\\n Regarding estimate of time to consume the ConfigStream, I should point out that \\\"control-job.sh --list\\\" will take 100 seconds too, which is not so great since it\'s an interactive command. However, most jobs will have vastly smaller config, so perhaps that\'s an edge case we can live with. \\n\\n Yea, it\'s kind of annoying, but I can live with it, I think. This estimate is pretty pessimistic, though, in terms of job size. One other short-circuit that we could employ would be to have the control-job.sh script start by trying to query the AM\'s HTTP JSON server (though this would require the control-job.sh script to somehow know where the HTTP:PORT is). \\n\\n Will ConfigStream be a general-purpose mechanism for parts of the framework which need to remember some information across job restarts? \\n\\n Hmm. I hadn\'t thought about this in great detail. It seems like it\'d be useful to provide a \\\"write config\\\" facility to pluggable parts of the framework, like SystemConsumers, as you\'ve said. Given a good implementation of the ConfigStream, it seems possible to expose it. \\n\\n How does the ConfigStream checkpointing interact with Kafka\'s own consumer offset management? Is the intention that Samza will eventually switch over to Kafka\'s offset management, or will Samza keep doing its own checkpointing indefinitely? \\n\\n The current porposal is to stay off of Kafka\'s offset management, and just use our own. Technically, we\'ll still have to interact with Kafka\'s, since that\'s where transactions are managed, but we won\'t store any offsets in it--we\'ll just tell it to commit transactions. I haven\'t thought about this in great depth, but my gut reasoning is that it\'s best to stay away from dependency directly on Kafka for things like offset checkpoints, especially if there\'s the possible for non-Kafka offsets needing to be checkpointed. It could also cause another bifurcation between the way offsets are stored, and the way other config is stored. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14153424\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14153424_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14153424&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14153424\' class=\'commentdate_14153424_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'30\\/Sep\\/14 17:20\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-30T17:20:28+0000\'\u003e30\\/Sep\\/14 17:20\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eIn the new proposal, they would be sent as a series of messages: one for each input SSP.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eAh yes, I missed that. My hunch is that it\'s not a big problem, since state changelogs are currently also not atomically tied to the checkpoints either, so the semantics of container restart are somewhat vague already. And we generally don\'t give any guarantee regarding cross-SSP coordination. Once we get Kafka transactions, these things can all be tied together atomically.\u003c\\/p\u003e\\n\\n\u003cp\u003eAlternatively, would it be an option to continue writing checkpoints in the current form (per-StreamTask rather than per-SSP)? I don\'t see why the change to a MetadataStream forces us to use per-SSP checkpoint messages.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eThe main concern was that the control-job.sh script might be a different version than what the job is running. In such a case, you (the developer) might wish to know what value is being used for config \\\"X\\\", but the default of the Samza version that control-job.sh is running might be different from the version that the job is running on.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI think it\'s quite reasonable to expect that if you want to know the \'interpreted\' value of a config property (after default is filled in, and perhaps also after it is parsed or otherwise processed) then you need to ask the AM, either via the web interface or via a command-line tool that talks to an API. Then control-job.sh is concerned only with the config that is explicitly declared, and not what is inferred.\u003c\\/p\u003e\\n\\n\u003cp\u003eIf you want control-job.sh to also include docs (\u003ctt\u003e--help\u003c\\/tt\u003e), you have the same version mismatch problem: if the job and the command-line tool are running different versions of Samza, the meaning of some of the properties may have changed.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eThe current proposal is to use JSON for both keys and values. I think that I just used shorthand notation at various points in the docs. Agree we\'ll need to define an ordering for the keys, which is a bit odd\\/error prone. I\'m not sure of a good way around this.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eOk. An alternative would be to use a serialization format with a canonical representation (most binary serialization formats would qualify: Avro, Protocol Buffers, ASN.1 DER, etc), but that makes it less convenient if you want to inspect the stream using kafka-console-consumer. I don\'t have a strong opinion either way.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI haven\'t thought about this in great depth, but my gut reasoning is that it\'s best to stay away from dependency directly on Kafka for things like offset checkpoints, especially if there\'s the possible for non-Kafka offsets needing to be checkpointed.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eAgree on the gut reasoning, though I haven\'t thought about it in great depth.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14153424_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14153424&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14153424\' class=\'commentdate_14153424_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'30\\/Sep\\/14 17:20\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-09-30T17:20:28+0000\'\u003e30\\/Sep\\/14 17:20\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e In the new proposal, they would be sent as a series of messages: one for each input SSP. \\n\\n Ah yes, I missed that. My hunch is that it\'s not a big problem, since state changelogs are currently also not atomically tied to the checkpoints either, so the semantics of container restart are somewhat vague already. And we generally don\'t give any guarantee regarding cross-SSP coordination. Once we get Kafka transactions, these things can all be tied together atomically. \\n\\n Alternatively, would it be an option to continue writing checkpoints in the current form (per-StreamTask rather than per-SSP)? I don\'t see why the change to a MetadataStream forces us to use per-SSP checkpoint messages. \\n\\n The main concern was that the control-job.sh script might be a different version than what the job is running. In such a case, you (the developer) might wish to know what value is being used for config \\\"X\\\", but the default of the Samza version that control-job.sh is running might be different from the version that the job is running on. \\n\\n I think it\'s quite reasonable to expect that if you want to know the \'interpreted\' value of a config property (after default is filled in, and perhaps also after it is parsed or otherwise processed) then you need to ask the AM, either via the web interface or via a command-line tool that talks to an API. Then control-job.sh is concerned only with the config that is explicitly declared, and not what is inferred. \\n\\n If you want control-job.sh to also include docs ( --help ), you have the same version mismatch problem: if the job and the command-line tool are running different versions of Samza, the meaning of some of the properties may have changed. \\n\\n The current proposal is to use JSON for both keys and values. I think that I just used shorthand notation at various points in the docs. Agree we\'ll need to define an ordering for the keys, which is a bit odd\\/error prone. I\'m not sure of a good way around this. \\n\\n Ok. An alternative would be to use a serialization format with a canonical representation (most binary serialization formats would qualify: Avro, Protocol Buffers, ASN.1 DER, etc), but that makes it less convenient if you want to inspect the stream using kafka-console-consumer. I don\'t have a strong opinion either way. \\n\\n I haven\'t thought about this in great depth, but my gut reasoning is that it\'s best to stay away from dependency directly on Kafka for things like offset checkpoints, especially if there\'s the possible for non-Kafka offsets needing to be checkpointed. \\n\\n Agree on the gut reasoning, though I haven\'t thought about it in great depth. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14160457\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"theduderog\\\" id=\\\"commentauthor_14160457_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=theduderog\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"theduderog\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Roger Hoover\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14160457&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14160457\' class=\'commentdate_14160457_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'06\\/Oct\\/14 16:28\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-06T16:28:20+0000\'\u003e06\\/Oct\\/14 16:28\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eOk. An alternative would be to use a serialization format with a canonical representation (most binary serialization formats would qualify: Avro, Protocol Buffers, ASN.1 DER, etc), but that makes it less convenient if you want to inspect the stream using kafka-console-consumer. I don\'t have a strong opinion either way.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIt\'s REALLY handy for developers to be able to inspect\\/debug using the kafka-console-consumer.sh. Maybe you can use this annotation to force ordering?\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003ca href=\\\"http:\\/\\/jackson.codehaus.org\\/1.6.5\\/javadoc\\/org\\/codehaus\\/jackson\\/annotate\\/JsonPropertyOrder.html\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttp:\\/\\/jackson.codehaus.org\\/1.6.5\\/javadoc\\/org\\/codehaus\\/jackson\\/annotate\\/JsonPropertyOrder.html\u003c\\/a\u003e \u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"theduderog\\\" id=\\\"commentauthor_14160457_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=theduderog\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"theduderog\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Roger Hoover\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14160457&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14160457\' class=\'commentdate_14160457_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'06\\/Oct\\/14 16:28\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-06T16:28:20+0000\'\u003e06\\/Oct\\/14 16:28\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Ok. An alternative would be to use a serialization format with a canonical representation (most binary serialization formats would qualify: Avro, Protocol Buffers, ASN.1 DER, etc), but that makes it less convenient if you want to inspect the stream using kafka-console-consumer. I don\'t have a strong opinion either way. \\n\\n It\'s REALLY handy for developers to be able to inspect\\/debug using the kafka-console-consumer.sh. Maybe you can use this annotation to force ordering? \\n\\n http:\\/\\/jackson.codehaus.org\\/1.6.5\\/javadoc\\/org\\/codehaus\\/jackson\\/annotate\\/JsonPropertyOrder.html \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14162063\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14162063_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14162063&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14162063\' class=\'commentdate_14162063_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'07\\/Oct\\/14 16:23\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-07T16:23:03+0000\'\u003e07\\/Oct\\/14 16:23\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI haven\'t been following along this discussion in detail, so this may be a non-issue. However, it might be a good idea to consider Amazon AWS Kinesis (very similar to Kafka) in this discussion. Of note is that Kinesis has a 50KB message size limit.\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003ca href=\\\"https:\\/\\/aws.amazon.com\\/kinesis\\/\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003ehttps:\\/\\/aws.amazon.com\\/kinesis\\/\u003c\\/a\u003e\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"jonbringhurst\\\" id=\\\"commentauthor_14162063_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"jonbringhurst\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Jon Bringhurst\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14162063&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14162063\' class=\'commentdate_14162063_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'07\\/Oct\\/14 16:23\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-07T16:23:03+0000\'\u003e07\\/Oct\\/14 16:23\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I haven\'t been following along this discussion in detail, so this may be a non-issue. However, it might be a good idea to consider Amazon AWS Kinesis (very similar to Kafka) in this discussion. Of note is that Kinesis has a 50KB message size limit. \\n\\n https:\\/\\/aws.amazon.com\\/kinesis\\/ \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14167573\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167573_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167573&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167573\' class=\'commentdate_14167573_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 21:43\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T21:43:21+0000\'\u003e10\\/Oct\\/14 21:43\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Chris Riccomini - 10\\/Oct\\/14 21:44\\\"\u003eedited\u003c\\/span\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eKnown remaining issues with the proposed design:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eMessage payload format K:V, vs. K:\\n{K:V, ...}\u003c\\/li\u003e\\n\\t\u003cli\u003eHow does the control-job.sh script use the SystemConsumer\\/SystemProducer?\u003c\\/li\u003e\\n\\t\u003cli\u003eHow will this work in a dev environment?\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eI will address these in order.\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003cb\u003eMessage payload format\u003c\\/b\u003e\u003c\\/p\u003e\\n\\n\u003cp\u003eThe current design models ConfigStream messages as a simple key-value pair. The downside to this approach is that it breaks atomicity for a StreamTask\'s checkpoint (multiple messages are required for a single checkpoint--one per SSP:offset pair).\u003c\\/p\u003e\\n\\n\u003cp\u003eThe two solutions to this are to (1) depend on transactionality, or (2) support a message payload format that is nested K: \u003c\\/p\u003e\\n{K:V, ...}\\n\u003cp\u003e. All offset checkpoints for a single task could therefore be written in a single message, thus maintaining atomic commits for all checkpoints within a single task. The latter approach (nested payloads) is how we currently checkpoint. The downsides to this approach are:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eThe single offset checkpoint message will be much larger than any individual offset checkpoint message in approach (1).\u003c\\/li\u003e\\n\\t\u003cli\u003eModifying an offset checkpoint requires the job coordinator to do a read-modify-write, which is more complicated than the simple put that would be required for approach (1).\u003c\\/li\u003e\\n\\t\u003cli\u003eIt muddles the data model a little bit.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eThe problem with (1) is mainly that it depends on transactionality. Without this, there\'s the potential for a failure to occur halfway through a task checkpoint. In such a case, some input streams would fall back, and others would not. I tend to agree with Martin\'s assessment of the problem:\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eMy hunch is that it\'s not a big problem, since state changelogs are currently also not atomically tied to the checkpoints either, so the semantics of container restart are somewhat vague already.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eBut given that it should be fairly trivial to solve this using nested payloads, we might as well do so. We can always clean it up later, if transactionality becomes commonplace.\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003cb\u003eHow does the control-job.sh script use the SystemConsumer\\/SystemProducer?\u003c\\/b\u003e\u003c\\/p\u003e\\n\\n\u003cp\u003eThis is a tricky one. Given that Samza has a SystemConsumer\\/SystemProducer API, it seems ideal to have the ConfigStream implementation use these interfaces for reading\\/writing config. In the design document, I glossed over how the job coordinator and control-job.sh script know how to translate a URI to a Config for SystemConsumer\\/SystemProducer. This is a bit of a chicken and egg problem. The control-job.sh script needs to know how to write to the ConfigStream, but in order to do that, it needs config for the SystemFactory.getConsumer() call.\u003c\\/p\u003e\\n\\n\u003cp\u003eTwo potential solutions that I can think of are:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eIntroduce a SAMZA_HOME environment variable, which expects a conf\\/samza-site.properties configuration.\u003c\\/li\u003e\\n\\t\u003cli\u003eAdd a SystemFactory.getConfig(URI uri) interface.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eIntroducing a SAMZA_HOME environment variable seems very heavy handed. It\'s going to have to be set on every node in the YARN cluster (since the job coordinator could run on any node), as well as the machine that control-job.sh is going to run on. This will be hard to operate, may be (Samza) version dependent, and seems kind of clunky.\u003c\\/p\u003e\\n\\n\u003cp\u003eAdding a getConfig() API seems mildly hacky. The main problem with this approach is how to determine which SystemFactory to use based on the URI. We could do something as simple as Class.forName(uri.getScheme() + \\\"SystemFactory\\\").newInstance(). This seems a bit hacky and dangerous, but should work, and maintains pluggability.\u003c\\/p\u003e\\n\\n\u003cp\u003eDoes anyone else have any other ideas for this?\u003c\\/p\u003e\\n\\n\u003cp\u003e\u003cb\u003eHow will this work in a dev environment?\u003c\\/b\u003e\u003c\\/p\u003e\\n\\n\u003cp\u003eIt\'s relatively easy to start a Samza job locally using the ThreadJobFactory or ProcessJobFactory right now. Config can be inserted via the constructor, and no Kafka grid is required to do this. In the new design proposal, it seems that developers will be required to have a Kafka grid (or some equivalent system implementation--hbase, or whatever) to store their configuration. There doesn\'t seem to be much of a way around this, unless the FileSystemConsumer\\/FileSystemProducer could be made to work as the backing system for the ConfigStream (which seems possible at first glance).\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167573_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167573&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167573\' class=\'commentdate_14167573_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 21:43\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T21:43:21+0000\'\u003e10\\/Oct\\/14 21:43\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e - \u003cspan class=\\\"subText update-info\\\" title=\\\"Chris Riccomini - 10\\/Oct\\/14 21:44\\\"\u003eedited\u003c\\/span\u003e Known remaining issues with the proposed design: \\n\\n \\n\\t Message payload format K:V, vs. K:\\n{K:V, ...} \\n\\t How does the control-job.sh script use the SystemConsumer\\/SystemProducer? \\n\\t How will this work in a dev environment? \\n \\n\\n\\n I will address these in order. \\n\\n Message payload format \\n\\n The current design models ConfigStream messages as a simple key-value pair. The downside to this approach is that it breaks atomicity for a StreamTask\'s checkpoint (multiple messages are required for a single checkpoint--one per SSP:offset pair). \\n\\n The two solutions to this are to (1) depend on transactionality, or (2) support a message payload format that is nested K: \\n{K:V, ...}\\n . All offset checkpoints for a single task could therefore be written in a single message, thus maintaining atomic commits for all checkpoints within a single task. The latter approach (nested payloads) is how we currently checkpoint. The downsides to this approach are: \\n\\n \\n\\t The single offset checkpoint message will be much larger than any individual offset checkpoint message in approach (1). \\n\\t Modifying an offset checkpoint requires the job coordinator to do a read-modify-write, which is more complicated than the simple put that would be required for approach (1). \\n\\t It muddles the data model a little bit. \\n \\n\\n\\n The problem with (1) is mainly that it depends on transactionality. Without this, there\'s the potential for a failure to occur halfway through a task checkpoint. In such a case, some input streams would fall back, and others would not. I tend to agree with Martin\'s assessment of the problem: \\n\\n My hunch is that it\'s not a big problem, since state changelogs are currently also not atomically tied to the checkpoints either, so the semantics of container restart are somewhat vague already. \\n\\n But given that it should be fairly trivial to solve this using nested payloads, we might as well do so. We can always clean it up later, if transactionality becomes commonplace. \\n\\n How does the control-job.sh script use the SystemConsumer\\/SystemProducer? \\n\\n This is a tricky one. Given that Samza has a SystemConsumer\\/SystemProducer API, it seems ideal to have the ConfigStream implementation use these interfaces for reading\\/writing config. In the design document, I glossed over how the job coordinator and control-job.sh script know how to translate a URI to a Config for SystemConsumer\\/SystemProducer. This is a bit of a chicken and egg problem. The control-job.sh script needs to know how to write to the ConfigStream, but in order to do that, it needs config for the SystemFactory.getConsumer() call. \\n\\n Two potential solutions that I can think of are: \\n\\n \\n\\t Introduce a SAMZA_HOME environment variable, which expects a conf\\/samza-site.properties configuration. \\n\\t Add a SystemFactory.getConfig(URI uri) interface. \\n \\n\\n\\n Introducing a SAMZA_HOME environment variable seems very heavy handed. It\'s going to have to be set on every node in the YARN cluster (since the job coordinator could run on any node), as well as the machine that control-job.sh is going to run on. This will be hard to operate, may be (Samza) version dependent, and seems kind of clunky. \\n\\n Adding a getConfig() API seems mildly hacky. The main problem with this approach is how to determine which SystemFactory to use based on the URI. We could do something as simple as Class.forName(uri.getScheme() + \\\"SystemFactory\\\").newInstance(). This seems a bit hacky and dangerous, but should work, and maintains pluggability. \\n\\n Does anyone else have any other ideas for this? \\n\\n How will this work in a dev environment? \\n\\n It\'s relatively easy to start a Samza job locally using the ThreadJobFactory or ProcessJobFactory right now. Config can be inserted via the constructor, and no Kafka grid is required to do this. In the new design proposal, it seems that developers will be required to have a Kafka grid (or some equivalent system implementation--hbase, or whatever) to store their configuration. There doesn\'t seem to be much of a way around this, unless the FileSystemConsumer\\/FileSystemProducer could be made to work as the backing system for the ConfigStream (which seems possible at first glance). \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14167637\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167637_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167637&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167637\' class=\'commentdate_14167637_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 22:10\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T22:10:13+0000\'\u003e10\\/Oct\\/14 22:10\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eWe could do something as simple as Class.forName(uri.getScheme() + \\\"SystemFactory\\\").newInstance(). This seems a bit hacky and dangerous, but should work, and maintains pluggability.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eRather than this, we could just add an extra switch to the CLI to provide a system factory, but provide built-in defaults for URIs with the \u003ca href=\\\"file:\\/\\/\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003efile:\\/\\/\u003c\\/a\u003e and kafka:\\/\\/ schemes. This seems a bit less hacky, and should still work out of the box, for most folks.\u003c\\/p\u003e\\n\\n\u003cp\u003eIn addition, because the control-job.sh script and job cooridnator will want to both read and write to the ConfigStream, we\'ll have to provide both the broker metadata list, and the ZK path in the URI. This can probably be done with something like:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"preformatted panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"preformattedContent panelContent\\\"\u003e\\n\u003cpre\u003ekafka:\\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port>\\nkafka:\\/\\/192.168.0.1,192.168.0.2:9192?zk=192.168.0.1,192.168.0.2,192.168.0.3:2181\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cp\u003eIt\'s ugly, but it\'s not our fault. Neither of these systems are represented well in URI schemes.\u003c\\/p\u003e\\n\\n\u003cp\u003eEventually, the need for a ZK path should go away, when Kafka finishes moving all of its ZK dependencies behind a broker protocol.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167637_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167637&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167637\' class=\'commentdate_14167637_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 22:10\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T22:10:13+0000\'\u003e10\\/Oct\\/14 22:10\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e We could do something as simple as Class.forName(uri.getScheme() + \\\"SystemFactory\\\").newInstance(). This seems a bit hacky and dangerous, but should work, and maintains pluggability. \\n\\n Rather than this, we could just add an extra switch to the CLI to provide a system factory, but provide built-in defaults for URIs with the file:\\/\\/ and kafka:\\/\\/ schemes. This seems a bit less hacky, and should still work out of the box, for most folks. \\n\\n In addition, because the control-job.sh script and job cooridnator will want to both read and write to the ConfigStream, we\'ll have to provide both the broker metadata list, and the ZK path in the URI. This can probably be done with something like: \\n\\n \\n kafka:\\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port>\\nkafka:\\/\\/192.168.0.1,192.168.0.2:9192?zk=192.168.0.1,192.168.0.2,192.168.0.3:2181\\n \\n \\n\\n It\'s ugly, but it\'s not our fault. Neither of these systems are represented well in URI schemes. \\n\\n Eventually, the need for a ZK path should go away, when Kafka finishes moving all of its ZK dependencies behind a broker protocol. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14167644\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167644_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167644&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167644\' class=\'commentdate_14167644_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 22:13\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T22:13:37+0000\'\u003e10\\/Oct\\/14 22:13\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI see the work for this ticket breaking down into three main phases:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eAdd ConfigStream to existing JobRunner\\/AM code, and add HTTP API to AM\\/SamzaContainer communication.\u003c\\/li\u003e\\n\\t\u003cli\u003eExtract job coordinator logic out of the Yarn AM\\/ThreadJobFactory\\/ProcessJobFactory, and into a single centralized execution-framework agnostic chunk of code.\u003c\\/li\u003e\\n\\t\u003cli\u003eWrite the control-job.sh script to manipulate the config stream.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eI propose ripping (2) and (3) out into separate tickets, and just focusing on implementing the ConfigStream (1) in this ticket.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14167644_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14167644&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14167644\' class=\'commentdate_14167644_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'10\\/Oct\\/14 22:13\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-10T22:13:37+0000\'\u003e10\\/Oct\\/14 22:13\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I see the work for this ticket breaking down into three main phases: \\n\\n \\n\\t Add ConfigStream to existing JobRunner\\/AM code, and add HTTP API to AM\\/SamzaContainer communication. \\n\\t Extract job coordinator logic out of the Yarn AM\\/ThreadJobFactory\\/ProcessJobFactory, and into a single centralized execution-framework agnostic chunk of code. \\n\\t Write the control-job.sh script to manipulate the config stream. \\n \\n\\n\\n I propose ripping (2) and (3) out into separate tickets, and just focusing on implementing the ConfigStream (1) in this ticket. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14169366\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14169366_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169366&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169366\' class=\'commentdate_14169366_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 14:57\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T14:57:51+0000\'\u003e13\\/Oct\\/14 14:57\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003esupport a message payload format that is nested (K: {K:V, ...})\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eDo you have an estimate of how big these payloads are likely to get? Thinking about \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/ViewProfile.jspa?name=jonbringhurst\\\" class=\\\"user-hover\\\" rel=\\\"jonbringhurst\\\"\u003ejonbringhurst\u003c\\/a\u003e\'s remark about the Kinesis message size limit.\u003c\\/p\u003e\\n\\n\u003cp\u003eApart from that, I agree with your observations about the advantages and disadvantages of each message format. I mildly prefer the K:V format as it\'s a bit cleaner, and transactionality is much needed anyway. But I\'m ok either way.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eAdding a getConfig() API seems mildly hacky. The main problem with this approach is how to determine which SystemFactory to use based on the URI.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eWe could define that the scheme is the name of a system defined in the job configuration. So in order to use a kafka:\\/\\/ URI, you must include a systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory in the job configuration. That configuration parameter would need to be provided as a command-line argument.\u003c\\/p\u003e\\n\\n\u003cp\u003eSensible defaults (like defining \u003ca href=\\\"file:\\/\\/\\\" class=\\\"external-link\\\" target=\\\"_blank\\\" rel=\\\"nofollow noopener\\\"\u003efile:\\/\\/\u003c\\/a\u003e and kafka:\\/\\/ by default) would then be a nice usability improvement without sacrificing generality.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eHow will this work in a dev environment?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI think it would be very good if the Kafka runtime dependency is optional, so that Samza remains friendly to jobs which choose to use a different message broker. So I would be keen for Samza to be able to use files for checkpoints and config in dev.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003e\u003ctt\u003ekafka:\\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port>\u003c\\/tt\u003e\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eNit: Kafka\'s configuration repeats the port for each ZK IP address, rather than specifying one port for all IP addresses. Might be better to stick with the same convention.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI propose ripping (2) and (3) out into separate tickets, and just focusing on implementing the ConfigStream (1) in this ticket.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003e+1\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14169366_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169366&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169366\' class=\'commentdate_14169366_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 14:57\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T14:57:51+0000\'\u003e13\\/Oct\\/14 14:57\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e support a message payload format that is nested (K: {K:V, ...}) \\n\\n Do you have an estimate of how big these payloads are likely to get? Thinking about jonbringhurst \'s remark about the Kinesis message size limit. \\n\\n Apart from that, I agree with your observations about the advantages and disadvantages of each message format. I mildly prefer the K:V format as it\'s a bit cleaner, and transactionality is much needed anyway. But I\'m ok either way. \\n\\n Adding a getConfig() API seems mildly hacky. The main problem with this approach is how to determine which SystemFactory to use based on the URI. \\n\\n We could define that the scheme is the name of a system defined in the job configuration. So in order to use a kafka:\\/\\/ URI, you must include a systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory in the job configuration. That configuration parameter would need to be provided as a command-line argument. \\n\\n Sensible defaults (like defining file:\\/\\/ and kafka:\\/\\/ by default) would then be a nice usability improvement without sacrificing generality. \\n\\n How will this work in a dev environment? \\n\\n I think it would be very good if the Kafka runtime dependency is optional, so that Samza remains friendly to jobs which choose to use a different message broker. So I would be keen for Samza to be able to use files for checkpoints and config in dev. \\n\\n kafka:\\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port> \\n\\n Nit: Kafka\'s configuration repeats the port for each ZK IP address, rather than specifying one port for all IP addresses. Might be better to stick with the same convention. \\n\\n I propose ripping (2) and (3) out into separate tickets, and just focusing on implementing the ConfigStream (1) in this ticket. \\n\\n +1 \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14169429\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14169429_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169429&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169429\' class=\'commentdate_14169429_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 15:52\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T15:52:19+0000\'\u003e13\\/Oct\\/14 15:52\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eDo you have an estimate of how big these payloads are likely to get?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThey will get above 50K for jobs with a lot of input SSPs\\/task. In general, they shouldn\'t be that large, though.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI mildly prefer the K:V format as it\'s a bit cleaner, and transactionality is much needed anyway.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003ePersonally, I agree. I think we should switch to k\\/v once we have transactionality, but in the meantime, I\'m just keeping the proposal inline with what we already do (single message for all offsets for a single task).\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eWe could define that the scheme is the name of a system defined in the job configuration.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eThis seems to be the cleanest\\/most general way to do things. It seems not too great from a usability perspective, but if we provide defaults for the file\\/kafka systems, then it should be OK.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eI think it would be very good if the Kafka runtime dependency is optional.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI agree.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eSo I would be keen for Samza to be able to use files for checkpoints and config in dev.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eSounds reasonable to me.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14169429_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169429&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169429\' class=\'commentdate_14169429_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 15:52\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T15:52:19+0000\'\u003e13\\/Oct\\/14 15:52\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Do you have an estimate of how big these payloads are likely to get? \\n\\n They will get above 50K for jobs with a lot of input SSPs\\/task. In general, they shouldn\'t be that large, though. \\n\\n I mildly prefer the K:V format as it\'s a bit cleaner, and transactionality is much needed anyway. \\n\\n Personally, I agree. I think we should switch to k\\/v once we have transactionality, but in the meantime, I\'m just keeping the proposal inline with what we already do (single message for all offsets for a single task). \\n\\n We could define that the scheme is the name of a system defined in the job configuration. \\n\\n This seems to be the cleanest\\/most general way to do things. It seems not too great from a usability perspective, but if we provide defaults for the file\\/kafka systems, then it should be OK. \\n\\n I think it would be very good if the Kafka runtime dependency is optional. \\n\\n I agree. \\n\\n So I would be keen for Samza to be able to use files for checkpoints and config in dev. \\n\\n Sounds reasonable to me. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14169568\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14169568_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169568&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169568\' class=\'commentdate_14169568_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 17:17\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T17:17:58+0000\'\u003e13\\/Oct\\/14 17:17\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eHow does the control-job.sh script use the SystemConsumer\\/SystemProducer?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eA third solution to this problem would be to define the ConfigStream in code. If we move toward doing wiring in code (rather than config) as part of \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-40\\\" title=\\\"Refactor Samza configuration\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-40\\\"\u003eSAMZA-40\u003c\\/a\u003e, we could also define the ConfigStream in code. At first, this might sound rather distasteful, since it will require a full rebuild of code in order to change a ConfigStream, but if you bend your thinking a bit, the ConfigStream is actually closer to wiring than config, so it might be OK to require this.\u003c\\/p\u003e\\n\\n\u003cp\u003eThis would also require the control-job.sh script to download the job that it\'s controlling, or to have the job\'s jars on its classpath, so that it would know how to talk to the ConfigStream as well.\u003c\\/p\u003e\\n\\n\u003cp\u003eI like this approach, except for the complexity it introduces in control-job.sh. Not sure if we can solve this, though. If not, then SystemFactory.getConfig still seems the least distasteful.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14169568_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14169568&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14169568\' class=\'commentdate_14169568_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Oct\\/14 17:17\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-13T17:17:58+0000\'\u003e13\\/Oct\\/14 17:17\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e How does the control-job.sh script use the SystemConsumer\\/SystemProducer? \\n\\n A third solution to this problem would be to define the ConfigStream in code. If we move toward doing wiring in code (rather than config) as part of SAMZA-40 , we could also define the ConfigStream in code. At first, this might sound rather distasteful, since it will require a full rebuild of code in order to change a ConfigStream, but if you bend your thinking a bit, the ConfigStream is actually closer to wiring than config, so it might be OK to require this. \\n\\n This would also require the control-job.sh script to download the job that it\'s controlling, or to have the job\'s jars on its classpath, so that it would know how to talk to the ConfigStream as well. \\n\\n I like this approach, except for the complexity it introduces in control-job.sh. Not sure if we can solve this, though. If not, then SystemFactory.getConfig still seems the least distasteful. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14171484\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14171484_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14171484&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14171484\' class=\'commentdate_14171484_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Oct\\/14 20:42\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-14T20:42:50+0000\'\u003e14\\/Oct\\/14 20:42\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003eA third solution to this problem would be to define the ConfigStream in code.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI can\'t visualise what this would look like. Could you give an example?\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"martinkl\\\" id=\\\"commentauthor_14171484_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=martinkl\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"martinkl\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Martin Kleppmann\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14171484&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14171484\' class=\'commentdate_14171484_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Oct\\/14 20:42\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-14T20:42:50+0000\'\u003e14\\/Oct\\/14 20:42\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e A third solution to this problem would be to define the ConfigStream in code. \\n\\n I can\'t visualise what this would look like. Could you give an example? \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14171643\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14171643_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14171643&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14171643\' class=\'commentdate_14171643_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Oct\\/14 22:30\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-14T22:30:06+0000\'\u003e14\\/Oct\\/14 22:30\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eThe example would depend on how wiring is handled as part of \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-40\\\" title=\\\"Refactor Samza configuration\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-40\\\"\u003eSAMZA-40\u003c\\/a\u003e. For argument\'s sake, let\'s say we had a SamzaJobBuilder class, where you could do something like:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"code panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"codeContent panelContent\\\"\u003e\\n\u003cpre class=\\\"code-java\\\"\u003eProperties props = \u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e Properties()\\n\u003cspan class=\\\"code-comment\\\"\u003e\\/\\/ Set Kafka consumer\\/producer properties\\n\u003c\\/span\u003eKafkaConfigStream configStream = \u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e KafkaConfigStream(\u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e KafkaConfig(props))\\nSamzaJob job = \u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e SamzaJobBuilder()\\n .setConfigStream(configStream)\\n .setFoo(foo)\\n .build\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cp\u003eAnother variation on this is:\u003c\\/p\u003e\\n\\n\u003cdiv class=\\\"code panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"codeContent panelContent\\\"\u003e\\n\u003cpre class=\\\"code-java\\\"\u003edef getJob(config: Config) = {\\n \u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e SamzaJobBuilder()\\n .setConfigStreamFactory(\u003cspan class=\\\"code-keyword\\\"\u003enew\u003c\\/span\u003e KafkaConfigStreamFactory)\\n .setFoo(foo)\\n .build\\n}\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\\n\u003cp\u003eThe main point is just that the config stream would be set in code.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14171643_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14171643&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14171643\' class=\'commentdate_14171643_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'14\\/Oct\\/14 22:30\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-14T22:30:06+0000\'\u003e14\\/Oct\\/14 22:30\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e The example would depend on how wiring is handled as part of SAMZA-40 . For argument\'s sake, let\'s say we had a SamzaJobBuilder class, where you could do something like: \\n\\n \\n Properties props = new Properties()\\n \\/\\/ Set Kafka consumer\\/producer properties\\n KafkaConfigStream configStream = new KafkaConfigStream( new KafkaConfig(props))\\nSamzaJob job = new SamzaJobBuilder()\\n .setConfigStream(configStream)\\n .setFoo(foo)\\n .build\\n \\n \\n\\n Another variation on this is: \\n\\n \\n def getJob(config: Config) = {\\n new SamzaJobBuilder()\\n .setConfigStreamFactory( new KafkaConfigStreamFactory)\\n .setFoo(foo)\\n .build\\n}\\n \\n \\n\\n The main point is just that the config stream would be set in code. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14172090\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172090_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172090&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172090\' class=\'commentdate_14172090_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 07:19\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T07:19:43+0000\'\u003e15\\/Oct\\/14 07:19\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\\n\u003cp\u003eI think it\'s quite reasonable to expect that if you want to know the \'interpreted\' value of a config property (after default is filled in, and perhaps also after it is parsed or otherwise processed) then you need to ask the AM, either via the web interface or via a command-line tool that talks to an API. Then control-job.sh is concerned only with the config that is explicitly declared, and not what is inferred.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\u003cp\u003e1) when you guys talk about \\\"default\\\", does it mean the default configuration provided by the Samza or by the user when he starts the job first time ?\u003cbr\\/\u003e\\n2) there is another use case where the user wants to check the latest configuration used in a finished job and probably reuses the same configuration. Then querying the AM seems not working because the job is already finished. The only way of getting the latest configuration of a finished job is reading the whole ConfigStream. Or we can dump the latest configuration into a file when the job finishes? But the problem is that, at most time, the job is killed rather than finished gracefully by itself.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\\n\u003cp\u003eSo in order to use a kafka:\\/\\/ URI, you must include a systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory in the job configuration. That configuration parameter would need to be provided as a command-line argument.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\u003cp\u003eI do not quite get this. Do you mean something like\u003c\\/p\u003e\\n\u003cdiv class=\\\"code panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"codeContent panelContent\\\"\u003e\\n\u003cpre class=\\\"code-java\\\"\u003ekafka:\u003cspan class=\\\"code-comment\\\"\u003e\\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port> systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory\u003c\\/span\u003e\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e \\n\u003cp\u003eIf yes, why can\'t this be a default setting when the URI starts with \\\"kafka:\\/\\/\\\"?\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\\n\u003cp\u003eThis seems to be the cleanest\\/most general way to do things. It seems not too great from a usability perspective, but if we provide defaults for the file\\/kafka systems, then it should be OK.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\u003cp\u003e+1. When we have other systems, we could add more. This should not scare people away.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\\n\u003cp\u003eHow does the control-job.sh script use the SystemConsumer\\/SystemProducer?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\u003cp\u003eMaybe I misunderstood something. When we use the control-job.sh to control the job, do we still have the properties file ? If no, is it possible for the control-job.sh to accept a properties file to get the system information? Because the main goal of ConfigStream is to solve the problems such as dynamic changing config, bigger configuration size, not to get rid of the properties file. If yes, then why cannot we put the system information in the properties file?\u003c\\/p\u003e\\n\\n\u003cp\u003eAlso, adding a --file for the control-job.sh maybe helpful, like\u003c\\/p\u003e\\n\u003cdiv class=\\\"code panel\\\" style=\\\"border-width: 1px;\\\"\u003e\u003cdiv class=\\\"codeContent panelContent\\\"\u003e\\n\u003cpre class=\\\"code-java\\\"\u003econtrol-job.sh --location kafka:\u003cspan class=\\\"code-comment\\\"\u003e\\/\\/localhost:10251 --file \\/path\\/to\\/config\\/file\u003c\\/span\u003e\\n\u003c\\/pre\u003e\\n\u003c\\/div\u003e\u003c\\/div\u003e\\n\u003cp\u003ebecause when you start the job at the first time, you may input a lot of configurations. Having a file containing the starting properties is useful.\u003c\\/p\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172090_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172090&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172090\' class=\'commentdate_14172090_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 07:19\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T07:19:43+0000\'\u003e15\\/Oct\\/14 07:19\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \\n I think it\'s quite reasonable to expect that if you want to know the \'interpreted\' value of a config property (after default is filled in, and perhaps also after it is parsed or otherwise processed) then you need to ask the AM, either via the web interface or via a command-line tool that talks to an API. Then control-job.sh is concerned only with the config that is explicitly declared, and not what is inferred. \\n 1) when you guys talk about \\\"default\\\", does it mean the default configuration provided by the Samza or by the user when he starts the job first time ? \\n2) there is another use case where the user wants to check the latest configuration used in a finished job and probably reuses the same configuration. Then querying the AM seems not working because the job is already finished. The only way of getting the latest configuration of a finished job is reading the whole ConfigStream. Or we can dump the latest configuration into a file when the job finishes? But the problem is that, at most time, the job is killed rather than finished gracefully by itself. \\n\\n \\n So in order to use a kafka:\\/\\/ URI, you must include a systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory in the job configuration. That configuration parameter would need to be provided as a command-line argument. \\n I do not quite get this. Do you mean something like \\n \\n kafka: \\/\\/<broker-list>:<broker ports>?zk=<zk-list>:<zk-port> systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory \\n \\n \\n If yes, why can\'t this be a default setting when the URI starts with \\\"kafka:\\/\\/\\\"? \\n\\n \\n This seems to be the cleanest\\/most general way to do things. It seems not too great from a usability perspective, but if we provide defaults for the file\\/kafka systems, then it should be OK. \\n +1. When we have other systems, we could add more. This should not scare people away. \\n\\n \\n How does the control-job.sh script use the SystemConsumer\\/SystemProducer? \\n Maybe I misunderstood something. When we use the control-job.sh to control the job, do we still have the properties file ? If no, is it possible for the control-job.sh to accept a properties file to get the system information? Because the main goal of ConfigStream is to solve the problems such as dynamic changing config, bigger configuration size, not to get rid of the properties file. If yes, then why cannot we put the system information in the properties file? \\n\\n Also, adding a --file for the control-job.sh maybe helpful, like \\n \\n control-job.sh --location kafka: \\/\\/localhost:10251 --file \\/path\\/to\\/config\\/file \\n \\n \\n because when you start the job at the first time, you may input a lot of configurations. Having a file containing the starting properties is useful. \\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14172515\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14172515_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172515&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172515\' class=\'commentdate_14172515_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 16:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T16:09:43+0000\'\u003e15\\/Oct\\/14 16:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\u003cp\u003e1) when you guys talk about \\\"default\\\", does it mean the default configuration provided by the Samza or by the user when he starts the job first time ?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eWe\'re talking about the \\\"default\\\" value that\'s used when a developer has \u003cb\u003enot\u003c\\/b\u003e specified a value for a given config. This usually happens in the code when we do a .getOrElse() on a config object.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003e2) there is another use case where the user wants to check the latest configuration used in a finished job and probably reuses the same configuration. Then querying the AM seems not working because the job is already finished. The only way of getting the latest configuration of a finished job is reading the whole ConfigStream. Or we can dump the latest configuration into a file when the job finishes? But the problem is that, at most time, the job is killed rather than finished gracefully by itself.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eYea, I think you\'d have to read the whole config stream.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eIf yes, why can\'t this be a default setting when the URI starts with \\\"kafka:\\/\\/\\\"?\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eI agree, we should just default it to the proper system factory for known system types (kafka, file).\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eMaybe I misunderstood something. When we use the control-job.sh to control the job, do we still have the properties file ? \u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eNo.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eIf no, is it possible for the control-job.sh to accept a properties file to get the system information? \u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eIt is possible, but it seems kind of hacky. Following this train of thought, the control-job.sh could receive a properties file with a system defined for the ConfigStream. The problem is that the coordinator (AM) will also need this file, and it\'ll be running elsewhere. This leads to the same problem we have today: how do you get the file to the AM? JSON encoded environment variable, HDFS, HTTP, local file system?\u003c\\/p\u003e\\n\\n\u003cp\u003eI feel like this idea inevitably leads to one of two outcomes:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eA system like we have today, where we pass the static properties file between machines (in our case, via environment variable).\u003c\\/li\u003e\\n\\t\u003cli\u003eThe properties file is placed on every host, and an environment variable is defined so that control-job.sh and the job coordinator (AM) can both find it.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cblockquote\u003e\u003cp\u003eBecause the main goal of ConfigStream is to solve the problems such as dynamic changing config, bigger configuration size, not to get rid of the properties file.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eTrue. I just want to make this as easy to use as possible. Having the kafka:\\/\\/ URI, with default system factories seems more usable than another properties file.\u003c\\/p\u003e\\n\\n\u003cblockquote\u003e\u003cp\u003eAlso, adding a --file for the control-job.sh maybe helpful\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eWe will certainly need a way to pipe config files into the ConfigStream. Something like {--file} makes a lot of sense.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14172515_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172515&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172515\' class=\'commentdate_14172515_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 16:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T16:09:43+0000\'\u003e15\\/Oct\\/14 16:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e 1) when you guys talk about \\\"default\\\", does it mean the default configuration provided by the Samza or by the user when he starts the job first time ? \\n\\n We\'re talking about the \\\"default\\\" value that\'s used when a developer has not specified a value for a given config. This usually happens in the code when we do a .getOrElse() on a config object. \\n\\n 2) there is another use case where the user wants to check the latest configuration used in a finished job and probably reuses the same configuration. Then querying the AM seems not working because the job is already finished. The only way of getting the latest configuration of a finished job is reading the whole ConfigStream. Or we can dump the latest configuration into a file when the job finishes? But the problem is that, at most time, the job is killed rather than finished gracefully by itself. \\n\\n Yea, I think you\'d have to read the whole config stream. \\n\\n If yes, why can\'t this be a default setting when the URI starts with \\\"kafka:\\/\\/\\\"? \\n\\n I agree, we should just default it to the proper system factory for known system types (kafka, file). \\n\\n Maybe I misunderstood something. When we use the control-job.sh to control the job, do we still have the properties file ? \\n\\n No. \\n\\n If no, is it possible for the control-job.sh to accept a properties file to get the system information? \\n\\n It is possible, but it seems kind of hacky. Following this train of thought, the control-job.sh could receive a properties file with a system defined for the ConfigStream. The problem is that the coordinator (AM) will also need this file, and it\'ll be running elsewhere. This leads to the same problem we have today: how do you get the file to the AM? JSON encoded environment variable, HDFS, HTTP, local file system? \\n\\n I feel like this idea inevitably leads to one of two outcomes: \\n\\n \\n\\t A system like we have today, where we pass the static properties file between machines (in our case, via environment variable). \\n\\t The properties file is placed on every host, and an environment variable is defined so that control-job.sh and the job coordinator (AM) can both find it. \\n \\n\\n\\n Because the main goal of ConfigStream is to solve the problems such as dynamic changing config, bigger configuration size, not to get rid of the properties file. \\n\\n True. I just want to make this as easy to use as possible. Having the kafka:\\/\\/ URI, with default system factories seems more usable than another properties file. \\n\\n Also, adding a --file for the control-job.sh maybe helpful \\n\\n We will certainly need a way to pipe config files into the ConfigStream. Something like {--file} makes a lot of sense. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14172579\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172579_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172579&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172579\' class=\'commentdate_14172579_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 16:47\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T16:47:33+0000\'\u003e15\\/Oct\\/14 16:47\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\\n\u003cp\u003eTrue. I just want to make this as easy to use as possible. Having the kafka:\\/\\/ URI, with default system factories seems more usable than another properties file.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eTotally agree. When users define a new system, they may need to pass a lot of initial configs into the control-job.sh. But I guess, when we can pipe config files, it may not be a big deal because they can put the new system\'s configuration in a file.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172579_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172579&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172579\' class=\'commentdate_14172579_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 16:47\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T16:47:33+0000\'\u003e15\\/Oct\\/14 16:47\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \\n True. I just want to make this as easy to use as possible. Having the kafka:\\/\\/ URI, with default system factories seems more usable than another properties file. \\n\\n Totally agree. When users define a new system, they may need to pass a lot of initial configs into the control-job.sh. But I guess, when we can pipe config files, it may not be a big deal because they can put the new system\'s configuration in a file. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14172783\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172783_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172783&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172783\' class=\'commentdate_14172783_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 18:59\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T18:59:24+0000\'\u003e15\\/Oct\\/14 18:59\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cblockquote\u003e\\n\u003cp\u003eWe will certainly need a way to pipe config files into the ConfigStream.\u003c\\/p\u003e\u003c\\/blockquote\u003e\\n\\n\u003cp\u003eAnother reason it is useful is that, if we allows the control.sh to pipe config files, we will have an easier life in debugging a Samza job with the same configuration:\u003cbr\\/\u003e\\n1. start a samza job with initial configurations.\u003cbr\\/\u003e\\n2. change configurations.\u003cbr\\/\u003e\\n3. kill the samza job.\u003cbr\\/\u003e\\n4. \u003cb\u003edump the ConfigStream into a config file.\u003c\\/b\u003e\u003cbr\\/\u003e\\n5. start a new samza job with the same configuration.\u003c\\/p\u003e\\n\\n\u003cp\u003eStep 4 gives three advantages: 1) give an intuitive view of what is used in the latest samza job 2) speed up the starting process because the AM does need to read the whole ConfigStream again in step 5. 3) easily make changes in the config file.\u003c\\/p\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"closeuris\\\" id=\\\"commentauthor_14172783_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=closeuris\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10442\\\" alt=\\\"closeuris\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yan Fang\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14172783&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14172783\' class=\'commentdate_14172783_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'15\\/Oct\\/14 18:59\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-15T18:59:24+0000\'\u003e15\\/Oct\\/14 18:59\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \\n We will certainly need a way to pipe config files into the ConfigStream. \\n\\n Another reason it is useful is that, if we allows the control.sh to pipe config files, we will have an easier life in debugging a Samza job with the same configuration: \\n1. start a samza job with initial configurations. \\n2. change configurations. \\n3. kill the samza job. \\n4. dump the ConfigStream into a config file. \\n5. start a new samza job with the same configuration. \\n\\n Step 4 gives three advantages: 1) give an intuitive view of what is used in the latest samza job 2) speed up the starting process because the AM does need to read the whole ConfigStream again in step 5. 3) easily make changes in the config file. \\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14174137\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14174137_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14174137&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14174137\' class=\'commentdate_14174137_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'16\\/Oct\\/14 19:35\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-16T19:35:16+0000\'\u003e16\\/Oct\\/14 19:35\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI am taking this up. Focus will be on adding an HTTP JSON server to the AM\\/local job factories, and writing the config to the stream.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14174137_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14174137&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14174137\' class=\'commentdate_14174137_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'16\\/Oct\\/14 19:35\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-16T19:35:16+0000\'\u003e16\\/Oct\\/14 19:35\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I am taking this up. Focus will be on adding an HTTP JSON server to the AM\\/local job factories, and writing the config to the stream. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14179106\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14179106_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14179106&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14179106\' class=\'commentdate_14179106_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'21\\/Oct\\/14 21:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-21T21:09:58+0000\'\u003e21\\/Oct\\/14 21:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eI am taking a very incremental approach to this ticket. I\'ve broken the work down into:\u003c\\/p\u003e\\n\\n\u003col\u003e\\n\\t\u003cli\u003eConvert the AM:SamzaContainer communication to be HTTP\\/JSON.\u003c\\/li\u003e\\n\\t\u003cli\u003eConvert the job:coordinator (AM, process job, thread job) communication to be ConfigStream.\u003c\\/li\u003e\\n\\t\u003cli\u003eExtract shared logic between AM, process job, and thread job into single job coordinator.\u003c\\/li\u003e\\n\\t\u003cli\u003eWrite a control-job.sh script, and remove run-job.sh.\u003c\\/li\u003e\\n\u003c\\/ol\u003e\\n\\n\\n\u003cp\u003eI\'ve opened up \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-438\\\" title=\\\"Pass config via HTTP\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-438\\\"\u003e\u003cdel\u003eSAMZA-438\u003c\\/del\u003e\u003c\\/a\u003e for (1), and posted a draft patch to it. I\'m looking for feedback.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"criccomini\\\" id=\\\"commentauthor_14179106_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"criccomini\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Chris Riccomini\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14179106&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14179106\' class=\'commentdate_14179106_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'21\\/Oct\\/14 21:09\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-10-21T21:09:58+0000\'\u003e21\\/Oct\\/14 21:09\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e I am taking a very incremental approach to this ticket. I\'ve broken the work down into: \\n\\n \\n\\t Convert the AM:SamzaContainer communication to be HTTP\\/JSON. \\n\\t Convert the job:coordinator (AM, process job, thread job) communication to be ConfigStream. \\n\\t Extract shared logic between AM, process job, and thread job into single job coordinator. \\n\\t Write a control-job.sh script, and remove run-job.sh. \\n \\n\\n\\n I\'ve opened up SAMZA-438 for (1), and posted a draft patch to it. I\'m looking for feedback. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-14211282\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"nickpan47\\\" id=\\\"commentauthor_14211282_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=nickpan47\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"nickpan47\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yi Pan\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14211282&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14211282\' class=\'commentdate_14211282_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Nov\\/14 20:55\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-11-13T20:55:48+0000\'\u003e13\\/Nov\\/14 20:55\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eHi, referring to Chris\'s comment before: \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134172&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134172\\\" class=\\\"external-link\\\" rel=\\\"nofollow\\\"\u003ehttps:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134172&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134172\u003c\\/a\u003e, we had a discussion on the potential race condition problem w\\/ the checkpoint messages from container. The specific issue in the discussion: when a new container was restarted, how does Samza AM knows that it have consumed all checkpoint messages from the container\'s last run? I think that the following solution would be a cheap and attractive one:\u003cbr\\/\u003e\\n1. Samza AM always assign a monotonically increasing generation # to each invocation of a specific container\u003cbr\\/\u003e\\n2. Each container will now associate its current generation # w\\/ the checkpoint messages published to the ConfigStream\u003cbr\\/\u003e\\n3. When a container failure is detected, Samza AM increment the generation # and bounce the container\u003cbr\\/\u003e\\n4. On restart, the container will first publish a start token w\\/ its current generation # to the ConfigStream\u003cbr\\/\u003e\\n5. Now, the Samza AM receiving messages from the ConfigStream can perform the following decisions:\u003cbr\\/\u003e\\n a. on reception of the new generation # of the container, Smaza AM knows that it has consumed all previous checkpoints and recovered the state. Hence, it can start serving the config to that new generation of container via HTTP API\u003cbr\\/\u003e\\n b. If there is any issue (e.g. network partition between the SamzaAM and the container) that makes the SamzaAM \\\"thinks\\\" the container has failed and restarted a new one, the checkpoint messages from the still running old generation of container can now be safely discarded after receiving the start token from the new generation of the same container.\u003c\\/p\u003e\\n\\n\u003cp\u003ePlease review and comment to see whether there are other issues I may miss.\u003c\\/p\u003e\\n\\n\u003cp\u003eThanks!\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"nickpan47\\\" id=\\\"commentauthor_14211282_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=nickpan47\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"nickpan47\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yi Pan\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14211282&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14211282\' class=\'commentdate_14211282_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'13\\/Nov\\/14 20:55\'\u003e\u003ctime class=\'livestamp\' datetime=\'2014-11-13T20:55:48+0000\'\u003e13\\/Nov\\/14 20:55\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Hi, referring to Chris\'s comment before: https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-348?focusedCommentId=14134172&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14134172 , we had a discussion on the potential race condition problem w\\/ the checkpoint messages from container. The specific issue in the discussion: when a new container was restarted, how does Samza AM knows that it have consumed all checkpoint messages from the container\'s last run? I think that the following solution would be a cheap and attractive one: \\n1. Samza AM always assign a monotonically increasing generation # to each invocation of a specific container \\n2. Each container will now associate its current generation # w\\/ the checkpoint messages published to the ConfigStream \\n3. When a container failure is detected, Samza AM increment the generation # and bounce the container \\n4. On restart, the container will first publish a start token w\\/ its current generation # to the ConfigStream \\n5. Now, the Samza AM receiving messages from the ConfigStream can perform the following decisions: \\n a. on reception of the new generation # of the container, Smaza AM knows that it has consumed all previous checkpoints and recovered the state. Hence, it can start serving the config to that new generation of container via HTTP API \\n b. If there is any issue (e.g. network partition between the SamzaAM and the container) that makes the SamzaAM \\\"thinks\\\" the container has failed and restarted a new one, the checkpoint messages from the still running old generation of container can now be safely discarded after receiving the start token from the new generation of the same container. \\n\\n Please review and comment to see whether there are other issues I may miss. \\n\\n Thanks! \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-15219855\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"alex.buck10\\\" id=\\\"commentauthor_15219855_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=alex.buck10\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"alex.buck10\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Alex Buck\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=15219855&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15219855\' class=\'commentdate_15219855_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'31\\/Mar\\/16 13:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2016-03-31T13:22:48+0000\'\u003e31\\/Mar\\/16 13:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eHi \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/ViewProfile.jspa?name=criccomini\\\" class=\\\"user-hover\\\" rel=\\\"criccomini\\\"\u003ecriccomini\u003c\\/a\u003e, I am interested in using explicit restarts as described in the design document. \u003c\\/p\u003e\\n\\n\u003cp\u003eI have had a go at implementing this locally by updating the ConfigManager to listen for restart messages and I would love to contribute. If this is ok, please would you create a subtask for me to do this?\u003c\\/p\u003e\\n\\n\u003cp\u003eI have also found a bug with the json deserialisation in the YarnUtil class that is used by ConfigManager to query the yarn webapp api for all the running applications so I would like to contribute a fix for that as well please. Should I email the dev mailing list about this or could you create a jira for that too?\u003c\\/p\u003e\\n\\n\u003cp\u003eThank you.\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"alex.buck10\\\" id=\\\"commentauthor_15219855_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=alex.buck10\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"alex.buck10\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Alex Buck\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=15219855&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15219855\' class=\'commentdate_15219855_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'31\\/Mar\\/16 13:22\'\u003e\u003ctime class=\'livestamp\' datetime=\'2016-03-31T13:22:48+0000\'\u003e31\\/Mar\\/16 13:22\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Hi criccomini , I am interested in using explicit restarts as described in the design document. \\n\\n I have had a go at implementing this locally by updating the ConfigManager to listen for restart messages and I would love to contribute. If this is ok, please would you create a subtask for me to do this? \\n\\n I have also found a bug with the json deserialisation in the YarnUtil class that is used by ConfigManager to query the yarn webapp api for all the running applications so I would like to contribute a fix for that as well please. Should I email the dev mailing list about this or could you create a jira for that too? \\n\\n Thank you. \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \\n\\n\\n\u003cdiv id=\\\"comment-15220302\\\" class=\\\"issue-data-block activity-comment twixi-block expanded\\\"\u003e\\n \u003cdiv class=\\\"twixi-wrap verbose actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Collapse comment\\\" title=\\\"Collapse comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-expanded\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details\\\"\u003e \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"nickpan47\\\" id=\\\"commentauthor_15220302_verbose\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=nickpan47\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"nickpan47\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yi Pan\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=15220302&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15220302\' class=\'commentdate_15220302_verbose subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'31\\/Mar\\/16 17:44\'\u003e\u003ctime class=\'livestamp\' datetime=\'2016-03-31T17:44:12+0000\'\u003e31\\/Mar\\/16 17:44\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-body flooded\\\"\u003e\u003cp\u003eHi, \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/ViewProfile.jspa?name=alex.buck10\\\" class=\\\"user-hover\\\" rel=\\\"alex.buck10\\\"\u003ealex.buck10\u003c\\/a\u003e, I have created the sub-task \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-921\\\" title=\\\"Consolidate LocalityManager and TaskAssignmentManager\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-921\\\"\u003eSAMZA-921\u003c\\/a\u003e and assigned to you. There have been many updates regarding to the CoordinatorStream and the JobCoordinator recently. Please check \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-448\\\" title=\\\"Pass config from JobRunner to JobCoordinator via ConfigStream\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-448\\\"\u003e\u003cdel\u003eSAMZA-448\u003c\\/del\u003e\u003c\\/a\u003e for the implementation of CoordinatorStream. This might also be related to the refactoring of JobCoordinator that we are actively working on: \u003ca href=\\\"https:\\/\\/issues.apache.org\\/jira\\/browse\\/SAMZA-881\\\" title=\\\"Re-think the Samza Job Coordinator\\\" class=\\\"issue-link\\\" data-issue-key=\\\"SAMZA-881\\\"\u003eSAMZA-881\u003c\\/a\u003e. So, it would be good if the design\\/implementation of the dynamic re-config via restart can be compatible w\\/ the refactored JobCoordinator as well.\u003c\\/p\u003e\\n\\n\u003cp\u003eAs for the bugs you found in JSON deserialization, feel free to open a JIRA. Everyone should have the power to open JIRA in Samza.\u003c\\/p\u003e\\n\\n\u003cp\u003eThanks a lot!\u003c\\/p\u003e \u003c\\/div\u003e\\n \u003cdiv class=\\\"action-links action-comment-actions\\\"\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003cdiv class=\\\"twixi-wrap concise actionContainer\\\"\u003e\\n \u003cdiv class=\\\"action-head\\\"\u003e\\n \u003cbutton aria-label=\\\"Expand comment\\\" title=\\\"Expand comment\\\" class=\\\"twixi icon-default aui-icon aui-icon-small aui-iconfont-collapsed\\\"\u003e\u003c\\/button\u003e\\n \u003cdiv class=\\\"action-details flooded\\\"\u003e\\n \\n \\n \\n \\n \\n\\n \u003ca class=\\\"user-hover user-avatar\\\" rel=\\\"nickpan47\\\" id=\\\"commentauthor_15220302_concise\\\" href=\\\"\\/jira\\/secure\\/ViewProfile.jspa?name=nickpan47\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-xsmall\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"https:\\/\\/issues.apache.org\\/jira\\/secure\\/useravatar?size=xsmall&avatarId=10452\\\" alt=\\\"nickpan47\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e Yi Pan\u003c\\/a\u003e\\n added a comment - \u003ca href=\'\\/jira\\/browse\\/SAMZA-348?focusedCommentId=15220302&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15220302\' class=\'commentdate_15220302_concise subText comment-created-date-link\'\u003e\u003cspan class=\'date user-tz\' title=\'31\\/Mar\\/16 17:44\'\u003e\u003ctime class=\'livestamp\' datetime=\'2016-03-31T17:44:12+0000\'\u003e31\\/Mar\\/16 17:44\u003c\\/time\u003e\u003c\\/span\u003e\u003c\\/a\u003e Hi, alex.buck10 , I have created the sub-task SAMZA-921 and assigned to you. There have been many updates regarding to the CoordinatorStream and the JobCoordinator recently. Please check SAMZA-448 for the implementation of CoordinatorStream. This might also be related to the refactoring of JobCoordinator that we are actively working on: SAMZA-881 . So, it would be good if the design\\/implementation of the dynamic re-config via restart can be compatible w\\/ the refactored JobCoordinator as well. \\n\\n As for the bugs you found in JSON deserialization, feel free to open a JIRA. Everyone should have the power to open JIRA in Samza. \\n\\n Thanks a lot! \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\u003c\\/div\u003e\\n \u003c\\/div\u003e\\n \u003c\\/div\u003e\\n\""; if(window.WRM._dataArrived)window.WRM._dataArrived();</script> <script> window.WRM=window.WRM||{};window.WRM._unparsedData=window.WRM._unparsedData||{};window.WRM._unparsedErrors=window.WRM._unparsedErrors||{}; WRM._unparsedData["scope-filter-data"]="{\"createScopeActions\":[],\"scopes\":[]}"; WRM._unparsedData["sidebar-collapsed-by-default"]="true"; WRM._unparsedData["com.atlassian.jira.projects.shortcuts:can-manage"]="false"; WRM._unparsedData["com.atlassian.jira.projects.shortcuts:with-icons"]="false"; WRM._unparsedData["com.atlassian.jira.projects.shortcuts:shortcuts"]="[]"; WRM._unparsedData["com.atlassian.jira.projects.shortcuts:project-id"]="12314526"; WRM._unparsedData["sidebar-id"]="\"\u003csection class=\\\"aui-sidebar projects-sidebar sidebar-pending\\\" \u003e\u003cdiv class=\\\"aui-sidebar-wrapper\\\"\u003e\u003cdiv class=\\\"aui-sidebar-body\\\"\u003e\u003cdiv class=\\\"aui-page-header\\\" \u003e\u003cdiv class=\\\"aui-page-header-inner\\\"\u003e\u003cdiv class=\\\"aui-page-header-image\\\" \u003e\u003ca href=\\\"\\/jira\\/projects\\/SAMZA\\/summary\\\" title=\\\"Samza\\\" class=\\\"jira-project-avatar\\\"\u003e\u003cspan class=\\\"aui-avatar aui-avatar-large aui-avatar-project\\\"\u003e\u003cspan class=\\\"aui-avatar-inner\\\"\u003e\u003cimg src=\\\"\\/jira\\/secure\\/projectavatar?pid=12314526&avatarId=36734\\\" alt=\\\"Samza\\\" \\/\u003e\u003c\\/span\u003e\u003c\\/span\u003e\u003cimg src=\\\"data:image\\/svg+xml;base64,PHN2ZyB3aWR0aD0iNzIiIGhlaWdodD0iNzIiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PGcgZmlsbD0ibm9uZSIgZmlsbC1ydWxlPSJldmVub2RkIj48Y2lyY2xlIGZpbGw9IiNFRDZGMDAiIGN4PSIzNiIgY3k9IjM2IiByPSIzNiIvPjxwYXRoIGQ9Ik0yOS42OCA0OS42MTdhMy4xOTQgMy4xOTQgMCAwMS0yLjI2My0uOTM3TDE3LjExMyAzOC4zNzVhMy44MyAzLjgzIDAgMDEwLTUuNGwxMC4wNC0xMC4wNGEzLjIwOSAzLjIwOSAwIDAxNC41MjggMCAzLjIwNiAzLjIwNiAwIDAxMCA0LjUyOGwtOC4yMTUgOC4yMTMgOC40NzkgOC40OGEzLjIwMSAzLjIwMSAwIDAxLTIuMjY0IDUuNDZNNTAuNjYzIDM3LjQ5NmwuMDE2LjAxNi0uMDE2LS4wMTZ6bS04LjAzNSAxMi4xMmEzLjE5OCAzLjE5OCAwIDAxLTIuMjYyLTUuNDYxbDguNDc3LTguNDgtOC4yMS04LjIxNGEzLjIgMy4yIDAgMDEwLTQuNTI2IDMuMiAzLjIgMCAwMTQuNTIyLjAwMmwxMC4wNCAxMC4wNGEzLjc3NCAzLjc3NCAwIDAxMS4xMiAyLjY5IDMuNzg2IDMuNzg2IDAgMDEtMS4xMiAyLjcwOEw0NC44OSA0OC42OGEzLjE4NiAzLjE4NiAwIDAxLTIuMjYyLjkzN3oiIGZpbGw9IiNGRkYiLz48L2c+PC9zdmc+\\\" alt=\\\"Project Type: software\\\" class=\\\"jira-project-avatar-icon\\\" \\/\u003e\u003c\\/a\u003e\u003c\\/div\u003e\u003cdiv class=\\\"aui-page-header-main\\\" \u003e\u003ch1\u003e\u003cdiv class=\\\"aui-group aui-group-split\\\"\u003e\u003cdiv class=\\\"aui-item project-title\\\"\u003e\u003ca href=\\\"\\/jira\\/projects\\/SAMZA\\/summary\\\" title=\\\"Samza\\\"\u003eSamza\u003c\\/a\u003e\u003c\\/div\u003e\u003c\\/div\u003e\u003c\\/h1\u003e\u003c\\/div\u003e\u003cdiv class=\\\"aui-page-header-main scopeFilterContainer\\\" \u003e\u003c\\/div\u003e\u003c\\/div\u003e\u003c\\/div\u003e\u003cnav class=\\\"aui-navgroup aui-navgroup-vertical\\\"\u003e\u003cdiv class=\\\"aui-navgroup-inner sidebar-content-container jira-navigation\\\"\u003e\u003cdiv class=\\\"aui-sidebar-group aui-sidebar-group-tier-one\\\" data-id=\\\"sidebar-navigation-panel\\\"\u003e\u003cul class=\\\"aui-nav\\\"\u003e\u003cli class=\\\"aui-nav-selected\\\" \u003e\u003ca class=\\\"aui-nav-item \\\" href=\\\"\\/jira\\/projects\\/SAMZA\\/issues\\\" data-link-id=\\\"com.atlassian.jira.jira-projects-issue-navigator:sidebar-issue-navigator\\\" \u003e\u003cspan class=\\\"aui-icon aui-icon-large icon-sidebar-issues aui-iconfont-issues\\\"\u003e\u003c\\/span\u003e\u003cspan class=\\\"aui-nav-item-label\\\" title=\\\"Issues\\\"\u003eIssues\u003c\\/span\u003e\u003c\\/a\u003e\u003c\\/li\u003e\u003cli \u003e\u003ca class=\\\"aui-nav-item \\\" href=\\\"\\/jira\\/projects\\/SAMZA?selectedItem=com.atlassian.jira.jira-projects-plugin:report-page\\\" data-link-id=\\\"com.atlassian.jira.jira-projects-plugin:report-page\\\" \u003e\u003cspan class=\\\"aui-icon aui-icon-large agile-icon-report aui-iconfont-graph-line\\\"\u003e\u003c\\/span\u003e\u003cspan class=\\\"aui-nav-item-label\\\" title=\\\"Reports\\\"\u003eReports\u003c\\/span\u003e\u003c\\/a\u003e\u003c\\/li\u003e\u003cli \u003e\u003ca class=\\\"aui-nav-item \\\" href=\\\"\\/jira\\/projects\\/SAMZA?selectedItem=com.atlassian.jira.jira-projects-plugin:components-page\\\" data-link-id=\\\"com.atlassian.jira.jira-projects-plugin:components-page\\\" \u003e\u003cspan class=\\\"aui-icon aui-icon-large icon-sidebar-components\\\"\u003e\u003c\\/span\u003e\u003cspan class=\\\"aui-nav-item-label\\\" title=\\\"Components\\\"\u003eComponents\u003c\\/span\u003e\u003c\\/a\u003e\u003c\\/li\u003e\u003cli \u003e\u003ca class=\\\"aui-nav-item \\\" href=\\\"\\/jira\\/projects\\/SAMZA?selectedItem=biz.everit.jira.epic-roadmap:erfj-sidebar-roadmap\\\" data-link-id=\\\"biz.everit.jira.epic-roadmap:erfj-sidebar-roadmap\\\" \u003e\u003cspan class=\\\"aui-icon aui-icon-large erfj-icon\\\"\u003e\u003c\\/span\u003e\u003cspan class=\\\"aui-nav-item-label\\\" title=\\\"Roadmap\\\"\u003eRoadmap\u003c\\/span\u003e\u003c\\/a\u003e\u003c\\/li\u003e\u003c\\/ul\u003e\u003c\\/div\u003e\u003c\\/div\u003e\u003c\\/nav\u003e\u003c\\/div\u003e\u003cdiv class=\\\"aui-sidebar-footer\\\"\u003e\u003cbutton class=\\\"aui-button aui-button-subtle aui-sidebar-toggle aui-sidebar-footer-tipsy\\\" title=\\\"Expand sidebar ( [ )\\\" data-tooltip=\\\"Expand sidebar ( [ )\\\"\u003e\u003cspan class=\\\"aui-icon aui-icon-small aui-iconfont-chevron-double-left\\\"\u003e\u003c\\/span\u003e\u003c\\/button\u003e\u003c\\/div\u003e\u003c\\/div\u003e\u003c\\/section\u003e\""; if(window.WRM._dataArrived)window.WRM._dataArrived();</script> <script type="text/javascript" src="/jira/s/d41d8cd98f00b204e9800998ecf8427e-CDN/-lmkfjk/820010/13pdxe5/1.0/_/download/batch/jira.webresources:bigpipe-init/jira.webresources:bigpipe-init.js" data-wrm-key="jira.webresources:bigpipe-init" data-wrm-batch-type="resource" data-initially-rendered></script> <form id="jira_request_timing_info" class="dont-default-focus" > <fieldset class="parameters hidden"> <input type="hidden" title="jira.request.start.millis" value="1741943450880" /> <input type="hidden" title="jira.request.server.time" value="218" /> <input type="hidden" title="jira.request.id" value="550x23380235x3" /> <input type="hidden" title="jira.session.expiry.time" value="-" /> <input type="hidden" title="jira.session.expiry.in.mins" value="-" /> <input id="jiraConcurrentRequests" type="hidden" name="jira.request.concurrent.requests" value="3" /> <input type="hidden" title="db.reads.time.in.ms" value="8" /> <input type="hidden" title="db.conns.time.in.ms" value="17" /> </fieldset> </form> <!-- REQUEST ID : 550x23380235x3 REQUEST TIMESTAMP : [14/Mar/2025:09:10:50 +0000] REQUEST TIME : 0.2180 ASESSIONID : - CONCURRENT REQUESTS : 3 db.reads : OpSnapshot{name='db.reads', invocationCount=51, elapsedTotal=8142930, elapsedMin=72536, elapsedMax=1125509, resultSetSize=0, cpuTotal=0, cpuMin=0, cpuMax=0} db.conns : OpSnapshot{name='db.conns', invocationCount=58, elapsedTotal=17285782, elapsedMin=87235, elapsedMax=6208559, resultSetSize=0, cpuTotal=0, cpuMin=0, cpuMax=0} --> </body> </html>