CINXE.COM
Introducing Similarity Search at Flickr | code.flickr.com
<!DOCTYPE html> <!--[if IE 6]> <html id="ie6" lang="en-US"> <![endif]--> <!--[if IE 7]> <html id="ie7" lang="en-US"> <![endif]--> <!--[if IE 8]> <html id="ie8" lang="en-US"> <![endif]--> <!--[if !(IE 6) & !(IE 7) & !(IE 8)]><!--> <html lang="en-US"> <!--<![endif]--> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width" /> <title> Introducing Similarity Search at Flickr | code.flickr.com </title> <link rel="profile" href="https://gmpg.org/xfn/11" /> <link rel="stylesheet" type="text/css" media="all" href="https://code.flickr.net/wp-content/themes/flickr-code/style.css?ver=20190507" /> <link rel="pingback" href="https://code.flickr.net/xmlrpc.php"> <!--[if lt IE 9]> <script src="https://code.flickr.net/wp-content/themes/twentyeleven/js/html5.js?ver=3.7.0" type="text/javascript"></script> <![endif]--> <meta name='robots' content='max-image-preview:large' /> <style>img:is([sizes="auto" i], [sizes^="auto," i]) { contain-intrinsic-size: 3000px 1500px }</style> <link rel='dns-prefetch' href='//stats.wp.com' /> <link rel="alternate" type="application/rss+xml" title="code.flickr.com » Feed" href="https://code.flickr.net/feed/" /> <link rel="alternate" type="application/rss+xml" title="code.flickr.com » Comments Feed" href="https://code.flickr.net/comments/feed/" /> <link rel="alternate" type="application/rss+xml" title="code.flickr.com » Introducing Similarity Search at Flickr Comments Feed" href="https://code.flickr.net/2017/03/07/introducing-similarity-search-at-flickr/feed/" /> <script type="text/javascript"> /* <![CDATA[ */ window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.0.3\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.0.3\/svg\/","svgExt":".svg","source":{"concatemoji":"https:\/\/code.flickr.net\/wp-includes\/js\/wp-emoji-release.min.js?ver=6.7.1"}}; /*! This file is auto-generated */ !function(i,n){var o,s,e;function c(e){try{var t={supportTests:e,timestamp:(new Date).valueOf()};sessionStorage.setItem(o,JSON.stringify(t))}catch(e){}}function p(e,t,n){e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(t,0,0);var t=new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data),r=(e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(n,0,0),new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data));return t.every(function(e,t){return e===r[t]})}function u(e,t,n){switch(t){case"flag":return n(e,"\ud83c\udff3\ufe0f\u200d\u26a7\ufe0f","\ud83c\udff3\ufe0f\u200b\u26a7\ufe0f")?!1:!n(e,"\ud83c\uddfa\ud83c\uddf3","\ud83c\uddfa\u200b\ud83c\uddf3")&&!n(e,"\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f","\ud83c\udff4\u200b\udb40\udc67\u200b\udb40\udc62\u200b\udb40\udc65\u200b\udb40\udc6e\u200b\udb40\udc67\u200b\udb40\udc7f");case"emoji":return!n(e,"\ud83d\udc26\u200d\u2b1b","\ud83d\udc26\u200b\u2b1b")}return!1}function f(e,t,n){var r="undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?new OffscreenCanvas(300,150):i.createElement("canvas"),a=r.getContext("2d",{willReadFrequently:!0}),o=(a.textBaseline="top",a.font="600 32px Arial",{});return e.forEach(function(e){o[e]=t(a,e,n)}),o}function t(e){var t=i.createElement("script");t.src=e,t.defer=!0,i.head.appendChild(t)}"undefined"!=typeof Promise&&(o="wpEmojiSettingsSupports",s=["flag","emoji"],n.supports={everything:!0,everythingExceptFlag:!0},e=new Promise(function(e){i.addEventListener("DOMContentLoaded",e,{once:!0})}),new Promise(function(t){var n=function(){try{var e=JSON.parse(sessionStorage.getItem(o));if("object"==typeof e&&"number"==typeof e.timestamp&&(new Date).valueOf()<e.timestamp+604800&&"object"==typeof e.supportTests)return e.supportTests}catch(e){}return null}();if(!n){if("undefined"!=typeof Worker&&"undefined"!=typeof OffscreenCanvas&&"undefined"!=typeof URL&&URL.createObjectURL&&"undefined"!=typeof Blob)try{var e="postMessage("+f.toString()+"("+[JSON.stringify(s),u.toString(),p.toString()].join(",")+"));",r=new Blob([e],{type:"text/javascript"}),a=new Worker(URL.createObjectURL(r),{name:"wpTestEmojiSupports"});return void(a.onmessage=function(e){c(n=e.data),a.terminate(),t(n)})}catch(e){}c(n=f(s,u,p))}t(n)}).then(function(e){for(var t in e)n.supports[t]=e[t],n.supports.everything=n.supports.everything&&n.supports[t],"flag"!==t&&(n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&n.supports[t]);n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&!n.supports.flag,n.DOMReady=!1,n.readyCallback=function(){n.DOMReady=!0}}).then(function(){return e}).then(function(){var e;n.supports.everything||(n.readyCallback(),(e=n.source||{}).concatemoji?t(e.concatemoji):e.wpemoji&&e.twemoji&&(t(e.twemoji),t(e.wpemoji)))}))}((window,document),window._wpemojiSettings); /* ]]> */ </script> <style id='wp-emoji-styles-inline-css'> img.wp-smiley, img.emoji { display: inline !important; border: none !important; box-shadow: none !important; height: 1em !important; width: 1em !important; margin: 0 0.07em !important; vertical-align: -0.1em !important; background: none !important; padding: 0 !important; } </style> <link rel='stylesheet' id='all-css-2' href='https://code.flickr.net/wp-includes/css/dist/block-library/style.min.css?m=1732206022g' type='text/css' media='all' /> <style id='wp-block-library-theme-inline-css'> .wp-block-audio :where(figcaption){color:#555;font-size:13px;text-align:center}.is-dark-theme .wp-block-audio :where(figcaption){color:#ffffffa6}.wp-block-audio{margin:0 0 1em}.wp-block-code{border:1px solid #ccc;border-radius:4px;font-family:Menlo,Consolas,monaco,monospace;padding:.8em 1em}.wp-block-embed :where(figcaption){color:#555;font-size:13px;text-align:center}.is-dark-theme .wp-block-embed :where(figcaption){color:#ffffffa6}.wp-block-embed{margin:0 0 1em}.blocks-gallery-caption{color:#555;font-size:13px;text-align:center}.is-dark-theme .blocks-gallery-caption{color:#ffffffa6}:root :where(.wp-block-image figcaption){color:#555;font-size:13px;text-align:center}.is-dark-theme :root :where(.wp-block-image figcaption){color:#ffffffa6}.wp-block-image{margin:0 0 1em}.wp-block-pullquote{border-bottom:4px solid;border-top:4px solid;color:currentColor;margin-bottom:1.75em}.wp-block-pullquote cite,.wp-block-pullquote footer,.wp-block-pullquote__citation{color:currentColor;font-size:.8125em;font-style:normal;text-transform:uppercase}.wp-block-quote{border-left:.25em solid;margin:0 0 1.75em;padding-left:1em}.wp-block-quote cite,.wp-block-quote footer{color:currentColor;font-size:.8125em;font-style:normal;position:relative}.wp-block-quote:where(.has-text-align-right){border-left:none;border-right:.25em solid;padding-left:0;padding-right:1em}.wp-block-quote:where(.has-text-align-center){border:none;padding-left:0}.wp-block-quote.is-large,.wp-block-quote.is-style-large,.wp-block-quote:where(.is-style-plain){border:none}.wp-block-search .wp-block-search__label{font-weight:700}.wp-block-search__button{border:1px solid #ccc;padding:.375em .625em}:where(.wp-block-group.has-background){padding:1.25em 2.375em}.wp-block-separator.has-css-opacity{opacity:.4}.wp-block-separator{border:none;border-bottom:2px solid;margin-left:auto;margin-right:auto}.wp-block-separator.has-alpha-channel-opacity{opacity:1}.wp-block-separator:not(.is-style-wide):not(.is-style-dots){width:100px}.wp-block-separator.has-background:not(.is-style-dots){border-bottom:none;height:1px}.wp-block-separator.has-background:not(.is-style-wide):not(.is-style-dots){height:2px}.wp-block-table{margin:0 0 1em}.wp-block-table td,.wp-block-table th{word-break:normal}.wp-block-table :where(figcaption){color:#555;font-size:13px;text-align:center}.is-dark-theme .wp-block-table :where(figcaption){color:#ffffffa6}.wp-block-video :where(figcaption){color:#555;font-size:13px;text-align:center}.is-dark-theme .wp-block-video :where(figcaption){color:#ffffffa6}.wp-block-video{margin:0 0 1em}:root :where(.wp-block-template-part.has-background){margin-bottom:0;margin-top:0;padding:1.25em 2.375em} </style> <link rel='stylesheet' id='all-css-6' href='https://code.flickr.net/_static/??-eJzTLy/QzcxLzilNSS3WzyrWz01NyUxMzUnNTc0rQeEU5CRWphbp5qSmJyZX6uVm5uklFxfr6OPTDpRD5sM02efaGpobGxkZmBkYGQMARIMu1Q==' type='text/css' media='all' /> <style id='jetpack-sharing-buttons-style-inline-css'> .jetpack-sharing-buttons__services-list{display:flex;flex-direction:row;flex-wrap:wrap;gap:0;list-style-type:none;margin:5px;padding:0}.jetpack-sharing-buttons__services-list.has-small-icon-size{font-size:12px}.jetpack-sharing-buttons__services-list.has-normal-icon-size{font-size:16px}.jetpack-sharing-buttons__services-list.has-large-icon-size{font-size:24px}.jetpack-sharing-buttons__services-list.has-huge-icon-size{font-size:36px}@media print{.jetpack-sharing-buttons__services-list{display:none!important}}.editor-styles-wrapper .wp-block-jetpack-sharing-buttons{gap:0;padding-inline-start:0}ul.jetpack-sharing-buttons__services-list.has-background{padding:1.25em 2.375em} </style> <style id='classic-theme-styles-inline-css'> /*! This file is auto-generated */ .wp-block-button__link{color:#fff;background-color:#32373c;border-radius:9999px;box-shadow:none;text-decoration:none;padding:calc(.667em + 2px) calc(1.333em + 2px);font-size:1.125em}.wp-block-file__button{background:#32373c;color:#fff;text-decoration:none} </style> <style id='global-styles-inline-css'> :root{--wp--preset--aspect-ratio--square: 1;--wp--preset--aspect-ratio--4-3: 4/3;--wp--preset--aspect-ratio--3-4: 3/4;--wp--preset--aspect-ratio--3-2: 3/2;--wp--preset--aspect-ratio--2-3: 2/3;--wp--preset--aspect-ratio--16-9: 16/9;--wp--preset--aspect-ratio--9-16: 9/16;--wp--preset--color--black: #000;--wp--preset--color--cyan-bluish-gray: #abb8c3;--wp--preset--color--white: #fff;--wp--preset--color--pale-pink: #f78da7;--wp--preset--color--vivid-red: #cf2e2e;--wp--preset--color--luminous-vivid-orange: #ff6900;--wp--preset--color--luminous-vivid-amber: #fcb900;--wp--preset--color--light-green-cyan: #7bdcb5;--wp--preset--color--vivid-green-cyan: #00d084;--wp--preset--color--pale-cyan-blue: #8ed1fc;--wp--preset--color--vivid-cyan-blue: #0693e3;--wp--preset--color--vivid-purple: #9b51e0;--wp--preset--color--blue: #1982d1;--wp--preset--color--dark-gray: #373737;--wp--preset--color--medium-gray: #666;--wp--preset--color--light-gray: #e2e2e2;--wp--preset--gradient--vivid-cyan-blue-to-vivid-purple: linear-gradient(135deg,rgba(6,147,227,1) 0%,rgb(155,81,224) 100%);--wp--preset--gradient--light-green-cyan-to-vivid-green-cyan: linear-gradient(135deg,rgb(122,220,180) 0%,rgb(0,208,130) 100%);--wp--preset--gradient--luminous-vivid-amber-to-luminous-vivid-orange: linear-gradient(135deg,rgba(252,185,0,1) 0%,rgba(255,105,0,1) 100%);--wp--preset--gradient--luminous-vivid-orange-to-vivid-red: linear-gradient(135deg,rgba(255,105,0,1) 0%,rgb(207,46,46) 100%);--wp--preset--gradient--very-light-gray-to-cyan-bluish-gray: linear-gradient(135deg,rgb(238,238,238) 0%,rgb(169,184,195) 100%);--wp--preset--gradient--cool-to-warm-spectrum: linear-gradient(135deg,rgb(74,234,220) 0%,rgb(151,120,209) 20%,rgb(207,42,186) 40%,rgb(238,44,130) 60%,rgb(251,105,98) 80%,rgb(254,248,76) 100%);--wp--preset--gradient--blush-light-purple: linear-gradient(135deg,rgb(255,206,236) 0%,rgb(152,150,240) 100%);--wp--preset--gradient--blush-bordeaux: linear-gradient(135deg,rgb(254,205,165) 0%,rgb(254,45,45) 50%,rgb(107,0,62) 100%);--wp--preset--gradient--luminous-dusk: linear-gradient(135deg,rgb(255,203,112) 0%,rgb(199,81,192) 50%,rgb(65,88,208) 100%);--wp--preset--gradient--pale-ocean: linear-gradient(135deg,rgb(255,245,203) 0%,rgb(182,227,212) 50%,rgb(51,167,181) 100%);--wp--preset--gradient--electric-grass: linear-gradient(135deg,rgb(202,248,128) 0%,rgb(113,206,126) 100%);--wp--preset--gradient--midnight: linear-gradient(135deg,rgb(2,3,129) 0%,rgb(40,116,252) 100%);--wp--preset--font-size--small: 13px;--wp--preset--font-size--medium: 20px;--wp--preset--font-size--large: 36px;--wp--preset--font-size--x-large: 42px;--wp--preset--spacing--20: 0.44rem;--wp--preset--spacing--30: 0.67rem;--wp--preset--spacing--40: 1rem;--wp--preset--spacing--50: 1.5rem;--wp--preset--spacing--60: 2.25rem;--wp--preset--spacing--70: 3.38rem;--wp--preset--spacing--80: 5.06rem;--wp--preset--shadow--natural: 6px 6px 9px rgba(0, 0, 0, 0.2);--wp--preset--shadow--deep: 12px 12px 50px rgba(0, 0, 0, 0.4);--wp--preset--shadow--sharp: 6px 6px 0px rgba(0, 0, 0, 0.2);--wp--preset--shadow--outlined: 6px 6px 0px -3px rgba(255, 255, 255, 1), 6px 6px rgba(0, 0, 0, 1);--wp--preset--shadow--crisp: 6px 6px 0px rgba(0, 0, 0, 1);}:where(.is-layout-flex){gap: 0.5em;}:where(.is-layout-grid){gap: 0.5em;}body .is-layout-flex{display: flex;}.is-layout-flex{flex-wrap: wrap;align-items: center;}.is-layout-flex > :is(*, div){margin: 0;}body .is-layout-grid{display: grid;}.is-layout-grid > :is(*, div){margin: 0;}:where(.wp-block-columns.is-layout-flex){gap: 2em;}:where(.wp-block-columns.is-layout-grid){gap: 2em;}:where(.wp-block-post-template.is-layout-flex){gap: 1.25em;}:where(.wp-block-post-template.is-layout-grid){gap: 1.25em;}.has-black-color{color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-color{color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-color{color: var(--wp--preset--color--white) !important;}.has-pale-pink-color{color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-color{color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-color{color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-color{color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-color{color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-color{color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-color{color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-color{color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-color{color: var(--wp--preset--color--vivid-purple) !important;}.has-black-background-color{background-color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-background-color{background-color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-background-color{background-color: var(--wp--preset--color--white) !important;}.has-pale-pink-background-color{background-color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-background-color{background-color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-background-color{background-color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-background-color{background-color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-background-color{background-color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-background-color{background-color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-background-color{background-color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-background-color{background-color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-background-color{background-color: var(--wp--preset--color--vivid-purple) !important;}.has-black-border-color{border-color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-border-color{border-color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-border-color{border-color: var(--wp--preset--color--white) !important;}.has-pale-pink-border-color{border-color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-border-color{border-color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-border-color{border-color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-border-color{border-color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-border-color{border-color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-border-color{border-color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-border-color{border-color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-border-color{border-color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-border-color{border-color: var(--wp--preset--color--vivid-purple) !important;}.has-vivid-cyan-blue-to-vivid-purple-gradient-background{background: var(--wp--preset--gradient--vivid-cyan-blue-to-vivid-purple) !important;}.has-light-green-cyan-to-vivid-green-cyan-gradient-background{background: var(--wp--preset--gradient--light-green-cyan-to-vivid-green-cyan) !important;}.has-luminous-vivid-amber-to-luminous-vivid-orange-gradient-background{background: var(--wp--preset--gradient--luminous-vivid-amber-to-luminous-vivid-orange) !important;}.has-luminous-vivid-orange-to-vivid-red-gradient-background{background: var(--wp--preset--gradient--luminous-vivid-orange-to-vivid-red) !important;}.has-very-light-gray-to-cyan-bluish-gray-gradient-background{background: var(--wp--preset--gradient--very-light-gray-to-cyan-bluish-gray) !important;}.has-cool-to-warm-spectrum-gradient-background{background: var(--wp--preset--gradient--cool-to-warm-spectrum) !important;}.has-blush-light-purple-gradient-background{background: var(--wp--preset--gradient--blush-light-purple) !important;}.has-blush-bordeaux-gradient-background{background: var(--wp--preset--gradient--blush-bordeaux) !important;}.has-luminous-dusk-gradient-background{background: var(--wp--preset--gradient--luminous-dusk) !important;}.has-pale-ocean-gradient-background{background: var(--wp--preset--gradient--pale-ocean) !important;}.has-electric-grass-gradient-background{background: var(--wp--preset--gradient--electric-grass) !important;}.has-midnight-gradient-background{background: var(--wp--preset--gradient--midnight) !important;}.has-small-font-size{font-size: var(--wp--preset--font-size--small) !important;}.has-medium-font-size{font-size: var(--wp--preset--font-size--medium) !important;}.has-large-font-size{font-size: var(--wp--preset--font-size--large) !important;}.has-x-large-font-size{font-size: var(--wp--preset--font-size--x-large) !important;} :where(.wp-block-post-template.is-layout-flex){gap: 1.25em;}:where(.wp-block-post-template.is-layout-grid){gap: 1.25em;} :where(.wp-block-columns.is-layout-flex){gap: 2em;}:where(.wp-block-columns.is-layout-grid){gap: 2em;} :root :where(.wp-block-pullquote){font-size: 1.5em;line-height: 1.6;} </style> <link rel='stylesheet' id='all-css-12' href='https://code.flickr.net/_static/??-eJzTLy/QTc7PK0nNK9EvyUjNTS3WLykHcipTc1LLUvP0i0sqc1L1kouLdfQxVablZCZnFwFFU1LxK0QxMiknPzm7GKTUPtfW0NzI0NDQxMDUEABaNTMI' type='text/css' media='all' /> <link rel="https://api.w.org/" href="https://code.flickr.net/wp-json/" /><link rel="alternate" title="JSON" type="application/json" href="https://code.flickr.net/wp-json/wp/v2/posts/3571" /><link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://code.flickr.net/xmlrpc.php?rsd" /> <meta name="generator" content="WordPress 6.7.1" /> <link rel="canonical" href="https://code.flickr.net/2017/03/07/introducing-similarity-search-at-flickr/" /> <link rel='shortlink' href='https://code.flickr.net/?p=3571' /> <link rel="alternate" title="oEmbed (JSON)" type="application/json+oembed" href="https://code.flickr.net/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fcode.flickr.net%2F2017%2F03%2F07%2Fintroducing-similarity-search-at-flickr%2F" /> <link rel="alternate" title="oEmbed (XML)" type="text/xml+oembed" href="https://code.flickr.net/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fcode.flickr.net%2F2017%2F03%2F07%2Fintroducing-similarity-search-at-flickr%2F&format=xml" /> <style>img#wpstats{display:none}</style> <style type="text/css" id="twentyeleven-header-css"> #site-title, #site-description { position: absolute; clip: rect(1px 1px 1px 1px); /* IE6, IE7 */ clip: rect(1px, 1px, 1px, 1px); } </style> <style type="text/css" id="wp-custom-css"> body { background: #fff; } body,input,textarea { font-family: Arial, sans-serif; color: #404040; } #page { margin: 1em auto; max-width: 1000px; } a:link { text-decoration: none; color: #0063dc; } a:visited { text-decoration: none; color: #0063dc; } a:hover { text-decoration: none; background: #0063dc; color: #fff; } a:active { text-decoration: none; background-color: #0259c4; color: #fff; } #branding { border: none; } #branding .only-search #s,#branding .only-search #s:focus { width: 280px; background-color: transparent; border-color: #ddd; } #branding #searchform { right: 0; } nav#access { background: none; box-shadow: none; -webkit-box-shadow: none; -moz-box-shadow: none; margin: 0; } nav#access a { font-weight: bold; margin: 11px 28px 0 0; padding: 0; line-height: 21px; } nav#access a:link,nav#access a:visited { text-decoration: none; color: #888; } nav#access a:hover,nav#access li:hover > a,nav#access a:focus,nav#access a:active { text-decoration: none; color: #0063dc; background: none; } #branding .only-search + nav#access div { padding-right: 300px; } nav#access div,nav#access ul { margin: 0; } @media screen and (max-width: 768px) { #branding .only-search + nav#access div { padding-right: 0; } #branding #searchform { display: none; position: static; text-align: center; } #branding .with-image #searchform { max-width: 100%; } #branding .only-search #s,#branding .only-search #s:focus { width: 85%; float: none; } nav#access { margin: 0 0 0 5%; } } @media screen and (max-width: 480px) { nav#access { display: none; } } #content { margin: 0 20% 0 0; width: 80%; } #content .comments-link { display: none; } @media screen and (max-width: 800px) { #main { padding-top: 0; overflow: hidden; } #branding { padding-bottom: 0; } #main #content { margin: 0; } .entry-title,.entry-header .entry-meta { padding-right: 0; } } .singular #content,.left-sidebar.singular #content { margin: 0 10%; width: 80%; } .singular .entry-header,.singular .entry-content,.singular footer.entry-meta,.singular #comments-title { width: 100%; } .hentry,.no-results { border-bottom: 1px dotted #dadada; } .singular .hentry { padding-top: 1em; } .entry-title { padding-bottom: 0; } .entry-title,.entry-title a { font-size: 28px; color: #000; } .entry-title a:hover,.entry-title a:focus,.entry-title a:active { color: #0063dc; background-color: transparent; } @media screen and (max-width: 650px) { .singular .entry-title { padding-top: 0; line-height: 42px; } } .singular .entry-content { margin-top: 0; } #jp-post-flair { margin-top: 3em; } footer.entry-meta .cat-links,footer.entry-meta .sep,footer.entry-meta .tag-links { display: none; } .singular .entry-meta .edit-link a { position: static; } .singular footer.entry-meta { margin-top: 2em; } #comments { display: none; } #site-generator { background: transparent; border-top: none; padding: 0 .5em; } img#wpstats { position: absolute !important; clip: rect(1px 1px 1px 1px); } #secondary { margin-right: 0; width: 16%; text-align: right; } #secondary #s { display: none; } #secondary aside ul { list-style: none; } #secondary .widget-title { color: #999; font-size: 12px; letter-spacing: auto; text-transform: none; } .widget a { font-weight: normal; } .widget a:hover,.widget a:focus,.widget a:active { text-decoration: none; } #everything-after-this-line-is-a-post-level-style---yay-for-wordpress-stripping-comments-from-css { color: #000; } .entry-content p { margin-bottom: 1.3em; } .entry-content p.note { padding: 11px; background-color: #fffdeb; border-bottom: 1px solid #fff9c2; } .entry-content p.warning { padding: 11px; background-color: #fdf8f8; border-bottom: 1px solid #f7dedd; } .entry-content .aside { margin-bottom: 1.3em; padding: 11px; background-color: #f8fdf8; border-bottom: 1px solid #def6df; } .entry-content .aside p:last-child { margin-bottom: 0; } .entry-content h2,.entry-content h3,.entry-content h4 { font-weight: bold; color: #000; text-transform: none; } .entry-content h2 { font-size: 22px; } .entry-content h3 { font-size: 16px; letter-spacing: 0; line-height: 1.3em; margin-bottom: 1em; } .entry-content h4 { font-size: 13px; line-height: 1.4em; margin-bottom: 0; } .entry-content p.flickr-photo,.entry-content p.flickr-photo a,.entry-content p.figure { color: #999; font-size: 14px; } .entry-content p.flickr-photo a:hover,.entry-content p.flickr-photo a:active { color: #0063dc; background-color: transparent; } .entry-content p.flickr-photo .caption { display: block; padding-left: 22px; background: url('http://farm4.staticflickr.com/3329/favicons/72157601614001242_7730.png') no-repeat 0 3px; } .entry-content img { max-width: 100%; height: auto; } .undersized-image-container { text-align: center; } .entry-content code { color: #000; font-family: "Consolas", "Bitstream Vera Sans Mono", "Courier New", Courier, monospace; font-size: 14px; background-color: #F6FcFF; border-bottom: 1px solid #ebf5ff; } .entry-content ul { list-style: disc; } .entry-content ol ol { list-style: lower-alpha; } .entry-content table caption { font-size: 13px; font-weight: bold; color: #000; } .entry-content table tr.odd td,.entry-content table tr.odd th { background-color: #f8f8f8; } .entry-content table td,.entry-content table th { padding: 3px 10px; line-height: 1.4em; } .entry-content table tr th { color: #000; font-weight: bold; text-transform: none; font-size: 13px; letter-spacing: 0; border-top: 1px solid #ddd; } .entry-content table tr td { font-size: 14px; } .entry-content table.data tr td,.entry-content table.data thead tr th { text-align: right; } .entry-content table.tight td,.entry-content table.tight th { padding: 3px 6px; } .entry-content table.tight th { font-size: 12px; } .entry-content table.tight td { font-size: 13px; } @media screen and (max-width: 650px) { .entry-content table td,.entry-content table th { padding: 3px 5px; } .entry-content table tr th { font-size: 12px; } .entry-content table tr td { font-size: 13px; } .entry-content table.tight td,.entry-content table.tight th { padding: 3px 4px; } .entry-content table.tight th { font-size: 11px; } .entry-content table.tight td { font-size: 12px; } } .entry-content blockquote { font-family: inherit; font-style: normal; font-weight: normal; margin: 0 2em 1.3em; padding: 1em; background: #F9F9F9; border-bottom: 1px solid #ececec; } .entry-content blockquote p.source { margin-bottom: 0; } .entry-content .hiring-banner { position: relative; background: #ffebf5; padding: 11px 11px 11px 88px; border-bottom: 1px solid #ffc5e2; margin: 2em 0; } .entry-content .hiring-banner p { margin-bottom: 0; } .entry-content .hiring-banner .group-photo { position: absolute; left: -57px; top: -5px; width: 120px; padding: 6px; background: #fff; -webkit-box-shadow: 1px 1px 8px rgba(50,50,50,0.8); -moz-box-shadow: 1px 1px 8px rgba(50,50,50,0.8); box-shadow: 1px 1px 8px rgba(50,50,50,0.8); -webkit-transform: rotate(-11deg); -moz-transform: rotate(-11deg); -o-transform: rotate(-11deg); -ms-transform: rotate(-11deg); } .entry-content .hiring-banner .group-photo img { display: block; margin: 0; max-width: 100%; } @media screen and (max-width: 800px) { .entry-content .hiring-banner { padding-left: 11px; } .entry-content .hiring-banner .group-photo { display: none; } } #everything-after-this-line-is-for-the-syntaxhighlighter---all-rules-require-important { color: #000; } #main .syntaxhighlighter .lines { border-bottom: 1px solid #ebf5ff !important; } #main .syntaxhighlighter,#main .syntaxhighlighter div,#main .syntaxhighlighter code,#main .syntaxhighlighter table,#main .syntaxhighlighter table td,#main .syntaxhighlighter table tr,#main .syntaxhighlighter table tbody { font-size: 14px !important; } #main .syntaxhighlighter .line.alt1,#main .syntaxhighlighter .line.alt2 { background-color: #fafdff !important; } #main .syntaxhighlighter .line.highlighted { background-color: #fffbd6 !important; } #main .syntaxhighlighter .plain,#main .syntaxhighlighter .plain a { color: #000 !important; } #main .syntaxhighlighter .comments,#main .syntaxhighlighter .comments a { color: #999 !important; } #main .syntaxhighlighter .string,#main .syntaxhighlighter .string a { color: #ff52a9 !important; } #main .syntaxhighlighter .keyword { color: #0034fe !important; font-weight: normal !important; } #main .syntaxhighlighter .preprocessor { color: #417ba9 !important; } #main .syntaxhighlighter .variable { color: #b130c0 !important; } #main .syntaxhighlighter .value { color: #6b77f7 !important; } #main .syntaxhighlighter .functions { color: #002ad5 !important; } #main .syntaxhighlighter .constants { color: #d11e08 !important; } #main .syntaxhighlighter .script { background-color: yellow !important; } #main .syntaxhighlighter .color1,#main .syntaxhighlighter .color1 a { color: #808080 !important; } #main .syntaxhighlighter .color2,#main .syntaxhighlighter .color2 a { color: #ff1493 !important; } #main .syntaxhighlighter .color3,#main .syntaxhighlighter .color3 a { color: red !important; } a[data-flickr-embed] img { width: 800px; } </style> </head> <body class="post-template-default single single-post postid-3571 single-format-standard custom-background wp-embed-responsive jps-theme-flickr-code singular two-column right-sidebar"> <div class="skip-link"><a class="assistive-text" href="#content">Skip to primary content</a></div><div id="page" class="hfeed"> <header id="branding"> <hgroup> <h1 id="site-title"><span><a href="https://code.flickr.net/" rel="home">code.flickr.com</a></span></h1> <h2 id="site-description"></h2> </hgroup> <a href="https://code.flickr.net/"> <img src="https://wp.flickr.net/wp-content/uploads/sites/3/2012/09/code-flickr-com-drawn-header-grey-large.png" width="1000" height="157" alt="code.flickr.com" /> </a> <div class="only-search with-image"> <form method="get" id="searchform" action="https://code.flickr.net/"> <label for="s" class="assistive-text">Search</label> <input type="text" class="field" name="s" id="s" placeholder="Search" /> <input type="submit" class="submit" name="submit" id="searchsubmit" value="Search" /> </form> </div> <nav id="access"> <h3 class="assistive-text">Main menu</h3> <div class="menu-menu-container"><ul id="menu-menu" class="menu"><li id="menu-item-2084" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2084"><a href="http://www.flickr.com/">Flickr</a></li> <li id="menu-item-2085" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2085"><a href="http://blog.flickr.net/">Flickr Blog</a></li> <li id="menu-item-2250" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2250"><a href="http://twitter.com/flickr">@flickr</a></li> <li id="menu-item-2086" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2086"><a href="http://twitter.com/flickrapi">@flickrapi</a></li> <li id="menu-item-2087" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2087"><a href="https://www.flickr.com/services/developer/">Developer Guidelines</a></li> <li id="menu-item-2088" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2088"><a href="http://www.flickr.com/services/api/">API</a></li> <li id="menu-item-2089" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-2089"><a href="http://www.flickr.com/jobs/">Jobs</a></li> </ul></div> </nav><!-- #access --> </header><!-- #branding --> <div id="main"> <div id="primary"> <div id="content" role="main"> <nav id="nav-single"> <h3 class="assistive-text">Post navigation</h3> <span class="nav-previous"><a href="https://code.flickr.net/2017/01/05/a-year-without-a-byte/" rel="prev"><span class="meta-nav">←</span> Previous</a></span> <span class="nav-next"><a href="https://code.flickr.net/2018/04/20/together/" rel="next">Next <span class="meta-nav">→</span></a></span> </nav><!-- #nav-single --> <article id="post-3571" class="post-3571 post type-post status-publish format-standard hentry category-uncategorized tag-machine-learning tag-machine-tags tag-neural-network tag-similarity-search tag-visual-similarity"> <header class="entry-header"> <h1 class="entry-title">Introducing Similarity Search at Flickr</h1> <div class="entry-meta"> <span class="sep">Posted on </span><a href="https://code.flickr.net/2017/03/07/introducing-similarity-search-at-flickr/" title="6:04 pm" rel="bookmark"><time class="entry-date" datetime="2017-03-07T18:04:36-08:00">March 7, 2017</time></a><span class="by-author"> <span class="sep"> by </span> <span class="author vcard"><a class="url fn n" href="https://code.flickr.net/author/claytonhoo/" title="View all posts by Clayton Mellina" rel="author">Clayton Mellina</a></span></span> </div><!-- .entry-meta --> </header><!-- .entry-header --> <div class="entry-content"> <p><span style="font-weight:400;">At Flickr, we understand that the value in our image corpus is only unlocked when our members can find photos and photographers that inspire them, so we strive to enable the discovery and appreciation of new photos.</span></p> <p><span style="font-weight:400;">To further that effort, today we are introducing </span><b>similarity search</b><span style="font-weight:400;"> on Flickr. If you hover over a photo on a search result page, you will reveal a “…” button that exposes a menu that gives you the option to search for photos similar to the photo you are currently viewing.</span></p> <p><span style="font-weight:400;">In many ways, photo search is very different from traditional web or text search. First, the goal of web search is usually to satisfy a particular information need, while with photo search the goal is often one of </span><i><span style="font-weight:400;">discovery</span></i><span style="font-weight:400;">; as such, it should be delightful as well as functional. We have taken this to heart throughout Flickr. For instance, our color search feature, which allows filtering by color scheme, and our style filters, which allow filtering by styles such as “minimalist” or “patterns,” encourage exploration. Second, in traditional web search, the goal is usually to match documents to a set of keywords in the query. That is, the query is in the same modality—text—as the documents being searched. Photo search usually matches </span><i><span style="font-weight:400;">across</span></i><span style="font-weight:400;"> modalities: text to image. Text querying is a necessary feature of a photo search engine, but, as the saying goes, a picture is worth a thousand words. And beyond saving people the effort of so much typing, many visual concepts genuinely defy accurate description. Now, we’re giving our community a way to easily explore those visual concepts with the “…” button, a feature we call the </span><b>similarity pivot</b><span style="font-weight:400;">.</span></p> <p><img fetchpriority="high" decoding="async" class="alignnone size-medium wp-image-3572" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/demo.gif?w=800" alt="" width="800" height="412" /></p> <p><span style="font-weight:400;">The similarity pivot is a significant addition to the Flickr experience because it offers our community an entirely new way to explore and discover the billions of incredible photos and millions of incredible photographers on Flickr. It allows people to look for </span><a href="https://www.flickr.com/search/?similarity_id=29327172003"><span style="font-weight:400;">images of a particular style</span></a><span style="font-weight:400;">, it gives people a view into </span><a href="https://www.flickr.com/search/?similarity_id=5742058855"><span style="font-weight:400;">universal behaviors</span></a><span style="font-weight:400;">, and even when it “messes up,” it can force people to look at the </span><a href="https://www.flickr.com/search/?similarity_id=14198128453"><span style="font-weight:400;">unexpected</span></a> <a href="https://www.flickr.com/search/?similarity_id=28863966765"><span style="font-weight:400;">commonalities</span></a><span style="font-weight:400;"> and </span><a href="https://www.flickr.com/search/?similarity_id=8002923505"><span style="font-weight:400;">oddities</span></a><span style="font-weight:400;"> of our visual world with a </span><a href="https://www.flickr.com/search/?similarity_id=13759436465"><span style="font-weight:400;">fresh</span></a> <a href="https://www.flickr.com/search/?similarity_id=15346205045"><span style="font-weight:400;">perspective</span></a><span style="font-weight:400;">.</span></p> <h2><span style="font-weight:400;">What is “similarity”?</span></h2> <p><span style="font-weight:400;">To understand how an experience like this is powered, we first need to understand what we mean by “similarity.” There are many ways photos can be similar to one another. Consider some examples.</span></p> <p><img decoding="async" class="alignnone size-medium wp-image-3573" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?w=800" alt="" width="800" height="343" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png 1033w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?resize=150,64 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?resize=800,343 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?resize=768,329 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?resize=1024,439 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/color_sim.png?resize=500,214 500w" sizes="(max-width: 800px) 100vw, 800px" /></p> <p><img decoding="async" class="alignnone size-medium wp-image-3576" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?w=800" alt="" width="800" height="343" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png 1030w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?resize=150,64 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?resize=800,343 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?resize=768,330 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?resize=1024,439 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/texture_sim.png?resize=500,215 500w" sizes="(max-width: 800px) 100vw, 800px" /></p> <p><img loading="lazy" decoding="async" class="alignnone size-medium wp-image-3575" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?w=800" alt="" width="800" height="344" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png 1029w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?resize=150,65 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?resize=800,344 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?resize=768,331 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?resize=1024,441 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/semantic_sim.png?resize=500,215 500w" sizes="auto, (max-width: 800px) 100vw, 800px" /></p> <p><span style="font-weight:400;">It is apparent that all of these groups of photos illustrate some notion of “similarity,” but each is different. Roughly, they are: similarity of color, similarity of texture, and similarity of semantic category. And there are many others that you might imagine as well.</span></p> <p>What notion of similarity is best suited for a site like Flickr? Ideally, we’d like to be able to capture multiple types of similarity, but we decided early on that semantic similarity—similarity based on the semantic content of the photos—was vital to facilitate discovery on Flickr. This requires a deep understanding of image content for which we employ deep neural networks.</p> <p>We have been using deep neural networks at Flickr for a while for various tasks such as object recognition, NSFW prediction, and even prediction of aesthetic quality. For these tasks, we train a neural network to map the raw pixels of a photo into a set of relevant tags, as illustrated below.</p> <p><img loading="lazy" decoding="async" class="alignnone size-medium wp-image-3578" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?w=800" alt="" width="800" height="230" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png 1408w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=150,43 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=800,230 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=768,220 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=1024,294 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=1000,288 1000w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_tag.png?resize=500,143 500w" sizes="auto, (max-width: 800px) 100vw, 800px" /></p> <p><span style="font-weight:400;">Internally, the neural network accomplishes this mapping incrementally by applying a series of transformations to the image, which can be thought of as a vector of numbers corresponding to the pixel intensities. Each transformation in the series produces another vector, which is in turn the input to the next transformation, until finally we have a vector that we specifically constrain to be a list of probabilities for each class we are trying to recognize in the image. To be able to go from raw pixels to a semantic label like “hot air balloon,” the network discards lots of information about the image, including information about appearance, such as the color of the balloon, its relative position in the sky, etc. Instead, we can extract an internal vector in the network before the final output.</span></p> <p><img loading="lazy" decoding="async" class="alignnone size-medium wp-image-3579" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?w=800" alt="" width="800" height="458" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png 1244w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?resize=150,86 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?resize=800,458 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?resize=768,440 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?resize=1024,586 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/nn_feature.png?resize=500,286 500w" sizes="auto, (max-width: 800px) 100vw, 800px" /></p> <p><span style="font-weight:400;">For common neural network architectures, this vector—which we call a “feature vector”—has many hundreds or thousands of dimensions. We can’t necessarily say with certainty that any one of these dimensions means something in particular as we could at the final network output, whose dimensions correspond to tag probabilities. But these vectors have an important property: when you compute the </span><a href="https://en.wikipedia.org/wiki/Euclidean_distance"><span style="font-weight:400;">Euclidean distance</span></a><span style="font-weight:400;"> between these vectors, images containing similar content will tend to have feature vectors closer together than images containing dissimilar content. You can think of this as a way that the network has learned to organize information present in the image so that it can output the required class prediction. This is exactly what we are looking for: Euclidian distance in this high-dimensional feature space is a measure of semantic similarity. The graphic below illustrates this idea: points in the neighborhood around the query image are semantically similar to the query image, whereas points in neighborhoods further away are not.</span></p> <p><img loading="lazy" decoding="async" class="alignnone size-medium wp-image-3580" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?w=800" alt="" width="800" height="366" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png 1686w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=150,69 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=800,366 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=768,352 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=1024,469 1024w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=1536,703 1536w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/retrieval.png?resize=500,229 500w" sizes="auto, (max-width: 800px) 100vw, 800px" /></p> <p><span style="font-weight:400;">This measure of similarity is not perfect and cannot capture all possible notions of similarity—it will be constrained by the particular task the network was trained to perform, i.e., scene recognition. However, it is effective for our purposes, and, importantly, it contains information beyond merely the semantic content of the image, such as appearance, composition, and texture. Most importantly, it gives us a simple algorithm for finding visually similar photos: compute the distance in the feature space of a query image to each index image and return the images with lowest distance. Of course, there is much more work to do to make this idea work for billions of images.</span></p> <h2><span style="font-weight:400;">Large-scale approximate nearest neighbor search</span></h2> <p><span style="font-weight:400;">With an index as large as Flickr’s, computing distances exhaustively for each query is intractable. Additionally, storing a high-dimensional floating point feature vector for each of billions of images takes a large amount of disk space and poses even more difficulty if these features need to be in memory for fast ranking. To solve these two issues, we adopt a state-of-the-art approximate nearest neighbor algorithm called </span><a href="http://image.ntua.gr/iva/files/lopq.pdf"><span style="font-weight:400;">Locally Optimized Product Quantization</span></a><span style="font-weight:400;"> (LOPQ).</span></p> <p><span style="font-weight:400;">To understand LOPQ, it is useful to first look at a simple strategy. Rather than ranking all vectors in the index, we can first filter a set of good candidates and only do expensive distance computations on them. For example, we can use an algorithm like </span><a href="https://en.wikipedia.org/wiki/K-means_clustering"><i><span style="font-weight:400;">k</span></i><span style="font-weight:400;">-means</span></a><span style="font-weight:400;"> to cluster our index vectors, find the cluster to which each vector is assigned, and index the corresponding cluster id for each vector. At query time, we find the cluster that the query vector is assigned to and fetch the items that belong to the same cluster from the index. We can even expand this set if we like by fetching items from the next nearest cluster.</span></p> <p><span style="font-weight:400;">This idea will take us far, but not far enough for a billions-scale index. For example, with 1 billion photos, we need 1 million clusters so that each cluster contains an average of 1000 photos. At query time, we will have to compute the distance from the query to each of these 1 million cluster centroids in order to find the nearest clusters. This is quite a lot. We can do better, however, if we instead split our vectors in half by dimension and cluster each half separately. In this scheme, each vector will be assigned to a pair of cluster ids, one for each half of the vector. If we choose k = 1000 to cluster both halves, we have k<sup>2</sup>= 1000 * 1000 = 1e6 possible pairs. In other words, by clustering each half separately and assigning each item a pair of cluster ids, we can get the same granularity of partitioning (1 million clusters total) with only 2 * 1000 distance computations with half the number of dimensions for a total computational savings of 1000x. Conversely, for the same computational cost, we gain a factor of k more partitions of the data space, providing a much finer-grained index.</span></p> <p><span style="font-weight:400;">This idea of splitting vectors into subvectors and clustering each split separately is called </span><a href="https://lear.inrialpes.fr/pubs/2011/JDS11/jegou_searching_with_quantization.pdf"><i><span style="font-weight:400;">product quantization</span></i></a><span style="font-weight:400;">. When we use this idea to index a dataset it is called the </span><a href="http://cache-ash04.cdn.yandex.net/download.yandex.ru/company/cvpr2012.pdf"><i><span style="font-weight:400;">inverted multi-index</span></i></a><span style="font-weight:400;">, and it forms the basis for fast candidate retrieval in our similarity index. Typically the distribution of points over the clusters in a multi-index will be unbalanced as compared to a standard k-means index, but this unbalance is a fair trade for the much higher resolution partitioning that it buys us. In fact, a multi-index will only be balanced across clusters if the two halves of the vectors are perfectly statistically independent. This is not the case in most real world data, but some heuristic preprocessing—like </span><a href="https://en.wikipedia.org/wiki/Principal_component_analysis"><span style="font-weight:400;">PCA-ing</span></a><span style="font-weight:400;"> and permuting the dimensions so that the cumulative per-dimension variance is approximately balanced between the halves—helps in many cases. And just like the simple k-means index, there is a fast algorithm for finding a ranked list of clusters to a query if we need to expand the candidate set.</span></p> <p><span style="font-weight:400;">After we have a set of candidates, we must rank them. We could store the full vector in the index and use it to compute the distance for each candidate item, but this would incur a large memory overhead (for example, 256 dimensional vectors of 4 byte floats would require 1Tb for 1 billion photos) as well as a computational overhead. LOPQ solves these issues by performing another product quantization, this time on the </span><i><span style="font-weight:400;">residuals</span></i><span style="font-weight:400;"> of the data. The residual of a point is the difference vector between the point and its closest cluster centroid. Given a residual vector and the cluster indexes along with the corresponding centroids, we have enough information to reproduce the original vector exactly. Instead of storing the residuals, LOPQ product quantizes the residuals, usually with a higher number of splits, and stores only the cluster indexes in the index. For example, if we split the vector into 8 splits and each split is clustered with 256 centroids, we can store the compressed vector with only 8 bytes regardless of the number of dimensions to start (though certainly a higher number of dimensions will result in higher approximation error). With this </span><a href="https://en.wikipedia.org/wiki/Lossy_compression"><span style="font-weight:400;">lossy representation</span></a><span style="font-weight:400;"> we can produce a reconstruction of a vector from the 8 byte codes: we simply take each quantization code, look up the corresponding centroid, and concatenate these 8 centroids together to produce a reconstruction. Likewise, we can approximate the distance from the query to an index vector by computing the distance between the query and the reconstruction. We can do this computation quickly for many candidate points by computing the squared difference of each split of the query to all of the centroids for that split. After computing this table, we can compute the squared difference for an index point by looking up the precomputed squared difference for each of the 8 indexes and summing them together to get the total squared difference. This caching trick allows us to quickly rank many candidates without resorting to distance computations in the original vector space.</span></p> <p>LOPQ adds one final detail: for each cluster in the multi-index, LOPQ fits a local rotation to the residuals of the points that fall in that cluster. This rotation is simply a PCA that aligns the major directions of variation in the data to the axes followed by a permutation to heuristically balance the variance across the splits of the product quantization. Note that this is the exact preprocessing step that is usually performed at the top-level multi-index. It tends to make the approximate distance computations more accurate by mitigating errors introduced by assuming that each split of the vector in the production quantization is statistically independent from other splits. Additionally, since a rotation is fit for each cluster, they serve to fit the local data distribution better.</p> <p>Below is a diagram from the LOPQ paper that illustrates the core ideas of LOPQ. K-means (a) is very effective at allocating cluster centroids, illustrated as red points, that target the distribution of the data, but it has other drawbacks at scale as discussed earlier. In the 2d example shown, we can imagine product quantizing the space with 2 splits, each with 1 dimension. Product Quantization (b) clusters each dimension independently and cluster centroids are specified by pairs of cluster indexes, one for each split. This is effectively a grid over the space. Since the splits are treated as if they were statistically independent, we will, unfortunately, get many clusters that are “wasted” by not targeting the data distribution. We can improve on this situation by rotating the data such that the main dimensions of variation are axis-aligned. This version, called Optimized Product Quantization (c), does a better job of making sure each centroid is useful. LOPQ (d) extends this idea by first coarsely clustering the data and then doing a separate instance of OPQ for each cluster, allowing highly targeted centroids while still reaping the benefits of product quantization in terms of scalability.</p> <p><img loading="lazy" decoding="async" class="alignnone size-medium wp-image-3581" src="https://wp.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png?w=800" alt="" width="800" height="689" srcset="https://code.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png 1022w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png?resize=150,129 150w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png?resize=800,689 800w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png?resize=768,661 768w, https://code.flickr.net/wp-content/uploads/sites/3/2017/03/lopq.png?resize=348,300 348w" sizes="auto, (max-width: 800px) 100vw, 800px" /></p> <p><span style="font-weight:400;">LOPQ is state-of-the-art for quantization methods, and you can find more information about the algorithm, as well as benchmarks, </span><a href="http://image.ntua.gr/iva/research/lopq/"><span style="font-weight:400;">here</span></a><span style="font-weight:400;">. Additionally, we provide an </span><a href="https://github.com/yahoo/lopq"><span style="font-weight:400;">open-source implementation</span></a><span style="font-weight:400;"> in Python and Spark which you can apply to your own datasets. The algorithm produces a set of cluster indexes that can be queried efficiently in an inverted index, as described. We have also explored use cases that use these indexes as a hash for fast deduplication of images and large-scale clustering. These extended use cases are studied </span><a href="https://arxiv.org/abs/1604.06480"><span style="font-weight:400;">here</span></a><span style="font-weight:400;">.</span></p> <h2><span style="font-weight:400;">Conclusion</span></h2> <p><span style="font-weight:400;">We have described our system for large-scale visual similarity search at Flickr. Techniques for producing high-quality vector representations for images with deep learning are constantly improving, enabling new ways to search and explore large multimedia collections. These techniques are being applied in other domains as well to, for example, produce vector representations for </span><a href="https://en.wikipedia.org/wiki/Word2vec"><span style="font-weight:400;">text</span></a><span style="font-weight:400;">, </span><a href="https://arxiv.org/pdf/1502.04681.pdf"><span style="font-weight:400;">video</span></a><span style="font-weight:400;">, and even </span><a href="https://arxiv.org/pdf/1603.00856.pdf"><span style="font-weight:400;">molecules</span></a><span style="font-weight:400;">. Large-scale approximate nearest neighbor search has importance and potential application in these domains as well as many others. Though these techniques are in their infancy, we hope similarity search provides a useful new way to appreciate the amazing collection of images at Flickr and surface photos of interest that may have previously gone undiscovered. We are excited about the future of this technology at Flickr and beyond.</span></p> <p><b>Acknowledgements</b></p> <p><span style="font-weight:400;">Yannis Kalantidis, Huy Nguyen, Stacey Svetlichnaya, Arel Cordero. Special thanks to the rest of the Computer Vision and Machine Learning team and the Vespa search team who manages Yahoo’s internal search engine.</span></p> </div><!-- .entry-content --> <footer class="entry-meta"> This entry was posted in <a href="https://code.flickr.net/category/uncategorized/" rel="category tag">Uncategorized</a> and tagged <a href="https://code.flickr.net/tag/machine-learning/" rel="tag">machine learning</a>, <a href="https://code.flickr.net/tag/machine-tags/" rel="tag">machine tags</a>, <a href="https://code.flickr.net/tag/neural-network/" rel="tag">neural network</a>, <a href="https://code.flickr.net/tag/similarity-search/" rel="tag">similarity search</a>, <a href="https://code.flickr.net/tag/visual-similarity/" rel="tag">visual similarity</a> by <a href="https://code.flickr.net/author/claytonhoo/">Clayton Mellina</a>. Bookmark the <a href="https://code.flickr.net/2017/03/07/introducing-similarity-search-at-flickr/" title="Permalink to Introducing Similarity Search at Flickr" rel="bookmark">permalink</a>. <div id="author-info"> <div id="author-avatar"> <img alt='' src='https://secure.gravatar.com/avatar/d6af1d2ecf8c2cd494df41e9a7bad31b?s=68&d=identicon&r=g' srcset='https://secure.gravatar.com/avatar/d6af1d2ecf8c2cd494df41e9a7bad31b?s=136&d=identicon&r=g 2x' class='avatar avatar-68 photo' height='68' width='68' loading='lazy' decoding='async'/> </div><!-- #author-avatar --> <div id="author-description"> <h2> About Clayton Mellina </h2> I work on large-scale computer vision and machine learning at Yahoo. <div id="author-link"> <a href="https://code.flickr.net/author/claytonhoo/" rel="author"> View all posts by Clayton Mellina <span class="meta-nav">→</span> </a> </div><!-- #author-link --> </div><!-- #author-description --> </div><!-- #author-info --> </footer><!-- .entry-meta --> </article><!-- #post-3571 --> <div id="comments"> </div><!-- #comments --> </div><!-- #content --> </div><!-- #primary --> </div><!-- #main --> <footer id="colophon" role="contentinfo"> <div id="site-generator"> © 2024 Flickr, Inc. All rights reserved. | Powered by <a href="https://wpvip.com/?utm_source=vip_powered_wpcom&utm_medium=web&utm_campaign=VIP%20Footer%20Credit&utm_term=code.flickr.net" rel="generator nofollow" class="powered-by-wpcom">WordPress VIP</a> </div> </footer><!-- #colophon --> </div><!-- #page --> <div class="jetpack-instant-search__widget-area" style="display: none"> <div id="jetpack-search-filters-2" class="widget jetpack-filters widget_search"> <div id="jetpack-search-filters-2-wrapper" class="jetpack-instant-search-wrapper"> </div></div> </div> <script type="text/javascript" src="https://code.flickr.net/_static/??/wp-includes/js/comment-reply.min.js,/wp-includes/js/dist/hooks.min.js?m=1732206023j" ></script><script type="text/javascript" src="https://code.flickr.net/wp-includes/js/dist/i18n.min.js?ver=5e580eb46a90c2b997e6" id="wp-i18n-js"></script> <script type="text/javascript" id="wp-i18n-js-after"> /* <![CDATA[ */ wp.i18n.setLocaleData( { 'text direction\u0004ltr': [ 'ltr' ] } ); /* ]]> */ </script> <script type="text/javascript" src="https://code.flickr.net/wp-content/mu-plugins/jetpack-14.0/jetpack_vendor/automattic/jetpack-assets/build/i18n-loader.js?minify=true&ver=becd7d9884bc1b331e45" id="wp-jp-i18n-loader-js"></script> <script type="text/javascript" id="wp-jp-i18n-loader-js-after"> /* <![CDATA[ */ wp.jpI18nLoader.state = {"baseUrl":"https://code.flickr.net/wp-content/languages/","locale":"en_US","domainMap":{"jetpack-admin-ui":"plugins/jetpack","jetpack-assets":"plugins/jetpack","jetpack-backup-pkg":"plugins/jetpack","jetpack-blaze":"plugins/jetpack","jetpack-boost-core":"plugins/jetpack","jetpack-boost-speed-score":"plugins/jetpack","jetpack-classic-theme-helper":"plugins/jetpack","jetpack-compat":"plugins/jetpack","jetpack-config":"plugins/jetpack","jetpack-connection":"plugins/jetpack","jetpack-explat":"plugins/jetpack","jetpack-forms":"plugins/jetpack","jetpack-image-cdn":"plugins/jetpack","jetpack-import":"plugins/jetpack","jetpack-ip":"plugins/jetpack","jetpack-jitm":"plugins/jetpack","jetpack-licensing":"plugins/jetpack","jetpack-masterbar":"plugins/jetpack","jetpack-my-jetpack":"plugins/jetpack","jetpack-password-checker":"plugins/jetpack","jetpack-plugins-installer":"plugins/jetpack","jetpack-post-list":"plugins/jetpack","jetpack-protect-models":"plugins/jetpack","jetpack-protect-status":"plugins/jetpack","jetpack-publicize-pkg":"plugins/jetpack","jetpack-search-pkg":"plugins/jetpack","jetpack-stats":"plugins/jetpack","jetpack-stats-admin":"plugins/jetpack","jetpack-sync":"plugins/jetpack","jetpack-videopress-pkg":"plugins/jetpack","jetpack-waf":"plugins/jetpack","jetpack-wordads":"plugins/jetpack","woocommerce-analytics":"plugins/jetpack"},"domainPaths":{"jetpack-admin-ui":"jetpack_vendor/automattic/jetpack-admin-ui/","jetpack-assets":"jetpack_vendor/automattic/jetpack-assets/","jetpack-backup-pkg":"jetpack_vendor/automattic/jetpack-backup/","jetpack-blaze":"jetpack_vendor/automattic/jetpack-blaze/","jetpack-boost-core":"jetpack_vendor/automattic/jetpack-boost-core/","jetpack-boost-speed-score":"jetpack_vendor/automattic/jetpack-boost-speed-score/","jetpack-classic-theme-helper":"jetpack_vendor/automattic/jetpack-classic-theme-helper/","jetpack-compat":"jetpack_vendor/automattic/jetpack-compat/","jetpack-config":"jetpack_vendor/automattic/jetpack-config/","jetpack-connection":"jetpack_vendor/automattic/jetpack-connection/","jetpack-explat":"jetpack_vendor/automattic/jetpack-explat/","jetpack-forms":"jetpack_vendor/automattic/jetpack-forms/","jetpack-image-cdn":"jetpack_vendor/automattic/jetpack-image-cdn/","jetpack-import":"jetpack_vendor/automattic/jetpack-import/","jetpack-ip":"jetpack_vendor/automattic/jetpack-ip/","jetpack-jitm":"jetpack_vendor/automattic/jetpack-jitm/","jetpack-licensing":"jetpack_vendor/automattic/jetpack-licensing/","jetpack-masterbar":"jetpack_vendor/automattic/jetpack-masterbar/","jetpack-my-jetpack":"jetpack_vendor/automattic/jetpack-my-jetpack/","jetpack-password-checker":"jetpack_vendor/automattic/jetpack-password-checker/","jetpack-plugins-installer":"jetpack_vendor/automattic/jetpack-plugins-installer/","jetpack-post-list":"jetpack_vendor/automattic/jetpack-post-list/","jetpack-protect-models":"jetpack_vendor/automattic/jetpack-protect-models/","jetpack-protect-status":"jetpack_vendor/automattic/jetpack-protect-status/","jetpack-publicize-pkg":"jetpack_vendor/automattic/jetpack-publicize/","jetpack-search-pkg":"jetpack_vendor/automattic/jetpack-search/","jetpack-stats":"jetpack_vendor/automattic/jetpack-stats/","jetpack-stats-admin":"jetpack_vendor/automattic/jetpack-stats-admin/","jetpack-sync":"jetpack_vendor/automattic/jetpack-sync/","jetpack-videopress-pkg":"jetpack_vendor/automattic/jetpack-videopress/","jetpack-waf":"jetpack_vendor/automattic/jetpack-waf/","jetpack-wordads":"jetpack_vendor/automattic/jetpack-wordads/","woocommerce-analytics":"jetpack_vendor/automattic/woocommerce-analytics/"}}; /* ]]> */ </script> <script type="text/javascript" src="https://code.flickr.net/_static/??/wp-includes/js/dist/vendor/wp-polyfill.min.js,/wp-includes/js/dist/url.min.js?m=1732206023j" ></script><script type="text/javascript" id="jetpack-instant-search-js-before"> /* <![CDATA[ */ var JetpackInstantSearchOptions=JSON.parse(decodeURIComponent("%7B%22overlayOptions%22%3A%7B%22colorTheme%22%3A%22light%22%2C%22enableInfScroll%22%3Atrue%2C%22enableFilteringOpensOverlay%22%3Atrue%2C%22enablePostDate%22%3Atrue%2C%22enableSort%22%3Atrue%2C%22highlightColor%22%3A%22%23FFC%22%2C%22overlayTrigger%22%3A%22submit%22%2C%22resultFormat%22%3A%22expanded%22%2C%22showPoweredBy%22%3Atrue%2C%22defaultSort%22%3A%22relevance%22%2C%22excludedPostTypes%22%3A%5B%5D%7D%2C%22homeUrl%22%3A%22https%3A%5C%2F%5C%2Fcode.flickr.net%22%2C%22locale%22%3A%22en-US%22%2C%22postsPerPage%22%3A10%2C%22siteId%22%3A185426273%2C%22postTypes%22%3A%7B%22post%22%3A%7B%22singular_name%22%3A%22Post%22%2C%22name%22%3A%22Posts%22%7D%2C%22page%22%3A%7B%22singular_name%22%3A%22Page%22%2C%22name%22%3A%22Pages%22%7D%2C%22attachment%22%3A%7B%22singular_name%22%3A%22Media%22%2C%22name%22%3A%22Media%22%7D%7D%2C%22webpackPublicPath%22%3A%22https%3A%5C%2F%5C%2Fcode.flickr.net%5C%2Fwp-content%5C%2Fmu-plugins%5C%2Fjetpack-14.0%5C%2Fjetpack_vendor%5C%2Fautomattic%5C%2Fjetpack-search%5C%2Fbuild%5C%2Finstant-search%5C%2F%22%2C%22isPhotonEnabled%22%3Afalse%2C%22isFreePlan%22%3Afalse%2C%22apiRoot%22%3A%22https%3A%5C%2F%5C%2Fcode.flickr.net%5C%2Fwp-json%5C%2F%22%2C%22apiNonce%22%3A%22647eae8b25%22%2C%22isPrivateSite%22%3Afalse%2C%22isWpcom%22%3Afalse%2C%22hasOverlayWidgets%22%3Atrue%2C%22widgets%22%3A%5B%7B%22filters%22%3A%5B%7B%22name%22%3A%22Bylines%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22byline%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-2%22%2C%22filter_id%22%3A%22taxonomy_0%22%7D%2C%7B%22name%22%3A%22Categories%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22category%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-2%22%2C%22filter_id%22%3A%22taxonomy_1%22%7D%2C%7B%22name%22%3A%22Tags%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22post_tag%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-2%22%2C%22filter_id%22%3A%22taxonomy_2%22%7D%2C%7B%22name%22%3A%22Year%22%2C%22type%22%3A%22date_histogram%22%2C%22count%22%3A5%2C%22field%22%3A%22post_date%22%2C%22interval%22%3A%22year%22%2C%22widget_id%22%3A%22jetpack-search-filters-2%22%2C%22filter_id%22%3A%22date_histogram_3%22%7D%5D%2C%22widget_id%22%3A%22jetpack-search-filters-2%22%7D%5D%2C%22widgetsOutsideOverlay%22%3A%5B%7B%22filters%22%3A%5B%7B%22name%22%3A%22Bylines%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22byline%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-3%22%2C%22filter_id%22%3A%22taxonomy_4%22%7D%2C%7B%22name%22%3A%22Categories%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22category%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-3%22%2C%22filter_id%22%3A%22taxonomy_5%22%7D%2C%7B%22name%22%3A%22Tags%22%2C%22type%22%3A%22taxonomy%22%2C%22taxonomy%22%3A%22post_tag%22%2C%22count%22%3A5%2C%22widget_id%22%3A%22jetpack-search-filters-3%22%2C%22filter_id%22%3A%22taxonomy_6%22%7D%2C%7B%22name%22%3A%22Year%22%2C%22type%22%3A%22date_histogram%22%2C%22count%22%3A5%2C%22field%22%3A%22post_date%22%2C%22interval%22%3A%22year%22%2C%22widget_id%22%3A%22jetpack-search-filters-3%22%2C%22filter_id%22%3A%22date_histogram_7%22%7D%5D%2C%22widget_id%22%3A%22jetpack-search-filters-3%22%7D%5D%2C%22hasNonSearchWidgets%22%3Afalse%2C%22preventTrackingCookiesReset%22%3Afalse%7D")); /* ]]> */ </script> <script type="text/javascript" src="https://code.flickr.net/wp-content/mu-plugins/jetpack-14.0/jetpack_vendor/automattic/jetpack-search/build/instant-search/jp-search.js?minify=false&ver=2b91c6a8150537fa6728" id="jetpack-instant-search-js"></script> <script type="text/javascript" src="//stats.wp.com/w.js?ver=202448" id="jp-tracks-js"></script> <script type="text/javascript" src="https://stats.wp.com/e-202448.js" id="jetpack-stats-js" data-wp-strategy="defer"></script> <script type="text/javascript" id="jetpack-stats-js-after"> /* <![CDATA[ */ _stq = window._stq || []; _stq.push([ "view", JSON.parse("{\"v\":\"ext\",\"blog\":\"185426273\",\"post\":\"3571\",\"tz\":\"-8\",\"srv\":\"code.flickr.net\",\"hp\":\"vip\",\"j\":\"1:14.0\"}") ]); _stq.push([ "clickTrackerInit", "185426273", "3571" ]); /* ]]> */ </script> <script async src="https://embedr.flickr.com/assets/client-code.js" charset="utf-8"></script> </body> </html>