CINXE.COM
An Art Analysis by Mistral Pixtral 12B
<!doctype html> <html class=""> <head> <meta charset="utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" /> <meta name="description" content="A Blog post by Pandora Langlais on Hugging Face" /> <meta property="fb:app_id" content="1321688464574422" /> <meta name="twitter:card" content="summary_large_image" /> <meta name="twitter:site" content="@huggingface" /> <meta name="twitter:image" content="https://cdn-thumbnails.huggingface.co/social-thumbnails/blog/PandorAI1995/art-analysis-mistral-pixtral12b.png" /> <meta property="og:title" content="An Art Analysis by Mistral Pixtral 12B" /> <meta property="og:type" content="website" /> <meta property="og:url" content="https://huggingface.co/blog/PandorAI1995/art-analysis-mistral-pixtral12b" /> <meta property="og:image" content="https://cdn-thumbnails.huggingface.co/social-thumbnails/blog/PandorAI1995/art-analysis-mistral-pixtral12b.png" /> <link rel="stylesheet" href="/front/build/kube-0ad51e9/style.css" /> <link rel="preconnect" href="https://fonts.gstatic.com" /> <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap" rel="stylesheet" /> <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap" rel="stylesheet" /> <link rel="preload" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css" as="style" onload="this.onload=null;this.rel='stylesheet'" /> <noscript> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css" /> </noscript> <link rel="canonical" href="https://huggingface.co/blog/PandorAI1995/art-analysis-mistral-pixtral12b"> <!-- HEAD_svelte-vwinwk_START --><link rel="alternate" type="application/rss+xml" href="/blog/feed.xml" title="Hugging Face Blog"><!-- HEAD_svelte-vwinwk_END --> <title>An Art Analysis by Mistral Pixtral 12B</title> <script defer data-domain="huggingface.co" event-loggedIn="false" src="/js/script.pageview-props.js" ></script> <script> window.plausible = window.plausible || function () { (window.plausible.q = window.plausible.q || []).push(arguments); }; </script> <script> window.hubConfig = {"features":{"signupDisabled":false},"sshGitUrl":"git@hf.co","moonHttpUrl":"https:\/\/huggingface.co","captchaApiKey":"bd5f2066-93dc-4bdd-a64b-a24646ca3859","captchaDisabledOnSignup":false,"datasetViewerPublicUrl":"https:\/\/datasets-server.huggingface.co","stripePublicKey":"pk_live_x2tdjFXBCvXo2FFmMybezpeM00J6gPCAAc","environment":"production","userAgent":"HuggingFace (production)","spacesIframeDomain":"hf.space","spacesApiUrl":"https:\/\/api.hf.space","docSearchKey":"ece5e02e57300e17d152c08056145326e90c4bff3dd07d7d1ae40cf1c8d39cb6","logoDev":{"apiUrl":"https:\/\/img.logo.dev\/","apiKey":"pk_UHS2HZOeRnaSOdDp7jbd5w"}}; </script> <script type="text/javascript" src="https://de5282c3ca0c.edge.sdk.awswaf.com/de5282c3ca0c/526cf06acb0d/challenge.js" defer></script> </head> <body class="flex flex-col min-h-dvh bg-white dark:bg-gray-950 text-black BlogPage"> <div class="flex min-h-dvh flex-col"> <div class="SVELTE_HYDRATER contents" data-target="MainHeader" data-props="{"classNames":"","isWide":false,"isZh":false}"><header class="border-b border-gray-100 "><div class="w-full px-4 container flex h-16 items-center"><div class="flex flex-1 items-center"><a class="mr-5 flex flex-none items-center lg:mr-6" href="/"><img alt="Hugging Face's logo" class="w-7 md:mr-2" src="/front/assets/huggingface_logo-noborder.svg"> <span class="hidden whitespace-nowrap text-lg font-bold md:block">Hugging Face</span></a> <div class="relative flex-1 lg:max-w-sm mr-2 sm:mr-4 md:mr-3 xl:mr-6"><input autocomplete="off" class="w-full dark:bg-gray-950 pl-8 form-input-alt h-9 pr-3 focus:shadow-xl " name="" placeholder="Search models, datasets, users..." spellcheck="false" type="text" value=""> <svg class="absolute left-2.5 text-gray-400 top-1/2 transform -translate-y-1/2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg> </div> <div class="flex flex-none items-center justify-center p-0.5 place-self-stretch lg:hidden"><button class="relative z-40 flex h-6 w-8 items-center justify-center" type="button"><svg width="1em" height="1em" viewBox="0 0 10 10" class="text-xl" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" preserveAspectRatio="xMidYMid meet" fill="currentColor"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.65039 2.9999C1.65039 2.8066 1.80709 2.6499 2.00039 2.6499H8.00039C8.19369 2.6499 8.35039 2.8066 8.35039 2.9999C8.35039 3.1932 8.19369 3.3499 8.00039 3.3499H2.00039C1.80709 3.3499 1.65039 3.1932 1.65039 2.9999ZM1.65039 4.9999C1.65039 4.8066 1.80709 4.6499 2.00039 4.6499H8.00039C8.19369 4.6499 8.35039 4.8066 8.35039 4.9999C8.35039 5.1932 8.19369 5.3499 8.00039 5.3499H2.00039C1.80709 5.3499 1.65039 5.1932 1.65039 4.9999ZM2.00039 6.6499C1.80709 6.6499 1.65039 6.8066 1.65039 6.9999C1.65039 7.1932 1.80709 7.3499 2.00039 7.3499H8.00039C8.19369 7.3499 8.35039 7.1932 8.35039 6.9999C8.35039 6.8066 8.19369 6.6499 8.00039 6.6499H2.00039Z"></path></svg> </button> </div></div> <nav aria-label="Main" class="ml-auto hidden lg:block"><ul class="flex items-center space-x-1.5 2xl:space-x-2"><li class="hover:text-indigo-700"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/models"><svg class="mr-1.5 text-gray-400 group-hover:text-indigo-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg> Models</a> </li><li class="hover:text-red-700"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/datasets"><svg class="mr-1.5 text-gray-400 group-hover:text-red-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 25"><ellipse cx="12.5" cy="5" fill="currentColor" fill-opacity="0.25" rx="7.5" ry="2"></ellipse><path d="M12.5 15C16.6421 15 20 14.1046 20 13V20C20 21.1046 16.6421 22 12.5 22C8.35786 22 5 21.1046 5 20V13C5 14.1046 8.35786 15 12.5 15Z" fill="currentColor" opacity="0.5"></path><path d="M12.5 7C16.6421 7 20 6.10457 20 5V11.5C20 12.6046 16.6421 13.5 12.5 13.5C8.35786 13.5 5 12.6046 5 11.5V5C5 6.10457 8.35786 7 12.5 7Z" fill="currentColor" opacity="0.5"></path><path d="M5.23628 12C5.08204 12.1598 5 12.8273 5 13C5 14.1046 8.35786 15 12.5 15C16.6421 15 20 14.1046 20 13C20 12.8273 19.918 12.1598 19.7637 12C18.9311 12.8626 15.9947 13.5 12.5 13.5C9.0053 13.5 6.06886 12.8626 5.23628 12Z" fill="currentColor"></path></svg> Datasets</a> </li><li class="hover:text-blue-700"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/spaces"><svg class="mr-1.5 text-gray-400 group-hover:text-blue-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 25 25"><path opacity=".5" d="M6.016 14.674v4.31h4.31v-4.31h-4.31ZM14.674 14.674v4.31h4.31v-4.31h-4.31ZM6.016 6.016v4.31h4.31v-4.31h-4.31Z" fill="currentColor"></path><path opacity=".75" fill-rule="evenodd" clip-rule="evenodd" d="M3 4.914C3 3.857 3.857 3 4.914 3h6.514c.884 0 1.628.6 1.848 1.414a5.171 5.171 0 0 1 7.31 7.31c.815.22 1.414.964 1.414 1.848v6.514A1.914 1.914 0 0 1 20.086 22H4.914A1.914 1.914 0 0 1 3 20.086V4.914Zm3.016 1.102v4.31h4.31v-4.31h-4.31Zm0 12.968v-4.31h4.31v4.31h-4.31Zm8.658 0v-4.31h4.31v4.31h-4.31Zm0-10.813a2.155 2.155 0 1 1 4.31 0 2.155 2.155 0 0 1-4.31 0Z" fill="currentColor"></path><path opacity=".25" d="M16.829 6.016a2.155 2.155 0 1 0 0 4.31 2.155 2.155 0 0 0 0-4.31Z" fill="currentColor"></path></svg> Spaces</a> </li><li class="hover:text-yellow-700 max-xl:hidden"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/posts"><svg class="mr-1.5 text-gray-400 group-hover:text-yellow-500 !text-yellow-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 12 12" preserveAspectRatio="xMidYMid meet"><path fill="currentColor" fill-rule="evenodd" d="M3.73 2.4A4.25 4.25 0 1 1 6 10.26H2.17l-.13-.02a.43.43 0 0 1-.3-.43l.01-.06a.43.43 0 0 1 .12-.22l.84-.84A4.26 4.26 0 0 1 3.73 2.4Z" clip-rule="evenodd"></path></svg> Posts</a> </li><li class="hover:text-yellow-700"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/docs"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="mr-1.5 text-gray-400 group-hover:text-yellow-500" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path opacity="0.5" d="M20.9022 5.10334L10.8012 10.8791L7.76318 9.11193C8.07741 8.56791 8.5256 8.11332 9.06512 7.7914L15.9336 3.73907C17.0868 3.08811 18.5002 3.26422 19.6534 3.91519L19.3859 3.73911C19.9253 4.06087 20.5879 4.56025 20.9022 5.10334Z" fill="currentColor"></path><path d="M10.7999 10.8792V28.5483C10.2136 28.5475 9.63494 28.4139 9.10745 28.1578C8.5429 27.8312 8.074 27.3621 7.74761 26.7975C7.42122 26.2327 7.24878 25.5923 7.24756 24.9402V10.9908C7.25062 10.3319 7.42358 9.68487 7.74973 9.1123L10.7999 10.8792Z" fill="currentColor" fill-opacity="0.75"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M21.3368 10.8499V6.918C21.3331 6.25959 21.16 5.61234 20.8346 5.03949L10.7971 10.8727L10.8046 10.874L21.3368 10.8499Z" fill="currentColor"></path><path opacity="0.5" d="M21.7937 10.8488L10.7825 10.8741V28.5486L21.7937 28.5234C23.3344 28.5234 24.5835 27.2743 24.5835 25.7335V13.6387C24.5835 12.0979 23.4365 11.1233 21.7937 10.8488Z" fill="currentColor"></path></svg> Docs</a> </li><li class="hover:text-green-700"><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400" href="/enterprise"><svg class="mr-1.5 text-gray-400 group-hover:text-green-500" xmlns="http://www.w3.org/2000/svg" fill="none" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 33 27"><path fill="currentColor" fill-rule="evenodd" d="M13.5.7a8.7 8.7 0 0 0-7.7 5.7L1 20.6c-1 3.1.9 5.7 4.1 5.7h15c3.3 0 6.8-2.6 7.8-5.7l4.6-14.2c1-3.1-.8-5.7-4-5.7h-15Zm1.1 5.7L9.8 20.3h9.8l1-3.1h-5.8l.8-2.5h4.8l1.1-3h-4.8l.8-2.3H23l1-3h-9.5Z" clip-rule="evenodd"></path></svg> Enterprise</a> </li> <li><a class="group flex items-center px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/pricing">Pricing </a></li> <li><div class="relative group"> <button class="px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-600 flex items-center " type="button"> <svg class=" text-gray-500 w-5 group-hover:text-gray-400 dark:text-gray-300 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 32 18" preserveAspectRatio="xMidYMid meet"><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 3.30221C14.4504 2.836 14.8284 2.45807 15.2946 2.45807H28.4933C28.9595 2.45807 29.3374 2.836 29.3374 3.30221C29.3374 3.76842 28.9595 4.14635 28.4933 4.14635H15.2946C14.8284 4.14635 14.4504 3.76842 14.4504 3.30221Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 9.00002C14.4504 8.53382 14.8284 8.15588 15.2946 8.15588H28.4933C28.9595 8.15588 29.3374 8.53382 29.3374 9.00002C29.3374 9.46623 28.9595 9.84417 28.4933 9.84417H15.2946C14.8284 9.84417 14.4504 9.46623 14.4504 9.00002Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 14.6978C14.4504 14.2316 14.8284 13.8537 15.2946 13.8537H28.4933C28.9595 13.8537 29.3374 14.2316 29.3374 14.6978C29.3374 15.164 28.9595 15.542 28.4933 15.542H15.2946C14.8284 15.542 14.4504 15.164 14.4504 14.6978Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M1.94549 6.87377C2.27514 6.54411 2.80962 6.54411 3.13928 6.87377L6.23458 9.96907L9.32988 6.87377C9.65954 6.54411 10.194 6.54411 10.5237 6.87377C10.8533 7.20343 10.8533 7.73791 10.5237 8.06756L6.23458 12.3567L1.94549 8.06756C1.61583 7.73791 1.61583 7.20343 1.94549 6.87377Z" fill="currentColor"></path></svg> </button> </div></li> <li><hr class="h-5 w-0.5 border-none bg-gray-100 dark:bg-gray-800"></li> <li><a class="block cursor-pointer whitespace-nowrap px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/login">Log In </a></li> <li><a class="whitespace-nowrap rounded-full border border-transparent bg-gray-900 px-3 py-1 leading-none text-white hover:border-black hover:bg-white hover:text-black" href="/join">Sign Up </a></li></ul></nav></div></header></div> <div class="SVELTE_HYDRATER contents" data-target="SSOBanner" data-props="{}"></div> <main class="flex flex-1 flex-col"><div class="container relative flex flex-row justify-center gap-4"><div class="max-w-3xl pb-16 pt-6 max-lg:overflow-hidden lg:flex-1 lg:pt-16 2xl:max-w-4xl"><div class="blog-content copiable-code-container prose mx-auto lg:prose-lg 2xl:prose-lg prose-h1:mb-3 lg:px-8 [&_h1]:!mr-0"><div class="SVELTE_HYDRATER contents" data-target="RepoCodeCopy" data-props="{}"><div></div></div> <div class="mb-4"><a href="/blog" class="flex items-center font-sans !text-gray-500 !no-underline hover:!underline"><svg class="mr-2 h-3 w-3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M14 26l1.41-1.41L7.83 17H28v-2H7.83l7.58-7.59L14 6L4 16l10 10z" fill="currentColor"></path></svg> Back to Articles</a></div> <h1 class="group relative flex items-center"><!-- HTML_TAG_START --> <a rel="nofollow" href="#an-art-analysis-by-mistral-pixtral-12b" class="block pr-1.5 text-lg md:absolute md:p-1.5 md:opacity-0 md:group-hover:opacity-100 md:right-full" id="an-art-analysis-by-mistral-pixtral-12b"> <span class="header-link"><svg viewBox="0 0 256 256" preserveAspectRatio="xMidYMid meet" height="1em" width="1em" role="img" aria-hidden="true" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" class="text-gray-500 hover:text-black dark:hover:text-gray-200 w-4"><path fill="currentColor" d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z"></path></svg></span> </a> <span> An Art Analysis by Mistral Pixtral 12B </span> <!-- HTML_TAG_END --></h1> <div><div class="mb-6 flex items-center gap-x-4 text-base"><a href="/blog/community" class="flex items-center gap-x-1.5 rounded-full bg-yellow-500/10 px-4 py-1.5 font-sans text-sm font-semibold no-underline hover:bg-yellow-500/20"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M20.6081 3C21.7684 3 22.8053 3.49196 23.5284 4.38415C23.9756 4.93678 24.4428 5.82749 24.4808 7.16133C24.9674 7.01707 25.4353 6.93643 25.8725 6.93643C26.9833 6.93643 27.9865 7.37587 28.696 8.17411C29.6075 9.19872 30.0124 10.4579 29.8361 11.7177C29.7523 12.3177 29.5581 12.8555 29.2678 13.3534C29.8798 13.8646 30.3306 14.5763 30.5485 15.4322C30.719 16.1032 30.8939 17.5006 29.9808 18.9403C30.0389 19.0342 30.0934 19.1319 30.1442 19.2318C30.6932 20.3074 30.7283 21.5229 30.2439 22.6548C29.5093 24.3704 27.6841 25.7219 24.1397 27.1727C21.9347 28.0753 19.9174 28.6523 19.8994 28.6575C16.9842 29.4379 14.3477 29.8345 12.0653 29.8345C7.87017 29.8345 4.8668 28.508 3.13831 25.8921C0.356375 21.6797 0.754104 17.8269 4.35369 14.1131C6.34591 12.058 7.67023 9.02782 7.94613 8.36275C8.50224 6.39343 9.97271 4.20438 12.4172 4.20438H12.4179C12.6236 4.20438 12.8314 4.2214 13.0364 4.25468C14.107 4.42854 15.0428 5.06476 15.7115 6.02205C16.4331 5.09583 17.134 4.359 17.7682 3.94323C18.7242 3.31737 19.6794 3 20.6081 3ZM20.6081 5.95917C20.2427 5.95917 19.7963 6.1197 19.3039 6.44225C17.7754 7.44319 14.8258 12.6772 13.7458 14.7131C13.3839 15.3952 12.7655 15.6837 12.2086 15.6837C11.1036 15.6837 10.2408 14.5497 12.1076 13.1085C14.9146 10.9402 13.9299 7.39584 12.5898 7.1776C12.5311 7.16799 12.4731 7.16355 12.4172 7.16355C11.1989 7.16355 10.6615 9.33114 10.6615 9.33114C10.6615 9.33114 9.0863 13.4148 6.38031 16.206C3.67434 18.998 3.5346 21.2388 5.50675 24.2246C6.85185 26.2606 9.42666 26.8753 12.0653 26.8753C14.8021 26.8753 17.6077 26.2139 19.1799 25.793C19.2574 25.7723 28.8193 22.984 27.6081 20.6107C27.4046 20.212 27.0693 20.0522 26.6471 20.0522C24.9416 20.0522 21.8393 22.6726 20.5057 22.6726C20.2076 22.6726 19.9976 22.5416 19.9116 22.222C19.3433 20.1173 28.552 19.2325 27.7758 16.1839C27.639 15.6445 27.2677 15.4256 26.746 15.4263C24.4923 15.4263 19.4358 19.5181 18.3759 19.5181C18.2949 19.5181 18.2368 19.4937 18.2053 19.4419C17.6743 18.557 17.9653 17.9394 21.7082 15.6009C25.4511 13.2617 28.0783 11.8545 26.5841 10.1752C26.4121 9.98141 26.1684 9.8956 25.8725 9.8956C23.6001 9.89634 18.2311 14.9403 18.2311 14.9403C18.2311 14.9403 16.7821 16.496 15.9057 16.496C15.7043 16.496 15.533 16.4139 15.4169 16.2112C14.7956 15.1296 21.1879 10.1286 21.5484 8.06535C21.7928 6.66715 21.3771 5.95917 20.6081 5.95917Z" fill="#FF9D00"></path><path d="M5.50686 24.2246C3.53472 21.2387 3.67446 18.9979 6.38043 16.206C9.08641 13.4147 10.6615 9.33111 10.6615 9.33111C10.6615 9.33111 11.2499 6.95933 12.59 7.17757C13.93 7.39581 14.9139 10.9401 12.1069 13.1084C9.29997 15.276 12.6659 16.7489 13.7459 14.713C14.8258 12.6772 17.7747 7.44316 19.304 6.44221C20.8326 5.44128 21.9089 6.00204 21.5484 8.06532C21.188 10.1286 14.795 15.1295 15.4171 16.2118C16.0391 17.2934 18.2312 14.9402 18.2312 14.9402C18.2312 14.9402 25.0907 8.49588 26.5842 10.1752C28.0776 11.8545 25.4512 13.2616 21.7082 15.6008C17.9646 17.9393 17.6744 18.557 18.2054 19.4418C18.7372 20.3266 26.9998 13.1351 27.7759 16.1838C28.5513 19.2324 19.3434 20.1173 19.9117 22.2219C20.48 24.3274 26.3979 18.2382 27.6082 20.6107C28.8193 22.9839 19.2574 25.7722 19.18 25.7929C16.0914 26.62 8.24723 28.3726 5.50686 24.2246Z" fill="#FFD21E"></path></svg> <span>Community Article</span></a> <span class="text-sm sm:text-base">Published November 27, 2024</span></div> </div> <div class="not-prose mb-6 lg:hidden"><div class="SVELTE_HYDRATER contents" data-target="UpvoteControl" data-props="{"maxShown":6,"apiUrlPrefix":"/api/blog/PandorAI1995/art-analysis-mistral-pixtral12b","postLoginRedirectUrl":"PandorAI1995/art-analysis-mistral-pixtral12b","size":"sm","style":"horizontal","color":"gray","upvotedColor":"orange","upvoted":false,"upvoters":[],"upvotes":0}"><div class="flex flex-wrap items-center gap-2.5 pt-1 "><a href="/login?next=PandorAI1995%2Fart-analysis-mistral-pixtral12b" class="self-start"><div class="shadow-alternate group flex h-9 cursor-pointer select-none items-center gap-2 rounded-lg border pl-3 pr-3.5 border-gray-300 bg-white dark:bg-gray-850"><input disabled type="checkbox" class="peer hidden"> <svg class="text-xs text-gray-500 peer-checked:text-gray-500 group-hover:text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 12 12"><path fill="currentColor" d="M5.19 2.67a.94.94 0 0 1 1.62 0l3.31 5.72a.94.94 0 0 1-.82 1.4H2.7a.94.94 0 0 1-.82-1.4l3.31-5.7v-.02Z"></path></svg> Upvote <div class="font-semibold text-orange-500">-</div></div> </a> </div> </div></div> <div class="not-prose"><div class="SVELTE_HYDRATER contents" data-target="BlogAuthorsByline" data-props="{"authors":[{"author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/xeYuLVacAjZhB_tkrT3f9.png","fullname":"Pandora Langlais","name":"PandorAI1995","type":"user","isPro":false,"isHf":false,"isMod":false,"followerCount":5}}],"translators":[],"proofreaders":[],"lang":"en"}"><div class="not-prose"><div class="mb-12 flex flex-wrap items-center gap-x-5 gap-y-3.5"> <span class="inline-block "><span class="contents"><a href="/PandorAI1995" class="flex items-center leading-tight"><img class="m-0 mr-2.5 size-9 !rounded-full sm:mr-3 sm:size-12" alt="Pandora Langlais's avatar" src="https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/xeYuLVacAjZhB_tkrT3f9.png"> <div class="text-gray-900 dark:text-gray-300"><span class="block font-mono text-xs !leading-tight underline">PandorAI1995</span> <span class="fullname font-sans font-semibold max-sm:text-sm">Pandora Langlais</span> <div class="flex items-center"></div> </div></a> </span> </span></div> </div></div></div> <!-- HTML_TAG_START --> <h2> Introduction</h2> <p>Previous studies of the artistic abilities of VLM Multimodal models such as Microsoft Florence-2-base, Alibaba Cloud Qwen2-VL-2B and DeepSeek Janus-1.3B, have showed that while the results obtained by these models to analyze and explain artworks could be successful, there were still some mistakes. These were caused by the models difficulties to contextualize and understand the artworks or to their tendencies to hallucinate. For instance, the results of the art analysis by Qwen2-VL-2B went as far as seeing objects and figures that were not present, also while correctly identifying the name of paintings and artists such as Vincent Van Gogh’s <i>The Starry Night</i> or Claude Monet’s <i>Impression, Sunrise</i>.</p> <p>Pixtral 12B has been released on September 17, 2024, as the first-ever Mistral multimodal model under the Apache 2.0 License. With a 400M parameter vision encoder trained from scratch and a 12B parameter multimodal decoder based on Mistral Nemo, Pixtral supports multiple images in the long context window of 128k tokens. As stated on mistral.ai website: </p> <p>“Pixtral is trained to understand both natural images and documents, achieving 52.5% on the MMMU reasoning benchmark, surpassing a number of larger models. The model shows strong abilities in tasks such as chart and figure understanding, document question answering, multimodal reasoning and instruction following. Pixtral is able to ingest images at their natural resolution and aspect ratio, giving the user flexibility on the number of tokens used to process an image. Pixtral is also able to process any number of images in its long context window of 128K tokens. Unlike previous open-source models, Pixtral does not compromise on text benchmark performance to excel in multimodal tasks”.</p> <p>The aim of this new study is to see the results obtained when processing artworks with Pixtral 12B — to evaluate its abilities, as well as compare them with the results obtained with the other models, as all the artworks used for this study are exactly the same which have been used in our previous studies with other VLM models.</p> <h4>Analysis of Raphael’s <i>The School of Athens</i>, 1509-1511, Apostolic Palace, Vatican City</h4> <p>We have processed Raphael’s <i>The School of Athens</i> with Pixtral 12B.</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/hjHpGz1nDOlAqaAIE-rQy.jpeg"><img alt="image/jpeg" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/hjHpGz1nDOlAqaAIE-rQy.jpeg"></a></p> <p>Here is the results that were delivered with the prompt “Explain the image”, without adding any information, context or elements related to the fresco or its artist:</p> <p>“The image depicts a detailed fresco titled "The School of Athens," created by the renowned Italian artist Raphael. This masterpiece is part of the Stanze di Raffaello (Raphael's Rooms) in the Apostolic Palace in the Vatican. Key elements of the fresco include:</p> <ol> <li>Central Figures: At the center of the composition, two prominent figures are walking and conversing. These are traditionally identified as Plato (left, holding his hand upward) and Aristotle (right, with his hand extended horizontally). They are surrounded by other notable philosophers and scholars.</li> <li>Architectural Setting: The scene is set in an architectural framework reminiscent of classical Greek and Roman styles, with columns, arches, and detailed ornamentation. The background features a large archway leading to another building, adding depth to the composition.</li> <li>Surrounding Figures: Numerous figures are depicted engaged in various activities such as reading, writing, and conversing. They are dressed in classical attire, emphasizing the theme of ancient philosophy and learning.</li> <li>Sculptures and Decorations: The lower part of the fresco includes decorative elements and statues, adding to the grandeur and classical aesthetic of the scene.</li> <li>Perspective: The use of perspective creates a sense of depth and three-dimensionality, drawing the viewer's eye into the scene. The figures are arranged in a way that guides the viewer's gaze through the painting. "The School of Athens" is a celebration of classical philosophy and learning, embodying the Renaissance ideals of humanism and the pursuit of knowledge. It is considered one of the most important works of High Renaissance art”.</li> </ol> <p>Pixtral’s analysis of <i>The School of Athens</i> shows an incredible accuracy, especially as compared to Florence-2-base (which confused figures and statues), but also Qwen2-VL-2B which had difficulties to depict the precise activity shown in the fresco. Even if Janus-1.3B, had been able to define the painting must have been made by a Renaissance artist, Pixtral was even able to correctly identify the exact name of the artwork and of its artist. The description, which is very wordy and uses the common vocabulary found in art history descriptions, is clear, precise and highly accurate, recognizing the names of the philosophers, identifying their activity (“emphasizing the theme of ancient philosophy and learning”), as well as fully analyzing the use of perspective.</p> <h4>The analysis of Wassily Kandinsky’s <i>Pastorale</i> (1911), Solomon R. Guggenheim Museum, New York</h4> <p>The second artwork we have prompted to “Explain the image” with Pixtral is an abstract artwork by Kandinsky. The results obtained were very intriguing…</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/lkAddTOpeRKfANj2YTDkz.png"><img alt="image/png" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/lkAddTOpeRKfANj2YTDkz.png"></a></p> <p>The first batch of results delivered by Pixtral were the following:</p> <p>“The image appears to be a modern, abstract interpretation of a classical painting. The original artwork being referenced seems to be Raphael's "The School of Athens," as evidenced by the architectural elements and the grouping of figures in the lower part of the image. However, this version has been heavily abstracted and overlaid with vibrant, swirling colors. Key features of the image include:</p> <ol> <li>Colorful Overlay: The painting is dominated by a bright, abstract overlay of colors, including yellows, greens, blues, pinks, and other hues. These colors create a dynamic and energetic visual effect that obscures much of the original detail.</li> <li>Architectural elements: Despite the abstraction, remnants of classical architectural elements, such as arches and columns, are still visibile, hinting at the original scene’s setting.</li> <li>Figures: The lower part of the image retains some recognizable human figures, though they are partially obscured by the colorful overlay. These figures seem to be engaged in various activities, consistent with the original painting's theme of philosophical discourse and learning.</li> <li>Composition: The overall composition is circular, with the colorful abstraction creating a sense of movement and energy that radiates outward from the center. This abstract interpretation likely aims to juxtapose the classical themes of learning and philosophy with a modern, expressive style, creating a visually striking and thought-provoking piece.”</li> </ol> <p>The whole description of Kandinsky’s <i>Pastorale</i> seems to have been based (hence, biased) on the previous artwork processed by the model. From this hallucinatory setting, suggesting Kandinsky has made this artwork as an interpretation of <i>The School of Athens</i>, Pixtral unwraps a list of references, mirroring elements between the two artworks and a comparison between the original version and its “abstract” counterpart. Indeed, Kandinsky’s abstract representation of a group of figure in a natural landscape has been misinterpreted by Pixtral as a “reinterpretation” of the classical architectural elements, indicating there are “arches” and “columns” in the Kandinsky’s painting. The model goes as far as interpreting the group of characters as “figures … engaged in various activities, consistent with the original painting's theme of philosophical discourse and learning”. While the aesthetical description is more accurate, in its analysis of colors, Pixtral pushes its theory the artwork is a new version of Raphael’s Renaissance fresco, considering it “aims to juxtapose the classical themes of learning and philosophy with a modern, expressive style, creating a visually striking and thought-provoking piece”.</p> <p>As the results seems to have been less accurate than expected with the influence of Pixtral’s previous analysis of <i>The School of Athens</i>, we have reloaded the page and processed the same painting a second time — wondering if the interpretation of the model would be the same.</p> <p>Here is the second description by Pixtral for the <i>Pastorale</i>:</p> <p>“The image is an abstract painting characterized by bold, vibrant colors and dynamic brushstrokes. The composition features a variety of shapes and forms, including rounded, organic forms and more angular, geometric elements. The color palette is dominated by bright hues such as yellow, blue, pink, green, and red, which are used to create a sense of energy and movement. The painting appears to have a central focus on abstracted natural forms, possibly representing flowers or foliage, with some areas suggesting petals or leaves. The use of overlapping shapes and contrasting colors gives the artwork a sense of depth and complexity. The overall style and technique suggest influences from Expressionism or Fauvism, known for their emphasis on emotional impact and vivid color. The painting appears to have a central focus on abstracted natural forms, possibly representing flowers or foliage, with some areas suggesting petals or leaves. The use of overlapping shapes and contrasting colors gives the artwork a sense of depth and complexity. The overall style and technique suggest influences from Expressionism or Fauvism, known for their emphasis on emotional impact and vivid color. The signature "Dubuffet" is visible in the lower right corner, indicating that the artist is Jean Dubuffet, a notable French artist known for his raw, unrefined style and his contributions to the Art Brut movement”.</p> <p>The second result is aesthetically accurate, as Pixtral did not misinterpreted the painting again as a tribute to Raphael. The depiction of natural elements and colors, recognizing flowers amid the objects, is much more correct. That being said, the painting is mistakenly identifying the artist as “Jean Dubuffet”, from the Art Brut movement, which is really not the case. The idea that the style and technique suggest “influences from Expressionism or Fauvism” is not entirely wrong, as Kandinsky’s artwork might have been influenced by these artistic movements.</p> <h4>Analysis of <i>The Lady of the Unicorn</i> tapestry by the Maître d’Anne de Bretagne (aka Jean d’Ypres), circa 1500, Musée de Cluny, Paris</h4> <p>The third artwork we have analyzed with Pixtral is a medieval tapestry we have already processed with the other models in our previous studies.</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/laZ7ZwxXkpbse-TgOrTxM.jpeg"><img alt="image/jpeg" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/laZ7ZwxXkpbse-TgOrTxM.jpeg"></a></p> <p>Here is the result obtained with the “Explain the image” prompt:</p> <p>“The image depicts a detailed and ornate tapestry, which is a form of textile art often used for decorative purposes. The tapestry features a richly colored and intricate design with multiple elements and figures. Key features of the tapestry include: Central Figures: In the center, there are several human figures, possibly representing historical or mythological characters. They are dressed in elaborate costumes, suggesting a scene from a significant event or story. Animals: Surrounding the central figures are various animals, including what appear to be lions and other mythical creatures. These animals are depicted in a stylized manner, contributing to the overall fantastical atmosphere of the tapestry. Background and Patterns: The background is filled with intricate patterns and designs, including floral motifs and possibly heraldic symbols. The use of red and blue colors dominates the background, creating a vibrant and dynamic setting. Flags and Banners: There are flags and banners present in the scene, some of which have inscriptions or emblems. These elements add to the sense of a ceremonial or celebratory event”.</p> <p>As compared to the results obtained with Florence-2-base (which contained several inaccuracies, including the impossibility to detect the presence of the unicorn even after several attempts) and Qwen2-VL-2B and Janus-1.3B more overall correct descriptions, the analysis given by Pixtral is concise yet accurate. The model successfully identified the presence of several figures, including “mythological characters”. Pixtral’s aesthetical description is correct and precise, describing the heraldic symbols, floral motifs and inscriptions and emblems. The interpretation of a “ceremonial or celebratory event” is not exactly true but it is understandable which elements have led the model on this theory. The model has noticed the presence of mythological, fantastical elements but it did not indicate the presence of the unicorn in the artwork, even if it did recognize the figure of the lion. The historical period is not defined by Pixtral. </p> <h4>Analysis of Claude Monet's <i>Luncheon on the grass</i>, 1865-1866, Musée d’Orsay, Paris</h4> <p>In another attempt to evaluate Pixtral’s art analysis abiltiies, we have tasked the model to process and explain Claude Monet’s <i>Luncheon on the grass</i> from 1865-1866.</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/7MHqJV803CmVOqUs8UQKv.jpeg"><img alt="image/jpeg" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/7MHqJV803CmVOqUs8UQKv.jpeg"></a></p> <p>Here is the description offered by Pixtral:</p> <p>“The image is an abstract painting that appears to depict a forest scene. The painting is characterized by its use of vibrant colors and dynamic brushstrokes, which create a sense of depth and movement. Key features of the painting include:</p> <ol> <li>Forest Elements: The painting prominently features trees and foliage, represented by various shades of green. The trees are depicted with thick, textured brushstrokes, giving the scene a sense of density and naturalism.</li> <li>Human Figures: There are several human figures integrated into the forest scene. These figures are stylized and abstract, with simplified forms and minimal details. They seem to be engaged in various activities, possibly representing a communal or ritualistic event.</li> <li>Color Palette: The painting uses a rich palette of greens, browns, and other earth tones, along with splashes of brighter colors such as reds and blues. These colors contribute to the overall vibrancy and energy of the piece.</li> <li>Composition: The composition is balanced, with the forest elements and human figures distributed across the canvas. The use of color and form creates a sense of depth and perspective, drawing the viewer's eye into the scene.</li> <li>Style: The painting's style is reminiscent of Expressionism or Abstract Expressionism, characterized by its emphasis on emotional and subjective interpretation rather than realistic representation. The thick, visible brushstrokes and bold colors are typical of these movements. Overall, the painting captures the essence of a forest scene with a focus on the interplay between nature and human activity, using abstract and expressive techniques to convey its themes”.</li> </ol> <p>Let’s journey back: when the same painting had been processed by Florence-2-base, it had made relatively few object detections, mistaking the sitting figures for objects and the tablecloth and food on the floor with a table. With Qwen2-VL-2B, it had correctly defined the right time period (late 19th Century, as based on the clothes worn) as well as the setting, but the model did have some hallucinatory moments — for example, indicating the presence of a “child” nowhere to be seen on the actual painting. Janus-1.3B’s description of the artwork had been similar — more or less accurate and with the correct period found, but once more with the indication of a “child” among the group of figures, and with an interpretation of the artwork as made to look “highly realistic” — not really fitting with Monet’s painting style. In comparison, Pixtral’s aesthetical description is accurate but contains some mistakes. The color analysis made by Pixtral is well done, as well as the correct description of the forest setting and of the human figures. The interpretation of a style characterized by “its emphasis on emotional and subjective interpretation rather than realistic representation. The thick, visible brushstrokes and bold colors are typical of these movements” and conclusion the “painting captures the essence of a forest scene with a focus on the interplay between nature and human activity, using abstract and expressive techniques to convey its themes” show Pixtral’s abilities at analyzing painting style, brushstrokes and artistic content in general. On the other hand, the fact the model interprets the artwork as “reminiscent of Expressionism or Abstract Expressionism” — artistic movements which would not even start until decades later — indicate some mistakes are still made in Pixtral’s art analysis.</p> <h4>Analysis of Wassily Kandinsky's <i>Impression III (Concert)</i>, 1911, Lenbachhaus, Munich</h4> <p>We have processed another abstract artwork by Wassily Kandinsky with Pixtral, for the purposes of this study.</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/9NcoPge22aV3tLwRuYCcE.jpeg"><img alt="image/jpeg" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/9NcoPge22aV3tLwRuYCcE.jpeg"></a></p> <p>In this case, the artwork was <i>Impression III (Concert)</i>, which have been already analyzed by Florence-2-base and Janus-1.3B. The results obtained with both these two other models shows the difficulties for VLM models as for now to analyze abstract artistic content. If they both had made an attempt at identifying a subject in this artwork, the results had been for the least inaccurate — with the abstract figures interpreted as flowers in some cases.</p> <p>Here are the results obtained with Pixtral:</p> <p>““The image is an abstract painting characterized by bold, vibrant colors and dynamic brushstrokes. The dominant background color is a bright yellow, which serves as a backdrop for various abstract shapes and forms. Key features of the painting include: Color Palette: The painting uses a vivid palette with prominent yellow, red, blue, and black colors. These colors are applied in thick, visible brushstrokes, giving the painting a textured and energetic appearance. Shapes and Forms: The painting contains several abstract shapes and forms, including: A large, dark green or black area in the upper right corner. A red and yellow area in the upper left corner. Blue and black lines and shapes in the center. A series of curved, black lines and shapes that resemble stylized figures or symbols. Composition: The composition is balanced but dynamic, with the various elements distributed across the canvas. The use of color and form creates a sense of movement and energy. Signature: In the bottom right corner, the painting is signed "Kandinsky 1911," indicating that it is a work by the renowned abstract artist Wassily Kandinsky, created in the year 1911. Overall, the painting exemplifies Kandinsky's early abstract style, characterized by its expressive use of color and form to convey emotion and movement”.</p> <p>Pixtral’s analysis of Kandinsky’s <i>Impression III (Concert)</i> was clearly the most successful. The model was able to identify the name of the artist, as well as the date of the painting from analyzing and correctly transcribing the signature — both Florence-2-base and Janus-1.3B had failed in identifying Kandinsky as the artist for this precise example. The aesthetical description of the shapes, form is very accurate. The model tried to deliver its interpretation of the painting and contextualize it in Kandinsky’s career, indicating it “exemplifies Kandinsky's early abstract style, characterized by its expressive use of color and form to convey emotion and movement”. Interestingly enough, while its color theory is well thought of, Pixtral did not offer a real concrete interpretation of the abstract elements in the scene.</p> <h4>Analysis of <i>The Starry Night</i> by Vincent Van Gogh, 1889, MoMa, New York</h4> <p>The last painting we have processed for this art study with Pixtral is Vincent Van Gogh’s iconic <i>The Starry Night</i>.</p> <p><a rel="nofollow" href="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/n98y2PTHHmTBe9BmumI28.jpeg"><img alt="image/jpeg" src="https://cdn-uploads.huggingface.co/production/uploads/66dad7278506f9b6cf6af71b/n98y2PTHHmTBe9BmumI28.jpeg"></a></p> <p>The painting had already been processed with all the three models used for our previous studies. While Qwen2-VL-2B and Janus-1.3B had successfully identified the name of the painting and of its artist, there were some remaining inaccuracies — due to Van Gogh’s almost abstract style of representation which might have made it harder to understand. In light of all this, the results were positive. If there were some hallucinations (Janus-1.3B interpreted the village represented as the homeland of Van Gogh), the aesthetic and object descriptions were correct, especially for Qwen2-VL-2B.</p> <p>Here is the result obtained with Pixtral:</p> <p>“The image is an abstract painting characterized by its vibrant colors and dynamic brushstrokes. The painting appears to depict a natural scene, possibly a landscape, with a focus on the interplay of light and color. Key features of the painting include:</p> <ol> <li>Color Palette: The painting uses a rich palette dominated by shades of blue, green, and yellow. These colors are applied in thick, visible brushstrokes, giving the painting a textured and energetic appearance.</li> <li>Light and Movement: The use of swirling and curved lines creates a sense of movement and energy, suggesting the dynamic nature of light and atmosphere in a landscape. The brushstrokes seem to capture the fluidity and vibrancy of natural elements.</li> <li>Composition: The composition is balanced, with the various elements distributed across the canvas. The painting features a central area with dense, overlapping brushstrokes, possibly representing trees or foliage, and a more open, lighter area to the right, which could symbolize the sky or a body of water.</li> <li>Style: The painting's style is reminiscent of Expressionism or Abstract Expressionism, characterized by its emphasis on emotional and subjective interpretation rather than realistic representation. The thick, visible brushstrokes and bold colors are typical of these movements.</li> <li>Signature: In the bottom right corner, the painting is signed "Kandinsky 1911," indicating that it is a work by the renowned abstract artist Wassily Kandinsky, created in the year 1911 Overall, the painting captures the essence of a natural landscape with a focus on the expressive and dynamic qualities of light and color, using abstract and energetic techniques to convey its themes”.</li> </ol> <p>Pixtral having processed Van Gogh’s <i>The Starry Night</i> right after its analysis of Kandinsky 1911 painting seems to have led to comprehension issues. In fact, the description of the elements in a “natural scene” such trees, foliage, sky etc. is correct, as well as the interpretation of the brushstrokes and colors. But the hallucination begins the moment where Pixtral evokes the painting is once again signed by Kandinsky in 1911, as indicated in the signature in the bottom right (?). Whether the analysis was done too quickly after the previous one or the model got mixed up in its images and descriptions is not clear. But the interpretation it finds as — once again — a reminiscence of “Expressionism or Abstract Expressionism” (which seems to be two of Pixtral’s favorite artistic movements) is wrong in any case.</p> <p>We have ran again the same prompt to “Explain the image” after having re-started the system to see if the results would be different.</p> <p>Indeed, here is the description produced by Pixtral for this second round:</p> <p>“The image is a painting titled "Starry Night" by Vincent van Gogh. The artwork is one of his most famous pieces and is known for its swirling, dynamic sky filled with stars and a crescent moon. The sky is dominated by vibrant blues, yellows, and whites, creating a sense of movement and energy. Below the sky, there is a dark, tall cypress tree on the left side, which contrasts sharply with the bright sky. In the lower part of the painting, there is a small village with houses and a church with a tall steeple. The village is depicted with a series of rooftops and a few trees, giving a sense of a quiet, peaceful settlement. The overall composition and use of color create a sense of harmony and balance, making "Starry Night" a timeless masterpiece of Post-Impressionist art”.</p> <p>If the description is much shorter, it is now completely accurate and much more precise. The name of the artist has been correctly identified by Pixtral, as well as the title and the description of all the elements in the artwork. The interpretation of the painting as “a timeless masterpiece of Post-Impressionist art” shows Pixtral’s highly good abilities to analyze and process art — and is also much more accurate.</p> <h2>Conclusion</h2> <p>To conclude this study, we can attest that Pixtral 12 B definitely has a high potential to process, analyze and describe works of art. The main issue during the analysis was the risk of hallucinations, which happened to be higher when two artworks were analyzed in a very short time period, one right after the other. In this case, the model seems to get highly confused between the two images and descriptions and either incorporate the wrong interpretation (for example, miscomprehending Kandinsky’s <i>Pastorale</i> for an abstract version of Raphael’s <i>The School of Athens</i>) or mis-identify one artwork for the other — for instance, when it stated <i>The Starry Night</i> was painted by Kandinsky in 1911 right after analyzing a painting by Kandinsky from this date.</p> <p>Pixtral 12B, much like Florence-2-base, Qwen2-VL-2B and Janus-1.3B, has tendencies to miscomprehend based and biased from its previous knowledge, leading to hallucinations: for example when processing an abstract artwork, it indicated it was made by “Jean Dubuffet” for no clear reason whatsoever.</p> <p>The art history vocabulary used and the aesthetical and color descriptions by Pixtral were extremely accurate overall.</p> <p>All in all, Pixtral 12B has a high potential including for art analysis and it would be interesting to see how its abilities fare to analyze elements of text in images as well in a future study — also to compare the results with those of other models for the same tasks.</p> <h2>Bibliography</h2> <ol> <li> Maître d’Anne de Bretagne (aka Jean d’Ypres), <cite>The Lady and The Unicorn</cite> (circa 1500), Musée de Cluny, Paris </li> <li> Raphael (1483-1520), <cite>The School of Athens</cite> (1509-1511), Apostolic Palace, Vatican City </li> <li> Claude Monet (1840-1926), <cite>Luncheon on the grass</cite> (1865-1866), Musée d’Orsay, Paris </li> <li> Vincent Van Gogh (1853-1890), <cite>The Starry Night</cite> (1889), MoMa, New York </li> <li> Wassily Kandinsky (1866-1944), <cite>Impression III (Concert)</cite> (1911), Lenbachhaus, Munich </li> <li> Wassily Kandinsky (1866-1944), <cite>Pastorale</cite> (1911), Solomon R. Guggenheim Museum, New York </li> </ol><!-- HTML_TAG_END --></div> </div> <div class="w-56 flex-none pt-28 max-lg:hidden"><div class="SVELTE_HYDRATER contents" data-target="UpvoteControl" data-props="{"classNames":"lg:max-w-60 lg:flex-col lg:!items-start","maxShown":12,"apiUrlPrefix":"/api/blog/PandorAI1995/art-analysis-mistral-pixtral12b","postLoginRedirectUrl":"/blog/PandorAI1995/art-analysis-mistral-pixtral12b","style":"horizontal","color":"gray","upvotedColor":"orange","upvoted":false,"upvoters":[],"upvotes":0}"><div class="flex flex-wrap items-center gap-2.5 pt-1 lg:max-w-60 lg:flex-col lg:!items-start"><a href="/login?next=%2Fblog%2FPandorAI1995%2Fart-analysis-mistral-pixtral12b" class="self-start"><div class="shadow-alternate group flex h-9 cursor-pointer select-none items-center gap-2 rounded-lg border pl-3 pr-3.5 border-gray-300 bg-white dark:bg-gray-850"><input disabled type="checkbox" class="peer hidden"> <svg class="text-xs text-gray-500 peer-checked:text-gray-500 group-hover:text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 12 12"><path fill="currentColor" d="M5.19 2.67a.94.94 0 0 1 1.62 0l3.31 5.72a.94.94 0 0 1-.82 1.4H2.7a.94.94 0 0 1-.82-1.4l3.31-5.7v-.02Z"></path></svg> Upvote <div class="font-semibold text-orange-500">-</div></div> </a> </div> </div></div></div></main> <footer class="b-12 mb-2 flex border-t border-gray-100 md:h-14"><nav class="container flex flex-col justify-between space-y-2 py-6 text-gray-500 md:flex-row md:items-center md:space-y-0 md:py-0 md:text-sm"><div class="font-semibold text-black md:hidden">Company</div> <div class="order-last pt-6 text-gray-400 md:order-none md:pt-0" href="Terms">© Hugging Face</div> <a class="hover:underline" href="/terms-of-service">TOS</a> <a class="hover:underline" href="/privacy">Privacy</a> <a class="hover:underline" href="/huggingface">About</a> <a class="hover:underline" href="https://apply.workable.com/huggingface/">Jobs</a> <a href="/" class="group order-first flex-none pb-6 md:order-none md:pb-0"><svg class="h-7 w-7 transition-transform group-hover:-translate-y-px" viewBox="0 0 95 88" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M47.2119 76.5C66.4037 76.5 81.9619 60.9419 81.9619 41.75C81.9619 22.5581 66.4037 7 47.2119 7C28.02 7 12.4619 22.5581 12.4619 41.75C12.4619 60.9419 28.02 76.5 47.2119 76.5Z" fill="#FFD21E"></path><path d="M81.9619 41.75C81.9619 22.5581 66.4037 7 47.2119 7C28.02 7 12.4619 22.5581 12.4619 41.75C12.4619 60.9419 28.02 76.5 47.2119 76.5C66.4037 76.5 81.9619 60.9419 81.9619 41.75ZM8.46185 41.75C8.46185 20.349 25.8108 3 47.2119 3C68.6129 3 85.9619 20.349 85.9619 41.75C85.9619 63.151 68.6129 80.5 47.2119 80.5C25.8108 80.5 8.46185 63.151 8.46185 41.75Z" fill="#FF9D0B"></path><path d="M58.5024 32.2915C59.7768 32.7415 60.2839 35.3615 61.5713 34.6769C64.0095 33.3805 64.9351 30.353 63.6387 27.9148C62.3423 25.4767 59.3148 24.5511 56.8766 25.8475C54.4384 27.1439 53.5128 30.1714 54.8092 32.6096C55.4211 33.7604 57.3632 31.8892 58.5024 32.2915Z" fill="#3A3B45"></path><path d="M34.9454 32.2915C33.671 32.7415 33.164 35.3615 31.8766 34.6769C29.4384 33.3805 28.5128 30.353 29.8092 27.9148C31.1056 25.4767 34.1331 24.5511 36.5713 25.8475C39.0095 27.1439 39.9351 30.1714 38.6387 32.6096C38.0268 33.7604 36.0846 31.8892 34.9454 32.2915Z" fill="#3A3B45"></path><path d="M46.9619 56.289C56.7903 56.289 59.9619 47.5261 59.9619 43.0262C59.9619 40.6875 58.3898 41.4236 55.8718 42.6702C53.5449 43.8222 50.4102 45.4101 46.9619 45.4101C39.7822 45.4101 33.9619 38.5263 33.9619 43.0262C33.9619 47.5261 37.1334 56.289 46.9619 56.289Z" fill="#3A3B45"></path><mask id="mask0" mask-type="alpha" maskUnits="userSpaceOnUse" x="33" y="41" width="27" height="16"><path d="M46.9619 56.289C56.7903 56.289 59.9619 47.5261 59.9619 43.0262C59.9619 40.6875 58.3898 41.4236 55.8718 42.6702C53.5449 43.8222 50.4102 45.4101 46.9619 45.4101C39.7822 45.4101 33.9619 38.5263 33.9619 43.0262C33.9619 47.5261 37.1334 56.289 46.9619 56.289Z" fill="white"></path></mask><g mask="url(#mask0)"><path d="M47.2119 66.5C52.0018 66.5 55.8848 62.617 55.8848 57.8271C55.8848 54.0962 53.5291 50.9156 50.224 49.6915C50.1023 49.6464 49.9794 49.604 49.8553 49.5643C49.0219 49.2979 48.1337 52.1623 47.2119 52.1623C46.3506 52.1623 45.5186 49.2797 44.7332 49.5135C41.151 50.5799 38.5389 53.8984 38.5389 57.8271C38.5389 62.617 42.4219 66.5 47.2119 66.5Z" fill="#F94040"></path></g><path d="M70.7119 37C72.5068 37 73.9619 35.5449 73.9619 33.75C73.9619 31.9551 72.5068 30.5 70.7119 30.5C68.9169 30.5 67.4619 31.9551 67.4619 33.75C67.4619 35.5449 68.9169 37 70.7119 37Z" fill="#FF9D0B"></path><path d="M24.2119 37C26.0068 37 27.4619 35.5449 27.4619 33.75C27.4619 31.9551 26.0068 30.5 24.2119 30.5C22.4169 30.5 20.9619 31.9551 20.9619 33.75C20.9619 35.5449 22.4169 37 24.2119 37Z" fill="#FF9D0B"></path><path class="origin-bottom-right transition-transform group-hover:-rotate-6" d="M17.5238 48C15.9048 48 14.4578 48.665 13.4488 49.871C12.8248 50.618 12.1728 51.822 12.1198 53.625C11.4408 53.43 10.7878 53.321 10.1778 53.321C8.6278 53.321 7.2278 53.915 6.2378 54.994C4.9658 56.379 4.4008 58.081 4.6468 59.784C4.7638 60.595 5.0348 61.322 5.4398 61.995C4.5858 62.686 3.9568 63.648 3.6528 64.805C3.4148 65.712 3.1708 67.601 4.4448 69.547C4.3638 69.674 4.2878 69.806 4.2168 69.941C3.4508 71.395 3.4018 73.038 4.0778 74.568C5.1028 76.887 7.6498 78.714 12.5958 80.675C15.6728 81.895 18.4878 82.675 18.5128 82.682C22.5808 83.737 26.2598 84.273 29.4448 84.273C35.2988 84.273 39.4898 82.48 41.9018 78.944C45.7838 73.25 45.2288 68.042 40.2058 63.022C37.4258 60.244 35.5778 56.148 35.1928 55.249C34.4168 52.587 32.3648 49.628 28.9538 49.628H28.9528C28.6658 49.628 28.3758 49.651 28.0898 49.696C26.5958 49.931 25.2898 50.791 24.3568 52.085C23.3498 50.833 22.3718 49.837 21.4868 49.275C20.1528 48.429 18.8198 48 17.5238 48ZM17.5238 52C18.0338 52 18.6568 52.217 19.3438 52.653C21.4768 54.006 25.5928 61.081 27.0998 63.833C27.6048 64.755 28.4678 65.145 29.2448 65.145C30.7868 65.145 31.9908 63.612 29.3858 61.664C25.4688 58.733 26.8428 53.942 28.7128 53.647C28.7948 53.634 28.8758 53.628 28.9538 53.628C30.6538 53.628 31.4038 56.558 31.4038 56.558C31.4038 56.558 33.6018 62.078 37.3778 65.851C41.1538 69.625 41.3488 72.654 38.5968 76.69C36.7198 79.442 33.1268 80.273 29.4448 80.273C25.6258 80.273 21.7108 79.379 19.5168 78.81C19.4088 78.782 6.0658 75.013 7.7558 71.805C8.0398 71.266 8.5078 71.05 9.0968 71.05C11.4768 71.05 15.8058 74.592 17.6668 74.592C18.0828 74.592 18.3758 74.415 18.4958 73.983C19.2888 71.138 6.4388 69.942 7.5218 65.821C7.7128 65.092 8.2308 64.796 8.9588 64.797C12.1038 64.797 19.1598 70.328 20.6388 70.328C20.7518 70.328 20.8328 70.295 20.8768 70.225C21.6178 69.029 21.2118 68.194 15.9888 65.033C10.7658 61.871 7.0998 59.969 9.1848 57.699C9.4248 57.437 9.7648 57.321 10.1778 57.321C13.3488 57.322 20.8408 64.14 20.8408 64.14C20.8408 64.14 22.8628 66.243 24.0858 66.243C24.3668 66.243 24.6058 66.132 24.7678 65.858C25.6348 64.396 16.7148 57.636 16.2118 54.847C15.8708 52.957 16.4508 52 17.5238 52Z" fill="#FF9D0B"></path><path class="origin-bottom-right transition-transform group-hover:-rotate-6" d="M38.5967 76.6898C41.3487 72.6538 41.1537 69.6248 37.3777 65.8508C33.6017 62.0778 31.4037 56.5578 31.4037 56.5578C31.4037 56.5578 30.5827 53.3518 28.7127 53.6468C26.8427 53.9418 25.4697 58.7328 29.3867 61.6638C33.3037 64.5938 28.6067 66.5848 27.0997 63.8328C25.5927 61.0808 21.4777 54.0058 19.3437 52.6528C17.2107 51.2998 15.7087 52.0578 16.2117 54.8468C16.7147 57.6358 25.6357 64.3958 24.7677 65.8588C23.8997 67.3208 20.8407 64.1398 20.8407 64.1398C20.8407 64.1398 11.2687 55.4288 9.18465 57.6988C7.10065 59.9688 10.7657 61.8708 15.9887 65.0328C21.2127 68.1938 21.6177 69.0288 20.8767 70.2248C20.1347 71.4208 8.60465 61.6998 7.52165 65.8208C6.43965 69.9418 19.2887 71.1378 18.4957 73.9828C17.7027 76.8288 9.44465 68.5978 7.75565 71.8048C6.06565 75.0128 19.4087 78.7818 19.5167 78.8098C23.8267 79.9278 34.7727 82.2968 38.5967 76.6898Z" fill="#FFD21E"></path><path class="origin-bottom-left transition-transform group-hover:rotate-6" d="M77.3999 48C79.0189 48 80.4659 48.665 81.4749 49.871C82.0989 50.618 82.7509 51.822 82.8039 53.625C83.4829 53.43 84.1359 53.321 84.7459 53.321C86.2959 53.321 87.6959 53.915 88.6859 54.994C89.9579 56.379 90.5229 58.081 90.2769 59.784C90.1599 60.595 89.8889 61.322 89.4839 61.995C90.3379 62.686 90.9669 63.648 91.2709 64.805C91.5089 65.712 91.7529 67.601 90.4789 69.547C90.5599 69.674 90.6359 69.806 90.7069 69.941C91.4729 71.395 91.5219 73.038 90.8459 74.568C89.8209 76.887 87.2739 78.714 82.3279 80.675C79.2509 81.895 76.4359 82.675 76.4109 82.682C72.3429 83.737 68.6639 84.273 65.4789 84.273C59.6249 84.273 55.4339 82.48 53.0219 78.944C49.1399 73.25 49.6949 68.042 54.7179 63.022C57.4979 60.244 59.3459 56.148 59.7309 55.249C60.5069 52.587 62.5589 49.628 65.9699 49.628H65.9709C66.2579 49.628 66.5479 49.651 66.8339 49.696C68.3279 49.931 69.6339 50.791 70.5669 52.085C71.5739 50.833 72.5519 49.837 73.4369 49.275C74.7709 48.429 76.1039 48 77.3999 48ZM77.3999 52C76.8899 52 76.2669 52.217 75.5799 52.653C73.4469 54.006 69.3309 61.081 67.8239 63.833C67.3189 64.755 66.4559 65.145 65.6789 65.145C64.1369 65.145 62.9329 63.612 65.5379 61.664C69.4549 58.733 68.0809 53.942 66.2109 53.647C66.1289 53.634 66.0479 53.628 65.9699 53.628C64.2699 53.628 63.5199 56.558 63.5199 56.558C63.5199 56.558 61.3219 62.078 57.5459 65.851C53.7699 69.625 53.5749 72.654 56.3269 76.69C58.2039 79.442 61.7969 80.273 65.4789 80.273C69.2979 80.273 73.2129 79.379 75.4069 78.81C75.5149 78.782 88.8579 75.013 87.1679 71.805C86.8839 71.266 86.4159 71.05 85.8269 71.05C83.4469 71.05 79.1179 74.592 77.2569 74.592C76.8409 74.592 76.5479 74.415 76.4279 73.983C75.6349 71.138 88.4849 69.942 87.4019 65.821C87.2109 65.092 86.6929 64.796 85.9649 64.797C82.8199 64.797 75.7639 70.328 74.2849 70.328C74.1719 70.328 74.0909 70.295 74.0469 70.225C73.3059 69.029 73.7119 68.194 78.9349 65.033C84.1579 61.871 87.8239 59.969 85.7389 57.699C85.4989 57.437 85.1589 57.321 84.7459 57.321C81.5749 57.322 74.0829 64.14 74.0829 64.14C74.0829 64.14 72.0609 66.243 70.8379 66.243C70.5569 66.243 70.3179 66.132 70.1559 65.858C69.2889 64.396 78.2089 57.636 78.7119 54.847C79.0529 52.957 78.4729 52 77.3999 52Z" fill="#FF9D0B"></path><path class="origin-bottom-left transition-transform group-hover:rotate-6" d="M56.3271 76.6898C53.5751 72.6538 53.7701 69.6248 57.5461 65.8508C61.3221 62.0778 63.5201 56.5578 63.5201 56.5578C63.5201 56.5578 64.3411 53.3518 66.2111 53.6468C68.0811 53.9418 69.4541 58.7328 65.5371 61.6638C61.6201 64.5938 66.3171 66.5848 67.8241 63.8328C69.3311 61.0808 73.4461 54.0058 75.5801 52.6528C77.7131 51.2998 79.2151 52.0578 78.7121 54.8468C78.2091 57.6358 69.2881 64.3958 70.1561 65.8588C71.0241 67.3208 74.0831 64.1398 74.0831 64.1398C74.0831 64.1398 83.6551 55.4288 85.7391 57.6988C87.8231 59.9688 84.1581 61.8708 78.9351 65.0328C73.7111 68.1938 73.3061 69.0288 74.0471 70.2248C74.7891 71.4208 86.3191 61.6998 87.4021 65.8208C88.4841 69.9418 75.6351 71.1378 76.4281 73.9828C77.2211 76.8288 85.4791 68.5978 87.1681 71.8048C88.8581 75.0128 75.5151 78.7818 75.4071 78.8098C71.0971 79.9278 60.1511 82.2968 56.3271 76.6898Z" fill="#FFD21E"></path></svg></a> <div class="pt-6 font-semibold text-black md:hidden md:pt-0">Website</div> <a class="hover:underline" href="/models">Models</a> <a class="hover:underline" href="/datasets">Datasets</a> <a class="hover:underline" href="/spaces">Spaces</a> <a class="hover:underline" href="/pricing">Pricing</a> <a class="hover:underline" href="/docs">Docs</a></nav></footer></div> <script> import("\/front\/build\/kube-0ad51e9\/index.js"); window.moonSha = "kube-0ad51e9\/"; window.__hf_deferred = {}; </script> <!-- Stripe --> <script> if (["hf.co", "huggingface.co"].includes(window.location.hostname)) { const script = document.createElement("script"); script.src = "https://js.stripe.com/v3/"; script.async = true; document.head.appendChild(script); } </script> </body> </html>