CINXE.COM

GPT-4 Technical Report | Papers With Code

<!doctype html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <script> const GTAG_ENABLED = true ; const GTAG_TRACKING_ID = "UA-121182717-1"; const SENTRY_DSN_FRONTEND = "".trim(); const GLOBAL_CSRF_TOKEN = 'RMII7xlHUBDRcWKZNE5o0UAqj4ksHb7MtdqrUK8u3wcM4aK5d9gcwdMTsMgqfR1w'; const MEDIA_URL = "https://production-media.paperswithcode.com/"; const ASSETS_URL = "https://production-assets.paperswithcode.com"; run_after_frontend_loaded = window.run_after_frontend_loaded || []; </script> <link rel="preconnect" href="https://production-assets.paperswithcode.com"><link rel="dns-prefetch" href="https://production-assets.paperswithcode.com"><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/65e877e527022735c1a1.woff2" crossorigin><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/917632e36982ca7933c8.woff2" crossorigin><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/f1405bd8a987c2ea8a67.woff2" crossorigin><script>(()=>{if(GTAG_ENABLED){const t=document.createElement("script");function n(){window.dataLayer.push(arguments)}t.src=`https://www.googletagmanager.com/gtag/js?id=${GTAG_TRACKING_ID}`,document.head.appendChild(t),window.dataLayer=window.dataLayer||[],window.gtag=n,n("js",new Date),n("config",GTAG_TRACKING_ID),window.captureOutboundLink=function(t){n("event","click",{event_category:"outbound",event_label:t})}}else window.captureOutboundLink=function(n){document.location=n}})();</script><link rel="preload" as="script" href="https://production-assets.paperswithcode.com/perf/766.4af6b88b.js"><link rel="preload" as="script" href="https://production-assets.paperswithcode.com/perf/351.a22a9607.js"><link rel="preload" as="style" href="https://production-assets.paperswithcode.com/perf/918.c41196c3.css"><link rel="preload" as="style" href="https://production-assets.paperswithcode.com/perf/view_paper.05773d2b.css"><link rel="stylesheet" href="https://production-assets.paperswithcode.com/perf/918.c41196c3.css"><link rel="stylesheet" href="https://production-assets.paperswithcode.com/perf/view_paper.05773d2b.css"> <!-- Metadata --> <title>GPT-4 Technical Report | Papers With Code</title> <meta name="description" content="🏆 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)" /> <!-- Open Graph protocol metadata --> <meta property="og:title" content="Papers with Code - GPT-4 Technical Report"> <meta property="og:description" content="🏆 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)"> <meta property="og:image" content="https://raw.githubusercontent.com/unispac/visual-adversarial-examples-jailbreak-large-language-models/master/assets/human_race.png"> <meta property="og:url" content="https://paperswithcode.com/paper/gpt-4-technical-report-1"> <!-- Twitter metadata --> <meta name="twitter:card" content="summary_large_image"> <meta name="twitter:site" content="@paperswithcode"> <meta name="twitter:title" content="Papers with Code - GPT-4 Technical Report"> <meta name="twitter:description" content="🏆 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)"> <meta name="twitter:creator" content="@paperswithcode"> <meta name="twitter:url" content="https://paperswithcode.com/paper/gpt-4-technical-report-1"> <meta name="twitter:domain" content="paperswithcode.com"> <!-- JSON LD --> <script type="application/ld+json">{ "@context": "http://schema.org", "@graph": { "@type": "ScholarlyArticle", "@id": "2303.08774", "name": "GPT-4 Technical Report", "description": "\ud83c\udfc6 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)", "url": "https://paperswithcode.com/paper/gpt-4-technical-report-1", "image": "https://raw.githubusercontent.com/unispac/visual-adversarial-examples-jailbreak-large-language-models/master/assets/human_race.png", "headline": "GPT-4 Technical Report", "abstract": "\ud83c\udfc6 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)", "author": [ { "@type": "Person", "@id": "#OpenAI", "name": "OpenAI", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#:", "name": ":", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Josh_Achiam", "name": "Josh Achiam", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Steven_Adler", "name": "Steven Adler", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sandhini_Agarwal", "name": "Sandhini Agarwal", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Lama_Ahmad", "name": "Lama Ahmad", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ilge_Akkaya", "name": "Ilge Akkaya", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Florencia_Leoni_Aleman", "name": "Florencia Leoni Aleman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Diogo_Almeida", "name": "Diogo Almeida", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Janko_Altenschmidt", "name": "Janko Altenschmidt", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sam_Altman", "name": "Sam Altman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shyamal_Anadkat", "name": "Shyamal Anadkat", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Red_Avila", "name": "Red Avila", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Igor_Babuschkin", "name": "Igor Babuschkin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Suchir_Balaji", "name": "Suchir Balaji", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Valerie_Balcom", "name": "Valerie Balcom", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Paul_Baltescu", "name": "Paul Baltescu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Haiming_Bao", "name": "Haiming Bao", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mohammad_Bavarian", "name": "Mohammad Bavarian", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jeff_Belgum", "name": "Jeff Belgum", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Irwan_Bello", "name": "Irwan Bello", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jake_Berdine", "name": "Jake Berdine", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Gabriel_Bernadett-Shapiro", "name": "Gabriel Bernadett-Shapiro", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Christopher_Berner", "name": "Christopher Berner", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Lenny_Bogdonoff", "name": "Lenny Bogdonoff", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Oleg_Boiko", "name": "Oleg Boiko", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Madelaine_Boyd", "name": "Madelaine Boyd", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Anna-Luisa_Brakman", "name": "Anna-Luisa Brakman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Greg_Brockman", "name": "Greg Brockman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tim_Brooks", "name": "Tim Brooks", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Miles_Brundage", "name": "Miles Brundage", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kevin_Button", "name": "Kevin Button", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Trevor_Cai", "name": "Trevor Cai", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rosie_Campbell", "name": "Rosie Campbell", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrew_Cann", "name": "Andrew Cann", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Brittany_Carey", "name": "Brittany Carey", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chelsea_Carlson", "name": "Chelsea Carlson", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rory_Carmichael", "name": "Rory Carmichael", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Brooke_Chan", "name": "Brooke Chan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Che_Chang", "name": "Che Chang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Fotis_Chantzis", "name": "Fotis Chantzis", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Derek_Chen", "name": "Derek Chen", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sully_Chen", "name": "Sully Chen", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ruby_Chen", "name": "Ruby Chen", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jason_Chen", "name": "Jason Chen", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mark_Chen", "name": "Mark Chen", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ben_Chess", "name": "Ben Chess", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chester_Cho", "name": "Chester Cho", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Casey_Chu", "name": "Casey Chu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Hyung_Won_Chung", "name": "Hyung Won Chung", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Dave_Cummings", "name": "Dave Cummings", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jeremiah_Currier", "name": "Jeremiah Currier", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yunxing_Dai", "name": "Yunxing Dai", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Cory_Decareaux", "name": "Cory Decareaux", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Thomas_Degry", "name": "Thomas Degry", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Noah_Deutsch", "name": "Noah Deutsch", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Damien_Deville", "name": "Damien Deville", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Arka_Dhar", "name": "Arka Dhar", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#David_Dohan", "name": "David Dohan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Steve_Dowling", "name": "Steve Dowling", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sheila_Dunning", "name": "Sheila Dunning", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Adrien_Ecoffet", "name": "Adrien Ecoffet", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Atty_Eleti", "name": "Atty Eleti", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tyna_Eloundou", "name": "Tyna Eloundou", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#David_Farhi", "name": "David Farhi", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Liam_Fedus", "name": "Liam Fedus", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Niko_Felix", "name": "Niko Felix", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sim\u00f3n_Posada_Fishman", "name": "Sim\u00f3n Posada Fishman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Juston_Forte", "name": "Juston Forte", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Isabella_Fulford", "name": "Isabella Fulford", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Leo_Gao", "name": "Leo Gao", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Elie_Georges", "name": "Elie Georges", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Christian_Gibson", "name": "Christian Gibson", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Vik_Goel", "name": "Vik Goel", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tarun_Gogineni", "name": "Tarun Gogineni", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Gabriel_Goh", "name": "Gabriel Goh", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rapha_Gontijo-Lopes", "name": "Rapha Gontijo-Lopes", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jonathan_Gordon", "name": "Jonathan Gordon", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Morgan_Grafstein", "name": "Morgan Grafstein", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Scott_Gray", "name": "Scott Gray", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ryan_Greene", "name": "Ryan Greene", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Joshua_Gross", "name": "Joshua Gross", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shixiang_Shane_Gu", "name": "Shixiang Shane Gu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yufei_Guo", "name": "Yufei Guo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chris_Hallacy", "name": "Chris Hallacy", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jesse_Han", "name": "Jesse Han", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jeff_Harris", "name": "Jeff Harris", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yuchen_He", "name": "Yuchen He", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mike_Heaton", "name": "Mike Heaton", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Johannes_Heidecke", "name": "Johannes Heidecke", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chris_Hesse", "name": "Chris Hesse", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alan_Hickey", "name": "Alan Hickey", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Wade_Hickey", "name": "Wade Hickey", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Peter_Hoeschele", "name": "Peter Hoeschele", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Brandon_Houghton", "name": "Brandon Houghton", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kenny_Hsu", "name": "Kenny Hsu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shengli_Hu", "name": "Shengli Hu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Xin_Hu", "name": "Xin Hu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Joost_Huizinga", "name": "Joost Huizinga", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shantanu_Jain", "name": "Shantanu Jain", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shawn_Jain", "name": "Shawn Jain", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Joanne_Jang", "name": "Joanne Jang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Angela_Jiang", "name": "Angela Jiang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Roger_Jiang", "name": "Roger Jiang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Haozhun_Jin", "name": "Haozhun Jin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Denny_Jin", "name": "Denny Jin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shino_Jomoto", "name": "Shino Jomoto", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Billie_Jonn", "name": "Billie Jonn", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Heewoo_Jun", "name": "Heewoo Jun", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tomer_Kaftan", "name": "Tomer Kaftan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#\u0141ukasz_Kaiser", "name": "\u0141ukasz Kaiser", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ali_Kamali", "name": "Ali Kamali", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ingmar_Kanitscheider", "name": "Ingmar Kanitscheider", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Nitish_Shirish_Keskar", "name": "Nitish Shirish Keskar", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tabarak_Khan", "name": "Tabarak Khan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Logan_Kilpatrick", "name": "Logan Kilpatrick", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jong_Wook_Kim", "name": "Jong Wook Kim", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Christina_Kim", "name": "Christina Kim", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yongjik_Kim", "name": "Yongjik Kim", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jan_Hendrik_Kirchner", "name": "Jan Hendrik Kirchner", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jamie_Kiros", "name": "Jamie Kiros", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Matt_Knight", "name": "Matt Knight", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Daniel_Kokotajlo", "name": "Daniel Kokotajlo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#\u0141ukasz_Kondraciuk", "name": "\u0141ukasz Kondraciuk", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrew_Kondrich", "name": "Andrew Kondrich", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Aris_Konstantinidis", "name": "Aris Konstantinidis", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kyle_Kosic", "name": "Kyle Kosic", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Gretchen_Krueger", "name": "Gretchen Krueger", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Vishal_Kuo", "name": "Vishal Kuo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Michael_Lampe", "name": "Michael Lampe", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ikai_Lan", "name": "Ikai Lan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Teddy_Lee", "name": "Teddy Lee", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jan_Leike", "name": "Jan Leike", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jade_Leung", "name": "Jade Leung", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Daniel_Levy", "name": "Daniel Levy", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chak_Ming_Li", "name": "Chak Ming Li", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rachel_Lim", "name": "Rachel Lim", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Molly_Lin", "name": "Molly Lin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Stephanie_Lin", "name": "Stephanie Lin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mateusz_Litwin", "name": "Mateusz Litwin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Theresa_Lopez", "name": "Theresa Lopez", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ryan_Lowe", "name": "Ryan Lowe", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Patricia_Lue", "name": "Patricia Lue", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Anna_Makanju", "name": "Anna Makanju", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kim_Malfacini", "name": "Kim Malfacini", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sam_Manning", "name": "Sam Manning", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Todor_Markov", "name": "Todor Markov", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yaniv_Markovski", "name": "Yaniv Markovski", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Bianca_Martin", "name": "Bianca Martin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Katie_Mayer", "name": "Katie Mayer", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrew_Mayne", "name": "Andrew Mayne", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Bob_McGrew", "name": "Bob McGrew", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Scott_Mayer_McKinney", "name": "Scott Mayer McKinney", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Christine_McLeavey", "name": "Christine McLeavey", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Paul_McMillan", "name": "Paul McMillan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jake_McNeil", "name": "Jake McNeil", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#David_Medina", "name": "David Medina", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Aalok_Mehta", "name": "Aalok Mehta", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jacob_Menick", "name": "Jacob Menick", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Luke_Metz", "name": "Luke Metz", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrey_Mishchenko", "name": "Andrey Mishchenko", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Pamela_Mishkin", "name": "Pamela Mishkin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Vinnie_Monaco", "name": "Vinnie Monaco", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Evan_Morikawa", "name": "Evan Morikawa", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Daniel_Mossing", "name": "Daniel Mossing", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tong_Mu", "name": "Tong Mu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mira_Murati", "name": "Mira Murati", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Oleg_Murk", "name": "Oleg Murk", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#David_M\u00e9ly", "name": "David M\u00e9ly", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ashvin_Nair", "name": "Ashvin Nair", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Reiichiro_Nakano", "name": "Reiichiro Nakano", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rajeev_Nayak", "name": "Rajeev Nayak", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Arvind_Neelakantan", "name": "Arvind Neelakantan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Richard_Ngo", "name": "Richard Ngo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Hyeonwoo_Noh", "name": "Hyeonwoo Noh", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Long_Ouyang", "name": "Long Ouyang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Cullen_O'Keefe", "name": "Cullen O'Keefe", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jakub_Pachocki", "name": "Jakub Pachocki", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alex_Paino", "name": "Alex Paino", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Joe_Palermo", "name": "Joe Palermo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ashley_Pantuliano", "name": "Ashley Pantuliano", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Giambattista_Parascandolo", "name": "Giambattista Parascandolo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Joel_Parish", "name": "Joel Parish", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Emy_Parparita", "name": "Emy Parparita", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alex_Passos", "name": "Alex Passos", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mikhail_Pavlov", "name": "Mikhail Pavlov", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrew_Peng", "name": "Andrew Peng", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Adam_Perelman", "name": "Adam Perelman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Filipe_de_Avila_Belbute_Peres", "name": "Filipe de Avila Belbute Peres", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Michael_Petrov", "name": "Michael Petrov", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Henrique_Ponde_de_Oliveira_Pinto", "name": "Henrique Ponde de Oliveira Pinto", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Michael", "name": "Michael", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Pokorny", "name": "Pokorny", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Michelle_Pokrass", "name": "Michelle Pokrass", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Vitchyr_H._Pong", "name": "Vitchyr H. Pong", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tolly_Powell", "name": "Tolly Powell", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alethea_Power", "name": "Alethea Power", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Boris_Power", "name": "Boris Power", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Elizabeth_Proehl", "name": "Elizabeth Proehl", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Raul_Puri", "name": "Raul Puri", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alec_Radford", "name": "Alec Radford", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jack_Rae", "name": "Jack Rae", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Aditya_Ramesh", "name": "Aditya Ramesh", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Cameron_Raymond", "name": "Cameron Raymond", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Francis_Real", "name": "Francis Real", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kendra_Rimbach", "name": "Kendra Rimbach", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Carl_Ross", "name": "Carl Ross", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Bob_Rotsted", "name": "Bob Rotsted", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Henri_Roussez", "name": "Henri Roussez", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Nick_Ryder", "name": "Nick Ryder", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Mario_Saltarelli", "name": "Mario Saltarelli", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ted_Sanders", "name": "Ted Sanders", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shibani_Santurkar", "name": "Shibani Santurkar", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Girish_Sastry", "name": "Girish Sastry", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Heather_Schmidt", "name": "Heather Schmidt", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#David_Schnurr", "name": "David Schnurr", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#John_Schulman", "name": "John Schulman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Daniel_Selsam", "name": "Daniel Selsam", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kyla_Sheppard", "name": "Kyla Sheppard", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Toki_Sherbakov", "name": "Toki Sherbakov", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jessica_Shieh", "name": "Jessica Shieh", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sarah_Shoker", "name": "Sarah Shoker", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Pranav_Shyam", "name": "Pranav Shyam", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Szymon_Sidor", "name": "Szymon Sidor", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Eric_Sigler", "name": "Eric Sigler", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Maddie_Simens", "name": "Maddie Simens", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jordan_Sitkin", "name": "Jordan Sitkin", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Katarina_Slama", "name": "Katarina Slama", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ian_Sohl", "name": "Ian Sohl", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Benjamin_Sokolowsky", "name": "Benjamin Sokolowsky", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Yang_song", "name": "Yang song", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Natalie_Staudacher", "name": "Natalie Staudacher", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Felipe_Petroski_Such", "name": "Felipe Petroski Such", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Natalie_Summers", "name": "Natalie Summers", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ilya_Sutskever", "name": "Ilya Sutskever", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jie_Tang", "name": "Jie Tang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Nikolas_Tezak", "name": "Nikolas Tezak", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Madeleine_B._Thompson", "name": "Madeleine B. Thompson", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Phil_Tillet", "name": "Phil Tillet", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Amin_Tootoonchian", "name": "Amin Tootoonchian", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Elizabeth_Tseng", "name": "Elizabeth Tseng", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Preston_Tuggle", "name": "Preston Tuggle", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Nick_Turley", "name": "Nick Turley", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jerry_Tworek", "name": "Jerry Tworek", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Juan_Felipe_Cer\u00f3n_Uribe", "name": "Juan Felipe Cer\u00f3n Uribe", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Andrea_Vallone", "name": "Andrea Vallone", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Arun_Vijayvergiya", "name": "Arun Vijayvergiya", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chelsea_Voss", "name": "Chelsea Voss", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Carroll_Wainwright", "name": "Carroll Wainwright", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Justin_Jay_Wang", "name": "Justin Jay Wang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Alvin_Wang", "name": "Alvin Wang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Ben_Wang", "name": "Ben Wang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jonathan_Ward", "name": "Jonathan Ward", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jason_Wei", "name": "Jason Wei", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#CJ_Weinmann", "name": "CJ Weinmann", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Akila_Welihinda", "name": "Akila Welihinda", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Peter_Welinder", "name": "Peter Welinder", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jiayi_Weng", "name": "Jiayi Weng", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Lilian_Weng", "name": "Lilian Weng", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Matt_Wiethoff", "name": "Matt Wiethoff", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Dave_Willner", "name": "Dave Willner", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Clemens_Winter", "name": "Clemens Winter", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Samuel_Wolrich", "name": "Samuel Wolrich", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Hannah_Wong", "name": "Hannah Wong", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Lauren_Workman", "name": "Lauren Workman", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sherwin_Wu", "name": "Sherwin Wu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Jeff_Wu", "name": "Jeff Wu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Michael_Wu", "name": "Michael Wu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kai_Xiao", "name": "Kai Xiao", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tao_Xu", "name": "Tao Xu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Sarah_Yoo", "name": "Sarah Yoo", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Kevin_Yu", "name": "Kevin Yu", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Qiming_Yuan", "name": "Qiming Yuan", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Wojciech_Zaremba", "name": "Wojciech Zaremba", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Rowan_Zellers", "name": "Rowan Zellers", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Chong_Zhang", "name": "Chong Zhang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Marvin_Zhang", "name": "Marvin Zhang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Shengjia_Zhao", "name": "Shengjia Zhao", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Tianhao_Zheng", "name": "Tianhao Zheng", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Juntang_Zhuang", "name": "Juntang Zhuang", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#William_Zhuk", "name": "William Zhuk", "image": "https://paperswithcode.com/static/" }, { "@type": "Person", "@id": "#Barret_Zoph", "name": "Barret Zoph", "image": "https://paperswithcode.com/static/" } ], "workExample": [ { "@type": "SoftwareSourceCode", "@id": "https://github.com/openai/evals", "name": "evals", "description": "Evals is a framework for evaluating LLMs and LLM systems, and an open-source registry of benchmarks.", "url": "https://github.com/openai/evals", "image": "https://paperswithcode.com/static/", "headline": "evals", "codeRepository": "https://github.com/openai/evals", "contentRating": "15084" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/shmsw25/factscore", "name": "factscore", "description": "A package to evaluate factuality of long-form generation. Original implementation of our EMNLP 2023 paper \"FActScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation\"", "url": "https://github.com/shmsw25/factscore", "image": "https://paperswithcode.com/static/", "headline": "factscore", "codeRepository": "https://github.com/shmsw25/factscore", "contentRating": "291" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models", "name": "visual-adversarial-examples-jailbreak-large-language-models", "description": "Repository for the Paper (AAAI 2024, Oral) --- Visual Adversarial Examples Jailbreak Large Language Models", "url": "https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models", "image": "https://paperswithcode.com/static/", "headline": "visual-adversarial-examples-jailbreak-large-language-models", "codeRepository": "https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models", "contentRating": "183" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/gpt4life/alpagasus", "name": "alpagasus", "description": "Unofficial implementation of AlpaGasus", "url": "https://github.com/gpt4life/alpagasus", "image": "https://paperswithcode.com/static/", "headline": "alpagasus", "codeRepository": "https://github.com/gpt4life/alpagasus", "contentRating": "84" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/emrgnt-cmplxty/zero-shot-replication", "name": "zero-shot-replication", "description": "", "url": "https://github.com/emrgnt-cmplxty/zero-shot-replication", "image": "https://paperswithcode.com/static/", "headline": "zero-shot-replication", "codeRepository": "https://github.com/emrgnt-cmplxty/zero-shot-replication", "contentRating": "72" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/ethz-privsec/superhuman-ai-consistency", "name": "superhuman-ai-consistency", "description": "", "url": "https://github.com/ethz-privsec/superhuman-ai-consistency", "image": "https://paperswithcode.com/static/", "headline": "superhuman-ai-consistency", "codeRepository": "https://github.com/ethz-privsec/superhuman-ai-consistency", "contentRating": "28" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/ethz-spylab/superhuman-ai-consistency", "name": "superhuman-ai-consistency", "description": "", "url": "https://github.com/ethz-spylab/superhuman-ai-consistency", "image": "https://paperswithcode.com/static/", "headline": "superhuman-ai-consistency", "codeRepository": "https://github.com/ethz-spylab/superhuman-ai-consistency", "contentRating": "28" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/eternityyw/tram-benchmark", "name": "tram-benchmark", "description": "TRAM: Benchmarking Temporal Reasoning for Large Language Models (Findings of ACL 2024)", "url": "https://github.com/eternityyw/tram-benchmark", "image": "https://paperswithcode.com/static/", "headline": "tram-benchmark", "codeRepository": "https://github.com/eternityyw/tram-benchmark", "contentRating": "22" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/AUCOHL/RTL-Repo", "name": "RTL-Repo", "description": "RTL-Repo: A Benchmark for Evaluating LLMs on Large-Scale RTL Design Projects - IEEE LAD'24", "url": "https://github.com/AUCOHL/RTL-Repo", "image": "https://paperswithcode.com/static/", "headline": "RTL-Repo", "codeRepository": "https://github.com/AUCOHL/RTL-Repo", "contentRating": "5" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/zach-zhiling-zheng/reticular_chemist", "name": "reticular_chemist", "description": "", "url": "https://github.com/zach-zhiling-zheng/reticular_chemist", "image": "https://paperswithcode.com/static/", "headline": "reticular_chemist", "codeRepository": "https://github.com/zach-zhiling-zheng/reticular_chemist", "contentRating": "2" }, { "@type": "SoftwareSourceCode", "@id": "https://github.com/avhbench/avhbench", "name": "avhbench", "description": "", "url": "https://github.com/avhbench/avhbench", "image": "https://paperswithcode.com/static/", "headline": "avhbench", "codeRepository": "https://github.com/avhbench/avhbench", "contentRating": "0" } ], "datePublished": "2023-03-15" } }</script> <meta name="theme-color" content="#fff"/> <link rel="manifest" href="https://production-assets.paperswithcode.com/static/manifest.web.json"> </head> <body> <nav class="navbar navbar-expand-lg navbar-light header"> <a class="navbar-brand" href="/"> <span class=" icon-wrapper" data-name="pwc"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path d="M88 128h48v256H88zm144 0h48v256h-48zm-72 16h48v224h-48zm144 0h48v224h-48zm72-16h48v256h-48z"/><path d="M104 104V56H16v400h88v-48H64V104zm304-48v48h40v304h-40v48h88V56z"/></svg></span> </a> <div class="navbar-mobile-twitter d-lg-none"> <a rel="noreferrer" href="https://twitter.com/paperswithcode"> <span class=" icon-wrapper icon-fa icon-fa-brands" data-name="twitter"><svg viewBox="0 0 512.001 515.25" xmlns="http://www.w3.org/2000/svg"><path d="M459.37 152.016c.326 4.548.326 9.097.326 13.645 0 138.72-105.583 298.558-298.559 298.558C101.685 464.22 46.457 447 0 417.114c8.447.973 16.568 1.298 25.34 1.298 49.054 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.113-72.772 6.499.975 12.996 1.624 19.819 1.624 9.42 0 18.843-1.3 27.613-3.573-48.08-9.747-84.142-51.98-84.142-102.984v-1.3c13.968 7.798 30.213 12.67 47.43 13.32-28.263-18.843-46.78-51.006-46.78-87.391 0-19.492 5.196-37.36 14.294-52.954 51.654 63.674 129.3 105.258 216.364 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.827 46.782-104.934 104.934-104.934 30.214 0 57.502 12.67 76.671 33.136 23.715-4.548 46.455-13.319 66.599-25.34-7.798 24.367-24.366 44.834-46.132 57.828 21.117-2.274 41.584-8.122 60.426-16.244-14.292 20.791-32.161 39.309-52.628 54.253z"/></svg></span> </a> </div> <button class="navbar-toggler" type="button" data-toggle="collapse" data-bs-toggle="collapse" data-target="#top-menu" data-bs-target="#top-menu" aria-controls="top-menu" aria-expanded="false" aria-label="Toggle navigation" > <span class="navbar-toggler-icon"></span> </button> <div class="collapse navbar-collapse" id="top-menu"> <ul class="navbar-nav mr-auto navbar-nav__left light-header"> <li class="nav-item header-search"> <form action="/search" method="get" id="id_global_search_form" autocomplete="off"> <input type="text" name="q_meta" style="display:none" id="q_meta" /> <input type="hidden" name="q_type" id="q_type" /> <input id="id_global_search_input" autocomplete="off" value="" name='q' class="global-search" type="search" placeholder='Search'/> <button type="submit" class="icon"><span class=" icon-wrapper icon-fa icon-fa-light" data-name="search"><svg viewBox="0 0 512.025 520.146" xmlns="http://www.w3.org/2000/svg"><path d="M508.5 482.6c4.7 4.7 4.7 12.3 0 17l-9.9 9.9c-4.7 4.7-12.3 4.7-17 0l-129-129c-2.2-2.3-3.5-5.3-3.5-8.5v-10.2C312 396 262.5 417 208 417 93.1 417 0 323.9 0 209S93.1 1 208 1s208 93.1 208 208c0 54.5-21 104-55.3 141.1H371c3.2 0 6.2 1.2 8.5 3.5zM208 385c97.3 0 176-78.7 176-176S305.3 33 208 33 32 111.7 32 209s78.7 176 176 176z"/></svg></span></button> </form> </li> <li class="nav-item"> <a class="nav-link" href="/sota"> Browse State-of-the-Art </a> </li> <li class="nav-item"> <a class="nav-link" href="/datasets"> Datasets </a> </li> <li class="nav-item"> <a class="nav-link" href="/methods">Methods</a> </li> <li class="nav-item dropdown"> <a class="nav-link dropdown-toggle" role="button" id="navbarDropdownRepro" data-toggle="dropdown" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false" > More </a> <div class="dropdown-menu" aria-labelledby="navbarDropdownRepro"> <a class="dropdown-item" href="/newsletter">Newsletter</a> <a class="dropdown-item" href="/rc2022">RC2022</a> <div class="dropdown-divider"></div> <a class="dropdown-item" href="/about">About</a> <a class="dropdown-item" href="/trends">Trends</a> <a class="dropdown-item" href="https://portal.paperswithcode.com/"> Portals </a> <a class="dropdown-item" href="/libraries"> Libraries </a> </div> </li> </ul> <ul class="navbar-nav ml-auto navbar-nav__right navbar-subscribe justify-content-center align-items-center"> <li class="nav-item"> <a class="nav-link" rel="noreferrer" href="https://twitter.com/paperswithcode"> <span class="nav-link-social-icon icon-wrapper icon-fa icon-fa-brands" data-name="twitter"><svg viewBox="0 0 512.001 515.25" xmlns="http://www.w3.org/2000/svg"><path d="M459.37 152.016c.326 4.548.326 9.097.326 13.645 0 138.72-105.583 298.558-298.559 298.558C101.685 464.22 46.457 447 0 417.114c8.447.973 16.568 1.298 25.34 1.298 49.054 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.113-72.772 6.499.975 12.996 1.624 19.819 1.624 9.42 0 18.843-1.3 27.613-3.573-48.08-9.747-84.142-51.98-84.142-102.984v-1.3c13.968 7.798 30.213 12.67 47.43 13.32-28.263-18.843-46.78-51.006-46.78-87.391 0-19.492 5.196-37.36 14.294-52.954 51.654 63.674 129.3 105.258 216.364 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.827 46.782-104.934 104.934-104.934 30.214 0 57.502 12.67 76.671 33.136 23.715-4.548 46.455-13.319 66.599-25.34-7.798 24.367-24.366 44.834-46.132 57.828 21.117-2.274 41.584-8.122 60.426-16.244-14.292 20.791-32.161 39.309-52.628 54.253z"/></svg></span> </a> </li> <li class="nav-item"> <a id="signin-link" class="nav-link" href="/accounts/login?next=/paper/gpt-4-technical-report-1">Sign In</a> </li> </ul> </div> </nav> <!-- Page modals --> <div class="modal fade" id="emailModal" tabindex="-1" role="dialog" aria-labelledby="emailModalLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h3 class="modal-title" id="emailModalLabel">Subscribe to the PwC Newsletter</h3> <button type="button" class="close" data-dismiss="modal" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <div class="modal-body-info-text"> Stay informed on the latest trending ML papers with code, research developments, libraries, methods, and datasets.<br/><br/> <a href="/newsletter">Read previous issues</a> </div> <input type="hidden" name="csrfmiddlewaretoken" value="RMII7xlHUBDRcWKZNE5o0UAqj4ksHb7MtdqrUK8u3wcM4aK5d9gcwdMTsMgqfR1w"> <input placeholder="Enter your email" type="email" class="form-control pwc-email" name="address" id="id_address" max_length="100" required> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary">Subscribe</button> </div> </form> </div> </div> </div> <!-- Login --> <div class="modal fade" id="loginModal" tabindex="-1" role="dialog" aria-labelledby="loginModalLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="loginModalLabel">Join the community</h5> <button type="button" class="close btn-close" data-dismiss="modal" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <div class="login-modal-message"> You need to <a href="/accounts/login?next=/paper/gpt-4-technical-report-1">log in</a> to edit.<br/> You can <a href="/accounts/register?next=/paper/gpt-4-technical-report-1">create a new account</a> if you don't have one.<br/><br/> </div> </div> </div> </div> <!-- All the modals go here --> <template id="modals-template"> <div class="modal fade" id="page-meta-modal"> <div class="modal-dialog"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title">Edit Social Preview</h5> <button type="button" class="close btn-close" data-dismiss="modal" data-bs-dismiss="modal" aria-label="Close" > <span aria-hidden="true">&times;</span> </button> </div> <div id="page-meta-modal-body" class="modal-body"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" id="page-meta-model-id-input" value="1174373" /> <input type="hidden" id="page-meta-model-name-input" value="Paper" /> <div class="form-group"> <label>Description</label><br /> <div class="form-check form-check-inline"> <input id="description-mode-default" class="form-check-input display-toggle-switch" checked type="radio" name="description-mode" value="default" data-target="display-description-default" /> <label class="form-check-label" for="description-mode-default">Default</label> </div> <div class="form-check form-check-inline"> <input id="description-mode-custom" class="form-check-input display-toggle-switch" type="radio" name="description-mode" value="custom" data-target="display-description-custom" /> <label class="form-check-label" for="description-mode-custom">Custom</label> </div> </div> <div class="form-group"> <div id="display-description-default" data-name="description-mode"> <textarea class="form-control" rows="3" readonly >🏆 SOTA for Legal Reasoning on LegalBench (Rule-recall) (Balanced Accuracy metric)</textarea> </div> <div id="display-description-custom" data-name="description-mode"> <textarea class="form-control" id="description-input" rows="3" ></textarea> </div> </div> <div class="form-group"> <label>Image</label><br /> <div class="form-group"> <div class="form-check form-check-inline"> <input id="image-mode-default" class="form-check-input display-toggle-switch" checked type="radio" name="image-mode" value="default" data-target="display-image-default" /> <label class="form-check-label" for="image-mode-default">Default</label> </div> <div class="form-check form-check-inline"> <input id="image-mode-custom" class="form-check-input display-toggle-switch" type="radio" name="image-mode" value="custom" data-target="display-image-custom" /> <label class="form-check-label" for="image-mode-custom">Custom</label> </div> <div class="form-check form-check-inline"> <input id="image-mode-none" class="form-check-input display-toggle-switch" type="radio" name="image-mode" value="none" data-target="display-image-none" /> <label class="form-check-label" for="image-mode-none">None</label> </div> </div> </div> <div class="form-group"> <div id="display-image-default" data-name="image-mode"> <img class="page-meta-media" src="https://raw.githubusercontent.com/unispac/visual-adversarial-examples-jailbreak-large-language-models/master/assets/human_race.png" /> </div> <div id="display-image-custom" data-name="image-mode"> <div id="file-too-large" style="display: none" class="alert alert-danger" role="alert"> File is too large </div> <p> Upload an image to customize your repository’s social media preview.<br /> Images should be at least 640×320px (1280×640px for best display). </P> <input type="file" class="form-control-file" id="image-input" /> </div> <div id="display-image-none" data-name="image-mode"> </div> </div> </div> <div class="modal-footer"> <button type="button" class="btn btn-secondary" data-dismiss="modal" data-bs-dismiss="modal"> Close </button> <button type="button" id="page-meta-submit" class="btn btn-primary"> Save </button> </div> </div> </div> </div> <!-- Add Code --> <div class="modal fade" id="addCode" tabindex="-1" role="dialog" aria-labelledby="addCodeLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="addCodeLabel">Add a new code entry for this paper</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <div id="div_id_url" class="form-group"> <label for="id_url" class=" requiredField"> GitHub, GitLab or BitBucket URL:<span class="asteriskField">*</span> </label> <div class=""> <input type="url" name="url" class="urlinput form-control" required id="id_url"> </div> </div> <div class="form-group"> <div id="div_id_is_official" class="form-check"> <input type="checkbox" name="is_official" class="checkboxinput form-check-input" id="id_is_official"> <label for="id_is_official" class="form-check-label"> Official code from paper authors </label> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary">Submit </button> </div> </form> </div> </div> </div> <!-- Remove Code --> <div class="modal fade" id="removeCode" tabindex="-1" role="dialog" aria-labelledby="removeCodeLabel" aria-hidden="true"> <div class="modal-dialog modal-lg" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="removeCodeLabel">Remove a code repository from this paper</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <div class="paper-implementations"> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/openai/evals" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> openai/evals <span class="badge badge-info is-official-code"><span class=" icon-wrapper icon-ion" data-name="checkmark-circle-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M448 256c0-106-86-192-192-192S64 150 64 256s86 192 192 192 192-86 192-192z" fill="none" stroke="#000" stroke-miterlimit="10" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M352 176L217.6 336 160 272"/></svg></span> official</span> </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 15,084 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1339808"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/shmsw25/factscore" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> shmsw25/factscore </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 291 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1379780"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> unispac/visual-adversarial-examples-jailbreak-large-language-models </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 183 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1359720"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/gpt4life/alpagasus" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> gpt4life/alpagasus </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 84 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1376980"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/emrgnt-cmplxty/zero-shot-replication" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> emrgnt-cmplxty/zero-shot-replication </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 72 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1400489"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-privsec/superhuman-ai-consistency" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-privsec/superhuman-ai-consistency </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1358532"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-spylab/superhuman-ai-consistency" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-spylab/superhuman-ai-consistency </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1358740"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/eternityyw/tram-benchmark" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> eternityyw/tram-benchmark </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 22 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1377429"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/AUCOHL/RTL-Repo" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> AUCOHL/RTL-Repo </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 5 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1426518"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/zach-zhiling-zheng/reticular_chemist" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> zach-zhiling-zheng/reticular_chemist </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 2 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1360363"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> <div class="row"> <div class="col-md-6"> <div class="paper-impl-cell"> <a href="https://github.com/avhbench/avhbench" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> avhbench/avhbench </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 0 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-1"> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_code_pk" value="1457258"> <button type="submit" class="btn btn-danger">- </button> </form> </div> </div> </div> </div> </form> </div> </div> </div> <!-- Change official code --> <div class="modal fade" id="changeOfficialCode" tabindex="-1" role="dialog" aria-labelledby="changeOfficialCodeLabel" aria-hidden="true"> <div class="modal-dialog modal-lg" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="changeOfficialCodeLabel"> Mark the official implementation from paper authors </h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post" id="official-pgr-form"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="official_pgr_ids" id="official-pgr-ids" /> <div class="modal-body"> <div class="paper-implementations"> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/openai/evals" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> openai/evals <span class="badge badge-info is-official-code"><span class=" icon-wrapper icon-ion" data-name="checkmark-circle-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M448 256c0-106-86-192-192-192S64 150 64 256s86 192 192 192 192-86 192-192z" fill="none" stroke="#000" stroke-miterlimit="10" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M352 176L217.6 336 160 272"/></svg></span> official</span> </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 15,084 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2104372" checked > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2104372" checked > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/shmsw25/factscore" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> shmsw25/factscore </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 291 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2207929" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2207929" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> unispac/visual-adversarial-examples-jailbreak-large-language-models </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 183 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2159940" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2159940" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/gpt4life/alpagasus" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> gpt4life/alpagasus </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 84 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2201277" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2201277" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/emrgnt-cmplxty/zero-shot-replication" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> emrgnt-cmplxty/zero-shot-replication </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 72 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2270784" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2270784" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-privsec/superhuman-ai-consistency" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-privsec/superhuman-ai-consistency </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2156222" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2156222" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-spylab/superhuman-ai-consistency" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-spylab/superhuman-ai-consistency </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2155290" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2155290" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/eternityyw/tram-benchmark" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> eternityyw/tram-benchmark </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 22 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2203553" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2203553" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/AUCOHL/RTL-Repo" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> AUCOHL/RTL-Repo </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 5 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2349135" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2349135" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/zach-zhiling-zheng/reticular_chemist" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> zach-zhiling-zheng/reticular_chemist </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 2 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2179701" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2179701" > </div> </div> <div class="row align-items-center"> <div class="col-md-5"> <div class="paper-impl-cell"> <a href="https://github.com/avhbench/avhbench" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> avhbench/avhbench </a> </div> </div> <div class="col-md-3"> <div class="paper-impl-cell"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 0 </div> </div> <div class="col-md-2"> <div class="paper-impl-cell"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" class="official-pgr-input official-pgr-radio radios-version-element" value="2447488" > <input type="checkbox" class="official-pgr-input official-pgr-checkbox checkboxes-version-element" value="2447488" > </div> </div> <div class="radios-version-element"> <hr/> <div class="row align-items-center justify-content-between"> <div class="col-md-5"> <div class="paper-impl-cell"> There is no official implementation </div> </div> <div class="col-md-2 text-center"> <input type="radio" name="official-pgr-radio" value="" class="official-pgr-input official-pgr-radio" > </div> </div> </div> <hr/> <div class="row align-items-center justify-content-between"> <div class="col-md-5"> <div class="paper-impl-cell"> Multiple official implementations </div> </div> <div class="col-md-2 text-center"> <div class="custom-control custom-switch"> <input type="checkbox" class="custom-control-input" id="official-pgr-multiple-switch"> <label class="custom-control-label" for="official-pgr-multiple-switch" id="official-pgr-multiple-switch-label"></label> </div> </div> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary">Submit </button> </div> </form> </div> </div> </div> <!-- Add Row --> <div class="modal fade" id="addRow" role="dialog" aria-labelledby="addRowLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="addRowLabel">Add a new evaluation result row</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <div id="div_id_task" class="form-group"> <label for="id_task" class=" requiredField"> Task:<span class="asteriskField">*</span> </label> <div class=""> <select name="task" class="select form-control" required id="id_task"> <option value="" selected>---------</option> <option value="5063">Legal Reasoning</option> <option value="5831">MMR total</option> <option value="697">Image Retrieval</option> <option value="158">Zero-Shot Learning</option> <option value="153">Few-Shot Learning</option> <option value="5550">Long-Context Understanding</option> <option value="2023">Sentence Completion</option> <option value="5455">Only Connect Walls Dataset Task 1 (Grouping)</option> <option value="5266">Math</option> <option value="5390">Factual Inconsistency Detection in Chart Captioning</option> <option value="3721">Arithmetic Reasoning</option> <option value="252">Common Sense Reasoning</option> <option value="3294">Multi-task Language Understanding</option> <option value="168">Visual Question Answering (VQA)</option> <option value="4750">Visual Question Answering</option> <option value="5184">Bug fixing</option> <option value="9">Question Answering</option> <option value="211">Code Generation</option> </select> </div> </div> <div class="add-task-hint"> Not in the list? <a href="#addTask" data-bs-toggle="modal" data-bs-dismiss="modal">Add a task.</a> </div> <div id="div_id_dataset" class="form-group"> <label for="id_dataset" class=" requiredField"> Dataset:<span class="asteriskField">*</span> </label> <div class=""> <select name="dataset" class="modelselect2 form-control" required id="id_dataset" data-autocomplete-light-language="en" data-autocomplete-light-url="/dataset-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div id="div_id_model_name" class="form-group"> <label for="id_model_name" class=" requiredField"> Model name:<span class="asteriskField">*</span> </label> <div class=""> <input type="text" name="model_name" class="textinput textInput form-control" required id="id_model_name"> </div> </div> <div id="div_id_metric" class="form-group"> <label for="id_metric" class=" requiredField"> Metric name:<span class="asteriskField">*</span> </label> <div class=""> <select name="metric" class="modelselect2 form-control" required id="id_metric" data-autocomplete-light-language="en" data-autocomplete-light-url="/metric-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div id="sota-metric-names"> </div> <div class="form-group"> <div id="div_id_metric_higher_is_better" class="form-check"> <input type="checkbox" name="metric_higher_is_better" class="checkboxinput form-check-input" id="id_metric_higher_is_better"> <label for="id_metric_higher_is_better" class="form-check-label"> Higher is better (for the metric) </label> </div> </div> <div id="div_id_metric_value" class="form-group"> <label for="id_metric_value" class=" requiredField"> Metric value:<span class="asteriskField">*</span> </label> <div class=""> <input type="text" name="metric_value" class="textinput textInput form-control" required id="id_metric_value"> </div> </div> <div id="sota-metric-values"> </div> <div class="form-group"> <div id="div_id_uses_additional_data" class="form-check"> <input type="checkbox" name="uses_additional_data" class="checkboxinput form-check-input" id="id_uses_additional_data"> <label for="id_uses_additional_data" class="form-check-label"> Uses extra training data </label> </div> </div> <div id="div_id_evaluated_on" class="form-group"> <label for="id_evaluated_on" class=""> Data evaluated on </label> <div class=""> <input type="text" name="evaluated_on" value="2023-03-15" autocomplete="off" class="dateinput form-control" id="id_evaluated_on"> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary"> Submit </button> </div> </form> </div> </div> </div> <!-- Remove Row --> <div class="modal fade" id="removeRow" role="dialog" aria-labelledby="removeRowLabel" aria-hidden="true"> <div class="modal-dialog modal-lg" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="removeRowLabel">Add a new evaluation result row</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <div class="sota-table"> <table class="table-striped"> <tr> <th>TASK</th> <th>DATASET</th> <th>MODEL</th> <th>METRIC NAME</th> <th>METRIC VALUE</th> <th>GLOBAL RANK</th> <th class="text-center">EXTRA<br/>DATA </th> <th>REMOVE</th> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 2k </td> <td> 73.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67069"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 4k </td> <td> 67.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67070"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 8k </td> <td> 53.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67071"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 16k </td> <td> 44.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67072"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 1k </td> <td> 74.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67090"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 6k </td> <td> 59.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67091"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 12k </td> <td> 49.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67092"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 32k </td> <td> 16.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67093"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 64k </td> <td> 0.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67094"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-1106 </td> <td> 128k </td> <td> 0.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120837"> <input type="hidden" name="remove_metric_pk" value="67095"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 2k </td> <td> 73.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67069"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 4k </td> <td> 65.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67070"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 8k </td> <td> 56.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67071"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 16k </td> <td> 44.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67072"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 1k </td> <td> 73.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67090"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 6k </td> <td> 63.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67091"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 12k </td> <td> 52.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67092"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 32k </td> <td> 30.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67093"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 64k </td> <td> 0.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67094"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (BestAnswer) </td> <td> GPT-4-Turbo-0125 </td> <td> 128k </td> <td> 0.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120835"> <input type="hidden" name="remove_metric_pk" value="67095"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 2k </td> <td> 18.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67073"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 4k </td> <td> 15.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67074"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 8k </td> <td> 7.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67075"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 16k </td> <td> 3.5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67076"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 32k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67087"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 64k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67088"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-1106 </td> <td> 128k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120836"> <input type="hidden" name="remove_metric_pk" value="67089"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 2k </td> <td> 15.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67073"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 4k </td> <td> 16.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67074"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 8k </td> <td> 8.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67075"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 16k </td> <td> 5.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67076"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 32k </td> <td> 2.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67087"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 64k </td> <td> 4.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67088"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> Ada-LEval (TSort) </td> <td> GPT-4-Turbo-0125 </td> <td> 128k </td> <td> 2.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="120838"> <input type="hidden" name="remove_metric_pk" value="67089"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Common Sense Reasoning </td> <td> ARC (Challenge) </td> <td> GPT-4 (few-shot, k=25) </td> <td> Accuracy </td> <td> 96.4 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99242"> <input type="hidden" name="remove_metric_pk" value="5297"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Common Sense Reasoning </td> <td> ARC (Challenge) </td> <td> GPT-3.5 (few-shot, k=25) </td> <td> Accuracy </td> <td> 85.2 </td> <td> # 12 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99243"> <input type="hidden" name="remove_metric_pk" value="5297"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> BenchLMM </td> <td> GPT-4V </td> <td> GPT-3.5 score </td> <td> 58.37 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="113615"> <input type="hidden" name="remove_metric_pk" value="64827"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Factual Inconsistency Detection in Chart Captioning </td> <td> CHOCOLATE-LLM </td> <td> GPT-4V </td> <td> Kendall&#x27;s Tau-c </td> <td> 0.205 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="115878"> <input type="hidden" name="remove_metric_pk" value="65632"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> CORE-MM </td> <td> GPT-4V </td> <td> Overall score </td> <td> 74.44 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114226"> <input type="hidden" name="remove_metric_pk" value="65009"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> CORE-MM </td> <td> GPT-4V </td> <td> Deductive </td> <td> 74.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114226"> <input type="hidden" name="remove_metric_pk" value="65010"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> CORE-MM </td> <td> GPT-4V </td> <td> Analogical </td> <td> 69.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114226"> <input type="hidden" name="remove_metric_pk" value="65011"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> CORE-MM </td> <td> GPT-4V </td> <td> Params </td> <td> - </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114226"> <input type="hidden" name="remove_metric_pk" value="65012"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> CORE-MM </td> <td> GPT-4V </td> <td> Abductive </td> <td> 77.88 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114226"> <input type="hidden" name="remove_metric_pk" value="65013"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Question Answering </td> <td> DROP Test </td> <td> GPT-4 (few-shot, k=3) </td> <td> F1 </td> <td> 80.9 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99248"> <input type="hidden" name="remove_metric_pk" value="5299"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Question Answering </td> <td> DROP Test </td> <td> GPT 3.5 (few-shot, k=3) </td> <td> F1 </td> <td> 64.1 </td> <td> # 11 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99249"> <input type="hidden" name="remove_metric_pk" value="5299"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Arithmetic Reasoning </td> <td> GSM8K </td> <td> GPT-3.5 (few-shot, k=5) </td> <td> Accuracy </td> <td> 57.1 </td> <td> # 115 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99251"> <input type="hidden" name="remove_metric_pk" value="47389"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Sentence Completion </td> <td> HellaSwag </td> <td> GPT-4 (10-shot) </td> <td> Accuracy </td> <td> 95.3 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99240"> <input type="hidden" name="remove_metric_pk" value="5289"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Sentence Completion </td> <td> HellaSwag </td> <td> GPT-3.5 (10-shot) </td> <td> Accuracy </td> <td> 85.5 </td> <td> # 23 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99241"> <input type="hidden" name="remove_metric_pk" value="5289"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> InfiMM-Eval </td> <td> GPT-4V </td> <td> Overall score </td> <td> 74.44 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112622"> <input type="hidden" name="remove_metric_pk" value="64525"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> InfiMM-Eval </td> <td> GPT-4V </td> <td> Deductive </td> <td> 74.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112622"> <input type="hidden" name="remove_metric_pk" value="64526"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> InfiMM-Eval </td> <td> GPT-4V </td> <td> Abductive </td> <td> 77.88 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112622"> <input type="hidden" name="remove_metric_pk" value="64527"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering (VQA) </td> <td> InfiMM-Eval </td> <td> GPT-4V </td> <td> Analogical </td> <td> 69.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112622"> <input type="hidden" name="remove_metric_pk" value="64528"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Legal Reasoning </td> <td> LegalBench (Rule-recall) </td> <td> GPT-4 </td> <td> Balanced Accuracy </td> <td> 59.2 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="108061"> <input type="hidden" name="remove_metric_pk" value="62825"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Zero-Shot Learning </td> <td> MedConceptsQA </td> <td> gpt-4-0125-preview </td> <td> Accuracy </td> <td> 52.489 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="122047"> <input type="hidden" name="remove_metric_pk" value="67646"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Few-Shot Learning </td> <td> MedConceptsQA </td> <td> gpt-4-0125-preview </td> <td> Accuracy </td> <td> 61.911 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="122034"> <input type="hidden" name="remove_metric_pk" value="67644"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Multi-task Language Understanding </td> <td> MMLU </td> <td> GPT-3.5 Turbo </td> <td> Average (%) </td> <td> 70.0 </td> <td> # 38 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114905"> <input type="hidden" name="remove_metric_pk" value="40964"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Multi-task Language Understanding </td> <td> MMLU </td> <td> GPT-4 (few-shot) </td> <td> Average (%) </td> <td> 86.4 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114904"> <input type="hidden" name="remove_metric_pk" value="40964"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 1 Image, 2*2 Stitching, Exact Accuracy </td> <td> 86.09 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68289"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 1 Image, 4*4 Stitching, Exact Accuracy </td> <td> 54.72 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68290"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 1 Image, 8*8 Stitching, Exact Accuracy </td> <td> 7.3 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68291"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 10 Images, 1*1 Stitching, Exact Accuracy </td> <td> 72.36 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68303"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 10 Images, 2*2 Stitching, Exact Accuracy </td> <td> 34.24 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68304"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 10 Images, 4*4 Stitching, Exact Accuracy </td> <td> 7.58 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68305"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4V </td> <td> 10 Images, 8*8 Stitching, Exact Accuracy </td> <td> 0 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124088"> <input type="hidden" name="remove_metric_pk" value="68307"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 1 Image, 2*2 Stitching, Exact Accuracy </td> <td> 94.6 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68289"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 1 Image, 4*4 Stitching, Exact Accuracy </td> <td> 83 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68290"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 1 Image, 8*8 Stitching, Exact Accuracy </td> <td> 19 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68291"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 10 Images, 1*1 Stitching, Exact Accuracy </td> <td> 97 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68303"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 10 Images, 2*2 Stitching, Exact Accuracy </td> <td> 81.8 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68304"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 10 Images, 4*4 Stitching, Exact Accuracy </td> <td> 26.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68305"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Long-Context Understanding </td> <td> MMNeedle </td> <td> GPT-4o </td> <td> 10 Images, 8*8 Stitching, Exact Accuracy </td> <td> 1 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="124089"> <input type="hidden" name="remove_metric_pk" value="68307"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet </td> <td> GPT-4o (gpt-4o-2024-05-13) </td> <td> GPT-4 score </td> <td> 69.3±0.1 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="122906"> <input type="hidden" name="remove_metric_pk" value="63393"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet </td> <td> GPT-4V-Turbo-detail:low </td> <td> GPT-4 score </td> <td> 60.2±0.3 </td> <td> # 24 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112088"> <input type="hidden" name="remove_metric_pk" value="63393"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet </td> <td> GPT-4V-Turbo-detail:high </td> <td> GPT-4 score </td> <td> 67.6±0.1 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="112087"> <input type="hidden" name="remove_metric_pk" value="63393"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet </td> <td> GPT-4V </td> <td> GPT-4 score </td> <td> 67.7±0.3 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="110824"> <input type="hidden" name="remove_metric_pk" value="63393"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet v2 </td> <td> GPT-4o (gpt-4o-2024-05-13) </td> <td> GPT-4 score </td> <td> 71.0±0.2 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="127291"> <input type="hidden" name="remove_metric_pk" value="69311"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> MM-Vet v2 </td> <td> GPT-4 Turbo (gpt-4-0125-preview) </td> <td> GPT-4 score </td> <td> 66.3±0.2 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="127299"> <input type="hidden" name="remove_metric_pk" value="69311"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 80.6 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> # Correct Groups </td> <td> 149 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 37.3 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 22.0 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 25.4 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (5-shot) </td> <td> # Solved Walls </td> <td> 2 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117150"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 80.9 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> # Correct Groups </td> <td> 140 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 36.8 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 21.3 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 24.7 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (3-shot) </td> <td> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117149"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 72.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> # Correct Groups </td> <td> 269 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 43.4 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 29.1 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 32.8 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (5-shot) </td> <td> # Solved Walls </td> <td> 7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117155"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 82.3 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> # Correct Groups </td> <td> 123 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 34.4 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 18.2 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 21.2 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (1-shot) </td> <td> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117148"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 81.2 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> # Correct Groups </td> <td> 137 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 36.1 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 20.4 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 24.0 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (10-shot) </td> <td> # Solved Walls </td> <td> 2 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117151"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 73.6 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> # Correct Groups </td> <td> 249 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 42.8 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 28.5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 32.3 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (100-shot) </td> <td> # Solved Walls </td> <td> 3 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117156"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 73.7 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> # Correct Groups </td> <td> 272 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 43.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 29.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 33.6 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (3-shot) </td> <td> # Solved Walls </td> <td> 5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117154"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 73.4 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> # Correct Groups </td> <td> 262 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 43.7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 29.7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 33.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (1-shot) </td> <td> # Solved Walls </td> <td> 4 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117153"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 75.8 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> # Correct Groups </td> <td> 239 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 41.5 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 27.2 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 30.7 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-4 (0-shot) </td> <td> # Solved Walls </td> <td> 6 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117152"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> Wasserstein Distance (WD) </td> <td> 82.5 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66164"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> # Correct Groups </td> <td> 114 </td> <td> # 11 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66166"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> Fowlkes Mallows Score (FMS) </td> <td> 34.0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66182"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> Adjusted Rand Index (ARI) </td> <td> 18.4 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66183"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> Adjusted Mutual Information (AMI) </td> <td> 21.6 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66184"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Only Connect Walls Dataset Task 1 (Grouping) </td> <td> OCW </td> <td> GPT-3.5-turbo (0-shot) </td> <td> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="117147"> <input type="hidden" name="remove_metric_pk" value="66185"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Question Answering </td> <td> TriviaQA </td> <td> GPT-4-0613 (Zero-shot) </td> <td> EM </td> <td> 84.8 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="125376"> <input type="hidden" name="remove_metric_pk" value="539"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Question Answering </td> <td> TruthfulQA </td> <td> GPT-4 (RLHF) </td> <td> MC1 </td> <td> 0.59 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="110275"> <input type="hidden" name="remove_metric_pk" value="33115"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> ViP-Bench </td> <td> GPT-4V-turbo-detail:high (Visual Prompt) </td> <td> GPT-4 score (bbox) </td> <td> 60.7 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114160"> <input type="hidden" name="remove_metric_pk" value="64991"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> ViP-Bench </td> <td> GPT-4V-turbo-detail:high (Visual Prompt) </td> <td> GPT-4 score (human) </td> <td> 59.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114160"> <input type="hidden" name="remove_metric_pk" value="64992"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> ViP-Bench </td> <td> GPT-4V-turbo-detail:low (Visual Prompt) </td> <td> GPT-4 score (bbox) </td> <td> 52.8 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114368"> <input type="hidden" name="remove_metric_pk" value="64991"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Visual Question Answering </td> <td> ViP-Bench </td> <td> GPT-4V-turbo-detail:low (Visual Prompt) </td> <td> GPT-4 score (human) </td> <td> 51.4 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="114368"> <input type="hidden" name="remove_metric_pk" value="64992"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Common Sense Reasoning </td> <td> WinoGrande </td> <td> GPT-3.5 (5-shot) </td> <td> Accuracy </td> <td> 81.6 </td> <td> # 14 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99245"> <input type="hidden" name="remove_metric_pk" value="59124"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> <tr> <td> Common Sense Reasoning </td> <td> WinoGrande </td> <td> GPT-4 (5-shot) </td> <td> Accuracy </td> <td> 87.5 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_row_pk" value="99244"> <input type="hidden" name="remove_metric_pk" value="59124"> <button type="submit" class="btn btn-danger"> - </button> </form> </td> </tr> </table> </div> </div> </form> </div> </div> </div> <!-- Add Task --> <div class="modal fade" id="addTask" role="dialog" aria-labelledby="addTaskLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="addTaskLabel">Add a task</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <div class="modal-body"> <div class="current-tasks-title">Attached tasks:</div> <ul class="list-unstyled"> <li> <a href="/task/arithmetic-reasoning"> <span class="badge badge-primary">ARITHMETIC REASONING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/bug-fixing"> <span class="badge badge-primary">BUG FIXING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/code-generation"> <span class="badge badge-primary">CODE GENERATION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/common-sense-reasoning"> <span class="badge badge-primary">COMMON SENSE REASONING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/factual-inconsistency-detection-in-chart"> <span class="badge badge-primary">FACTUAL INCONSISTENCY DETECTION IN CHART CAPTIONING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/few-shot-learning"> <span class="badge badge-primary">FEW-SHOT LEARNING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/image-retrieval"> <span class="badge badge-primary">IMAGE RETRIEVAL</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/legal-reasoning"> <span class="badge badge-primary">LEGAL REASONING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/long-context-understanding"> <span class="badge badge-primary">LONG-CONTEXT UNDERSTANDING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/math"> <span class="badge badge-primary">MATH</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/mmr-total"> <span class="badge badge-primary">MMR TOTAL</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/multi-task-language-understanding"> <span class="badge badge-primary">MULTI-TASK LANGUAGE UNDERSTANDING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/task-1-grouping"> <span class="badge badge-primary">ONLY CONNECT WALLS DATASET TASK 1 (GROUPING)</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/question-answering"> <span class="badge badge-primary">QUESTION ANSWERING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/sentence-completion"> <span class="badge badge-primary">SENTENCE COMPLETION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/visual-question-answering-1"> <span class="badge badge-primary">VISUAL QUESTION ANSWERING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/visual-question-answering"> <span class="badge badge-primary">VISUAL QUESTION ANSWERING (VQA)</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/task/zero-shot-learning"> <span class="badge badge-primary">ZERO-SHOT LEARNING</span> </a> </li> </ul> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <div id="div_id_task" class="form-group"> <label for="id_task" class=""> Add: </label> <div class=""> <select name="task" class="modelselect2 form-control" id="id_task" data-autocomplete-light-language="en" data-autocomplete-light-url="/task-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div class="modal-help-text"> Not in the list?<br/> <a href="#" id="new-task-form-toggle"> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Create a new task</a>. </div> <div id="new-task-form" style="display: none"> <div id="div_id_new_task_name" class="form-group"> <label for="id_new_task_name" class=""> New task name: </label> <div class=""> <input type="text" name="new_task_name" maxlength="200" class="textinput textInput form-control" id="id_new_task_name"> </div> </div> <div id="div_id_new_task_area" class="form-group"> <label for="id_new_task_area" class=""> Top-level area: </label> <div class=""> <select name="new_task_area" class="select form-control" id="id_new_task_area"> <option value="" selected>---------</option> <option value="17">Adversarial</option> <option value="18">Audio</option> <option value="11">Computer Code</option> <option value="3">Computer Vision</option> <option value="9">Graphs</option> <option value="15">Knowledge Base</option> <option value="7">Medical</option> <option value="6">Methodology</option> <option value="5">Miscellaneous</option> <option value="12">Music</option> <option value="4">Natural Language Processing</option> <option value="13">Playing Games</option> <option value="14">Reasoning</option> <option value="16">Robots</option> <option value="10">Speech</option> <option value="8">Time Series</option> </select> </div> </div> <div id="div_id_new_task_parent" class="form-group"> <label for="id_new_task_parent" class=""> Parent task (if any): </label> <div class=""> <select name="new_task_parent" class="modelselect2 form-control" id="id_new_task_parent" data-autocomplete-light-language="en" data-autocomplete-light-url="/task-and-tag-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div id="div_id_new_task_desc" class="form-group"> <label for="id_new_task_desc" class=""> Description: </label> <div class=""> <textarea name="new_task_desc" cols="40" rows="3" class="textarea form-control" id="id_new_task_desc"> </textarea> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary"> Submit </button> </div> </form> </div> </div> </div> </div> <!-- Remove Task --> <div class="modal fade" id="removeTask" tabindex="-1" role="dialog" aria-labelledby="removeTaskLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="removeTaskLabel">Remove a task</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/arithmetic-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Arithmetic Reasoning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="3721"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/bug-fixing"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Bug fixing</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5184"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/code-generation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/658fcf6a-52ca-4865-b111-28bf61c346dc.jpg"> <span>Code Generation</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="211"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/common-sense-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Common Sense Reasoning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="252"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/factual-inconsistency-detection-in-chart"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Factual Inconsistency Detection in Chart Captioning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5390"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/few-shot-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Few-Shot Learning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="153"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/image-retrieval"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/834263fd-0f2e-47a9-bda1-0fd3f44c71df.jpg"> <span>Image Retrieval</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="697"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/legal-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Legal Reasoning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5063"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/long-context-understanding"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Long-Context Understanding</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5550"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/math"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Math</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5266"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/mmr-total"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>MMR total</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5831"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/multi-task-language-understanding"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/93c61892-0ee4-4896-8201-6f7497f6058e.jpg"> <span>Multi-task Language Understanding</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="3294"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/task-1-grouping"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Only Connect Walls Dataset Task 1 (Grouping)</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="5455"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/question-answering"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/56ae901a-265f-415f-b175-ce54133d648b.jpg"> <span>Question Answering</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="9"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/sentence-completion"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Sentence Completion</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="2023"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/visual-question-answering-1"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Visual Question Answering</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="4750"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/visual-question-answering"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/d0abcf9a-29fb-4295-9b2d-10d82222ccc9.jpg"> <span>Visual Question Answering (VQA)</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="168"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled paper-tasks"> <form action="" method="post"> <li> <a href="/task/zero-shot-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000000158-1446b13d_bCNDpWB.jpg"> <span>Zero-Shot Learning</span> </span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_task_pk" value="158"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> </div> </form> </div> </div> </div> <!-- Add Method --> <div class="modal fade" id="addMethod" role="dialog" aria-labelledby="addMethodLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="addMethodLabel">Add a method</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <div class="modal-body"> <div class="current-methods-title">Attached methods:</div> <ul class="list-unstyled"> <li> <a href="/method/absolute-position-encodings"> <span class="badge badge-primary">ABSOLUTE POSITION ENCODINGS</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/adam"> <span class="badge badge-primary">ADAM</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/bpe"> <span class="badge badge-primary">BPE</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/dense-connections"> <span class="badge badge-primary">DENSE CONNECTIONS</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/dropout"> <span class="badge badge-primary">DROPOUT</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/gpt-4"> <span class="badge badge-primary">GPT-4</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/label-smoothing"> <span class="badge badge-primary">LABEL SMOOTHING</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/layer-normalization"> <span class="badge badge-primary">LAYER NORMALIZATION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/linear-layer"> <span class="badge badge-primary">LINEAR LAYER</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/multi-head-attention"> <span class="badge badge-primary">MULTI-HEAD ATTENTION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/position-wise-feed-forward-layer"> <span class="badge badge-primary">POSITION-WISE FEED-FORWARD LAYER</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/residual-connection"> <span class="badge badge-primary">RESIDUAL CONNECTION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/scaled"> <span class="badge badge-primary">SCALED DOT-PRODUCT ATTENTION</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/softmax"> <span class="badge badge-primary">SOFTMAX</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/test"> <span class="badge badge-primary">TEST</span> </a> </li> </ul> <ul class="list-unstyled"> <li> <a href="/method/transformer"> <span class="badge badge-primary">TRANSFORMER</span> </a> </li> </ul> <form action="" method="post"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <div id="div_id_method" class="form-group"> <label for="id_method" class=""> Add: </label> <div class=""> <select name="method" class="modelselect2 form-control" id="id_method" data-autocomplete-light-language="en" data-autocomplete-light-url="/method-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div class="modal-help-text"> Not in the list?<br/> <a href="#" id="new-method-form-toggle"> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Create a new method</a>. </div> <div id="new-method-form" style="display: none"> <div id="div_id_new_method_name" class="form-group"> <label for="id_new_method_name" class=""> <b>New method name</b> (e.g. ReLU): </label> <div class=""> <input type="text" name="new_method_name" maxlength="200" minlength="2" class="textinput textInput form-control" id="id_new_method_name"> </div> </div> <div id="div_id_new_method_full_name" class="form-group"> <label for="id_new_method_full_name" class=""> <b>New method full name</b> (e.g. Rectified Linear Unit): </label> <div class=""> <input type="text" name="new_method_full_name" maxlength="200" minlength="2" class="textinput textInput form-control" id="id_new_method_full_name"> </div> </div> <div id="div_id_new_method_paper" class="form-group"> <label for="id_new_method_paper" class=""> <b>Paper where method was first introduced</b>: </label> <div class=""> <select name="new_method_paper" class="modelselect2 form-control" id="id_new_method_paper" data-autocomplete-light-language="en" data-autocomplete-light-url="/paper-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div id="div_id_new_method_collection" class="form-group"> <label for="id_new_method_collection" class=""> <b>Method category</b> (e.g. Activation Functions): <i>If no match, add something for now then you can add a new category afterwards.</i> </label> <div class=""> <select name="new_method_collection" class="modelselect2 form-control" id="id_new_method_collection" data-autocomplete-light-language="en" data-autocomplete-light-url="/method-collection-autocomplete/" data-autocomplete-light-function="select2"> <option value="" selected>---------</option> </select> </div> </div> <div id="div_id_new_method_desc" class="form-group"> <label for="id_new_method_desc" class=""> <b>Markdown description</b> (optional; $\LaTeX$ enabled): <i>You can edit this later, so feel free to start with something succinct.</i> </label> <div class=""> <textarea name="new_method_desc" cols="40" rows="10" class="textarea form-control" id="id_new_method_desc"> </textarea> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary"> Submit </button> </div> </form> </div> </div> </div> </div> <!-- Remove Method --> <div class="modal fade" id="removeMethod" tabindex="-1" role="dialog" aria-labelledby="removeMethodLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="removeMethodLabel">Remove a method</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/absolute-position-encodings"> <span class="badge badge-primary">ABSOLUTE POSITION ENCODINGS</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="1604"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/adam"> <span class="badge badge-primary">ADAM</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="496"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/bpe"> <span class="badge badge-primary">BPE</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="58"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/dense-connections"> <span class="badge badge-primary">DENSE CONNECTIONS</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="374"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/dropout"> <span class="badge badge-primary">DROPOUT</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="169"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/gpt-4"> <span class="badge badge-primary">GPT-4</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="2137"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/label-smoothing"> <span class="badge badge-primary">LABEL SMOOTHING</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="335"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/layer-normalization"> <span class="badge badge-primary">LAYER NORMALIZATION</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="386"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/linear-layer"> <span class="badge badge-primary">LINEAR LAYER</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="57"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/multi-head-attention"> <span class="badge badge-primary">MULTI-HEAD ATTENTION</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="37"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/position-wise-feed-forward-layer"> <span class="badge badge-primary">POSITION-WISE FEED-FORWARD LAYER</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="1602"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/residual-connection"> <span class="badge badge-primary">RESIDUAL CONNECTION</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="9"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/scaled"> <span class="badge badge-primary">SCALED DOT-PRODUCT ATTENTION</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="6"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/softmax"> <span class="badge badge-primary">SOFTMAX</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="537"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/test"> <span class="badge badge-primary">TEST</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="1114"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> <ul class="list-unstyled"> <form action="" method="post"> <li> <a href="/method/transformer"> <span class="badge badge-primary">TRANSFORMER</span> </a> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <input type="hidden" name="remove_method_pk" value="50"> <button type="submit" class="btn btn-danger" style="width:2.5em">- </button> </li> </form> </ul> </div> </form> </div> </div> </div> <!-- Badge Modal --> <div class="modal fade" id="badgeModal" tabindex="-1" role="dialog" aria-labelledby="badgeModalLabel" aria-hidden="true" > <div class="modal-dialog modal-lg" role="document"> <div class="modal-content modal-badge"> <div class="row"> <div class="col-md-12 paper-evaluation-section-title"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12 zero-padding"> <h1>🦡 Badges</h1> <hr/> </div> </div> </div> </div> </div> <div class="paper-evaluation-section" id="badges"> <div class="row"> <div class="col-md-12"> <p> Include the markdown at the top of your GitHub <code>README.md</code> file to showcase the performance of the model. </p> <p> Badges are live and will be dynamically updated with the latest ranking of this paper. </p> <div class="sota-table badge-table"> <table class="table-striped"> <tr> <th> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Badge </th> <th style="width: 50%">Markdown </th> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-ada-leval" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-ada-leval)](https://paperswithcode.com/sota/long-context-understanding-on-ada-leval?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-ada-leval-tsort" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-ada-leval-tsort)](https://paperswithcode.com/sota/long-context-understanding-on-ada-leval-tsort?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/common-sense-reasoning-on-arc-challenge" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/common-sense-reasoning-on-arc-challenge)](https://paperswithcode.com/sota/common-sense-reasoning-on-arc-challenge?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-benchlmm" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-benchlmm)](https://paperswithcode.com/sota/visual-question-answering-on-benchlmm?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/factual-inconsistency-detection-in-chart-1" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/factual-inconsistency-detection-in-chart-1)](https://paperswithcode.com/sota/factual-inconsistency-detection-in-chart-1?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-vqa-on-core-mm-1" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-vqa-on-core-mm-1)](https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm-1?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-vqa-on-core-mm" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-vqa-on-core-mm)](https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/legal-reasoning-on-legalbench-rule-recall" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/legal-reasoning-on-legalbench-rule-recall)](https://paperswithcode.com/sota/legal-reasoning-on-legalbench-rule-recall?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/zero-shot-learning-on-medconceptsqa" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/zero-shot-learning-on-medconceptsqa)](https://paperswithcode.com/sota/zero-shot-learning-on-medconceptsqa?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/few-shot-learning-on-medconceptsqa" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/few-shot-learning-on-medconceptsqa)](https://paperswithcode.com/sota/few-shot-learning-on-medconceptsqa?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-mmneedle" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/long-context-understanding-on-mmneedle)](https://paperswithcode.com/sota/long-context-understanding-on-mmneedle?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/task-1-grouping-on-ocw" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/task-1-grouping-on-ocw)](https://paperswithcode.com/sota/task-1-grouping-on-ocw?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-truthfulqa" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-truthfulqa)](https://paperswithcode.com/sota/question-answering-on-truthfulqa?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-vip-bench" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-vip-bench)](https://paperswithcode.com/sota/visual-question-answering-on-vip-bench?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-mm-vet-v2" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-mm-vet-v2)](https://paperswithcode.com/sota/visual-question-answering-on-mm-vet-v2?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/sentence-completion-on-hellaswag" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/sentence-completion-on-hellaswag)](https://paperswithcode.com/sota/sentence-completion-on-hellaswag?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-drop-test" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-drop-test)](https://paperswithcode.com/sota/question-answering-on-drop-test?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/common-sense-reasoning-on-winogrande" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/common-sense-reasoning-on-winogrande)](https://paperswithcode.com/sota/common-sense-reasoning-on-winogrande?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/multi-task-language-understanding-on-mmlu" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/multi-task-language-understanding-on-mmlu)](https://paperswithcode.com/sota/multi-task-language-understanding-on-mmlu?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-mm-vet" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/visual-question-answering-on-mm-vet)](https://paperswithcode.com/sota/visual-question-answering-on-mm-vet?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-triviaqa" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/question-answering-on-triviaqa)](https://paperswithcode.com/sota/question-answering-on-triviaqa?p=gpt-4-technical-report-1)</code> </td> </tr> <tr> <td> <img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/arithmetic-reasoning-on-gsm8k" /> </td> <td> <code>[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gpt-4-technical-report-1/arithmetic-reasoning-on-gsm8k)](https://paperswithcode.com/sota/arithmetic-reasoning-on-gsm8k?p=gpt-4-technical-report-1)</code> </td> </tr> </table> </div> </div> </div> </div> </div> </div> </div> <!-- Edit Datasets --> <div class="modal fade" id="editDatasets" role="dialog" aria-labelledby="editDatasetsLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="editDatasetsLabel">Edit Datasets</h5> <button type="button" class="close btn-close" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">&times;</span> </button> </div> <form action="" method="post"> <div class="modal-body paper-page-edit-dataset-modal"> <input type="hidden" name="csrfmiddlewaretoken" value="AErvmiR3h25AZ81aDg3ngW3CfSyG4cnK97dpv8SVDIyA28udSPpnhUBdIUA2C3L7"> <div id="div_id_introduced" class="form-group"> <label for="id_introduced" class=""> Add or remove datasets <b>introduced</b> in this paper: </label> <div class=""> <select name="introduced" data-container-css-class="" data-allow-clear="false" style="width: 100%" class="modelselect2multiple form-control" id="id_introduced" data-autocomplete-light-language="en" data-autocomplete-light-url="/dataset-autocomplete/" data-autocomplete-light-function="select2" multiple> </select><div style="display:none" class="dal-forward-conf" id="dal-forward-conf-for_id_introduced"><script type="text/dal-forward-conf">[{"type": "const", "val": true, "dst": "canonical_only"}, {"type": "const", "val": true, "dst": "disable_create_option"}]</script></div> </div> </div> <div id="div_id_used" class="form-group"> <label for="id_used" class=""> Add or remove other datasets <b>used</b> in this paper: </label> <div class=""> <select name="used" data-container-css-class="" data-allow-clear="false" style="width: 100%" class="modelselect2multiple form-control" id="id_used" data-autocomplete-light-language="en" data-autocomplete-light-url="/dataset-autocomplete/" data-autocomplete-light-function="select2" multiple> <option value="3676" selected>ARC (AI2 Reasoning Challenge)</option> <option value="17838" selected>BenchLMM</option> <option value="18180" selected>CHOCOLATE</option> <option value="17878" selected>CORE-MM</option> <option value="3437" selected>DROP</option> <option value="9042" selected>GSM8K</option> <option value="2421" selected>HellaSwag</option> <option value="8024" selected>HumanEval</option> <option value="17728" selected>InfiMM-Eval</option> <option value="19135" selected>MedConceptsQA</option> <option value="10103" selected>MMLU</option> <option value="19401" selected>MMNeedle</option> <option value="17355" selected>MM-Vet</option> <option value="19880" selected>MM-Vet v2</option> <option value="18206" selected>OCW</option> <option value="198" selected>TriviaQA</option> <option value="8643" selected>TruthfulQA</option> <option value="17872" selected>ViP-Bench</option> <option value="5619" selected>WinoGrande</option> </select><div style="display:none" class="dal-forward-conf" id="dal-forward-conf-for_id_used"><script type="text/dal-forward-conf">[{"type": "const", "val": true, "dst": "canonical_only"}, {"type": "const", "val": true, "dst": "disable_create_option"}]</script></div> </div> </div> <div style="display: inline-block; padding-bottom: 15px;font-size:14px;"> Paper introduces a new dataset? <div style="padding-top:4px"> <a href="/contribute/dataset/new"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="plus-circle"><svg viewBox="0 0 512 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M256 9.998c137 0 248 111 248 248s-111 248-248 248-248-111-248-248 111-248 248-248zm144 276v-56c0-6.6-5.4-12-12-12h-92v-92c0-6.6-5.4-12-12-12h-56c-6.6 0-12 5.4-12 12v92h-92c-6.6 0-12 5.4-12 12v56c0 6.6 5.4 12 12 12h92v92c0 6.6 5.4 12 12 12h56c6.6 0 12-5.4 12-12v-92h92c6.6 0 12-5.4 12-12z"/></svg></span> Add a new dataset here </a> </div> </div> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary" name="edit-datasets"> Save </button> </div> </form> </div> </div> </div> </template> <div class="container content content-buffer "> <main> <div class="paper-title"> <div class="row"> <div class="col-md-12"> <h1> GPT-4 Technical Report </h1> <div class="authors"> <p> <span class="item-conference-link"> <a href="/conference/preprint-2023-3"> Preprint 2023 </a> </span> &nbsp;·&nbsp; <span class="author-span"> <a href="/author/openai">OpenAI</a></span>, <span class="author-span"> <a href="/author/-1">:</a></span>, <span class="author-span"> <a href="/author/josh-achiam">Josh Achiam</a></span>, <span class="author-span"> <a href="/author/steven-adler">Steven Adler</a></span>, <span class="author-span"> <a href="/author/sandhini-agarwal">Sandhini Agarwal</a></span>, <span class="author-span"> <a href="/author/lama-ahmad">Lama Ahmad</a></span>, <span class="author-span"> <a href="/author/ilge-akkaya">Ilge Akkaya</a></span>, <span class="author-span"> <a href="/author/florencia-leoni-aleman">Florencia Leoni Aleman</a></span>, <span class="author-span"> <a href="/author/diogo-almeida">Diogo Almeida</a></span>, <span class="author-span"> <a href="/author/janko-altenschmidt">Janko Altenschmidt</a></span>, <span class="author-span"> <a href="/author/sam-altman">Sam Altman</a></span>, <span class="author-span"> <a href="/author/shyamal-anadkat">Shyamal Anadkat</a></span>, <span class="author-span"> <a href="/author/red-avila">Red Avila</a></span>, <span class="author-span"> <a href="/author/igor-babuschkin">Igor Babuschkin</a></span>, <span class="author-span"> <a href="/author/suchir-balaji">Suchir Balaji</a></span>, <span class="author-span"> <a href="/author/valerie-balcom">Valerie Balcom</a></span>, <span class="author-span"> <a href="/author/paul-baltescu">Paul Baltescu</a></span>, <span class="author-span"> <a href="/author/haiming-bao">Haiming Bao</a></span>, <span class="author-span"> <a href="/author/mohammad-bavarian">Mohammad Bavarian</a></span>, <span class="author-span"> <a href="/author/jeff-belgum">Jeff Belgum</a></span>, <span class="author-span"> <a href="/author/irwan-bello">Irwan Bello</a></span>, <span class="author-span"> <a href="/author/jake-berdine">Jake Berdine</a></span>, <span class="author-span"> <a href="/author/gabriel-bernadett-shapiro">Gabriel Bernadett-Shapiro</a></span>, <span class="author-span"> <a href="/author/christopher-berner">Christopher Berner</a></span>, <span class="author-span"> <a href="/author/lenny-bogdonoff">Lenny Bogdonoff</a></span>, <span class="author-span"> <a href="/author/oleg-boiko">Oleg Boiko</a></span>, <span class="author-span"> <a href="/author/madelaine-boyd">Madelaine Boyd</a></span>, <span class="author-span"> <a href="/author/anna-luisa-brakman">Anna-Luisa Brakman</a></span>, <span class="author-span"> <a href="/author/greg-brockman">Greg Brockman</a></span>, <span class="author-span"> <a href="/author/tim-brooks">Tim Brooks</a></span>, <span class="author-span"> <a href="/author/miles-brundage">Miles Brundage</a></span>, <span class="author-span"> <a href="/author/kevin-button">Kevin Button</a></span>, <span class="author-span"> <a href="/author/trevor-cai">Trevor Cai</a></span>, <span class="author-span"> <a href="/author/rosie-campbell">Rosie Campbell</a></span>, <span class="author-span"> <a href="/author/andrew-cann">Andrew Cann</a></span>, <span class="author-span"> <a href="/author/brittany-carey">Brittany Carey</a></span>, <span class="author-span"> <a href="/author/chelsea-carlson">Chelsea Carlson</a></span>, <span class="author-span"> <a href="/author/rory-carmichael">Rory Carmichael</a></span>, <span class="author-span"> <a href="/author/brooke-chan">Brooke Chan</a></span>, <span class="author-span"> <a href="/author/che-chang">Che Chang</a></span>, <span class="author-span"> <a href="/author/fotis-chantzis">Fotis Chantzis</a></span>, <span class="author-span"> <a href="/author/derek-chen">Derek Chen</a></span>, <span class="author-span"> <a href="/author/sully-chen">Sully Chen</a></span>, <span class="author-span"> <a href="/author/ruby-chen">Ruby Chen</a></span>, <span class="author-span"> <a href="/author/jason-chen">Jason Chen</a></span>, <span class="author-span"> <a href="/author/mark-chen">Mark Chen</a></span>, <span class="author-span"> <a href="/author/ben-chess">Ben Chess</a></span>, <span class="author-span"> <a href="/author/chester-cho">Chester Cho</a></span>, <span class="author-span"> <a href="/author/casey-chu">Casey Chu</a></span>, <span class="author-span"> <a href="/author/hyung-won-chung">Hyung Won Chung</a></span>, <span class="author-span"> <a href="/author/dave-cummings">Dave Cummings</a></span>, <span class="author-span"> <a href="/author/jeremiah-currier">Jeremiah Currier</a></span>, <span class="author-span"> <a href="/author/yunxing-dai">Yunxing Dai</a></span>, <span class="author-span"> <a href="/author/cory-decareaux">Cory Decareaux</a></span>, <span class="author-span"> <a href="/author/thomas-degry">Thomas Degry</a></span>, <span class="author-span"> <a href="/author/noah-deutsch">Noah Deutsch</a></span>, <span class="author-span"> <a href="/author/damien-deville">Damien Deville</a></span>, <span class="author-span"> <a href="/author/arka-dhar">Arka Dhar</a></span>, <span class="author-span"> <a href="/author/david-dohan">David Dohan</a></span>, <span class="author-span"> <a href="/author/steve-dowling">Steve Dowling</a></span>, <span class="author-span"> <a href="/author/sheila-dunning">Sheila Dunning</a></span>, <span class="author-span"> <a href="/author/adrien-ecoffet">Adrien Ecoffet</a></span>, <span class="author-span"> <a href="/author/atty-eleti">Atty Eleti</a></span>, <span class="author-span"> <a href="/author/tyna-eloundou">Tyna Eloundou</a></span>, <span class="author-span"> <a href="/author/david-farhi">David Farhi</a></span>, <span class="author-span"> <a href="/author/liam-fedus">Liam Fedus</a></span>, <span class="author-span"> <a href="/author/niko-felix">Niko Felix</a></span>, <span class="author-span"> <a href="/author/simon-posada-fishman">Simón Posada Fishman</a></span>, <span class="author-span"> <a href="/author/juston-forte">Juston Forte</a></span>, <span class="author-span"> <a href="/author/isabella-fulford">Isabella Fulford</a></span>, <span class="author-span"> <a href="/author/leo-gao">Leo Gao</a></span>, <span class="author-span"> <a href="/author/elie-georges">Elie Georges</a></span>, <span class="author-span"> <a href="/author/christian-gibson">Christian Gibson</a></span>, <span class="author-span"> <a href="/author/vik-goel">Vik Goel</a></span>, <span class="author-span"> <a href="/author/tarun-gogineni">Tarun Gogineni</a></span>, <span class="author-span"> <a href="/author/gabriel-goh">Gabriel Goh</a></span>, <span class="author-span"> <a href="/author/rapha-gontijo-lopes-1">Rapha Gontijo-Lopes</a></span>, <span class="author-span"> <a href="/author/jonathan-gordon">Jonathan Gordon</a></span>, <span class="author-span"> <a href="/author/morgan-grafstein">Morgan Grafstein</a></span>, <span class="author-span"> <a href="/author/scott-gray">Scott Gray</a></span>, <span class="author-span"> <a href="/author/ryan-greene">Ryan Greene</a></span>, <span class="author-span"> <a href="/author/joshua-gross">Joshua Gross</a></span>, <span class="author-span"> <a href="/author/shixiang-shane-gu">Shixiang Shane Gu</a></span>, <span class="author-span"> <a href="/author/yufei-guo">Yufei Guo</a></span>, <span class="author-span"> <a href="/author/chris-hallacy">Chris Hallacy</a></span>, <span class="author-span"> <a href="/author/jesse-han">Jesse Han</a></span>, <span class="author-span"> <a href="/author/jeff-harris">Jeff Harris</a></span>, <span class="author-span"> <a href="/author/yuchen-he">Yuchen He</a></span>, <span class="author-span"> <a href="/author/mike-heaton">Mike Heaton</a></span>, <span class="author-span"> <a href="/author/johannes-heidecke">Johannes Heidecke</a></span>, <span class="author-span"> <a href="/author/chris-hesse">Chris Hesse</a></span>, <span class="author-span"> <a href="/author/alan-hickey">Alan Hickey</a></span>, <span class="author-span"> <a href="/author/wade-hickey">Wade Hickey</a></span>, <span class="author-span"> <a href="/author/peter-hoeschele">Peter Hoeschele</a></span>, <span class="author-span"> <a href="/author/brandon-houghton">Brandon Houghton</a></span>, <span class="author-span"> <a href="/author/kenny-hsu">Kenny Hsu</a></span>, <span class="author-span"> <a href="/author/shengli-hu">Shengli Hu</a></span>, <span class="author-span"> <a href="/author/xin-hu">Xin Hu</a></span>, <span class="author-span"> <a href="/author/joost-huizinga">Joost Huizinga</a></span>, <span class="author-span"> <a href="/author/shantanu-jain">Shantanu Jain</a></span>, <span class="author-span"> <a href="/author/shawn-jain">Shawn Jain</a></span>, <span class="author-span"> <a href="/author/joanne-jang">Joanne Jang</a></span>, <span class="author-span"> <a href="/author/angela-jiang">Angela Jiang</a></span>, <span class="author-span"> <a href="/author/roger-jiang">Roger Jiang</a></span>, <span class="author-span"> <a href="/author/haozhun-jin">Haozhun Jin</a></span>, <span class="author-span"> <a href="/author/denny-jin">Denny Jin</a></span>, <span class="author-span"> <a href="/author/shino-jomoto">Shino Jomoto</a></span>, <span class="author-span"> <a href="/author/billie-jonn">Billie Jonn</a></span>, <span class="author-span"> <a href="/author/heewoo-jun">Heewoo Jun</a></span>, <span class="author-span"> <a href="/author/tomer-kaftan">Tomer Kaftan</a></span>, <span class="author-span"> <a href="/author/lukasz-kaiser-1">Łukasz Kaiser</a></span>, <span class="author-span"> <a href="/author/ali-kamali">Ali Kamali</a></span>, <span class="author-span"> <a href="/author/ingmar-kanitscheider">Ingmar Kanitscheider</a></span>, <span class="author-span"> <a href="/author/nitish-shirish-keskar">Nitish Shirish Keskar</a></span>, <span class="author-span"> <a href="/author/tabarak-khan">Tabarak Khan</a></span>, <span class="author-span"> <a href="/author/logan-kilpatrick">Logan Kilpatrick</a></span>, <span class="author-span"> <a href="/author/jong-wook-kim">Jong Wook Kim</a></span>, <span class="author-span"> <a href="/author/christina-kim">Christina Kim</a></span>, <span class="author-span"> <a href="/author/yongjik-kim">Yongjik Kim</a></span>, <span class="author-span"> <a href="/author/jan-hendrik-kirchner">Jan Hendrik Kirchner</a></span>, <span class="author-span"> <a href="/author/jamie-kiros">Jamie Kiros</a></span>, <span class="author-span"> <a href="/author/matt-knight">Matt Knight</a></span>, <span class="author-span"> <a href="/author/daniel-kokotajlo">Daniel Kokotajlo</a></span>, <span class="author-span"> <a href="/author/lukasz-kondraciuk">Łukasz Kondraciuk</a></span>, <span class="author-span"> <a href="/author/andrew-kondrich">Andrew Kondrich</a></span>, <span class="author-span"> <a href="/author/aris-konstantinidis">Aris Konstantinidis</a></span>, <span class="author-span"> <a href="/author/kyle-kosic">Kyle Kosic</a></span>, <span class="author-span"> <a href="/author/gretchen-krueger">Gretchen Krueger</a></span>, <span class="author-span"> <a href="/author/vishal-kuo">Vishal Kuo</a></span>, <span class="author-span"> <a href="/author/michael-lampe">Michael Lampe</a></span>, <span class="author-span"> <a href="/author/ikai-lan">Ikai Lan</a></span>, <span class="author-span"> <a href="/author/teddy-lee">Teddy Lee</a></span>, <span class="author-span"> <a href="/author/jan-leike">Jan Leike</a></span>, <span class="author-span"> <a href="/author/jade-leung">Jade Leung</a></span>, <span class="author-span"> <a href="/author/daniel-levy">Daniel Levy</a></span>, <span class="author-span"> <a href="/author/chak-ming-li">Chak Ming Li</a></span>, <span class="author-span"> <a href="/author/rachel-lim">Rachel Lim</a></span>, <span class="author-span"> <a href="/author/molly-lin">Molly Lin</a></span>, <span class="author-span"> <a href="/author/stephanie-lin">Stephanie Lin</a></span>, <span class="author-span"> <a href="/author/mateusz-litwin">Mateusz Litwin</a></span>, <span class="author-span"> <a href="/author/theresa-lopez">Theresa Lopez</a></span>, <span class="author-span"> <a href="/author/ryan-lowe">Ryan Lowe</a></span>, <span class="author-span"> <a href="/author/patricia-lue">Patricia Lue</a></span>, <span class="author-span"> <a href="/author/anna-makanju">Anna Makanju</a></span>, <span class="author-span"> <a href="/author/kim-malfacini">Kim Malfacini</a></span>, <span class="author-span"> <a href="/author/sam-manning">Sam Manning</a></span>, <span class="author-span"> <a href="/author/todor-markov">Todor Markov</a></span>, <span class="author-span"> <a href="/author/yaniv-markovski">Yaniv Markovski</a></span>, <span class="author-span"> <a href="/author/bianca-martin">Bianca Martin</a></span>, <span class="author-span"> <a href="/author/katie-mayer">Katie Mayer</a></span>, <span class="author-span"> <a href="/author/andrew-mayne">Andrew Mayne</a></span>, <span class="author-span"> <a href="/author/bob-mcgrew">Bob McGrew</a></span>, <span class="author-span"> <a href="/author/scott-mayer-mckinney">Scott Mayer McKinney</a></span>, <span class="author-span"> <a href="/author/christine-mcleavey">Christine McLeavey</a></span>, <span class="author-span"> <a href="/author/paul-mcmillan">Paul McMillan</a></span>, <span class="author-span"> <a href="/author/jake-mcneil">Jake McNeil</a></span>, <span class="author-span"> <a href="/author/david-medina">David Medina</a></span>, <span class="author-span"> <a href="/author/aalok-mehta">Aalok Mehta</a></span>, <span class="author-span"> <a href="/author/jacob-menick">Jacob Menick</a></span>, <span class="author-span"> <a href="/author/luke-metz">Luke Metz</a></span>, <span class="author-span"> <a href="/author/andrey-mishchenko">Andrey Mishchenko</a></span>, <span class="author-span"> <a href="/author/pamela-mishkin">Pamela Mishkin</a></span>, <span class="author-span"> <a href="/author/vinnie-monaco">Vinnie Monaco</a></span>, <span class="author-span"> <a href="/author/evan-morikawa">Evan Morikawa</a></span>, <span class="author-span"> <a href="/author/daniel-mossing">Daniel Mossing</a></span>, <span class="author-span"> <a href="/author/tong-mu">Tong Mu</a></span>, <span class="author-span"> <a href="/author/mira-murati">Mira Murati</a></span>, <span class="author-span"> <a href="/author/oleg-murk">Oleg Murk</a></span>, <span class="author-span"> <a href="/author/david-mely-1">David Mély</a></span>, <span class="author-span"> <a href="/author/ashvin-nair">Ashvin Nair</a></span>, <span class="author-span"> <a href="/author/reiichiro-nakano">Reiichiro Nakano</a></span>, <span class="author-span"> <a href="/author/rajeev-nayak">Rajeev Nayak</a></span>, <span class="author-span"> <a href="/author/arvind-neelakantan">Arvind Neelakantan</a></span>, <span class="author-span"> <a href="/author/richard-ngo">Richard Ngo</a></span>, <span class="author-span"> <a href="/author/hyeonwoo-noh">Hyeonwoo Noh</a></span>, <span class="author-span"> <a href="/author/long-ouyang">Long Ouyang</a></span>, <span class="author-span"> <a href="/author/cullen-o-keefe">Cullen O&#x27;Keefe</a></span>, <span class="author-span"> <a href="/author/jakub-pachocki">Jakub Pachocki</a></span>, <span class="author-span"> <a href="/author/alex-paino">Alex Paino</a></span>, <span class="author-span"> <a href="/author/joe-palermo">Joe Palermo</a></span>, <span class="author-span"> <a href="/author/ashley-pantuliano">Ashley Pantuliano</a></span>, <span class="author-span"> <a href="/author/giambattista-parascandolo">Giambattista Parascandolo</a></span>, <span class="author-span"> <a href="/author/joel-parish">Joel Parish</a></span>, <span class="author-span"> <a href="/author/emy-parparita">Emy Parparita</a></span>, <span class="author-span"> <a href="/author/alex-passos">Alex Passos</a></span>, <span class="author-span"> <a href="/author/mikhail-pavlov">Mikhail Pavlov</a></span>, <span class="author-span"> <a href="/author/andrew-peng">Andrew Peng</a></span>, <span class="author-span"> <a href="/author/adam-perelman">Adam Perelman</a></span>, <span class="author-span"> <a href="/author/filipe-de-avila-belbute-peres-1">Filipe de Avila Belbute Peres</a></span>, <span class="author-span"> <a href="/author/michael-petrov">Michael Petrov</a></span>, <span class="author-span"> <a href="/author/henrique-ponde-de-oliveira-pinto-1">Henrique Ponde de Oliveira Pinto</a></span>, <span class="author-span"> <a href="/author/michael">Michael</a></span>, <span class="author-span"> <a href="/author/pokorny">Pokorny</a></span>, <span class="author-span"> <a href="/author/michelle-pokrass">Michelle Pokrass</a></span>, <span class="author-span"> <a href="/author/vitchyr-h-pong">Vitchyr H. Pong</a></span>, <span class="author-span"> <a href="/author/tolly-powell">Tolly Powell</a></span>, <span class="author-span"> <a href="/author/alethea-power">Alethea Power</a></span>, <span class="author-span"> <a href="/author/boris-power">Boris Power</a></span>, <span class="author-span"> <a href="/author/elizabeth-proehl">Elizabeth Proehl</a></span>, <span class="author-span"> <a href="/author/raul-puri">Raul Puri</a></span>, <span class="author-span"> <a href="/author/alec-radford">Alec Radford</a></span>, <span class="author-span"> <a href="/author/jack-rae">Jack Rae</a></span>, <span class="author-span"> <a href="/author/aditya-ramesh">Aditya Ramesh</a></span>, <span class="author-span"> <a href="/author/cameron-raymond">Cameron Raymond</a></span>, <span class="author-span"> <a href="/author/francis-real">Francis Real</a></span>, <span class="author-span"> <a href="/author/kendra-rimbach">Kendra Rimbach</a></span>, <span class="author-span"> <a href="/author/carl-ross">Carl Ross</a></span>, <span class="author-span"> <a href="/author/bob-rotsted">Bob Rotsted</a></span>, <span class="author-span"> <a href="/author/henri-roussez">Henri Roussez</a></span>, <span class="author-span"> <a href="/author/nick-ryder">Nick Ryder</a></span>, <span class="author-span"> <a href="/author/mario-saltarelli">Mario Saltarelli</a></span>, <span class="author-span"> <a href="/author/ted-sanders">Ted Sanders</a></span>, <span class="author-span"> <a href="/author/shibani-santurkar">Shibani Santurkar</a></span>, <span class="author-span"> <a href="/author/girish-sastry">Girish Sastry</a></span>, <span class="author-span"> <a href="/author/heather-schmidt">Heather Schmidt</a></span>, <span class="author-span"> <a href="/author/david-schnurr">David Schnurr</a></span>, <span class="author-span"> <a href="/author/john-schulman">John Schulman</a></span>, <span class="author-span"> <a href="/author/daniel-selsam">Daniel Selsam</a></span>, <span class="author-span"> <a href="/author/kyla-sheppard">Kyla Sheppard</a></span>, <span class="author-span"> <a href="/author/toki-sherbakov">Toki Sherbakov</a></span>, <span class="author-span"> <a href="/author/jessica-shieh">Jessica Shieh</a></span>, <span class="author-span"> <a href="/author/sarah-shoker">Sarah Shoker</a></span>, <span class="author-span"> <a href="/author/pranav-shyam">Pranav Shyam</a></span>, <span class="author-span"> <a href="/author/szymon-sidor">Szymon Sidor</a></span>, <span class="author-span"> <a href="/author/eric-sigler">Eric Sigler</a></span>, <span class="author-span"> <a href="/author/maddie-simens">Maddie Simens</a></span>, <span class="author-span"> <a href="/author/jordan-sitkin">Jordan Sitkin</a></span>, <span class="author-span"> <a href="/author/katarina-slama">Katarina Slama</a></span>, <span class="author-span"> <a href="/author/ian-sohl">Ian Sohl</a></span>, <span class="author-span"> <a href="/author/benjamin-sokolowsky">Benjamin Sokolowsky</a></span>, <span class="author-span"> <a href="/author/yang-song-1">Yang song</a></span>, <span class="author-span"> <a href="/author/natalie-staudacher">Natalie Staudacher</a></span>, <span class="author-span"> <a href="/author/felipe-petroski-such">Felipe Petroski Such</a></span>, <span class="author-span"> <a href="/author/natalie-summers">Natalie Summers</a></span>, <span class="author-span"> <a href="/author/ilya-sutskever">Ilya Sutskever</a></span>, <span class="author-span"> <a href="/author/jie-tang">Jie Tang</a></span>, <span class="author-span"> <a href="/author/nikolas-tezak">Nikolas Tezak</a></span>, <span class="author-span"> <a href="/author/madeleine-b-thompson">Madeleine B. Thompson</a></span>, <span class="author-span"> <a href="/author/phil-tillet">Phil Tillet</a></span>, <span class="author-span"> <a href="/author/amin-tootoonchian">Amin Tootoonchian</a></span>, <span class="author-span"> <a href="/author/elizabeth-tseng">Elizabeth Tseng</a></span>, <span class="author-span"> <a href="/author/preston-tuggle">Preston Tuggle</a></span>, <span class="author-span"> <a href="/author/nick-turley">Nick Turley</a></span>, <span class="author-span"> <a href="/author/jerry-tworek">Jerry Tworek</a></span>, <span class="author-span"> <a href="/author/juan-felipe-ceron-uribe">Juan Felipe Cerón Uribe</a></span>, <span class="author-span"> <a href="/author/andrea-vallone">Andrea Vallone</a></span>, <span class="author-span"> <a href="/author/arun-vijayvergiya">Arun Vijayvergiya</a></span>, <span class="author-span"> <a href="/author/chelsea-voss">Chelsea Voss</a></span>, <span class="author-span"> <a href="/author/carroll-wainwright">Carroll Wainwright</a></span>, <span class="author-span"> <a href="/author/justin-jay-wang">Justin Jay Wang</a></span>, <span class="author-span"> <a href="/author/alvin-wang">Alvin Wang</a></span>, <span class="author-span"> <a href="/author/ben-wang">Ben Wang</a></span>, <span class="author-span"> <a href="/author/jonathan-ward">Jonathan Ward</a></span>, <span class="author-span"> <a href="/author/jason-wei">Jason Wei</a></span>, <span class="author-span"> <a href="/author/cj-weinmann">CJ Weinmann</a></span>, <span class="author-span"> <a href="/author/akila-welihinda">Akila Welihinda</a></span>, <span class="author-span"> <a href="/author/peter-welinder">Peter Welinder</a></span>, <span class="author-span"> <a href="/author/jiayi-weng">Jiayi Weng</a></span>, <span class="author-span"> <a href="/author/lilian-weng">Lilian Weng</a></span>, <span class="author-span"> <a href="/author/matt-wiethoff">Matt Wiethoff</a></span>, <span class="author-span"> <a href="/author/dave-willner">Dave Willner</a></span>, <span class="author-span"> <a href="/author/clemens-winter">Clemens Winter</a></span>, <span class="author-span"> <a href="/author/samuel-wolrich">Samuel Wolrich</a></span>, <span class="author-span"> <a href="/author/hannah-wong">Hannah Wong</a></span>, <span class="author-span"> <a href="/author/lauren-workman">Lauren Workman</a></span>, <span class="author-span"> <a href="/author/sherwin-wu">Sherwin Wu</a></span>, <span class="author-span"> <a href="/author/jeff-wu">Jeff Wu</a></span>, <span class="author-span"> <a href="/author/michael-wu">Michael Wu</a></span>, <span class="author-span"> <a href="/author/kai-xiao">Kai Xiao</a></span>, <span class="author-span"> <a href="/author/tao-xu">Tao Xu</a></span>, <span class="author-span"> <a href="/author/sarah-yoo">Sarah Yoo</a></span>, <span class="author-span"> <a href="/author/kevin-yu">Kevin Yu</a></span>, <span class="author-span"> <a href="/author/qiming-yuan">Qiming Yuan</a></span>, <span class="author-span"> <a href="/author/wojciech-zaremba">Wojciech Zaremba</a></span>, <span class="author-span"> <a href="/author/rowan-zellers">Rowan Zellers</a></span>, <span class="author-span"> <a href="/author/chong-zhang">Chong Zhang</a></span>, <span class="author-span"> <a href="/author/marvin-zhang">Marvin Zhang</a></span>, <span class="author-span"> <a href="/author/shengjia-zhao">Shengjia Zhao</a></span>, <span class="author-span"> <a href="/author/tianhao-zheng">Tianhao Zheng</a></span>, <span class="author-span"> <a href="/author/juntang-zhuang">Juntang Zhuang</a></span>, <span class="author-span"> <a href="/author/william-zhuk">William Zhuk</a></span>, <span class="author-span"> <a href="/author/barret-zoph">Barret Zoph</a></span> <span class="hidden-element">· </span><button type="button" class="badge-edit" data-bs-toggle="modal" data-bs-toggle="modal" data-bs-target="#loginModal"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> <span>Edit social preview</span> </button> </p> </div> </div> </div> </div> <div class="paper-abstract"> <div class="row"> <div class="col-md-12"> <p> We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks, including passing a simulated bar exam with a score around the top 10% of test takers. GPT-4 is a Transformer-based model pre-trained to predict the next token in a document. The post-training alignment process results in improved performance on measures of factuality and adherence to desired behavior. A core component of this project was developing infrastructure and optimization methods that behave predictably across a wide range of scales. This allowed us to accurately predict some aspects of GPT-4&#x27;s performance based on models trained with no more than 1/1,000th the compute of GPT-4. </p> <a href="https://arxiv.org/pdf/2303.08774v5.pdf" onclick="captureOutboundLink('https://arxiv.org/pdf/2303.08774v5.pdf'); return true;" class="badge badge-light "> <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="file-pdf"><svg viewBox="0 0 384 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M369.9 98.88c9 9 14.1 21.3 14.1 34v332.1c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-416c0-26.5 21.5-48 48-48.1h204.1c12.7 0 24.9 5.1 33.9 14.1zm-37.8 30.1L256 52.88v76.1h76.1zM48 464.98h288v-288H232c-13.3 0-24-10.7-24-24v-104H48v416zm250.2-143.7c10.5 10.5 8 38.7-17.5 38.7-14.8 0-36.9-6.8-55.8-17-21.6 3.6-46 12.7-68.4 20.1-50.1 86.4-79.4 47-76.1 31.2 4-20 31-35.9 51-46.2 10.5-18.4 25.4-50.5 35.4-74.4-7.4-28.6-11.4-51-7-67.1 4.8-17.7 38.4-20.3 42.6 5.9 4.7 15.4-1.5 39.9-5.4 56 8.1 21.3 19.6 35.8 36.8 46.3 17.4-2.2 52.2-5.5 64.4 6.5zm-198.1 77.8c0 .7 11.4-4.7 30.4-35-5.9 5.5-25.299 21.3-30.4 35zm81.6-190.6c-2.5 0-2.6 26.9 1.8 40.8 4.9-8.7 5.6-40.8-1.8-40.8zm-24.4 136.6c15.9-6.1 34-14.9 54.8-19.2-11.199-8.3-21.8-20.4-30.1-35.5-6.7 17.7-15 37.8-24.7 54.7zm131.6-5c3.6-2.4-2.2-10.4-37.3-7.8 32.3 13.8 37.3 7.8 37.3 7.8z"/></svg></span> <span>PDF</span> </a> <a href="https://arxiv.org/abs/2303.08774v5" onclick="captureOutboundLink('https://arxiv.org/abs/2303.08774v5'); return true;" class="badge badge-light "> <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="file"><svg viewBox="0 0 384 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M369.9 98.88c9 9 14.1 21.3 14.1 34v332.1c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-416c0-26.5 21.5-48 48-48.1h204.1c12.7 0 24.9 5.1 33.9 14.1zm-37.8 30.1L256 52.88v76.1h76.1zM48 464.98h288v-288H232c-13.3 0-24-10.7-24-24v-104H48v416z"/></svg></span> <span>Abstract</span> </a> <a href="https://cdn.openai.com/papers/gpt-4.pdf" onclick="captureOutboundLink('https://cdn.openai.com/papers/gpt-4.pdf'); return true;" class="badge badge-light "> <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="file-pdf"><svg viewBox="0 0 384 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M369.9 98.88c9 9 14.1 21.3 14.1 34v332.1c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-416c0-26.5 21.5-48 48-48.1h204.1c12.7 0 24.9 5.1 33.9 14.1zm-37.8 30.1L256 52.88v76.1h76.1zM48 464.98h288v-288H232c-13.3 0-24-10.7-24-24v-104H48v416zm250.2-143.7c10.5 10.5 8 38.7-17.5 38.7-14.8 0-36.9-6.8-55.8-17-21.6 3.6-46 12.7-68.4 20.1-50.1 86.4-79.4 47-76.1 31.2 4-20 31-35.9 51-46.2 10.5-18.4 25.4-50.5 35.4-74.4-7.4-28.6-11.4-51-7-67.1 4.8-17.7 38.4-20.3 42.6 5.9 4.7 15.4-1.5 39.9-5.4 56 8.1 21.3 19.6 35.8 36.8 46.3 17.4-2.2 52.2-5.5 64.4 6.5zm-198.1 77.8c0 .7 11.4-4.7 30.4-35-5.9 5.5-25.299 21.3-30.4 35zm81.6-190.6c-2.5 0-2.6 26.9 1.8 40.8 4.9-8.7 5.6-40.8-1.8-40.8zm-24.4 136.6c15.9-6.1 34-14.9 54.8-19.2-11.199-8.3-21.8-20.4-30.1-35.5-6.7 17.7-15 37.8-24.7 54.7zm131.6-5c3.6-2.4-2.2-10.4-37.3-7.8 32.3 13.8 37.3 7.8 37.3 7.8z"/></svg></span> <span> Preprint 2023 PDF </span> </a> <a href="https://openai.com/research/gpt-4" onclick="captureOutboundLink('https://openai.com/research/gpt-4'); return true;" class="badge badge-light "> <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="file"><svg viewBox="0 0 384 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M369.9 98.88c9 9 14.1 21.3 14.1 34v332.1c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-416c0-26.5 21.5-48 48-48.1h204.1c12.7 0 24.9 5.1 33.9 14.1zm-37.8 30.1L256 52.88v76.1h76.1zM48 464.98h288v-288H232c-13.3 0-24-10.7-24-24v-104H48v416z"/></svg></span> <span>Preprint 2023 Abstract</span> </a> </div> </div> </div> <div class="row"> <div class="col-md-7 paper-section-first" id="code"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12"> <h2>Code <div class="float-right"> <div class="dropdown edit-button"> <button class="dropdown-toggle badge badge-edit" type="button" id="codeEditMenu" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> Edit </button> <div class="dropdown-menu dropdown-menu-end" aria-labelledby="codeEditMenu"> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add</a> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="remove"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M400 256H112"/></svg></span> Remove</a> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="checkmark-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> Mark official</a> </div> </div> </div> </h2> <hr/> </div> </div> </div> <div class="paper-implementations code-table"> <div id="implementations-short-list"> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/openai/evals" onclick="captureOutboundLink('https://github.com/openai/evals'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> openai/evals <span class="badge badge-info is-official-code"><span class=" icon-wrapper icon-ion" data-name="checkmark-circle-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M448 256c0-106-86-192-192-192S64 150 64 256s86 192 192 192 192-86 192-192z" fill="none" stroke="#000" stroke-miterlimit="10" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M352 176L217.6 336 160 272"/></svg></span> official</span> </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 15,084 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/shmsw25/factscore" onclick="captureOutboundLink('https://github.com/shmsw25/factscore'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> shmsw25/factscore </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 291 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models" onclick="captureOutboundLink('https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> unispac/visual-adversarial-examples… </a> </div> <div class="additional-options"> <table> <tr> <td>↳ Quickstart in </td> <td> <div class="hf-demo-logo"> <a href="https://huggingface.co/spaces/Vision-CAIR/minigpt4" onclick="captureOutboundLink('https://huggingface.co/spaces/Vision-CAIR/minigpt4'); return true;" class="badge badge-primary"> <img src="data:image/svg+xml;utf8,%3Csvg%20class%3D%22w-14%20h-14%20mr-1.5%22%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20aria-hidden%3D%22true%22%20width%3D%221em%22%20height%3D%221em%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20d%3D%22M7.81%2018.746v5.445h5.444v-5.445H7.809z%22%20fill%3D%22%23FF3270%22/%3E%3Cpath%20d%3D%22M18.746%2018.746v5.445h5.444v-5.445h-5.444z%22%20fill%3D%22%23861FFF%22/%3E%3Cpath%20d%3D%22M7.81%207.81v5.444h5.444V7.81H7.809z%22%20fill%3D%22%23097EFF%22/%3E%3Cpath%20fill-rule%3D%22evenodd%22%20clip-rule%3D%22evenodd%22%20d%3D%22M4%206.418A2.418%202.418%200%20016.418%204h8.228c1.117%200%202.057.757%202.334%201.786a6.532%206.532%200%20019.234%209.234A2.419%202.419%200%200128%2017.355v8.227A2.418%202.418%200%200125.582%2028H6.417A2.418%202.418%200%20014%2025.582V6.417zM7.81%207.81v5.444h5.444V7.81H7.81zm0%2016.38v-5.444h5.444v5.445H7.81zm10.936%200v-5.444h5.445v5.445h-5.445zm0-13.658a2.722%202.722%200%20115.445%200%202.722%202.722%200%2001-5.445%200z%22/%3E%3Cpath%20d%3D%22M21.468%207.81a2.722%202.722%200%20100%205.444%202.722%202.722%200%20000-5.444z%22%20fill%3D%22%23FFD702%22/%3E%3C/svg%3E"> Spaces </a> </div> </td> </tr> </table> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 183 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/gpt4life/alpagasus" onclick="captureOutboundLink('https://github.com/gpt4life/alpagasus'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> gpt4life/alpagasus </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 84 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/emrgnt-cmplxty/zero-shot-replication" onclick="captureOutboundLink('https://github.com/emrgnt-cmplxty/zero-shot-replication'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> emrgnt-cmplxty/zero-shot-replication </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 72 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="table-options"> <a href="#" id="implementations-see-more-trigger">See all 11</a> implementations </div> </div> <div id="implementations-full-list" style="display:none"> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/openai/evals" onclick="captureOutboundLink('https://github.com/openai/evals'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> openai/evals <span class="badge badge-info is-official-code"><span class=" icon-wrapper icon-ion" data-name="checkmark-circle-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M448 256c0-106-86-192-192-192S64 150 64 256s86 192 192 192 192-86 192-192z" fill="none" stroke="#000" stroke-miterlimit="10" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M352 176L217.6 336 160 272"/></svg></span> official</span> </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 15,084 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/shmsw25/factscore" onclick="captureOutboundLink('https://github.com/shmsw25/factscore'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> shmsw25/factscore </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 291 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models" onclick="captureOutboundLink('https://github.com/unispac/visual-adversarial-examples-jailbreak-large-language-models'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> unispac/visual-adversarial-examples… </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 183 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/gpt4life/alpagasus" onclick="captureOutboundLink('https://github.com/gpt4life/alpagasus'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> gpt4life/alpagasus </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 84 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/emrgnt-cmplxty/zero-shot-replication" onclick="captureOutboundLink('https://github.com/emrgnt-cmplxty/zero-shot-replication'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> emrgnt-cmplxty/zero-shot-replication </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 72 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-privsec/superhuman-ai-consistency" onclick="captureOutboundLink('https://github.com/ethz-privsec/superhuman-ai-consistency'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-privsec/superhuman-ai-consiste… </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/ethz-spylab/superhuman-ai-consistency" onclick="captureOutboundLink('https://github.com/ethz-spylab/superhuman-ai-consistency'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> ethz-spylab/superhuman-ai-consisten… </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 28 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/eternityyw/tram-benchmark" onclick="captureOutboundLink('https://github.com/eternityyw/tram-benchmark'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> eternityyw/tram-benchmark </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 22 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/AUCOHL/RTL-Repo" onclick="captureOutboundLink('https://github.com/AUCOHL/RTL-Repo'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> AUCOHL/RTL-Repo </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 5 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/zach-zhiling-zheng/reticular_chemist" onclick="captureOutboundLink('https://github.com/zach-zhiling-zheng/reticular_chemist'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> zach-zhiling-zheng/reticular_chemist </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 2 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> </div> </div> </div> <div class="row"> <div class="col-sm-7"> <div class="paper-impl-cell"> <a href="https://github.com/avhbench/avhbench" onclick="captureOutboundLink('https://github.com/avhbench/avhbench'); return true;" class="code-table-link"> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> avhbench/avhbench </a> </div> </div> <div class="col-3"> <div class="paper-impl-cell text-nowrap"> <span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 0 </div> </div> <div class="col-2"> <div class="paper-impl-cell text-center"> <img class="" src="https://production-assets.paperswithcode.com/perf/images/frameworks/pytorch-2fbf2cb9.png" /> </div> </div> </div> <div class="table-options"> <a href="#" id="implementations-see-less-trigger">Collapse 11</a> implementations </div> </div> </div> </div> <div class="col-md-5 paper-section" id="tasks"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12"> <h2>Tasks <div class="float-right"> <div class="dropdown edit-button"> <button class="dropdown-toggle badge badge-edit" type="button" id="taskEditMenu" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> Edit </button> <div class="dropdown-menu dropdown-menu-end" aria-labelledby="taskEditMenu"> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add</a> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="remove"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M400 256H112"/></svg></span> Remove</a> </div> </div> </div> </h2> <hr/> </div> </div> <div class="paper-tasks"> <div class="row"> <div class="col-md-12"> <a href="/task/arithmetic-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Arithmetic Reasoning</span> </span> </a> <a href="/task/bug-fixing"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Bug fixing</span> </span> </a> <a href="/task/code-generation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/658fcf6a-52ca-4865-b111-28bf61c346dc.jpg"> <span>Code Generation</span> </span> </a> <a href="/task/common-sense-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Common Sense Reasoning</span> </span> </a> <a href="/task/factual-inconsistency-detection-in-chart"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Factual Inconsistency Detection in Chart Captioning</span> </span> </a> <a href="/task/few-shot-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Few-Shot Learning</span> </span> </a> <a href="/task/image-retrieval"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/834263fd-0f2e-47a9-bda1-0fd3f44c71df.jpg"> <span>Image Retrieval</span> </span> </a> <a href="/task/legal-reasoning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Legal Reasoning</span> </span> </a> <a href="/task/long-context-understanding"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Long-Context Understanding</span> </span> </a> <a href="/task/math"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Math</span> </span> </a> <a href="/task/mmr-total"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>MMR total</span> </span> </a> <a href="/task/multi-task-language-understanding"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/93c61892-0ee4-4896-8201-6f7497f6058e.jpg"> <span>Multi-task Language Understanding</span> </span> </a> <a href="/task/task-1-grouping"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Only Connect Walls Dataset Task 1 (Grouping)</span> </span> </a> <a href="/task/question-answering"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/56ae901a-265f-415f-b175-ce54133d648b.jpg"> <span>Question Answering</span> </span> </a> <a href="/task/sentence-completion"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Sentence Completion</span> </span> </a> <a href="/task/visual-question-answering-1"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Visual Question Answering</span> </span> </a> <a href="/task/visual-question-answering"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/d0abcf9a-29fb-4295-9b2d-10d82222ccc9.jpg"> <span>Visual Question Answering (VQA)</span> </span> </a> <a href="/task/zero-shot-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000000158-1446b13d_bCNDpWB.jpg"> <span>Zero-Shot Learning</span> </span> </a> </div> </div> </div> </div> </div> </div> <div class="row"> <div class="col-md-12 paper-section paper-evaluation-section-title" id="datasets"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12 zero-padding-datasets"> <h2>Datasets <div class="float-right"> <div class="dropdown edit-button"> <button class="dropdown-toggle badge badge-edit" type="button" id="datasetEditMenu" data-bs-toggle="modal" data-bs-target="#loginModal" aria-haspopup="true" aria-expanded="false"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> Edit </button> </div> </div> </h2> <hr/> </div> </div> <div class="paper-datasets"> <div class="row"> <div class="col-md-12"> <span class="badge badge-primary"> <a href="/dataset/mmlu"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/d5a66a59-f54e-4c88-afec-3a8bcbe5a172.jpg"> MMLU </a> </span> <span class="badge badge-primary"> <a href="/dataset/gsm8k"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/29872a9e-055e-4aa0-b201-7b768933c944.jpg"> GSM8K </a> </span> <span class="badge badge-primary"> <a href="/dataset/triviaqa"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000000198-605bbc16.jpg"> TriviaQA </a> </span> <span class="badge badge-primary"> <a href="/dataset/humaneval"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/fb954d4f-19de-4767-8b70-d9da28333bb5.jpg"> HumanEval </a> </span> <span class="badge badge-primary"> <a href="/dataset/hellaswag"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000002421-d81253e7.jpg"> HellaSwag </a> </span> <span class="badge badge-primary"> <a href="/dataset/winogrande"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000005619-56320ec6.jpg"> WinoGrande </a> </span> <span class="badge badge-primary"> <a href="/dataset/truthfulqa"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000008643-b262942e.jpg"> TruthfulQA </a> </span> <span class="badge badge-primary"> <a href="/dataset/drop"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000003437-e26aff77.jpg"> DROP </a> </span> <span class="badge badge-primary"> <a href="/dataset/mm-vet"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/38f3ba2a-0f2e-45be-8941-91379c91b736.jpg"> MM-Vet </a> </span> <span class="badge badge-primary"> <a href="/dataset/arc"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/dataset-0000003676-c65dedaf.jpg"> ARC (AI2 Reasoning Challenge) </a> </span> <span class="badge badge-primary"> <a href="/dataset/core-mm"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/16169316-30a0-46ff-b634-d803cd35647b.jpg"> InfiMM-Eval </a> </span> <span class="badge badge-primary"> <a href="/dataset/mm-vet-v2"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/848a87fc-9ce9-42f2-841c-5c623d7da18d.jpg"> MM-Vet v2 </a> </span> <span class="badge badge-primary"> <a href="/dataset/mmneedle"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/4b9d60aa-b68c-4f12-853a-c315789ff1c4.jpg"> MMNeedle </a> </span> <span class="badge badge-primary"> <a href="/dataset/medconceptsqa"> <span class="icon-list-image" style="opacity:0.4;color:#CD933C"><span class=" icon-wrapper icon-fa icon-fa-solid" data-name="font-case"><svg viewBox="0 0 640 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M229.88 87.688l121.28 341.2c.462 1.372.838 3.661.838 5.11 0 8.83-7.167 15.999-15.998 16h-50.62c-6.343-.001-13.134-4.88-15.16-10.89l-19.07-53.11h-150.3l-19.07 53.11c-2.026 6.01-8.817 10.889-15.16 10.89H16c-8.825-.007-15.989-7.175-15.989-16 0-1.448.376-3.738.838-5.11l121.271-341.2c4.075-11.973 17.647-21.69 30.294-21.69h47.172c12.648 0 26.22 9.717 30.294 21.69zm-100.3 218.31h92.84L176 176.738zm494.42-144c8.832 0 16 7.168 16 16v256c0 8.832-7.168 16-16 16h-32c-8.832 0-16-7.168-16-16v-1.81c-18.9 11.03-40.58 17.81-64 17.81-70.58-.077-127.923-57.42-128-128v-32c.077-70.579 57.42-127.923 128-128 23.42 0 45.1 6.81 64 17.81v-1.81c0-8.832 7.168-16 16-16h32zm-64 160v-32c0-26.496-21.504-48-48-48s-48 21.504-48 48v32c0 26.496 21.504 48 48 48s48-21.504 48-48z"/></svg></span></span> MedConceptsQA </a> </span> <span class="badge badge-primary"> <a href="/dataset/benchlmm"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/e45614d5-a797-49e7-b01f-e9709304621e.jpg"> BenchLMM </a> </span> <span class="badge badge-primary"> <a href="/dataset/only-connect-wall-ocw-dataset"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/ff813fbe-3790-4ac9-b86d-0d198ff442f1.jpg"> OCW </a> </span> <span class="badge badge-primary"> <a href="/dataset/vip-bench"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/de4de7f2-0a58-45bf-b5bd-d8db1d0b3191.jpg"> ViP-Bench </a> </span> <span class="badge badge-primary"> <a href="/dataset/chocolate"> <img class="dataset-list-image" src="https://production-media.paperswithcode.com/thumbnails/dataset-small/e86da084-7764-4d77-bf30-7af3f9e267b8.jpg"> CHOCOLATE </a> </span> <span class="badge badge-primary"> <a href="/dataset/mmlu-sr"> <span class="icon-list-image" style="opacity:0.4;color:#A59F78"><span class=" icon-wrapper icon-fa icon-fa-solid" data-name="file"><svg viewBox="0 0 384 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M224 137.998c0 13.2 10.8 24 24 24h136v328c0 13.3-10.7 24-24 24H24c-13.3 0-24-10.7-24-24v-464c0-13.3 10.7-24 24-24h200v136zm160-14.1v6.1H256v-128h6.1c6.4 0 12.5 2.5 17 7l97.9 98c4.5 4.5 7 10.6 7 16.9z"/></svg></span></span> MMLU-SR </a> </span> <span class="badge badge-primary"> <a href="/dataset/core-mm-1"> <span class="icon-list-image" style="opacity:0.4;color:#A59F78"><span class=" icon-wrapper icon-fa icon-fa-solid" data-name="file"><svg viewBox="0 0 384 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M224 137.998c0 13.2 10.8 24 24 24h136v328c0 13.3-10.7 24-24 24H24c-13.3 0-24-10.7-24-24v-464c0-13.3 10.7-24 24-24h200v136zm160-14.1v6.1H256v-128h6.1c6.4 0 12.5 2.5 17 7l97.9 98c4.5 4.5 7 10.6 7 16.9z"/></svg></span></span> CORE-MM </a> </span> </div> </div> </div> </div> </div> </div> <!-- End portal_name if --> <div class="row"> <div id="results" class="col-md-12 paper-evaluation-section-title"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12 zero-padding"> <h2>Results from the Paper <div class="float-right"> <div class="edit-button"> <a class="dropdown-toggle badge badge-edit" id="evalEditMenu" href="/paper/gpt-4-technical-report-1/review/"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> Edit </a> </div> </div> </h2> <hr/> <div class="paper-evaluation-badge"> <div class="sota"> <p> <a href="/sota/legal-reasoning-on-legalbench-rule-recall"> <img style="height:20px;width:35px;position:relative;top:1px;" src="https://production-media.paperswithcode.com/sota-thumbs/legal-reasoning-on-legalbench-rule-recall-small_c4b8f370.png"/> </a> &nbsp;Ranked #1 on <a class="sota-task" href="/sota/legal-reasoning-on-legalbench-rule-recall"> Legal Reasoning on LegalBench (Rule-recall) </a> </p> </div> &nbsp;&nbsp;&nbsp;&nbsp; <a href="#" class="get-badge-button float-right" data-bs-toggle="modal" data-bs-target="#badgeModal"> <span class=" icon-wrapper icon-fa icon-fa-light" data-name="arrow-to-right"><svg viewBox="0 0 448 520.146" xmlns="http://www.w3.org/2000/svg"><path d="M215 100.5c4.7-4.7 12.3-4.7 17 0l148.6 148c4.7 4.7 4.7 12.3 0 17l-148.5 148c-4.7 4.7-12.3 4.7-17 0l-7.1-7.1c-4.7-4.7-4.7-12.3 0-17L323.9 274H12c-6.6 0-12-5.4-12-12v-10c0-6.6 5.4-12 12-12h311.9l-116-115.4a12.01 12.01 0 0 1 0-17zM448 77v360c0 6.6-5.4 12-12 12h-8c-6.6 0-12-5.4-12-12V77c0-6.6 5.4-12 12-12h8c6.6 0 12 5.4 12 12z"/></svg></span> Get a GitHub badge </a> </div> </div> </div> </div> </div> </div> <div class="paper-evaluation-section" id="evaluation"> <div class="row"> <div class="col-md-12"> <div class="sota-table table-responsive"> <table class="table-striped"> <tr> <th>Task</th> <th>Dataset</th> <th>Model</th> <th>Metric Name</th> <th>Metric Value</th> <th>Global Rank</th> <th class="text-center">Uses Extra<br/>Training Data </th> <th>Result</th> <th>Benchmark</th> </tr> <tr> <td rowspan="10" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="10" class="rowspan-td"> Ada-LEval (BestAnswer) </td> <td rowspan="10" class="rowspan-td model-col"> GPT-4-Turbo-1106 </td> <td style="vertical-align: top; padding-top: 18px;"> 2k </td> <td> 73.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 4k </td> <td> 67.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 8k </td> <td> 53.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 16k </td> <td> 44.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1k </td> <td> 74.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 6k </td> <td> 59.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 12k </td> <td> 49.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 32k </td> <td> 16.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 64k </td> <td> 0.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 128k </td> <td> 0.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120837"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="10" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="10" class="rowspan-td"> Ada-LEval (BestAnswer) </td> <td rowspan="10" class="rowspan-td model-col"> GPT-4-Turbo-0125 </td> <td style="vertical-align: top; padding-top: 18px;"> 2k </td> <td> 73.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 4k </td> <td> 65.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 8k </td> <td> 56.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 16k </td> <td> 44.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1k </td> <td> 73.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 6k </td> <td> 63.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 12k </td> <td> 52.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 32k </td> <td> 30.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 64k </td> <td> 0.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 128k </td> <td> 0.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120835"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="7" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="7" class="rowspan-td"> Ada-LEval (TSort) </td> <td rowspan="7" class="rowspan-td model-col"> GPT-4-Turbo-1106 </td> <td style="vertical-align: top; padding-top: 18px;"> 2k </td> <td> 18.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 4k </td> <td> 15.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 8k </td> <td> 7.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 16k </td> <td> 3.5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 32k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 64k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 128k </td> <td> 6.0 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120836"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="7" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="7" class="rowspan-td"> Ada-LEval (TSort) </td> <td rowspan="7" class="rowspan-td model-col"> GPT-4-Turbo-0125 </td> <td style="vertical-align: top; padding-top: 18px;"> 2k </td> <td> 15.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 4k </td> <td> 16.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 8k </td> <td> 8.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 16k </td> <td> 5.5 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 32k </td> <td> 2.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 64k </td> <td> 4.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 128k </td> <td> 2.0 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=120838"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-ada-leval-tsort" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Common Sense Reasoning </td> <td rowspan="1" class="rowspan-td"> ARC (Challenge) </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (few-shot, k=25) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 96.4 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99242"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/common-sense-reasoning-on-arc-challenge" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Common Sense Reasoning </td> <td rowspan="1" class="rowspan-td"> ARC (Challenge) </td> <td rowspan="1" class="rowspan-td model-col"> GPT-3.5 (few-shot, k=25) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 85.2 </td> <td> # 12 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99243"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/common-sense-reasoning-on-arc-challenge" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> BenchLMM </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-3.5 score </td> <td> 58.37 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=113615"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-benchlmm" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Factual Inconsistency Detection in Chart Captioning </td> <td rowspan="1" class="rowspan-td"> CHOCOLATE-LLM </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> Kendall&#x27;s Tau-c </td> <td> 0.205 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=115878"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/factual-inconsistency-detection-in-chart-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="5" class="rowspan-td"> Visual Question Answering (VQA) </td> <td rowspan="5" class="rowspan-td"> CORE-MM </td> <td rowspan="5" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> Overall score </td> <td> 74.44 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114226"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Deductive </td> <td> 74.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114226"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Analogical </td> <td> 69.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114226"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Params </td> <td> - </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114226"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Abductive </td> <td> 77.88 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114226"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm-1" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Question Answering </td> <td rowspan="1" class="rowspan-td"> DROP Test </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (few-shot, k=3) </td> <td style="vertical-align: top; padding-top: 18px;"> F1 </td> <td> 80.9 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99248"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/question-answering-on-drop-test" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Question Answering </td> <td rowspan="1" class="rowspan-td"> DROP Test </td> <td rowspan="1" class="rowspan-td model-col"> GPT 3.5 (few-shot, k=3) </td> <td style="vertical-align: top; padding-top: 18px;"> F1 </td> <td> 64.1 </td> <td> # 11 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99249"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/question-answering-on-drop-test" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Arithmetic Reasoning </td> <td rowspan="1" class="rowspan-td"> GSM8K </td> <td rowspan="1" class="rowspan-td model-col"> GPT-3.5 (few-shot, k=5) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 57.1 </td> <td> # 115 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99251"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/arithmetic-reasoning-on-gsm8k" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Sentence Completion </td> <td rowspan="1" class="rowspan-td"> HellaSwag </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (10-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 95.3 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99240"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/sentence-completion-on-hellaswag" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Sentence Completion </td> <td rowspan="1" class="rowspan-td"> HellaSwag </td> <td rowspan="1" class="rowspan-td model-col"> GPT-3.5 (10-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 85.5 </td> <td> # 23 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99241"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/sentence-completion-on-hellaswag" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="4" class="rowspan-td"> Visual Question Answering (VQA) </td> <td rowspan="4" class="rowspan-td"> InfiMM-Eval </td> <td rowspan="4" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> Overall score </td> <td> 74.44 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112622"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Deductive </td> <td> 74.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112622"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Abductive </td> <td> 77.88 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112622"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Analogical </td> <td> 69.86 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112622"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-vqa-on-core-mm" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Legal Reasoning </td> <td rowspan="1" class="rowspan-td"> LegalBench (Rule-recall) </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 </td> <td style="vertical-align: top; padding-top: 18px;"> Balanced Accuracy </td> <td> 59.2 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=108061"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/legal-reasoning-on-legalbench-rule-recall" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Zero-Shot Learning </td> <td rowspan="1" class="rowspan-td"> MedConceptsQA </td> <td rowspan="1" class="rowspan-td model-col"> gpt-4-0125-preview </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 52.489 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=122047"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/zero-shot-learning-on-medconceptsqa" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Few-Shot Learning </td> <td rowspan="1" class="rowspan-td"> MedConceptsQA </td> <td rowspan="1" class="rowspan-td model-col"> gpt-4-0125-preview </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 61.911 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=122034"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/few-shot-learning-on-medconceptsqa" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Multi-task Language Understanding </td> <td rowspan="1" class="rowspan-td"> MMLU </td> <td rowspan="1" class="rowspan-td model-col"> GPT-3.5 Turbo </td> <td style="vertical-align: top; padding-top: 18px;"> Average (%) </td> <td> 70.0 </td> <td> # 38 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114905"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/multi-task-language-understanding-on-mmlu" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Multi-task Language Understanding </td> <td rowspan="1" class="rowspan-td"> MMLU </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (few-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Average (%) </td> <td> 86.4 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114904"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/multi-task-language-understanding-on-mmlu" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="7" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="7" class="rowspan-td"> MMNeedle </td> <td rowspan="7" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 2*2 Stitching, Exact Accuracy </td> <td> 86.09 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 4*4 Stitching, Exact Accuracy </td> <td> 54.72 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 8*8 Stitching, Exact Accuracy </td> <td> 7.3 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 1*1 Stitching, Exact Accuracy </td> <td> 72.36 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 2*2 Stitching, Exact Accuracy </td> <td> 34.24 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 4*4 Stitching, Exact Accuracy </td> <td> 7.58 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 8*8 Stitching, Exact Accuracy </td> <td> 0 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="7" class="rowspan-td"> Long-Context Understanding </td> <td rowspan="7" class="rowspan-td"> MMNeedle </td> <td rowspan="7" class="rowspan-td model-col"> GPT-4o </td> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 2*2 Stitching, Exact Accuracy </td> <td> 94.6 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 4*4 Stitching, Exact Accuracy </td> <td> 83 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 1 Image, 8*8 Stitching, Exact Accuracy </td> <td> 19 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 1*1 Stitching, Exact Accuracy </td> <td> 97 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 2*2 Stitching, Exact Accuracy </td> <td> 81.8 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 4*4 Stitching, Exact Accuracy </td> <td> 26.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> 10 Images, 8*8 Stitching, Exact Accuracy </td> <td> 1 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=124089"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/long-context-understanding-on-mmneedle" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4o (gpt-4o-2024-05-13) </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 69.3±0.1 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=122906"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4V-Turbo-detail:low </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 60.2±0.3 </td> <td> # 24 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112088"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4V-Turbo-detail:high </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 67.6±0.1 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=112087"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4V </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 67.7±0.3 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=110824"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet v2 </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4o (gpt-4o-2024-05-13) </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 71.0±0.2 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=127291"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet-v2" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Visual Question Answering </td> <td rowspan="1" class="rowspan-td"> MM-Vet v2 </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 Turbo (gpt-4-0125-preview) </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score </td> <td> 66.3±0.2 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=127299"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-mm-vet-v2" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-3.5-turbo (5-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 80.6 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 149 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 37.3 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 22.0 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 25.4 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 2 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117150"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-3.5-turbo (3-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 80.9 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 140 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 36.8 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 21.3 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 24.7 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117149"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-4 (5-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 72.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 269 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 43.4 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 29.1 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 32.8 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117155"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-3.5-turbo (1-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 82.3 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 123 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 34.4 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 18.2 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 21.2 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117148"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-3.5-turbo (10-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 81.2 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 137 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 36.1 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 20.4 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 24.0 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 2 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117151"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-4 (100-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 73.6 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 249 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 42.8 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 28.5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 32.3 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 3 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117156"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-4 (3-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 73.7 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 272 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 43.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 29.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 33.6 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 5 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117154"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-4 (1-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 73.4 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 262 </td> <td> # 4 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 43.7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 29.7 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 33.5 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 4 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117153"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-4 (0-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 75.8 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 239 </td> <td> # 6 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 41.5 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 27.2 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 30.7 </td> <td> # 5 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 6 </td> <td> # 3 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117152"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="6" class="rowspan-td"> Only Connect Walls Dataset Task 1 (Grouping) </td> <td rowspan="6" class="rowspan-td"> OCW </td> <td rowspan="6" class="rowspan-td model-col"> GPT-3.5-turbo (0-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Wasserstein Distance (WD) </td> <td> 82.5 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Correct Groups </td> <td> 114 </td> <td> # 11 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Fowlkes Mallows Score (FMS) </td> <td> 34.0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Rand Index (ARI) </td> <td> 18.4 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> Adjusted Mutual Information (AMI) </td> <td> 21.6 </td> <td> # 9 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> # Solved Walls </td> <td> 0 </td> <td> # 10 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="checkmark"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M416 128L192 384l-96-96"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=117147"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/task-1-grouping-on-ocw" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Question Answering </td> <td rowspan="1" class="rowspan-td"> TriviaQA </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4-0613 (Zero-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> EM </td> <td> 84.8 </td> <td> # 8 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=125376"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/question-answering-on-triviaqa" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Question Answering </td> <td rowspan="1" class="rowspan-td"> TruthfulQA </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (RLHF) </td> <td style="vertical-align: top; padding-top: 18px;"> MC1 </td> <td> 0.59 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=110275"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/question-answering-on-truthfulqa" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="2" class="rowspan-td"> Visual Question Answering </td> <td rowspan="2" class="rowspan-td"> ViP-Bench </td> <td rowspan="2" class="rowspan-td model-col"> GPT-4V-turbo-detail:high (Visual Prompt) </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score (bbox) </td> <td> 60.7 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114160"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-vip-bench" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score (human) </td> <td> 59.9 </td> <td> # 1 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114160"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-vip-bench" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="2" class="rowspan-td"> Visual Question Answering </td> <td rowspan="2" class="rowspan-td"> ViP-Bench </td> <td rowspan="2" class="rowspan-td model-col"> GPT-4V-turbo-detail:low (Visual Prompt) </td> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score (bbox) </td> <td> 52.8 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114368"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-vip-bench" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td style="vertical-align: top; padding-top: 18px;"> GPT-4 score (human) </td> <td> 51.4 </td> <td> # 2 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=114368"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/visual-question-answering-on-vip-bench" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Common Sense Reasoning </td> <td rowspan="1" class="rowspan-td"> WinoGrande </td> <td rowspan="1" class="rowspan-td model-col"> GPT-3.5 (5-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 81.6 </td> <td> # 14 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99245"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/common-sense-reasoning-on-winogrande" style="text-decoration: underline"> Compare</a> </div> </td> </tr> <tr> <td rowspan="1" class="rowspan-td"> Common Sense Reasoning </td> <td rowspan="1" class="rowspan-td"> WinoGrande </td> <td rowspan="1" class="rowspan-td model-col"> GPT-4 (5-shot) </td> <td style="vertical-align: top; padding-top: 18px;"> Accuracy </td> <td> 87.5 </td> <td> # 7 </td> <td class="text-center"> <span class=" icon-wrapper icon-ion" data-name="close"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M289.94 256l95-95A24 24 0 0 0 351 127l-95 95-95-95a24 24 0 0 0-34 34l95 95-95 95a24 24 0 1 0 34 34l95-95 95 95a24 24 0 0 0 34-34z"/></svg></span> </td> <td class="results-icon"> <a href="/paper/gpt-4-technical-report-1/review/?hl=99244"> <span class=" icon-wrapper icon-ion" data-name="enter-outline"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M176 176v-40a40 40 0 0 1 40-40h208a40 40 0 0 1 40 40v240a40 40 0 0 1-40 40H216a40 40 0 0 1-40-40v-40" fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32"/><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M272 336l80-80-80-80M48 256h288"/></svg></span> </a> </td> <td> <div class="sota-table-link"> <a class="btn btn-primary" href="/sota/common-sense-reasoning-on-winogrande" style="text-decoration: underline"> Compare</a> </div> </td> </tr> </table> </div> </div> </div> </div> <div class="row"> <div id="methods" class="col-md-12 paper-evaluation-section-title"> <div class="paper-section-title"> <div class="row"> <div class="col-md-12 zero-padding"> <h2> Methods <div class="float-right"> <div class="dropdown edit-button"> <button class="dropdown-toggle badge badge-edit" type="button" id="methodEditMenu" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> <span class=" icon-wrapper icon-fa icon-fa-solid" data-name="edit"><svg viewBox="0 0 576 514.999" xmlns="http://www.w3.org/2000/svg"><path d="M402.6 85.198l90.2 90.2c3.8 3.8 3.8 10 0 13.8l-218.399 218.4-92.8 10.3c-12.4 1.4-22.9-9.1-21.5-21.5l10.3-92.8 218.4-218.4c3.799-3.8 10-3.8 13.799 0zm162-22.9c15.2 15.2 15.2 39.9 0 55.2l-35.4 35.4c-3.8 3.8-10 3.8-13.8 0l-90.2-90.2c-3.8-3.8-3.8-10 0-13.8l35.4-35.4c15.3-15.2 40-15.2 55.2 0zM384 348.198c0-3.2 1.3-6.2 3.5-8.5l40-40c7.6-7.5 20.5-2.2 20.5 8.5v157.8c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48v-352c0-26.5 21.5-48 48-48h285.8c10.7 0 16.1 12.9 8.5 20.5l-40 40c-2.3 2.2-5.3 3.5-8.5 3.5H64v320h320v-101.8z"/></svg></span> Edit </button> <div class="dropdown-menu dropdown-menu-end" aria-labelledby="methodEditMenu"> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add</a> <a class="dropdown-item" href="#loginModal" data-bs-toggle="modal"> <span class=" icon-wrapper icon-ion" data-name="remove"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M400 256H112"/></svg></span> Remove</a> </div> </div> </div> </h2> <hr/> </div> </div> </div> </div> </div> <div class="method-section" id="methods"> <a href="/method/absolute-position-encodings"> Absolute Position Encodings</a> • <a href="/method/adam"> Adam</a> • <a href="/method/bpe"> BPE</a> • <a href="/method/dense-connections"> Dense Connections</a> • <a href="/method/dropout"> Dropout</a> • <a href="/method/gpt-4"> GPT-4</a> • <a href="/method/label-smoothing"> Label Smoothing</a> • <a href="/method/layer-normalization"> Layer Normalization</a> • <a href="/method/linear-layer"> Linear Layer</a> • <a href="/method/multi-head-attention"> Multi-Head Attention</a> • <a href="/method/position-wise-feed-forward-layer"> Position-Wise Feed-Forward Layer</a> • <a href="/method/residual-connection"> Residual Connection</a> • <a href="/method/scaled"> Scaled Dot-Product Attention</a> • <a href="/method/softmax"> Softmax</a> • <a href="/method/test"> Test</a> • <a href="/method/transformer"> Transformer</a> </div> <!-- End portal_name if --> </div> </div> <div class="footer"> <div class="footer-contact"> <span class="footer-contact-item">Contact us on:</span> <a class="footer-contact-item" href="mailto:hello@paperswithcode.com"> <span class=" icon-wrapper icon-ion" data-name="mail"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M424 80H88a56.06 56.06 0 0 0-56 56v240a56.06 56.06 0 0 0 56 56h336a56.06 56.06 0 0 0 56-56V136a56.06 56.06 0 0 0-56-56zm-14.18 92.63l-144 112a16 16 0 0 1-19.64 0l-144-112a16 16 0 1 1 19.64-25.26L256 251.73l134.18-104.36a16 16 0 0 1 19.64 25.26z"/></svg></span> hello@paperswithcode.com </a>. <span class="footer-contact-item"> Papers With Code is a free resource with all data licensed under <a rel="noreferrer" href="https://creativecommons.org/licenses/by-sa/4.0/">CC-BY-SA</a>. </span> </div> <div class="footer-links"> <a href="/site/terms">Terms</a> <a href="/site/data-policy">Data policy</a> <a href="/site/cookies-policy">Cookies policy</a> <a href="/about#team" class="fair-logo"> from <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAANAAAAAgCAMAAABU6AZfAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAAJcEhZcwAAFiUAABYlAUlSJPAAAABFUExURUdwTBwqMhwqMxsqMhkqMxsqMhwqMgCA+hwrMxJIgBsrMxsqMgJ28AF58wF38BsqMwB58hsqMwF17wF07hwrMwRm4QJz7Wj6SIIAAAAUdFJOUwDP87wcPIT+4A1tVti1Ta0smZVzG3JP8wAABR9JREFUWMO1memWpCoMgF0QxX1//0e9kCAkAadq5tzKjzndQmM+szNFEWQ9puu6xn02BXm4j23bTsdapKJAMguFgRVT/Ejyx4uH5hgvL1PUfm69jEd6bN05GTJvXF5X/hfRcPyWe2kTLDFdRA4ENVMbZZJGMt3ppEttNMDC2X/Qa7MK1OrveZoKz2/445I+U4znuvaExxKZLFCqtym/A6rzn+OjbHj8ubwDmfESslvtgWea13WeckQPUKJTf/4USHkDnVXzCrT74DnmeX+8rjgcxA4QBmPpyAKdOm+5XwFpgHH/bG9AMzLMqM9DxxCQaM0qLr7U4xE/AgIDVRBHlcoDeYd7lFee6GZOBvaaskD8S6nut0Dg0ItZEt+IQAfjseIzRDvS/WCxWQJ17phqEGqepQBS/VaXZa0H/4XUYMVt6nr309DEjYvduPT2gWELQTr0iQbC1+SADOg/kjVvspGqX6zSRAgEKbqOf6zgd82AVB+8s0YNm5NL6Y8MGzttwKt0krP9+9A/+hzQTALoUX5MnxW7iCIEUmD7IVZb8G0G1HRE9UqbWKkEUFPSR0MWqH5eB65XmgzQdN3WGjxReROxPD2LROeBIEiD7UGLraBAjMcS9W9AquTPckBgoMqEWG1SIGN57otn5KO9Y30N4rq6MQFC5TX1cEWBfJLY+mbQ5ZMUm8UK7F1A9GNc90T3enkpCZhCdUzfdQq0Wp774gnZao55YU3SgkmAVBez1eDfR4BABd/XqY36ichyaLUnyJZ8jatimUBjqQTouK2M3OGs4miiiduN5bkHCL15C9Zw7heBRMHYSMRxIGyYFsPqpwTqactT8w0P0OSA9iRY9jQvrDyIAhCoAjrrR90I1PNCpcivHEh+cATUmS5xoCaNB3ggMzqgRO/RYPIb1WviDkB4sv22kB8ghQcgUIFWzyUmaQ6kpf5DCoTFh5fwQQCt493e9ypD5Xjq7S5cMQeEubpBf2oKCoSMohPzduBAi2yimhRIc3NvrOd+gCxPexvhcGPM3SRoJpbmIhAGSudTNgNCR+qIRL05UCebsxTIiAYOX6sEkONphRkw9A9ZjADIZIDg857we5MBSiQHVMlWJgXyeTBIyVpGD4RttHC4yVtENHn7K5ASdeM3QGX2sKcKBCBmITYmrGii9TOQT7JYwxOgrhbyby4XJrvs54kuR8vlCg4XEgEOEs8Q8R5DYZboCwEESpTmi/Hhc1Lo8zxPlghZjpbLqWVGUGxSes1y4W2lkkC+Wf0C6GPaxtZo0VQW4nOhsJLqAg01HXqgGN0+083MegKoYLdisbDqzHVG1iZJYe0EUDoB+dj149gDRCCgt2lZ1zA5nhvCyEwvrc/b3N/HiZlMgINmZaR/aX3MJluf7Kepo8+F5tRfUh1wR0odzg8Srnm9w7L5SyB/p6H9Ptt0Vj310ngAlDHbnLo3mGc00sJiQ+4KEM+I8xC7fWv5VGcz3Y0C2ZCa70sgf0tXbnbY1jXpln3W6jYXDG4jNthdrfVWn8n4gAVAZe+0GgaEaeGFx4XRQyTM9yWQnNuIAy5/HPAWPuDJ8Yc66sYvSeY/8dhlYqH0kuQzkFQ03nnHCyI/gtc0GfM7BVPmL5J0yHPkXm6d3u6v/TLw3GL5ayDr6WW47awHYmS1VC+XJOVQcCCZBPk13SCvgmcb8uI/UqjqdvlOlk3j5OU20C0putdO1ZWNo0a8oumXslx0vMYaNrfPURt2hnp5G2rhtsEP5j/3Wqt0fQd1YgAAAABJRU5ErkJggg=="> </a> </div> </div> <script> // MathJax window.MathJax = { tex: { inlineMath: [ ["$", "$"], ["\\(", "\\)"], ], }, }; const mathjaxScript = document.createElement("script"); mathjaxScript.src = "https://production-assets.paperswithcode.com/static/js/mathjax/tex-chtml.js"; document.head.appendChild(mathjaxScript); </script> <script src="https://production-assets.paperswithcode.com/perf/766.4af6b88b.js" defer></script><script src="https://production-assets.paperswithcode.com/perf/351.a22a9607.js" defer></script><script>(()=>{"use strict";var e,t,n,r,a={73487:(e,t,n)=>{n(26029),n(96869),n(22696),n(89527),n(7233),n(80591);var r=n(23279),a=n.n(r);const o=["tasks","leaderboards","papers","datasets","methods"],l=document.getElementById("id_global_search_form"),s=document.getElementById("id_global_search_input"),i=document.getElementById("q_meta"),d=document.getElementById("q_type"),c=document.createElement("ul");c.id="result-box",l.appendChild(c);let u=0,m=!1,p=[],f=null,g="";const h=a()((function(e){const t=e.target.value;if(t.length<=1)return c.classList.remove("show"),m=!1,p=[],void(f=null);(async e=>{const n=await fetch(`/api/search-autocomplete/?q=${encodeURIComponent(t)}`),r=await n.json();e===u&&function(e){if(e=function(e){let t=o.reduce(((e,t)=>(e[t]=[],e)),{}),n=12;for(let r=0;r<5;r++){for(const a of o)if(e[a].length>r&&(t[a].push(e[a][r]),n--,n<=0))break;if(n<=0)break}return n<12?t:null}(e),f=null,!e)return c.classList.remove("show"),m=!1,void(p=[]);let t="";for(const n of o)if(e[n].length){t+=`<li class='category-name'>${b(n[0].toUpperCase()+n.substring(1))}</li>`;for(const r of e[n]){let e="";["leaderboards","datasets"].includes(n)&&(e=r.slug),t+=`<li class='search-item' data-category="${b(n)}" data-meta="${b(e)}" data-label="${b(r.name||r.title)}"><div class='search-item-inner'>`,r.image?(r.image.startsWith("media")&&(r.image="/"+r.image),t+=`<img src="${b(r.image)}">`):"papers"!==n&&(t+=`<img src='${MEDIA_URL}tasks/default.gif'>`),t+=`<span>${b(r.name||r.title)}</span></div></li>`}}c.innerHTML=t,c.classList.add("show"),m=!0,p=[...document.getElementsByClassName("search-item")]}(r)})(++u)}),250,{maxWait:1e3});function y(e){if(!e)return void l.submit();const t=e.dataset.meta,n=e.dataset.category,r=e.dataset.label;GTAG_ENABLED&&window.gtag("event","SiteActions",{event_category:"Search",event_label:n}),s.value=r,t?i.value=t:(i.value="",i.removeAttribute("name")),d.value=n,l.submit()}function v(e){if(null!==e&&e>=p.length)throw Error("idx out of bound");f=e;for(const e of p)e.classList.remove("selected");null!==e?(p[e].classList.add("selected"),s.value=p[e].dataset.label):s.value=g}function b(e){return e.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/'/g,"&#039;")}s.addEventListener("input",h),document.body.addEventListener("click",(()=>{c.classList.remove("show"),m=!1})),s.addEventListener("click",(e=>{e.stopPropagation()})),s.addEventListener("input",(()=>{g=s.value})),s.addEventListener("keydown",(e=>{if("Escape"===e.key&&m&&(e.preventDefault(),c.classList.remove("show"),m=!1,v(null)),"ArrowDown"===e.key){if(e.preventDefault(),!p.length)return;p.length&&(c.classList.add("show"),m=!0),null===f?v(0):f>=p.length-1?v(null):v(f+1)}if("ArrowUp"===e.key){if(e.preventDefault(),!p.length)return;p.length&&(c.classList.add("show"),m=!0),v(null===f?p.length-1:f<=0?null:f-1)}})),c.addEventListener("click",(e=>{e.stopPropagation(),y(e.target.closest(".search-item"))})),l.addEventListener("submit",(e=>{y(p[f])}));var E=n(179);""!==SENTRY_DSN_FRONTEND&&E.S1({dsn:SENTRY_DSN_FRONTEND});var w=n(45852);let k=!1;const L=document.getElementsByClassName("read-more-toggle")[0],_=document.getElementsByClassName("read-more-dots")[0],x=document.getElementsByClassName("read-more-rest")[0];L&&L.addEventListener("click",(e=>{e.preventDefault(),k?(_.style.display="",x.style.display="",L.text="read more"):(_.style.display="none",x.style.display="inline",L.text="(read less)"),k=!k}));const C=document.getElementById("implementations-see-more-trigger"),B=document.getElementById("implementations-see-less-trigger"),N=document.getElementById("implementations-short-list"),S=document.getElementById("implementations-full-list");C&&C.addEventListener("click",(e=>{e.preventDefault(),N.style.display="none",S.style.display=""})),B&&B.addEventListener("click",(e=>{e.preventDefault(),N.style.display="",S.style.display="none"})),(()=>{const e=[...document.querySelectorAll(".modal-body")];let t=!1;for(const t of e)t.style.opacity=0;window.addEventListener("click",(r=>{if(r.target.closest('[data-bs-toggle="modal"]')){const r=document.getElementById("modals-template");document.body.appendChild(r.content);let a=(0,w.Z)("csrftoken");for(const e of[...document.querySelectorAll("input[name='csrfmiddlewaretoken']")])e.value=a;!async function(){t||(t=!0,Promise.all([n.e(2),n.e(109),n.e(702),n.e(90)]).then(n.bind(n,56090)).then((()=>{n.e(43).then(n.bind(n,36043));for(const t of e)t.style.opacity=""})))}(),(()=>{const e=document.getElementById("new-method-form"),t=document.getElementById("new-method-form-toggle");let n=!1;t.addEventListener("click",(t=>{t.preventDefault(),e.style.display=n?"none":"",n=!n}))})(),(()=>{const e=document.getElementById("new-task-form"),t=document.getElementById("new-task-form-toggle");let n=!1;t.addEventListener("click",(t=>{t.preventDefault(),e.style.display=n?"none":"",n=!n}))})()}}),!0)})()},45852:(e,t,n)=>{n.d(t,{Z:()=>r});const r=e=>{var t=null;if(document.cookie&&""!==document.cookie)for(var n=document.cookie.split(";"),r=0;r<n.length;r++){var a=n[r].trim();if(a.substring(0,e.length+1)===e+"="){t=decodeURIComponent(a.substring(e.length+1));break}}return t}}},o={};function l(e){if(o[e])return o[e].exports;var t=o[e]={id:e,loaded:!1,exports:{}};return a[e](t,t.exports,l),t.loaded=!0,t.exports}l.m=a,l.x=e=>{},l.n=e=>{var t=e&&e.__esModule?()=>e.default:()=>e;return l.d(t,{a:t}),t},l.d=(e,t)=>{for(var n in t)l.o(t,n)&&!l.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},l.f={},l.e=e=>Promise.all(Object.keys(l.f).reduce(((t,n)=>(l.f[n](e,t),t)),[])),l.u=e=>e+"."+{2:"6da00df7",43:"b3f6a007",90:"ead00655",109:"5aa180f0",702:"c05a3709"}[e]+".js",l.miniCssF=e=>e+"."+{43:"b2664180",90:"d7a7e4c6",109:"6ee1c62e",918:"c41196c3"}[e]+".css",l.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),l.hmd=e=>((e=Object.create(e)).children||(e.children=[]),Object.defineProperty(e,"exports",{enumerable:!0,set:()=>{throw new Error("ES Modules may not assign module.exports or exports.*, Use ESM export syntax, instead: "+e.id)}}),e),l.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="perf_frontend:",l.l=(n,r,a,o)=>{if(e[n])e[n].push(r);else{var s,i;if(void 0!==a)for(var d=document.getElementsByTagName("script"),c=0;c<d.length;c++){var u=d[c];if(u.getAttribute("src")==n||u.getAttribute("data-webpack")==t+a){s=u;break}}s||(i=!0,(s=document.createElement("script")).charset="utf-8",s.timeout=120,l.nc&&s.setAttribute("nonce",l.nc),s.setAttribute("data-webpack",t+a),s.src=n),e[n]=[r];var m=(t,r)=>{s.onerror=s.onload=null,clearTimeout(p);var a=e[n];if(delete e[n],s.parentNode&&s.parentNode.removeChild(s),a&&a.forEach((e=>e(r))),t)return t(r)},p=setTimeout(m.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=m.bind(null,s.onerror),s.onload=m.bind(null,s.onload),i&&document.head.appendChild(s)}},l.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.p="https://production-assets.paperswithcode.com/perf/",n=e=>new Promise(((t,n)=>{var r=l.miniCssF(e),a=l.p+r;if(((e,t)=>{for(var n=document.getElementsByTagName("link"),r=0;r<n.length;r++){var a=(l=n[r]).getAttribute("data-href")||l.getAttribute("href");if("stylesheet"===l.rel&&(a===e||a===t))return l}var o=document.getElementsByTagName("style");for(r=0;r<o.length;r++){var l;if((a=(l=o[r]).getAttribute("data-href"))===e||a===t)return l}})(r,a))return t();((e,t,n,r)=>{var a=document.createElement("link");a.rel="stylesheet",a.type="text/css",a.onerror=a.onload=o=>{if(a.onerror=a.onload=null,"load"===o.type)n();else{var l=o&&("load"===o.type?"missing":o.type),s=o&&o.target&&o.target.href||t,i=new Error("Loading CSS chunk "+e+" failed.\n("+s+")");i.code="CSS_CHUNK_LOAD_FAILED",i.type=l,i.request=s,a.parentNode.removeChild(a),r(i)}},a.href=t,document.head.appendChild(a)})(e,a,t,n)})),r={645:0},l.f.miniCss=(e,t)=>{r[e]?t.push(r[e]):0!==r[e]&&{43:1,90:1,109:1}[e]&&t.push(r[e]=n(e).then((()=>{r[e]=0}),(t=>{throw delete r[e],t})))},(()=>{var e={645:0},t=[[73487,766,351]];l.f.j=(t,n)=>{var r=l.o(e,t)?e[t]:void 0;if(0!==r)if(r)n.push(r[2]);else if(918!=t){var a=new Promise(((n,a)=>{r=e[t]=[n,a]}));n.push(r[2]=a);var o=l.p+l.u(t),s=new Error;l.l(o,(n=>{if(l.o(e,t)&&(0!==(r=e[t])&&(e[t]=void 0),r)){var a=n&&("load"===n.type?"missing":n.type),o=n&&n.target&&n.target.src;s.message="Loading chunk "+t+" failed.\n("+a+": "+o+")",s.name="ChunkLoadError",s.type=a,s.request=o,r[1](s)}}),"chunk-"+t,t)}else e[t]=0};var n=e=>{},r=(r,a)=>{for(var o,s,[i,d,c,u]=a,m=0,p=[];m<i.length;m++)s=i[m],l.o(e,s)&&e[s]&&p.push(e[s][0]),e[s]=0;for(o in d)l.o(d,o)&&(l.m[o]=d[o]);for(c&&c(l),r&&r(a);p.length;)p.shift()();return u&&t.push.apply(t,u),n()},a=self.webpackChunkperf_frontend=self.webpackChunkperf_frontend||[];function o(){for(var n,r=0;r<t.length;r++){for(var a=t[r],o=!0,s=1;s<a.length;s++){var i=a[s];0!==e[i]&&(o=!1)}o&&(t.splice(r--,1),n=l(l.s=a[0]))}return 0===t.length&&(l.x(),l.x=e=>{}),n}a.forEach(r.bind(null,0)),a.push=r.bind(null,a.push.bind(a));var s=l.x;l.x=()=>(l.x=s||(e=>{}),(n=o)())})(),l.x()})();</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10