CINXE.COM

<!doctype html> <html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-concepts/colossalai_overview"> <head> <meta charset="UTF-8"> <meta name="generator" content="Docusaurus v2.3.1"> <title data-rh="true">Colossal-AI Overview | Colossal-AI</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://colossalai.org/img/social-card.png"><meta data-rh="true" name="twitter:image" content="https://colossalai.org/img/social-card.png"><meta data-rh="true" property="og:url" content="https://colossalai.org/docs/concepts/colossalai_overview"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Colossal-AI Overview | Colossal-AI"><meta data-rh="true" name="description" content="Author: Shenggui Li, Siqi Mai"><meta data-rh="true" property="og:description" content="Author: Shenggui Li, Siqi Mai"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://colossalai.org/docs/concepts/colossalai_overview"><link data-rh="true" rel="alternate" href="https://colossalai.org/docs/concepts/colossalai_overview" hreflang="en"><link data-rh="true" rel="alternate" href="https://colossalai.org/zh-Hans/docs/concepts/colossalai_overview" hreflang="zh-Hans"><link data-rh="true" rel="alternate" href="https://colossalai.org/docs/concepts/colossalai_overview" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://XP2V2KAOVI-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Colossal-AI RSS Feed"> <link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Colossal-AI Atom Feed"> <link rel="preconnect" href="https://www.google-analytics.com"> <link rel="preconnect" href="https://www.googletagmanager.com"> <script async src="https://www.googletagmanager.com/gtag/js?id=G-1XKZVCCKRZ"></script> <script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-1XKZVCCKRZ",{anonymize_ip:!0})</script> <link rel="search" type="application/opensearchdescription+xml" title="Colossal-AI" href="/opensearch.xml"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css" integrity="sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM" crossorigin="anonymous"> <script src="https://js-eu1.hs-scripts.com/26563514.js" id="hs-script-loader" async defer="defer"></script><link rel="stylesheet" href="/assets/css/styles.d9a9b3d5.css"> <link rel="preload" href="/assets/js/runtime~main.6b0f350a.js" as="script"> <link rel="preload" href="/assets/js/main.c6c31bc7.js" as="script"> </head> <body class="navigation-with-keyboard"> <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus"> <div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/logo.svg" alt="project-logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/logo.svg" alt="project-logo" class="themedImage_ToTc themedImage--dark_i4oU"></div><b class="navbar__title text--truncate">Colossal-AI</b></a><a class="navbar__item navbar__link" href="/docs/get_started/installation">Tutorials</a><a href="https://github.com/hpcaitech/ColossalAI/tree/main/examples" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">Examples</a><a href="https://www.hpc-ai.tech/blog" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">Blogs</a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/docs/get_started/installation">Next</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link"><svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true" class="iconLanguage_nlXk"><path fill="currentColor" d="M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"></path></svg>English</a><ul class="dropdown__menu"><li><a href="/docs/concepts/colossalai_overview" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active" lang="en">English</a></li><li><a href="/zh-Hans/docs/concepts/colossalai_overview" target="_self" rel="noopener noreferrer" class="dropdown__link" lang="zh-Hans">简体中文</a></li></ul></div><a href="https://github.com/hpcaitech/ColossalAI" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link"></a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/get_started/installation">Get started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/concepts/distributed_training">Concepts</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/concepts/distributed_training">Distributed Training</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/concepts/paradigms_of_parallelism">Paradigms of Parallelism</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/concepts/colossalai_overview">Colossal-AI Overview</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/basics/command_line_tool">Basics</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/features/shardformer">Features</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/advanced_tutorials/train_vit_with_hybrid_parallelism">Advanced Tutorials</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Concepts</span><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Colossal-AI Overview</span><meta itemprop="position" content="2"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><h1>Colossal-AI Overview</h1><p>Author: Shenggui Li, Siqi Mai</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="about-colossal-ai">About Colossal-AI<a href="#about-colossal-ai" class="hash-link" aria-label="Direct link to heading" title="Direct link to heading">​</a></h2><p>With the development of deep learning model size, it is important to shift to a new training paradigm. The traditional training method with no parallelism and optimization became a thing of the past and new training methods are the key to make training large-scale models efficient and cost-effective.</p><p>Colossal-AI is designed to be a unified system to provide an integrated set of training skills and utilities to the user. You can find the common training utilities such as mixed precision training and gradient accumulation. Besides, we provide an array of parallelism including data, tensor and pipeline parallelism. We optimize tensor parallelism with different multi-dimensional distributed matrix-matrix multiplication algorithm. We also provided different pipeline parallelism methods to allow the user to scale their model across nodes efficiently. More advanced features such as offloading can be found in this tutorial documentation in detail as well.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="general-usage">General Usage<a href="#general-usage" class="hash-link" aria-label="Direct link to heading" title="Direct link to heading">​</a></h2><p>We aim to make Colossal-AI easy to use and non-intrusive to user code. There is a simple general workflow if you want to use Colossal-AI.</p><figure style="text-align:center"><img loading="lazy" src="https://s2.loli.net/2022/01/28/ZK7ICWzbMsVuJof.png" class="img_ev3q"><figcaption>Workflow</figcaption></figure><ol><li>Prepare a configuration file where specifies the features you want to use and your parameters.</li><li>Initialize distributed backend with <code>colossalai.launch</code></li><li>Inject the training features into your training components (e.g. model, optimizer) with <code>colossalai.booster</code>.</li><li>Run training and testing</li></ol><p>We will cover the whole workflow in the <code>basic tutorials</code> section.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="future-development">Future Development<a href="#future-development" class="hash-link" aria-label="Direct link to heading" title="Direct link to heading">​</a></h2><p>The Colossal-AI system will be expanded to include more training skills, these new developments may include but are not limited to:</p><ol><li>optimization of distributed operations</li><li>optimization of training on heterogenous system</li><li>implementation of training utilities to reduce model size and speed up training while preserving model performance</li><li>expansion of existing parallelism methods</li></ol><p>We welcome ideas and contribution from the community and you can post your idea for future development in our forum.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/docs/concepts/colossalai_overview.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_vwxv"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/concepts/paradigms_of_parallelism"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Paradigms of Parallelism</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/basics/command_line_tool"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Command Line Tool</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#about-colossal-ai" class="table-of-contents__link toc-highlight">About Colossal-AI</a></li><li><a href="#general-usage" class="table-of-contents__link toc-highlight">General Usage</a></li><li><a href="#future-development" class="table-of-contents__link toc-highlight">Future Development</a></li></ul></div></div></div></div></main></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Resources</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/get_started/installation">Tutorials</a></li><li class="footer__item"><a href="https://github.com/hpcaitech/ColossalAI/tree/main/examples" target="_blank" rel="noopener noreferrer" class="footer__link-item">Examples</a></li><li class="footer__item"><a href="https://github.com/hpcaitech/ColossalAI/discussions" target="_blank" rel="noopener noreferrer" class="footer__link-item">Forum</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/hpcaitech/ColossalAI" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li><li class="footer__item"><a href="https://www.hpc-ai.tech/blog" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://twitter.com/HPCAITech" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter</a></li></ul></div><div class="col footer__col"><div class="footer__title">About</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://www.hpc-ai.tech/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Company</a></li><li class="footer__item"><a href="https://www.hpc-ai.tech/services" target="_blank" rel="noopener noreferrer" class="footer__link-item">Services</a></li><li class="footer__item"><a href="https://www.hpc-ai.tech/customers" target="_blank" rel="noopener noreferrer" class="footer__link-item">Customers</a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2024 All Rights Reserved by HPC-AI Technology Inc.</div></div></div></footer></div> <script src="/assets/js/runtime~main.6b0f350a.js"></script> <script src="/assets/js/main.c6c31bc7.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10