[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-72288":3},{"id":4,"name":5,"fullName":6,"owner":7,"repo":5,"description":8,"homepage":9,"htmlUrl":10,"language":11,"languages":10,"totalLinesOfCode":10,"stars":12,"forks":13,"watchers":14,"openIssues":15,"contributorsCount":16,"subscribersCount":16,"size":16,"stars1d":17,"stars7d":15,"stars30d":18,"stars90d":16,"forks30d":16,"starsTrendScore":19,"compositeScore":20,"rankGlobal":10,"rankLanguage":10,"license":21,"archived":22,"fork":22,"defaultBranch":23,"hasWiki":22,"hasPages":22,"topics":24,"createdAt":10,"pushedAt":10,"updatedAt":32,"readmeContent":33,"aiSummary":34,"trendingCount":16,"starSnapshotCount":16,"syncStatus":35,"lastSyncTime":36,"discoverSource":37},72288,"MiniMax-01","MiniMax-AI\u002FMiniMax-01","MiniMax-AI","The official repo of MiniMax-Text-01 and MiniMax-VL-01, large-language-model & vision-language-model based on Linear Attention","https:\u002F\u002Fwww.minimax.io\u002F",null,"Python",3428,329,46,7,0,3,10,9,29.56,"MIT License",false,"main",[25,26,27,28,29,30,31],"large-language-models","llm","llms","minimax-text-01","minimax-vl-01","vision-language-model","vlm","2026-06-12 02:03:01","\u003Cdiv align=\"center\">\n  \u003Cpicture>\n    \u003Csource srcset=\"figures\u002FMiniMaxLogo-Dark.png\" media=\"(prefers-color-scheme: dark)\">\n      \u003Cimg src=\"figures\u002FMiniMaxLogo-Light.png\" width=\"60%\" alt=\"MiniMax\">\n    \u003C\u002Fsource>\n  \u003C\u002Fpicture>\n\u003C\u002Fdiv>\n\u003Chr>\n\n\u003Cdiv align=\"center\" style=\"line-height: 1;\">\n  \u003Ca href=\"https:\u002F\u002Fwww.minimax.io\" target=\"_blank\" style=\"margin: 2px; color: var(--fgColor-default);\">\n    \u003Cimg alt=\"Homepage\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F_Homepage-MiniMax-FF4040?style=flat-square&labelColor=2C3E50&logo=data:image\u002Fsvg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB2aWV3Qm94PSIwIDAgNDkwLjE2IDQxMS43Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2ZmZjt9PC9zdHlsZT48L2RlZnM+PHBhdGggY2xhc3M9ImNscy0xIiBkPSJNMjMzLjQ1LDQwLjgxYTE3LjU1LDE3LjU1LDAsMSwwLTM1LjEsMFYzMzEuNTZhNDAuODIsNDAuODIsMCwwLDEtODEuNjMsMFYxNDVhMTcuNTUsMTcuNTUsMCwxLDAtMzUuMDksMHY3OS4wNmE0MC44Miw0MC44MiwwLDAsMS04MS42MywwVjE5NS40MmExMS42MywxMS42MywwLDAsMSwyMy4yNiwwdjI4LjY2YTE3LjU1LDE3LjU1LDAsMCwwLDM1LjEsMFYxNDVBNDAuODIsNDAuODIsMCwwLDEsMTQwLDE0NVYzMzEuNTZhMTcuNTUsMTcuNTUsMCwwLDAsMzUuMSwwVjIxNy41aDBWNDAuODFhNDAuODEsNDAuODEsMCwxLDEsODEuNjIsMFYyODEuNTZhMTEuNjMsMTEuNjMsMCwxLDEtMjMuMjYsMFptMjE1LjksNjMuNEE0MC44Niw0MC44NiwwLDAsMCw0MDguNTMsMTQ1VjMwMC44NWExNy41NSwxNy41NSwwLDAsMS0zNS4wOSwwdi0yNjBhNDAuODIsNDAuODIsMCwwLDAtODEuNjMsMFYzNzAuODlhMTcuNTUsMTcuNTUsMCwwLDEtMzUuMSwwVjMzMGExMS42MywxMS42MywwLDEsMC0yMy4yNiwwdjQwLjg2YTQwLjgxLDQwLjgxLDAsMCwwLDgxLjYyLDBWNDAuODFhMTcuNTUsMTcuNTUsMCwwLDEsMzUuMSwwdjI2MGE0MC44Miw0MC44MiwwLDAsMCw4MS42MywwVjE0NWExNy41NSwxNy41NSwwLDEsMSwzNS4xLDBWMjgxLjU2YTExLjYzLDExLjYzLDAsMCwwLDIzLjI2LDBWMTQ1QTQwLjg1LDQwLjg1LDAsMCwwLDQ0OS4zNSwxMDQuMjFaIi8+PC9zdmc+&logoWidth=20\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2501.08313\" target=\"_blank\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"Paper\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F📖_Paper-MiniMax--01-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n   \u003Ca href=\"https:\u002F\u002Fchat.minimax.io\u002F\" target=\"_blank\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"Chat\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F_MiniMax_Chat-FF4040?style=flat-square&labelColor=2C3E50&logo=data:image\u002Fsvg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB2aWV3Qm94PSIwIDAgNDkwLjE2IDQxMS43Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2ZmZjt9PC9zdHlsZT48L2RlZnM+PHBhdGggY2xhc3M9ImNscy0xIiBkPSJNMjMzLjQ1LDQwLjgxYTE3LjU1LDE3LjU1LDAsMSwwLTM1LjEsMFYzMzEuNTZhNDAuODIsNDAuODIsMCwwLDEtODEuNjMsMFYxNDVhMTcuNTUsMTcuNTUsMCwxLDAtMzUuMDksMHY3OS4wNmE0MC44Miw0MC44MiwwLDAsMS04MS42MywwVjE5NS40MmExMS42MywxMS42MywwLDAsMSwyMy4yNiwwdjI4LjY2YTE3LjU1LDE3LjU1LDAsMCwwLDM1LjEsMFYxNDVBNDAuODIsNDAuODIsMCwwLDEsMTQwLDE0NVYzMzEuNTZhMTcuNTUsMTcuNTUsMCwwLDAsMzUuMSwwVjIxNy41aDBWNDAuODFhNDAuODEsNDAuODEsMCwxLDEsODEuNjIsMFYyODEuNTZhMTEuNjMsMTEuNjMsMCwxLDEtMjMuMjYsMFptMjE1LjksNjMuNEE0MC44Niw0MC44NiwwLDAsMCw0MDguNTMsMTQ1VjMwMC44NWExNy41NSwxNy41NSwwLDAsMS0zNS4wOSwwdi0yNjBhNDAuODIsNDAuODIsMCwwLDAtODEuNjMsMFYzNzAuODlhMTcuNTUsMTcuNTUsMCwwLDEtMzUuMSwwVjMzMGExMS42MywxMS42MywwLDEsMC0yMy4yNiwwdjQwLjg2YTQwLjgxLDQwLjgxLDAsMCwwLDgxLjYyLDBWNDAuODFhMTcuNTUsMTcuNTUsMCwwLDEsMzUuMSwwdjI2MGE0MC44Miw0MC44MiwwLDAsMCw4MS42MywwVjE0NWExNy41NSwxNy41NSwwLDEsMSwzNS4xLDBWMjgxLjU2YTExLjYzLDExLjYzLDAsMCwwLDIzLjI2LDBWMTQ1QTQwLjg1LDQwLjg1LDAsMCwwLDQ0OS4zNSwxMDQuMjFaIi8+PC9zdmc+&logoWidth=20\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.minimax.io\u002Fplatform\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"API\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F⚡_API-Platform-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FMiniMax-AI\u002FMiniMax-MCP\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"MCP\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🚀_MCP-MiniMax_MCP-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa> \n\u003C\u002Fdiv>\n\u003Cdiv align=\"center\" style=\"line-height: 1;\">\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002FMiniMaxAI\" target=\"_blank\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"Hugging Face\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🤗_Hugging_Face-MiniMax-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FMiniMax-AI\u002FMiniMax-AI.github.io\u002Fblob\u002Fmain\u002Fimages\u002Fwechat-qrcode.jpeg\" target=\"_blank\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"WeChat\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F_WeChat-MiniMax-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n\u003Cdiv align=\"center\" style=\"line-height: 1;\">\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FMiniMax-AI\u002FMiniMax-01\u002Fblob\u002Fmain\u002FLICENSE-MODEL\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"Model License\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F_Model_License-Model_Agreement-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n   \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FMiniMax-AI\u002FMiniMax-01\u002Fblob\u002Fmain\u002FLICENSE-CODE\" style=\"margin: 2px;\">\n    \u003Cimg alt=\"Code License\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F_Code_License-MIT-FF4040?style=flat-square&labelColor=2C3E50\" style=\"display: inline-block; vertical-align: middle;\"\u002F>\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n\n\n# MiniMax-01\n\n## 1. Introduction\nWe are delighted to introduce two remarkable models, **MiniMax-Text-01** and **MiniMax-VL-01**.\nMiniMax-Text-01 is a powerful language model boasting 456 billion total parameters, with 45.9 billion activated per token. To unlock its long-context capabilities, it adopts a hybrid architecture integrating Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). Leveraging advanced parallel strategies like Linear Attention Sequence Parallelism Plus (LASP+), varlen ring attention, and Expert Tensor Parallel (ETP), its training context length extends to 1 million tokens, and it can handle up to 4 million tokens during inference. Consequently, MiniMax-Text-01 showcases top-tier performance on various academic benchmarks.\nBuilding on MiniMax-Text-01's prowess, we developed MiniMax-VL-01 for enhanced visual capabilities. It uses the \"ViT-MLP-LLM\" framework common in multimodal LLMs. It is initialized and trained using three key components: a 303-million-parameter Vision Transformer (ViT) for visual encoding, a randomly initialized two-layer MLP projector for image adaptation, and MiniMax-Text-01 as the base LLM. This model features a dynamic resolution mechanism. Input images are resized according to a pre-set grid, with resolutions ranging from 336×336 to 2016×2016, while maintaining a 336×336 thumbnail. The resized images are split into non - overlapping patches of the same size. These patches and the thumbnail are encoded separately and then combined to form a full image representation. As a result, MiniMax-VL-01 has achieved top-level performance on multimodal leaderboards, demonstrating its edge in complex multimodal tasks.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"100%\" src=\"figures\u002FTextBench.png\">\n\u003C\u002Fp>\n\u003Cp align=\"center\">\n  \u003Cimg width=\"100%\" src=\"figures\u002FVisionBench.png\">\n\u003C\u002Fp>\n\n## 2. Model Architecture\n\nThe architecture of MiniMax-Text-01 is briefly described as follows:\n- Total Parameters: 456B\n- Activated Parameters per Token: 45.9B\n- Number Layers: 80\n- Hybrid Attention: a softmax attention is positioned after every 7 lightning attention.\n  - Number of attention heads: 64\n  - Attention head dimension: 128\n- Mixture of Experts:\n  - Number of experts: 32\n  - Expert hidden dimension: 9216\n  - Top-2 routing strategy\n- Positional Encoding: Rotary Position Embedding (RoPE) applied to half of the attention head dimension with a base frequency of 10,000,000\n- Hidden Size: 6144\n- Vocab Size: 200,064\n\nFor MiniMax-VL-01, the additional ViT architecture details is as follows:\n- Total Parameters: 303M\n- Number of layers: 24\n- Patch size: 14\n- Hidden size: 1024\n- FFN hidden size: 4096\n- Number of heads: 16\n- Attention head dimension: 64\n\n## 3. Evaluation\n### Text Benchmarks\n\n#### Core Academic Benchmarks\n\n| **Tasks**                     | **GPT-4o (11-20)** | **Claude-3.5-Sonnet (10-22)** | **Gemini-1.5-Pro (002)** | **Gemini-2.0-Flash (exp)** | **Qwen2.5-72B-Inst.** | **DeepSeek-V3** | **Llama-3.1-405B-Inst.** | **MiniMax-Text-01** |\n|-------------------------------|--------------------|-------------------------------|--------------------------|----------------------------|-----------------------|-----------------|--------------------------|---------------------|\n| **General**                   |                    |                               |                          |                            |                       |                 |                          |                     |\n| MMLU\u003Csup>*\u003C\u002Fsup>                      | 85.7               | 88.3                          | 86.8                     | 86.5                       | 86.1                  | 88.5        | **88.6**                 | 88.5                |\n| MMLU-Pro\u003Csup>*\u003C\u002Fsup>                  | 74.4               | **78.0**                      | 75.8                     | 76.4                       | 71.1                  | 75.9            | 73.3                     | 75.7                |\n| SimpleQA                      | **39.0**           | 28.1                          | 23.4                     | 26.6                       | 10.3                  | 24.9            | 23.2                     | 23.7                |\n| C-SimpleQA                    | 64.6               | 56.8                          | 59.4                     | 63.3                       | 52.2                  | 64.8            | 54.7                     | **67.4**            |\n| IFEval _(avg)_                | 84.1               | **90.1**                      | 89.4                     | 88.4                       | 87.2                  | 87.3            | 86.4                     | 89.1                |\n| Arena-Hard                    | **92.4**           | 87.6                          | 85.3                     | 72.7                       | 81.2                  | 91.4            | 63.5                     | 89.1                |\n| **Reasoning**                 |                    |                               |                          |                            |                       |                 |                          |                     |\n| GPQA\u003Csup>*\u003C\u002Fsup> _(diamond)_          | 46.0               | **65.0**                      | 59.1                     | 62.1                       | 49.0                  | 59.1            | 50.7                     | 54.4                |\n| DROP\u003Csup>*\u003C\u002Fsup> _(F1)_               | 89.2               | 88.8                          | 89.2                     | 89.3                       | 85.0                  | 91.0        | **92.5**                 | 87.8                |\n| **Mathematics**               |                    |                               |                          |                            |                       |                 |                          |                     |\n| GSM8k\u003Csup>*\u003C\u002Fsup>                     | 95.6               | **96.9**                      | 95.2                     | 95.4                       | 95.8                  | 96.7            | 96.7                     | 94.8                |\n| MATH\u003Csup>*\u003C\u002Fsup>                      | 76.6               | 74.1                          | **84.6**                 | 83.9                       | 81.8                  | **84.6**        | 73.8                     | 77.4                |\n| **Coding**                    |                    |                               |                          |                            |                       |                 |                          |                     |\n| MBPP +                        | 76.2               | 75.1                          | 75.4                     | 75.9                       | 77.0              | **78.8**        | 73.0                     | 71.7                |\n| HumanEval                     | 90.2               | **93.7**                      | 86.6                     | 89.6                       | 86.6                  | 92.1            | 89.0                     | 86.9                |\n\n\u003Csup>*\u003C\u002Fsup> Evaluated following a _0-shot CoT_ setting.\n\n#### Long Benchmarks\n**4M Needle In A Haystack Test**\n\u003Cp align=\"center\">\n  \u003Cimg width=\"90%\" src=\"figures\u002Fniah.png\">\n\u003C\u002Fp>\n\n**Ruler**\n| Model | 4k | 8k | 16k | 32k | 64k | 128k | 256k | 512k | 1M |\n|-------|----|----|-----|-----|-----|------|------|------|----|\n| **GPT-4o (11-20)** | **0.970** | 0.921 | 0.890 | 0.888 | 0.884 | - | - | - | - |\n| **Claude-3.5-Sonnet (10-22)** | 0.965 | 0.960 | 0.957 | 0.950 | **0.952** | 0.938 | - | - | - |\n| **Gemini-1.5-Pro (002)** | 0.962 | 0.960 | **0.960** | **0.958** | 0.938 | 0.917 | 0.916 | 0.861 | 0.850 |\n| **Gemini-2.0-Flash (exp)** | 0.960 | 0.960 | 0.951 | 0.957 | 0.937 | 0.860 | 0.797 | 0.709 | - |\n| **MiniMax-Text-01** | 0.963 | **0.961** | 0.953 | 0.954 | 0.943 | **0.947** | **0.945** | **0.928** | **0.910** |\n\n**LongBench v2**\n| **Model**                  | **overall** | **easy** | **hard** | **short** | **medium** | **long** |\n|----------------------------|-------------|----------|----------|------------|------------|----------|\n| Human                      | 53.7        | 100.0    | 25.1     | 47.2       | 59.1       | 53.7     |\n| **w\u002F CoT**                 |             |          |          |            |            |          |\n| GPT-4o (11-20)             | 51.4        | 54.2     | 49.7     | 59.6       | 48.6       | 43.5     |\n| Claude-3.5-Sonnet (10-22)  | 46.7        | 55.2     | 41.5     | 53.9       | 41.9       | 44.4     |\n| Deepseek-V3                | -           | -        | -        | -          | -          | -        |\n| Qwen2.5-72B-Inst.          | 43.5        | 47.9     | 40.8     | 48.9       | 40.9       | 39.8     |\n| **MiniMax-Text-01**        | **56.5**    | **66.1** | **50.5** | **61.7**   | **56.7**   | **47.2** |\n| **w\u002Fo CoT**                |             |          |          |            |            |          |\n| GPT-4o (11-20)             | 50.1        | 57.4     | 45.6     | 53.3       | 52.4       | 40.2     |\n| Claude-3.5-Sonnet (10-22)  | 41.0        | 46.9     | 37.3     | 46.1       | 38.6       | 37.0     |\n| Deepseek-V3                | 48.7        | -        | -        | -          | -          | -        |\n| Qwen2.5-72B-Inst.          | 42.1        | 42.7     | 41.8     | 45.6       | 38.1       | **44.4** |\n| **MiniMax-Text-01**        | **52.9**    | **60.9** | **47.9** | **58.9**   | **52.6**   | 43.5     |\n\n**MTOB**\n| **Context Type** | **no context** | **half book** | **full book** | **Δ half book** | **Δ full book** |\n|------------------|----------------|---------------|---------------|------------------|-----------------|\n| **eng → kalam (ChrF)** | | | | | |\n| GPT-4o (11-20) | 9.90 | **54.30** | - | 44.40 | - |\n| Claude-3.5-Sonnet (10-22) | 20.22 | 53.62 | 55.65 | 33.39 | 35.42 |\n| Gemini-1.5-Pro (002) | 16.79 | 53.68 | **57.90** | 36.89 | 41.11 |\n| Gemini-2.0-Flash (exp) | 12.20 | 49.50 | 53.30 | 37.30 | 41.10 |\n| Qwen-Long | 16.55 | 48.48 | 45.94 | 31.92 | 29.39 |\n| **MiniMax-Text-01** | 6.0 | 51.74 | 51.60 | **45.7** | **45.6** |\n| **kalam → eng (BLEURT)** | | | | | |\n| GPT-4o (11-20) | 33.20 | 58.30 | - | 25.10 | - |\n| Claude-3.5-Sonnet (10-22) | 31.42 | 59.70 | 62.30 | 28.28 | 30.88 |\n| Gemini-1.5-Pro (002) | 32.02 | **61.52** | **63.09** | **29.50** | **31.07** |\n| Gemini-2.0-Flash (exp) | 33.80 | 57.50 | 57.00 | 23.70 | 23.20 |\n| Qwen-Long | 30.13 | 53.14 | 32.15 | 23.01 | 2.02 |\n| **MiniMax-Text-01** | 33.65 | 57.10 | 58.00 | 23.45 | 24.35 |\n\n### Vision Benchmarks\n\n| Tasks | GPT-4o\u003Cbr>(11-20) | Claude-3.5-Sonnet (10-22) | Gemini-1.5-Pro (002) | Gemini-2.0-Flash (exp) | Qwen2-VL-72B-Inst. | InternVL2.5-78B | LLama-3.2-90B | MiniMax-VL-01 |\n| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |\n| **Knowledge** |  |  |  |  |  |  |  |  |\n| MMMU\u003Csup>*\u003C\u002Fsup> | 63.5 | **72.0** | 68.4  | 70.6  | 64.5 | 66.5 | 62.1 | 68.5 |\n| MMMU-Pro\u003Csup>*\u003C\u002Fsup>  |  54.5 | 54.7 | 50.9 | **57.0**  | 43.2 | 47.3 | 36.0 | 52.7 |\n| **Visual Q&A** |  |  |  |  |  |  |  |  |\n| ChartQA\u003Csup>*\u003C\u002Fsup>\u003Csub>relaxed\u003C\u002Fsub> | 88.1 | 90.8 | 88.7 | 88.3 | 91.2 | 91.5 | 85.5 | **91.7** |\n| DocVQA\u003Csup>*\u003C\u002Fsup>  | 91.1 | 94.2 | 91.5 | 92.9 | **97.1** | 96.1 | 90.1 | 96.4 |\n| OCRBench | 806 | 790 | 800 | 846  | 856 | 847 | 805 | **865** |\n| **Mathematics & Sciences** ||  |  |  |  |  |  |  |\n| AI2D\u003Csup>*\u003C\u002Fsup> | 83.1 | 82.0 | 80.9 | 85.1 | 84.4 | **86.8** | 78.9 | 83.3 |\n| MathVista\u003Csup>*\u003C\u002Fsup>  | 62.1 | 65.4 | 70.6 | **73.1** | 69.6 | 68.4 | 57.3 | 68.6 |\n| OlympiadBench\u003Csub>full\u003C\u002Fsub> | 25.2 | 28.4 | 32.1 | **46.1** | 21.9 | 25.1 | 19.3 | 24.2 |\n|**Long Context**|||||\n|M-LongDoc\u003Csub>acc\u003C\u002Fsub>| **41.4** | 31.4 | 26.2 | 31.4 | 11.6 | 19.7 | 13.9 | 32.5 |\n|**Comprehensive**|||||\n|MEGA-Bench\u003Csub>macro\u003C\u002Fsub> | 49.4 | 51.4 | 45.9 | **53.9** | 46.8 | 45.3 | 19.9 | 47.4 |\n|**User Experience**|||||\n|In-house Benchmark | 62.3 | 47.0 | 49.2 | **72.1** | 40.6 | 34.8 | 13.6 | 56.6 |\n\n\u003Csup>*\u003C\u002Fsup> Evaluated following a _0-shot CoT_ setting.\n\n\n## 4. Quickstart\nHere, we provide a simple example  to demonstrate how to use MiniMax-Text-01 and MiniMax-VL-01 respectively.\n\n### MiniMax-Text-01\n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, QuantoConfig, GenerationConfig\n\n# load hf config\nhf_config = AutoConfig.from_pretrained(\"MiniMaxAI\u002FMiniMax-Text-01\", trust_remote_code=True)\n\n# quantization config, int8 is recommended\nquantization_config =  QuantoConfig(\n            weights=\"int8\",\n            modules_to_not_convert=[\n                \"lm_head\",\n                \"embed_tokens\",\n            ] + [f\"model.layers.{i}.coefficient\" for i in range(hf_config.num_hidden_layers)]\n            + [f\"model.layers.{i}.block_sparse_moe.gate\" for i in range(hf_config.num_hidden_layers)]\n        )\n\n# assume 8 GPUs\nworld_size = 8\nlayers_per_device = hf_config.num_hidden_layers \u002F\u002F world_size\n# set device map\ndevice_map = {\n    'model.embed_tokens': 'cuda:0',\n    'model.norm': f'cuda:{world_size - 1}',\n    'lm_head': f'cuda:{world_size - 1}'\n}\nfor i in range(world_size):\n    for j in range(layers_per_device):\n        device_map[f'model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'\n\n# load tokenizer\ntokenizer = AutoTokenizer.from_pretrained(\"MiniMaxAI\u002FMiniMax-Text-01\")\nprompt = \"Hello!\"\nmessages = [\n    {\"role\": \"system\", \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model.\"}]},\n    {\"role\": \"user\", \"content\": [{\"type\": \"text\", \"text\": prompt}]},\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\n# tokenize and move to device\nmodel_inputs = tokenizer(text, return_tensors=\"pt\").to(\"cuda\")\n\n# load bfloat16 model, move to device, and apply quantization\nquantized_model = AutoModelForCausalLM.from_pretrained(\n    \"MiniMaxAI\u002FMiniMax-Text-01\",\n    torch_dtype=\"bfloat16\",\n    device_map=device_map,\n    quantization_config=quantization_config,\n    trust_remote_code=True,\n    offload_buffers=True,\n)\n\n# generate response\ngeneration_config = GenerationConfig(\n    max_new_tokens=20,\n    eos_token_id=200020,\n    use_cache=True,\n)\ngenerated_ids = quantized_model.generate(**model_inputs, generation_config=generation_config)\nprint(f\"generated_ids: {generated_ids}\")\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n```\n\n### MiniMax-VL-01\n```python\nfrom transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig, QuantoConfig, GenerationConfig\nimport torch\nimport json\nimport os\nfrom PIL import Image\n\n# load hf config\nhf_config = AutoConfig.from_pretrained(\"MiniMaxAI\u002FMiniMax-VL-01\", trust_remote_code=True)\n\n# quantization config, int8 is recommended\nquantization_config =  QuantoConfig(\n            weights=\"int8\",\n            modules_to_not_convert=[\n                \"vision_tower\",\n                \"image_newline\",\n                \"multi_modal_projector\",\n                \"lm_head\",\n                \"embed_tokens\",\n            ] + [f\"model.layers.{i}.coefficient\" for i in range(hf_config.text_config.num_hidden_layers)]\n            + [f\"model.layers.{i}.block_sparse_moe.gate\" for i in range(hf_config.text_config.num_hidden_layers)]\n        )\n\n# set device map\nmodel_safetensors_index_path = os.path.join(\"MiniMax-VL-01\", \"model.safetensors.index.json\")\nwith open(model_safetensors_index_path, \"r\") as f:\n    model_safetensors_index = json.load(f)\nweight_map = model_safetensors_index['weight_map']\nvision_map = {}\nfor key, value in weight_map.items():\n    if 'vision_tower' in key or 'image_newline' in key or 'multi_modal_projector' in key:\n        new_key = key.replace('.weight','').replace('.bias','')\n        if new_key not in vision_map:\n            vision_map[new_key] = value\n# assume 8 GPUs\nworld_size = 8\ndevice_map = {\n    'language_model.model.embed_tokens': 'cuda:0',\n    'language_model.model.norm': f'cuda:{world_size - 1}',\n    'language_model.lm_head': f'cuda:{world_size - 1}'\n}\nfor key, value in vision_map.items():\n    device_map[key] = f'cuda:0'\ndevice_map['vision_tower.vision_model.post_layernorm'] = f'cuda:0'\nlayers_per_device = hf_config.text_config.num_hidden_layers \u002F\u002F world_size\nfor i in range(world_size):\n    for j in range(layers_per_device):\n        device_map[f'language_model.model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'\n\n# load processor\nprocessor = AutoProcessor.from_pretrained(\"MiniMaxAI\u002FMiniMax-VL-01\", trust_remote_code=True)\nmessages = [\n    {\"role\": \"system\", \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant created by MiniMax based on MiniMax-VL-01 model.\"}]},\n    {\"role\": \"user\", \"content\": [{\"type\": \"image\", \"image\": \"placeholder\"},{\"type\": \"text\", \"text\": \"Describe this image.\"}]},\n]\nprompt = processor.tokenizer.apply_chat_template(\n    messages, tokenize=False, add_generation_prompt=True\n)\nraw_image = Image.open(\"figures\u002Fimage.jpg\")\n# tokenize and move to device\nmodel_inputs = processor(images=[raw_image], text=prompt, return_tensors='pt').to('cuda').to(torch.bfloat16)\n\n# load bfloat16 model, move to device, and apply quantization\nquantized_model = AutoModelForCausalLM.from_pretrained(\n    \"MiniMaxAI\u002FMiniMax-VL-01\",\n    torch_dtype=\"bfloat16\",\n    device_map=device_map,\n    quantization_config=quantization_config,\n    trust_remote_code=True,\n    offload_buffers=True,\n)\ngeneration_config = GenerationConfig(\n    max_new_tokens=100,\n    eos_token_id=200020,\n    use_cache=True,\n)\n\n# generate response\ngenerated_ids = quantized_model.generate(**model_inputs, generation_config=generation_config)\nprint(f\"generated_ids: {generated_ids}\")\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\nresponse = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n```\n\n## 5. Deployment Guide\n\nFor production deployment, we recommend using [vLLM](https:\u002F\u002Fdocs.vllm.ai\u002Fen\u002Flatest\u002F) to serve MiniMax-Text-01 and MiniMax-VL-01. vLLM provides excellent performance for serving large language models with the following features:\n\n- 🔥 Outstanding service throughput performance\n- ⚡ Efficient and intelligent memory management\n- 📦 Powerful batch request processing capability\n- ⚙️ Deeply optimized underlying performance\n\nFor detailed vLLM deployment instructions, please refer to our [vLLM Deployment Guide](docs\u002Fvllm_deployment_guide.md).\n\nAlternatively, you can also deploy using Transformers directly. For detailed Transformers deployment instructions, you can see our [MiniMax-Text-01 Transformers Deployment Guide](docs\u002Ftransformers_deployment_guide.md).\n\n## 6. Citation\n\n```\n@misc{minimax2025minimax01scalingfoundationmodels,\n      title={MiniMax-01: Scaling Foundation Models with Lightning Attention}, \n      author={MiniMax and Aonian Li and Bangwei Gong and Bo Yang and Boji Shan and Chang Liu and Cheng Zhu and Chunhao Zhang and Congchao Guo and Da Chen and Dong Li and Enwei Jiao and Gengxin Li and Guojun Zhang and Haohai Sun and Houze Dong and Jiadai Zhu and Jiaqi Zhuang and Jiayuan Song and Jin Zhu and Jingtao Han and Jingyang Li and Junbin Xie and Junhao Xu and Junjie Yan and Kaishun Zhang and Kecheng Xiao and Kexi Kang and Le Han and Leyang Wang and Lianfei Yu and Liheng Feng and Lin Zheng and Linbo Chai and Long Xing and Meizhi Ju and Mingyuan Chi and Mozhi Zhang and Peikai Huang and Pengcheng Niu and Pengfei Li and Pengyu Zhao and Qi Yang and Qidi Xu and Qiexiang Wang and Qin Wang and Qiuhui Li and Ruitao Leng and Shengmin Shi and Shuqi Yu and Sichen Li and Songquan Zhu and Tao Huang and Tianrun Liang and Weigao Sun and Weixuan Sun and Weiyu Cheng and Wenkai Li and Xiangjun Song and Xiao Su and Xiaodong Han and Xinjie Zhang and Xinzhu Hou and Xu Min and Xun Zou and Xuyang Shen and Yan Gong and Yingjie Zhu and Yipeng Zhou and Yiran Zhong and Yongyi Hu and Yuanxiang Fan and Yue Yu and Yufeng Yang and Yuhao Li and Yunan Huang and Yunji Li and Yunpeng Huang and Yunzhi Xu and Yuxin Mao and Zehan Li and Zekang Li and Zewei Tao and Zewen Ying and Zhaoyang Cong and Zhen Qin and Zhenhua Fan and Zhihang Yu and Zhuo Jiang and Zijia Wu},\n      year={2025},\n      eprint={2501.08313},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2501.08313}, \n}\n```\n\n## 7. Chatbot & API\nFor general use and evaluation, we provide a [Chatbot](https:\u002F\u002Fchat.minimax.io\u002F) with online search capabilities and the [online API](https:\u002F\u002Fwww.minimax.io\u002Fplatform) for developers. For general use and evaluation, we provide the [MiniMax MCP Server](https:\u002F\u002Fgithub.com\u002FMiniMax-AI\u002FMiniMax-MCP) with video generation, image generation, speech synthesis, and voice cloning for developers.\n\nContact us at [model@minimax.io](mailto:model@minimax.io).\n","MiniMax-01 是一个基于线性注意力机制的大型语言模型和视觉-语言模型项目。该项目的核心功能包括处理文本生成与理解任务，以及结合视觉信息进行多模态分析。技术上，它采用了先进的线性注意力机制以提高在大规模数据集上的训练效率和性能表现。适用于需要高效自然语言处理能力或跨模态内容理解的应用场景，如智能客服、内容创作辅助工具及多媒体信息检索系统等。",2,"2026-06-11 03:41:12","high_star"]