[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-72240":3},{"id":4,"name":5,"fullName":6,"owner":7,"repo":5,"description":8,"homepage":9,"htmlUrl":10,"language":11,"languages":10,"totalLinesOfCode":10,"stars":12,"forks":13,"watchers":14,"openIssues":15,"contributorsCount":16,"subscribersCount":16,"size":16,"stars1d":17,"stars7d":18,"stars30d":19,"stars90d":16,"forks30d":16,"starsTrendScore":20,"compositeScore":21,"rankGlobal":10,"rankLanguage":10,"license":22,"archived":23,"fork":23,"defaultBranch":24,"hasWiki":23,"hasPages":23,"topics":25,"createdAt":10,"pushedAt":10,"updatedAt":29,"readmeContent":30,"aiSummary":31,"trendingCount":16,"starSnapshotCount":16,"syncStatus":32,"lastSyncTime":33,"discoverSource":34},72240,"MAGI-1","SandAI-org\u002FMAGI-1","SandAI-org","MAGI-1: Autoregressive Video Generation at Scale","https:\u002F\u002Fsand.ai",null,"Python",3706,237,44,38,0,3,7,22,9,70.83,"Apache License 2.0",false,"main",[26,27,28],"autoregressive","diffusion-models","video-generation","2026-06-12 04:01:04","![magi-logo](figures\u002Flogo_black.png)\n\n\n-----\n\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.13211\">\u003Cimg alt=\"paper\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPaper-arXiv-B31B1B?logo=arxiv\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fsand.ai\">\u003Cimg alt=\"blog\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FSand%20AI-Homepage-333333.svg?logo=data:image\u002Fsvg%2bxml;base64,PHN2ZyB3aWR0aD0iODAwIiBoZWlnaHQ9IjgwMCIgdmlld0JveD0iMCAwIDgwMCA4MDAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJNMjI3IDIyNS4wODVDMjI3IDIwMi4zMDMgMjI3IDE5MC45MTIgMjMxLjQzNyAxODIuMjExQzIzNS4zMzkgMTc0LjU1NyAyNDEuNTY2IDE2OC4zMzQgMjQ5LjIyNiAxNjQuNDM0QzI1Ny45MzMgMTYwIDI2OS4zMzIgMTYwIDI5Mi4xMjkgMTYwSDUwNy44NzFDNTA5LjI5NSAxNjAgNTEwLjY3NiAxNjAgNTEyLjAxNCAxNjAuMDAxQzUzMi4wODIgMTYwLjAxNyA1NDIuNjExIDE2MC4yNzcgNTUwLjc3NCAxNjQuNDM0QzU1OC40MzQgMTY4LjMzNCA1NjQuNjYxIDE3NC41NTcgNTY4LjU2MyAxODIuMjExQzU3MyAxOTAuOTEyIDU3MyAyMDIuMzAzIDU3MyAyMjUuMDg1VjI1Ni41NThDNTczIDI5MS4zMTkgNTczIDMwOC43IDU2NS4wMzUgMzIzLjI3OUM1NTguNzU2IDMzNC43NzIgNTQzLjU2NSAzNDYuMTEgNTIzLjA3OCAzNTkuNjA1QzUxNC42NzQgMzY1LjE0MSA1MTAuNDcyIDM2Ny45MDkgNTA1LjYzOSAzNjcuOTM2QzUwMC44MDYgMzY3Ljk2NCA0OTYuNTAzIDM2NS4yIDQ4Ny44OTYgMzU5LjY3MUw0ODcuODk2IDM1OS42N0w0NjYuNDY5IDM0NS45MDVDNDU2Ljg3NSAzMzkuNzQyIDQ1Mi4wNzggMzM2LjY2IDQ1Mi4wNzggMzMyLjIxOEM0NTIuMDc4IDMyNy43NzcgNDU2Ljg3NSAzMjQuNjk1IDQ2Ni40NjkgMzE4LjUzMUw1MjYuNzgyIDI3OS43ODVDNTM1LjI5MSAyNzQuMzE5IDU0MC40MzUgMjY0LjkwMyA1NDAuNDM1IDI1NC43OTRDNTQwLjQzNSAyMzguMzg2IDUyNy4xMjUgMjI1LjA4NSA1MTAuNzA1IDIyNS4wODVIMjg5LjI5NUMyNzIuODc1IDIyNS4wODUgMjU5LjU2NSAyMzguMzg2IDI1OS41NjUgMjU0Ljc5NEMyNTkuNTY1IDI2NC45MDMgMjY0LjcwOSAyNzQuMzE5IDI3My4yMTggMjc5Ljc4NUw1MTMuMTggNDMzLjk0MUM1NDIuNDQxIDQ1Mi43MzggNTU3LjA3MSA0NjIuMTM3IDU2NS4wMzUgNDc2LjcxNkM1NzMgNDkxLjI5NCA1NzMgNTA4LjY3NSA1NzMgNTQzLjQzNlY1NzQuOTE1QzU3MyA1OTcuNjk3IDU3MyA2MDkuMDg4IDU2OC41NjMgNjE3Ljc4OUM1NjQuNjYxIDYyNS40NDQgNTU4LjQzNCA2MzEuNjY2IDU1MC43NzQgNjM1LjU2NkM1NDIuMDY3IDY0MCA1MzAuNjY4IDY0MCA1MDcuODcxIDY0MEgyOTIuMTI5QzI2OS4zMzIgNjQwIDI1Ny45MzMgNjQwIDI0OS4yMjYgNjM1LjU2NkMyNDEuNTY2IDYzMS42NjYgMjM1LjMzOSA2MjUuNDQ0IDIzMS40MzcgNjE3Ljc4OUMyMjcgNjA5LjA4OCAyMjcgNTk3LjY5NyAyMjcgNTc0LjkxNVY1NDMuNDM2QzIyNyA1MDguNjc1IDIyNyA0OTEuMjk0IDIzNC45NjUgNDc2LjcxNkMyNDEuMjQ0IDQ2NS4yMjIgMjU2LjQzMyA0NTMuODg2IDI3Ni45MTggNDQwLjM5MkMyODUuMzIyIDQzNC44NTYgMjg5LjUyNSA0MzIuMDg4IDI5NC4zNTcgNDMyLjA2QzI5OS4xOSA0MzIuMDMyIDMwMy40OTQgNDM0Ljc5NyAzMTIuMSA0NDAuMzI2TDMzMy41MjcgNDU0LjA5MUMzNDMuMTIyIDQ2MC4yNTQgMzQ3LjkxOSA0NjMuMzM2IDM0Ny45MTkgNDY3Ljc3OEMzNDcuOTE5IDQ3Mi4yMiAzNDMuMTIyIDQ3NS4zMDEgMzMzLjUyOCA0ODEuNDY1TDMzMy41MjcgNDgxLjQ2NUwyNzMuMjIgNTIwLjIwOEMyNjQuNzA5IDUyNS42NzUgMjU5LjU2NSA1MzUuMDkxIDI1OS41NjUgNTQ1LjIwMkMyNTkuNTY1IDU2MS42MTIgMjcyLjg3NyA1NzQuOTE1IDI4OS4yOTkgNTc0LjkxNUg1MTAuNzAxQzUyNy4xMjMgNTc0LjkxNSA1NDAuNDM1IDU2MS42MTIgNTQwLjQzNSA1NDUuMjAyQzU0MC40MzUgNTM1LjA5MSA1MzUuMjkxIDUyNS42NzUgNTI2Ljc4IDUyMC4yMDhMMjg2LjgyIDM2Ni4wNTNDMjU3LjU2IDM0Ny4yNTYgMjQyLjkyOSAzMzcuODU3IDIzNC45NjUgMzIzLjI3OUMyMjcgMzA4LjcgMjI3IDI5MS4zMTkgMjI3IDI1Ni41NThWMjI1LjA4NVoiIGZpbGw9IiNGRkZGRkYiLz4KPC9zdmc+Cg==\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fmagi.sand.ai\">\u003Cimg alt=\"product\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FMagi-Product-logo.svg?logo=data:image\u002Fsvg%2bxml;base64,PHN2ZyB3aWR0aD0iODAwIiBoZWlnaHQ9IjgwMCIgdmlld0JveD0iMCAwIDgwMCA4MDAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJNNDY5LjAyNyA1MDcuOTUxVjE4MC4zNjRDNDY5LjAyNyAxNjguNDE2IDQ2OS4wMjcgMTYyLjQ0MiA0NjUuMjQ0IDE2MC41MTlDNDYxLjQ2MSAxNTguNTk2IDQ1Ni42NTkgMTYyLjEzIDQ0Ny4wNTYgMTY5LjE5OEwzNjEuMDQ4IDIzMi40OTZDMzQ2LjI5NiAyNDMuMzUzIDMzOC45MjEgMjQ4Ljc4MSAzMzQuOTQ3IDI1Ni42NUMzMzAuOTczIDI2NC41MTggMzMwLjk3MyAyNzMuNjk1IDMzMC45NzMgMjkyLjA0OVY2MTkuNjM2QzMzMC45NzMgNjMxLjU4NCAzMzAuOTczIDYzNy41NTggMzM0Ljc1NiA2MzkuNDgxQzMzOC41MzkgNjQxLjQwNCAzNDMuMzQxIDYzNy44NyAzNTIuOTQ0IDYzMC44MDJMNDM4Ljk1MiA1NjcuNTA0QzQ1My43MDQgNTU2LjY0OCA0NjEuMDggNTUxLjIxOSA0NjUuMDUzIDU0My4zNUM0NjkuMDI3IDUzNS40ODIgNDY5LjAyNyA1MjYuMzA1IDQ2OS4wMjcgNTA3Ljk1MVpNMjg3LjkwNyA0OTQuMTU1VjIyMS45M0MyODcuOTA3IDIxNC4wMDIgMjg3LjkwNyAyMTAuMDM5IDI4NS4zOTQgMjA4Ljc1NEMyODIuODgxIDIwNy40NyAyNzkuNjg0IDIwOS44MDEgMjczLjI5MiAyMTQuNDYyTDIwOS40MjEgMjYxLjAzMkMxOTguMjYyIDI2OS4xNjggMTkyLjY4MyAyNzMuMjM2IDE4OS42NzUgMjc5LjE2QzE4Ni42NjcgMjg1LjA4NCAxODYuNjY3IDI5Mi4wMDMgMTg2LjY2NyAzMDUuODQxVjU3OC4wNjdDMTg2LjY2NyA1ODUuOTk0IDE4Ni42NjcgNTg5Ljk1OCAxODkuMTggNTkxLjI0MkMxOTEuNjkzIDU5Mi41MjYgMTk0Ljg4OSA1OTAuMTk2IDIwMS4yODIgNTg1LjUzNUwyNjUuMTUyIDUzOC45NjVDMjc2LjMxMSA1MzAuODI5IDI4MS44OSA1MjYuNzYxIDI4NC44OTkgNTIwLjgzN0MyODcuOTA3IDUxNC45MTMgMjg3LjkwNyA1MDcuOTk0IDI4Ny45MDcgNDk0LjE1NVpNNjEzLjMzMyAyMjEuOTNWNDk0LjE1NUM2MTMuMzMzIDUwNy45OTQgNjEzLjMzMyA1MTQuOTEzIDYxMC4zMjUgNTIwLjgzN0M2MDcuMzE3IDUyNi43NjEgNjAxLjczOCA1MzAuODI5IDU5MC41NzkgNTM4Ljk2NUw1MjYuNzA4IDU4NS41MzVDNTIwLjMxNiA1OTAuMTk2IDUxNy4xMTkgNTkyLjUyNiA1MTQuNjA2IDU5MS4yNDJDNTEyLjA5MyA1ODkuOTU4IDUxMi4wOTMgNTg1Ljk5NCA1MTIuMDkzIDU3OC4wNjdWMzA1Ljg0MUM1MTIuMDkzIDI5Mi4wMDMgNTEyLjA5MyAyODUuMDg0IDUxNS4xMDIgMjc5LjE2QzUxOC4xMSAyNzMuMjM2IDUyMy42ODkgMjY5LjE2OCA1MzQuODQ4IDI2MS4wMzJMNTk4LjcxOSAyMTQuNDYyQzYwNS4xMTEgMjA5LjgwMSA2MDguMzA3IDIwNy40NyA2MTAuODIgMjA4Ljc1NEM2MTMuMzMzIDIxMC4wMzkgNjEzLjMzMyAyMTQuMDAyIDYxMy4zMzMgMjIxLjkzWiIgZmlsbD0iI0ZGRkZGRiIgc2hhcGUtcmVuZGVyaW5nPSJjcmlzcEVkZ2VzIi8+Cjwvc3ZnPgo=&color=DCBE7E\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fsand-ai\">\u003Cimg alt=\"Hugging Face\"\n    src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%A4%97%20Hugging%20Face-Sand AI-ffc107?color=ffc107&logoColor=white\"\u002F>\u003C\u002Fa>\n     \u003Ca href=\"https:\u002F\u002Fx.com\u002FSandAI_HQ\">\u003Cimg alt=\"Twitter Follow\"\n    src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FTwitter-Sand%20AI-white?logo=x&logoColor=white\"\u002F>\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FhgaZ86D7Wv\">\u003Cimg alt=\"Discord\"\n    src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDiscord-Sand%20AI-7289da?logo=discord&logoColor=white&color=7289da\"\u002F>\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FSandAI-org\u002FMAGI-1\u002FLICENSE\">\u003Cimg alt=\"license\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-Apache2.0-green?logo=Apache\">\u003C\u002Fa>\n\u003C\u002Fp>\n\n# MAGI-1: Autoregressive Video Generation at Scale\n\nThis repository contains the code for the MAGI-1 model, pre-trained weights and inference code. You can find more information on our [technical report](https:\u002F\u002Fstatic.magi.world\u002Fstatic\u002Ffiles\u002FMAGI_1.pdf) or directly create magic with MAGI-1 [here](http:\u002F\u002Fsand.ai) . 🚀✨\n\n\n## 🔥🔥🔥 Latest News\n\n- May 30, 2025: Support for ComfyUI is added 🎉 — the custom nodes for MAGI-1 are now available. Try them out in your workflows!\n- May 26, 2025: MAGI-1 4.5B distill and distill+quant models has been released 🎉 — we’ve updated the model weights - check it out!\n- May 14, 2025: Added Dify DSL for prompt enhancement 🎉 — import it into Dify to boost prompt quality!\n- Apr 30, 2025: MAGI-1 4.5B model has been released 🎉. We've updated the model weights — check it out!\n- Apr 21, 2025: MAGI-1 is here 🎉. We've released the model weights and inference code — check it out!\n\n\n## 1. About\n\nWe present MAGI-1, a world model that generates videos by ***autoregressively*** predicting a sequence of video chunks, defined as fixed-length segments of consecutive frames. Trained to denoise per-chunk noise that increases monotonically over time, MAGI-1 enables causal temporal modeling and naturally supports streaming generation. It achieves strong performance on image-to-video (I2V) tasks conditioned on text instructions, providing high temporal consistency and scalability, which are made possible by several algorithmic innovations and a dedicated infrastructure stack. MAGI-1 further supports controllable generation via chunk-wise prompting, enabling smooth scene transitions, long-horizon synthesis, and fine-grained text-driven control. We believe MAGI-1 offers a promising direction for unifying high-fidelity video generation with flexible instruction control and real-time deployment.\n\n\u003Cdiv align=\"center\">\n  \u003Cvideo src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F5cfa90e0-f6ed-476b-a194-71f1d309903a\n\" width=\"70%\" poster=\"\"> \u003C\u002Fvideo>\n\u003C\u002Fdiv>\n\n\n## 2. Model Summary\n\n### Transformer-based VAE\n\n- Variational autoencoder (VAE) with transformer-based architecture, 8x spatial and 4x temporal compression.\n- Fastest average decoding time and highly competitive reconstruction quality\n\n### Auto-Regressive Denoising Algorithm\n\nMAGI-1 is an autoregressive denoising video generation model generating videos chunk-by-chunk instead of as a whole. Each chunk (24 frames) is denoised holistically, and the generation of the next chunk begins as soon as the current one reaches a certain level of denoising. This pipeline design enables concurrent processing of up to four chunks for efficient video generation.\n\n![auto-regressive denosing algorithm](figures\u002Falgorithm.png)\n\n### Diffusion Model Architecture\n\nMAGI-1 is built upon the Diffusion Transformer, incorporating several key innovations to enhance training efficiency and stability at scale. These advancements include Block-Causal Attention, Parallel Attention Block, QK-Norm and GQA, Sandwich Normalization in FFN, SwiGLU, and Softcap Modulation. For more details, please refer to the [technical report.](https:\u002F\u002Fstatic.magi.world\u002Fstatic\u002Ffiles\u002FMAGI_1.pdf)\n\u003Cdiv align=\"center\">\n\u003Cimg src=\"figures\u002Fdit_architecture.png\" alt=\"diffusion model architecture\" width=\"500\" \u002F>\n\u003C\u002Fdiv>\n\n### Distillation Algorithm\n\nWe adopt a shortcut distillation approach that trains a single velocity-based model to support variable inference budgets. By enforcing a self-consistency constraint—equating one large step with two smaller steps—the model learns to approximate flow-matching trajectories across multiple step sizes. During training, step sizes are cyclically sampled from {64, 32, 16, 8}, and classifier-free guidance distillation is incorporated to preserve conditional alignment. This enables efficient inference with minimal loss in fidelity.\n\n\n## 3. Model Zoo\n\nWe provide the pre-trained weights for MAGI-1, including the 24B and 4.5B models, as well as the corresponding distill and distill+quant models. The model weight links are shown in the table.\n\n| Model                         | Link                                                                 | Recommend Machine             |\n| ------------------------------ | -------------------------------------------------------------------- | ------------------------------- |\n| T5                             | [T5](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Ft5)        | -                               |\n| MAGI-1-VAE                     | [MAGI-1-VAE](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fvae) | -                               |\n| MAGI-1-24B                     | [MAGI-1-24B](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F24B_base) | H100\u002FH800 × 8                   |\n| MAGI-1-24B-distill              | [MAGI-1-24B-distill](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F24B_distill) | H100\u002FH800 × 8                   |\n| MAGI-1-24B-distill+fp8_quant    | [MAGI-1-24B-distill+quant](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F24B_distill_quant) | H100\u002FH800 × 4 or RTX 4090 × 8    |\n| MAGI-1-4.5B                    | [MAGI-1-4.5B](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F4.5B_base) | RTX 4090 × 1                    |\n| MAGI-1-4.5B-distill             | [MAGI-1-4.5B-distill](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F4.5B_distill) | RTX 4090 × 1                    |\n| MAGI-1-4.5B-distill+fp8_quant   | [MAGI-1-4.5B-distill+quant](https:\u002F\u002Fhuggingface.co\u002Fsand-ai\u002FMAGI-1\u002Ftree\u002Fmain\u002Fckpt\u002Fmagi\u002F4.5B_distill_quant) | RTX 4090 × 1                    |\n\n> [!NOTE]\n>\n> For 4.5B models, any machine with at least 24GB of GPU memory is sufficient.\n> If GPU memory is more constrained, you can instead run the 4.5B-distill+fp8_quant model by setting the `window_size` parameter to 1 in the `4.5B_distill_quant_config.json` file. This configuration works on GPUs with at least 12GB of memory.\n\n## 4. Evaluation\n\n### In-house Human Evaluation\n\nMAGI-1 achieves state-of-the-art performance among open-source models like Wan-2.1 and HunyuanVideo and closed-source model like Hailuo (i2v-01), particularly excelling in instruction following and motion quality, positioning it as a strong potential competitor to closed-source commercial models such as Kling.\n\n![inhouse human evaluation](figures\u002Finhouse_human_evaluation.png)\n\n### Physical Evaluation\n\nThanks to the natural advantages of autoregressive architecture, Magi achieves far superior precision in predicting physical behavior on the [Physics-IQ benchmark](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fphysics-IQ-benchmark) through video continuation—significantly outperforming all existing models.\n\n| Model          | Phys. IQ Score ↑ | Spatial IoU ↑ | Spatio Temporal ↑ | Weighted Spatial IoU ↑ | MSE ↓  |\n|----------------|------------------|---------------|-------------------|-------------------------|--------|\n| **V2V Models** |                  |               |                   |                         |        |\n| **Magi-24B (V2V)** | **56.02**        | **0.367**     | **0.270**         | **0.304**               | **0.005** |\n| **Magi-4.5B (V2V)** | **42.44**        | **0.234**     | **0.285**         | **0.188**               | **0.007** |\n| VideoPoet (V2V)| 29.50            | 0.204         | 0.164             | 0.137                   | 0.010  |\n| **I2V Models** |                  |               |                   |                         |        |\n| **Magi-24B (I2V)** | **30.23**        | **0.203**     | **0.151**         | **0.154**               | **0.012** |\n| Kling1.6 (I2V) | 23.64            | 0.197         | 0.086             | 0.144                   | 0.025  |\n| VideoPoet (I2V)| 20.30            | 0.141         | 0.126             | 0.087                   | 0.012  |\n| Gen 3 (I2V)    | 22.80            | 0.201         | 0.115             | 0.116                   | 0.015  |\n| Wan2.1 (I2V)   | 20.89            | 0.153         | 0.100             | 0.112                   | 0.023  |\n| Sora (I2V)     | 10.00            | 0.138         | 0.047             | 0.063                   | 0.030  |\n| **GroundTruth**| **100.0**        | **0.678**     | **0.535**         | **0.577**               | **0.002** |\n\n\n## 5. How to run\n\n### Environment Preparation\n\nWe provide two ways to run MAGI-1, with the Docker environment being the recommended option.\n\n**Run with Docker Environment (Recommend)**\n\n```bash\ndocker pull sandai\u002Fmagi:latest\n\ndocker run -it --gpus all --privileged --shm-size=32g --name magi --net=host --ipc=host --ulimit memlock=-1 --ulimit stack=6710886 sandai\u002Fmagi:latest \u002Fbin\u002Fbash\n```\n\n**Run with Source Code**\n\n```bash\n# Create a new environment\nconda create -n magi python==3.10.12\n\n# Install pytorch\nconda install pytorch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 pytorch-cuda=12.4 -c pytorch -c nvidia\n\n# Install other dependencies\npip install -r requirements.txt\n\n# Install ffmpeg\nconda install -c conda-forge ffmpeg=4.4\n\n# For GPUs based on the Hopper architecture (e.g., H100\u002FH800), it is recommended to install MagiAttention(https:\u002F\u002Fgithub.com\u002FSandAI-org\u002FMagiAttention) for acceleration. For non-Hopper GPUs, installing MagiAttention is not necessary.\ngit clone git@github.com:SandAI-org\u002FMagiAttention.git\ncd MagiAttention\ngit submodule update --init --recursive\npip install --no-build-isolation .\n```\n\n### Inference Command\n\nTo run the `MagiPipeline`, you can control the input and output by modifying the parameters in the `example\u002F24B\u002Frun.sh` or `example\u002F4.5B\u002Frun.sh` script. Below is an explanation of the key parameters:\n\n#### Parameter Descriptions\n\n- `--config_file`: Specifies the path to the configuration file, which contains model configuration parameters, e.g., `example\u002F24B\u002F24B_config.json`.\n- `--mode`: Specifies the mode of operation. Available options are:\n  - `t2v`: Text to Video\n  - `i2v`: Image to Video\n  - `v2v`: Video to Video\n- `--prompt`: The text prompt used for video generation, e.g., `\"Good Boy\"`.\n- `--image_path`: Path to the image file, used only in `i2v` mode.\n- `--prefix_video_path`: Path to the prefix video file, used only in `v2v` mode.\n- `--output_path`: Path where the generated video file will be saved.\n\n#### Bash Script\n\n```bash\n#!\u002Fbin\u002Fbash\n# Run 24B MAGI-1 model\nbash example\u002F24B\u002Frun.sh\n\n# Run 4.5B MAGI-1 model\nbash example\u002F4.5B\u002Frun.sh\n```\n\n#### Customizing Parameters\n\nYou can modify the parameters in `run.sh` as needed. For example:\n\n- To use the Image to Video mode (`i2v`), set `--mode` to `i2v` and provide `--image_path`:\n  ```bash\n  --mode i2v \\\n  --image_path example\u002Fassets\u002Fimage.jpeg \\\n  ```\n\n- To use the Video to Video mode (`v2v`), set `--mode` to `v2v` and provide `--prefix_video_path`:\n  ```bash\n  --mode v2v \\\n  --prefix_video_path example\u002Fassets\u002Fprefix_video.mp4 \\\n  ```\n\nBy adjusting these parameters, you can flexibly control the input and output to meet different requirements.\n\n### Some Useful Configs (for config.json)\n\n> [!NOTE]\n>\n> - If you are running 24B model with RTX 4090 \\* 8, please set `pp_size:2 cp_size: 4`.\n>\n> - Our model supports arbitrary resolutions. To accelerate inference process, the default resolution for the 4.5B model is set to 720×720 in the `4.5B_config.json`.\n\n| Config         | Help                                                         |\n| -------------- | ------------------------------------------------------------ |\n| seed           | Random seed used for video generation                        |\n| video_size_h   | Height of the video                                          |\n| video_size_w   | Width of the video                                           |\n| num_frames     | Controls the duration of generated video                     |\n| fps            | Frames per second, 4 video frames correspond to 1 latent_frame |\n| cfg_number     | Base model uses cfg_number==3, distill and quant model uses cfg_number=1 |\n| load           | Directory containing a model checkpoint.                     |\n| t5_pretrained  | Path to load pretrained T5 model                             |\n| vae_pretrained | Path to load pretrained VAE model                            |\n\n## 6. Prompt Enhancement\n\nTo improve prompt quality, we provide a [Dify DSL](\u002Fassets\u002Fprompt_enhancement_dify_dsl.yml) file that can be imported directly into [Dify](https:\u002F\u002Fdify.ai\u002F) to set up a prompt enhancement pipeline. If you’re new to Dify, see [how to create an app from a DSL file](https:\u002F\u002Fdocs.dify.ai\u002Fen\u002Fguides\u002Fapplication-orchestrate\u002Fcreating-an-application#creating-from-a-dsl-file) to get started.\n\n## 7. License\n\nThis project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.\n\n## 8. Citation\n\nIf you find our code or model useful in your research, please cite:\n\n```bibtex\n@misc{ai2025magi1autoregressivevideogeneration,\n      title={MAGI-1: Autoregressive Video Generation at Scale},\n      author={Sand. ai and Hansi Teng and Hongyu Jia and Lei Sun and Lingzhi Li and Maolin Li and Mingqiu Tang and Shuai Han and Tianning Zhang and W. Q. Zhang and Weifeng Luo and Xiaoyang Kang and Yuchen Sun and Yue Cao and Yunpeng Huang and Yutong Lin and Yuxin Fang and Zewei Tao and Zheng Zhang and Zhongshu Wang and Zixun Liu and Dai Shi and Guoli Su and Hanwen Sun and Hong Pan and Jie Wang and Jiexin Sheng and Min Cui and Min Hu and Ming Yan and Shucheng Yin and Siran Zhang and Tingting Liu and Xianping Yin and Xiaoyu Yang and Xin Song and Xuan Hu and Yankai Zhang and Yuqiao Li},\n      year={2025},\n      eprint={2505.13211},\n      archivePrefix={arXiv},\n      primaryClass={cs.CV},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.13211},\n}\n```\n\n## 9. Contact\n\nIf you have any questions, please feel free to raise an issue or contact us at [research@sand.ai](mailto:research@sand.ai) .\n","MAGI-1 是一个用于大规模自回归视频生成的项目。它利用先进的自回归模型和扩散模型技术，能够根据给定条件生成高质量的视频内容。该项目采用Python语言开发，具有良好的可扩展性和灵活性。适用于需要生成连续且连贯视频序列的应用场景，如影视特效制作、虚拟现实内容生成等。开源许可为Apache License 2.0，便于研究者和开发者使用与贡献。",2,"2026-06-11 03:40:59","high_star"]