[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-2490":3},{"id":4,"name":5,"fullName":6,"owner":7,"repo":5,"description":8,"homepage":9,"htmlUrl":10,"language":11,"languages":10,"totalLinesOfCode":10,"stars":12,"forks":13,"watchers":14,"openIssues":15,"contributorsCount":16,"subscribersCount":16,"size":16,"stars1d":17,"stars7d":18,"stars30d":19,"stars90d":16,"forks30d":16,"starsTrendScore":20,"compositeScore":21,"rankGlobal":10,"rankLanguage":10,"license":22,"archived":23,"fork":23,"defaultBranch":24,"hasWiki":23,"hasPages":23,"topics":25,"createdAt":10,"pushedAt":10,"updatedAt":28,"readmeContent":29,"aiSummary":30,"trendingCount":16,"starSnapshotCount":16,"syncStatus":31,"lastSyncTime":32,"discoverSource":33},2490,"RAG-Anything","HKUDS\u002FRAG-Anything","HKUDS","\"RAG-Anything: All-in-One RAG Framework\"","http:\u002F\u002Farxiv.org\u002Fabs\u002F2510.12323",null,"Python",21203,2466,112,97,0,58,257,1138,271,45,"MIT License",false,"main",[26,27],"multi-modal-rag","retrieval-augmented-generation","2026-06-12 02:00:41","\u003Cdiv align=\"center\">\n\n\u003Cdiv style=\"margin: 20px 0;\">\n  \u003Cimg src=\".\u002Fassets\u002Flogo.png\" width=\"120\" height=\"120\" alt=\"RAG-Anything Logo\" style=\"border-radius: 20px; box-shadow: 0 8px 32px rgba(0, 217, 255, 0.3);\">\n\u003C\u002Fdiv>\n\n# 🚀 RAG-Anything: All-in-One RAG Framework\n\n\u003Ca href=\"https:\u002F\u002Ftrendshift.io\u002Frepositories\u002F14959\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Ftrendshift.io\u002Fapi\u002Fbadge\u002Frepositories\u002F14959\" alt=\"HKUDS%2FRAG-Anything | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"\u002F>\u003C\u002Fa>\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Freadme-typing-svg.herokuapp.com?font=Orbitron&size=24&duration=3000&pause=1000&color=00D9FF&center=true&vCenter=true&width=600&lines=Welcome+to+RAG-Anything;Next-Gen+Multimodal+RAG+System;Powered+by+Advanced+AI+Technology\" alt=\"Typing Animation\" \u002F>\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Cdiv style=\"background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 25px; text-align: center;\">\n    \u003Cp>\n      \u003Ca href='https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything'>\u003Cimg src='https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🔥Project-Page-00d9ff?style=for-the-badge&logo=github&logoColor=white&labelColor=1a1a2e'>\u003C\u002Fa>\n      \u003Ca href='https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.12323'>\u003Cimg src='https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F📄arXiv-2510.12323-ff6b6b?style=for-the-badge&logo=arxiv&logoColor=white&labelColor=1a1a2e'>\u003C\u002Fa>\n      \u003Ca href='https:\u002F\u002Fgithub.com\u002FHKUDS\u002FLightRAG'>\u003Cimg src='https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F⚡Based%20on-LightRAG-4ecdc4?style=for-the-badge&logo=lightning&logoColor=white&labelColor=1a1a2e'>\u003C\u002Fa>\n    \u003C\u002Fp>\n    \u003Cp>\n      \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\u002Fstargazers\">\u003Cimg src='https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FRAG-Anything?color=00d9ff&style=for-the-badge&logo=star&logoColor=white&labelColor=1a1a2e' \u002F>\u003C\u002Fa>\n      \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🐍Python-3.10-4ecdc4?style=for-the-badge&logo=python&logoColor=white&labelColor=1a1a2e\">\n      \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Fraganything\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Fraganything.svg?style=for-the-badge&logo=pypi&logoColor=white&labelColor=1a1a2e&color=ff6b6b\">\u003C\u002Fa>\n      \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fastral-sh\u002Fuv\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F⚡uv-Ready-ff6b6b?style=for-the-badge&logo=python&logoColor=white&labelColor=1a1a2e\">\u003C\u002Fa>\n    \u003C\u002Fp>\n    \u003Cp>\n      \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FyF2MmDJyGJ\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F💬Discord-Community-7289da?style=for-the-badge&logo=discord&logoColor=white&labelColor=1a1a2e\">\u003C\u002Fa>\n      \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\u002Fissues\u002F7\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F💬WeChat-Group-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e\">\u003C\u002Fa>\n    \u003C\u002Fp>\n    \u003Cp>\n      \u003Ca href=\"README_zh.md\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🇨🇳中文版-1a1a2e?style=for-the-badge\">\u003C\u002Fa>\n      \u003Ca href=\"README.md\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🇺🇸English-1a1a2e?style=for-the-badge\">\u003C\u002Fa>\n    \u003C\u002Fp>\n  \u003C\u002Fdiv>\n\u003C\u002Fdiv>\n\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Cdiv style=\"width: 100%; height: 2px; margin: 20px 0; background: linear-gradient(90deg, transparent, #00d9ff, transparent);\">\u003C\u002Fdiv>\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Ca href=\"#-quick-start\" style=\"text-decoration: none;\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FQuick%20Start-Get%20Started%20Now-00d9ff?style=for-the-badge&logo=rocket&logoColor=white&labelColor=1a1a2e\">\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n\n---\n\n\u003Cdiv align=\"center\">\n  \u003Ctable>\n    \u003Ctr>\n      \u003Ctd style=\"vertical-align: middle;\">\n        \u003Cimg src=\".\u002Fassets\u002FLiteWrite.png\"\n             width=\"56\"\n             height=\"56\"\n             alt=\"LiteWrite\"\n             style=\"border-radius: 12px;\" \u002F>\n      \u003C\u002Ftd>\n      \u003Ctd style=\"vertical-align: middle; padding-left: 12px;\">\n        \u003Ca href=\"https:\u002F\u002Flitewrite.ai\">\n          \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🚀%20LiteWrite-AI%20Native%20LaTeX%20Editor-ff6b6b?style=for-the-badge&logoColor=white&labelColor=1a1a2e\">\n        \u003C\u002Fa>\n      \u003C\u002Ftd>\n    \u003C\u002Ftr>\n  \u003C\u002Ftable>\n\u003C\u002Fdiv>\n\n---\n\n## 🎉 News\n- [X] [2025.10]🎯📢 🚀 We have released the technical report of [RAG-Anything](http:\u002F\u002Farxiv.org\u002Fabs\u002F2510.12323). Access it now to explore our latest research findings.\n- [X] [2025.08]🎯📢 🔍 RAG-Anything now features **VLM-Enhanced Query** mode! When documents include images, the system seamlessly integrates them into VLM for advanced multimodal analysis, combining visual and textual context for deeper insights.\n- [X] [2025.07]🎯📢 RAG-Anything now features a [context configuration module](docs\u002Fcontext_aware_processing.md), enabling intelligent integration of relevant contextual information to enhance multimodal content processing.\n- [X] [2025.07]🎯📢 🚀 RAG-Anything now supports multimodal query capabilities, enabling enhanced RAG with seamless processing of text, images, tables, and equations.\n- [X] [2025.07]🎯📢 🎉 RAG-Anything has reached 1k🌟 stars on GitHub! Thank you for your incredible support and valuable contributions to the project.\n\n---\n\n## 🌟 System Overview\n\n*Next-Generation Multimodal Intelligence*\n\n\u003Cdiv style=\"background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); border-radius: 15px; padding: 25px; margin: 20px 0; border: 2px solid #00d9ff; box-shadow: 0 0 30px rgba(0, 217, 255, 0.3);\">\n\nModern documents increasingly contain diverse multimodal content—text, images, tables, equations, charts, and multimedia—that traditional text-focused RAG systems cannot effectively process. **RAG-Anything** addresses this challenge as a comprehensive **All-in-One Multimodal Document Processing RAG system** built on [LightRAG](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FLightRAG).\n\nAs a unified solution, RAG-Anything **eliminates the need for multiple specialized tools**. It provides **seamless processing and querying across all content modalities** within a single integrated framework. Unlike conventional RAG approaches that struggle with non-textual elements, our all-in-one system delivers **comprehensive multimodal retrieval capabilities**.\n\nUsers can query documents containing **interleaved text**, **visual diagrams**, **structured tables**, and **mathematical formulations** through **one cohesive interface**. This consolidated approach makes RAG-Anything particularly valuable for academic research, technical documentation, financial reports, and enterprise knowledge management where rich, mixed-content documents demand a **unified processing framework**.\n\n\u003Cimg src=\"assets\u002Frag_anything_framework.png\" alt=\"RAG-Anything\" \u002F>\n\n\u003C\u002Fdiv>\n\n### 🎯 Key Features\n\n\u003Cdiv style=\"background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 15px; padding: 25px; margin: 20px 0;\">\n\n- **🔄 End-to-End Multimodal Pipeline** - Complete workflow from document ingestion and parsing to intelligent multimodal query answering\n- **📄 Universal Document Support** - Seamless processing of PDFs, Office documents, images, and diverse file formats\n- **🧠 Specialized Content Analysis** - Dedicated processors for images, tables, mathematical equations, and heterogeneous content types\n- **🔗 Multimodal Knowledge Graph** - Automatic entity extraction and cross-modal relationship discovery for enhanced understanding\n- **⚡ Adaptive Processing Modes** - Flexible MinerU-based parsing or direct multimodal content injection workflows\n- **📋 Direct Content List Insertion** - Bypass document parsing by directly inserting pre-parsed content lists from external sources\n- **🎯 Hybrid Intelligent Retrieval** - Advanced search capabilities spanning textual and multimodal content with contextual understanding\n\n\u003C\u002Fdiv>\n\n---\n\n## 🏗️ Algorithm & Architecture\n\n\u003Cdiv style=\"background: linear-gradient(135deg, #0f0f23 0%, #1a1a2e 100%); border-radius: 15px; padding: 25px; margin: 20px 0; border-left: 5px solid #00d9ff;\">\n\n### Core Algorithm\n\n**RAG-Anything** implements an effective **multi-stage multimodal pipeline** that fundamentally extends traditional RAG architectures to seamlessly handle diverse content modalities through intelligent orchestration and cross-modal understanding.\n\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Cdiv style=\"width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);\">\n    \u003Cdiv style=\"display: flex; justify-content: space-around; align-items: center; flex-wrap: wrap; gap: 20px;\">\n      \u003Cdiv style=\"text-align: center;\">\n        \u003Cdiv style=\"font-size: 24px; margin-bottom: 10px;\">📄\u003C\u002Fdiv>\n        \u003Cdiv style=\"font-size: 14px; color: #00d9ff;\">Document Parsing\u003C\u002Fdiv>\n      \u003C\u002Fdiv>\n      \u003Cdiv style=\"font-size: 20px; color: #00d9ff;\">→\u003C\u002Fdiv>\n      \u003Cdiv style=\"text-align: center;\">\n        \u003Cdiv style=\"font-size: 24px; margin-bottom: 10px;\">🧠\u003C\u002Fdiv>\n        \u003Cdiv style=\"font-size: 14px; color: #00d9ff;\">Content Analysis\u003C\u002Fdiv>\n      \u003C\u002Fdiv>\n      \u003Cdiv style=\"font-size: 20px; color: #00d9ff;\">→\u003C\u002Fdiv>\n      \u003Cdiv style=\"text-align: center;\">\n        \u003Cdiv style=\"font-size: 24px; margin-bottom: 10px;\">🔍\u003C\u002Fdiv>\n        \u003Cdiv style=\"font-size: 14px; color: #00d9ff;\">Knowledge Graph\u003C\u002Fdiv>\n      \u003C\u002Fdiv>\n      \u003Cdiv style=\"font-size: 20px; color: #00d9ff;\">→\u003C\u002Fdiv>\n      \u003Cdiv style=\"text-align: center;\">\n        \u003Cdiv style=\"font-size: 24px; margin-bottom: 10px;\">🎯\u003C\u002Fdiv>\n        \u003Cdiv style=\"font-size: 14px; color: #00d9ff;\">Intelligent Retrieval\u003C\u002Fdiv>\n      \u003C\u002Fdiv>\n    \u003C\u002Fdiv>\n  \u003C\u002Fdiv>\n\u003C\u002Fdiv>\n\n### 1. Document Parsing Stage\n\n\u003Cdiv style=\"background: linear-gradient(90deg, #1a1a2e 0%, #16213e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #4ecdc4;\">\n\nThe system provides high-fidelity document extraction through adaptive content decomposition. It intelligently segments heterogeneous elements while preserving contextual relationships. Universal format compatibility is achieved via specialized optimized parsers.\n\n**Key Components:**\n\n- **⚙️ MinerU Integration**: Leverages [MinerU](https:\u002F\u002Fgithub.com\u002Fopendatalab\u002FMinerU) for high-fidelity document structure extraction and semantic preservation across complex layouts.\n\n- **🧩 Adaptive Content Decomposition**: Automatically segments documents into coherent text blocks, visual elements, structured tables, mathematical equations, and specialized content types while preserving contextual relationships.\n\n- **📁 Universal Format Support**: Provides comprehensive handling of PDFs, Office documents (DOC\u002FDOCX\u002FPPT\u002FPPTX\u002FXLS\u002FXLSX), images, and emerging formats through specialized parsers with format-specific optimization.\n\n\u003C\u002Fdiv>\n\n### 2. Multi-Modal Content Understanding & Processing\n\n\u003Cdiv style=\"background: linear-gradient(90deg, #16213e 0%, #0f3460 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #ff6b6b;\">\n\nThe system automatically categorizes and routes content through optimized channels. It uses concurrent pipelines for parallel text and multimodal processing. Document hierarchy and relationships are preserved during transformation.\n\n**Key Components:**\n\n- **🎯 Autonomous Content Categorization and Routing**: Automatically identify, categorize, and route different content types through optimized execution channels.\n\n- **⚡ Concurrent Multi-Pipeline Architecture**: Implements concurrent execution of textual and multimodal content through dedicated processing pipelines. This approach maximizes throughput efficiency while preserving content integrity.\n\n- **🏗️ Document Hierarchy Extraction**: Extracts and preserves original document hierarchy and inter-element relationships during content transformation.\n\n\u003C\u002Fdiv>\n\n### 3. Multimodal Analysis Engine\n\n\u003Cdiv style=\"background: linear-gradient(90deg, #0f3460 0%, #1a1a2e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #00d9ff;\">\n\nThe system deploys modality-aware processing units for heterogeneous data modalities:\n\n**Specialized Analyzers:**\n\n- **🔍 Visual Content Analyzer**:\n  - Integrate vision model for image analysis.\n  - Generates context-aware descriptive captions based on visual semantics.\n  - Extracts spatial relationships and hierarchical structures between visual elements.\n\n- **📊 Structured Data Interpreter**:\n  - Performs systematic interpretation of tabular and structured data formats.\n  - Implements statistical pattern recognition algorithms for data trend analysis.\n  - Identifies semantic relationships and dependencies across multiple tabular datasets.\n\n- **📐 Mathematical Expression Parser**:\n  - Parses complex mathematical expressions and formulas with high accuracy.\n  - Provides native LaTeX format support for seamless integration with academic workflows.\n  - Establishes conceptual mappings between mathematical equations and domain-specific knowledge bases.\n\n- **🔧 Extensible Modality Handler**:\n  - Provides configurable processing framework for custom and emerging content types.\n  - Enables dynamic integration of new modality processors through plugin architecture.\n  - Supports runtime configuration of processing pipelines for specialized use cases.\n\n\u003C\u002Fdiv>\n\n### 4. Multimodal Knowledge Graph Index\n\n\u003Cdiv style=\"background: linear-gradient(90deg, #1a1a2e 0%, #16213e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #4ecdc4;\">\n\nThe multi-modal knowledge graph construction module transforms document content into structured semantic representations. It extracts multimodal entities, establishes cross-modal relationships, and preserves hierarchical organization. The system applies weighted relevance scoring for optimized knowledge retrieval.\n\n**Core Functions:**\n\n- **🔍 Multi-Modal Entity Extraction**: Transforms significant multimodal elements into structured knowledge graph entities. The process includes semantic annotations and metadata preservation.\n\n- **🔗 Cross-Modal Relationship Mapping**: Establishes semantic connections and dependencies between textual entities and multimodal components. This is achieved through automated relationship inference algorithms.\n\n- **🏗️ Hierarchical Structure Preservation**: Maintains original document organization through \"belongs_to\" relationship chains. These chains preserve logical content hierarchy and sectional dependencies.\n\n- **⚖️ Weighted Relationship Scoring**: Assigns quantitative relevance scores to relationship types. Scoring is based on semantic proximity and contextual significance within the document structure.\n\n\u003C\u002Fdiv>\n\n### 5. Modality-Aware Retrieval\n\n\u003Cdiv style=\"background: linear-gradient(90deg, #16213e 0%, #0f3460 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #ff6b6b;\">\n\nThe hybrid retrieval system combines vector similarity search with graph traversal algorithms for comprehensive content retrieval. It implements modality-aware ranking mechanisms and maintains relational coherence between retrieved elements to ensure contextually integrated information delivery.\n\n**Retrieval Mechanisms:**\n\n- **🔀 Vector-Graph Fusion**: Integrates vector similarity search with graph traversal algorithms. This approach leverages both semantic embeddings and structural relationships for comprehensive content retrieval.\n\n- **📊 Modality-Aware Ranking**: Implements adaptive scoring mechanisms that weight retrieval results based on content type relevance. The system adjusts rankings according to query-specific modality preferences.\n\n- **🔗 Relational Coherence Maintenance**: Maintains semantic and structural relationships between retrieved elements. This ensures coherent information delivery and contextual integrity.\n\n\u003C\u002Fdiv>\n\n---\n\n## 🚀 Quick Start\n\n*Initialize Your AI Journey*\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Fuser-images.githubusercontent.com\u002F74038190\u002F212284158-e840e285-664b-44d7-b79b-e264b5e54825.gif\" width=\"400\">\n\u003C\u002Fdiv>\n\n### Installation\n\n#### Option 1: Install from PyPI (Recommended)\n\n```bash\n# Basic installation\npip install raganything\n\n# With optional dependencies for extended format support:\npip install 'raganything[all]'              # All optional features\npip install 'raganything[image]'            # Image format conversion (BMP, TIFF, GIF, WebP)\npip install 'raganything[text]'             # Text file processing (TXT, MD)\npip install 'raganything[image,text]'       # Multiple features\n```\n\n#### Option 2: Install from Source\n```bash\n# Install uv (if not already installed)\ncurl -LsSf https:\u002F\u002Fastral.sh\u002Fuv\u002Finstall.sh | sh\n\n# Clone and setup the project with uv\ngit clone https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything.git\ncd RAG-Anything\n\n# Install the package and dependencies in a virtual environment\nuv sync\n\n# If you encounter network timeouts (especially for opencv packages):\n# UV_HTTP_TIMEOUT=120 uv sync\n\n# Run commands directly with uv (recommended approach)\nuv run python examples\u002Fraganything_example.py --help\n\n# Install with optional dependencies\nuv sync --extra image --extra text  # Specific extras\nuv sync --all-extras                 # All optional features\n```\n\n#### Optional Dependencies\n\n- **`[image]`** - Enables processing of BMP, TIFF, GIF, WebP image formats (requires Pillow)\n- **`[text]`** - Enables processing of TXT and MD files (requires ReportLab)\n- **`[all]`** - Includes all Python optional dependencies\n\n> **⚠️ Office Document Processing Requirements:**\n> - Office documents (.doc, .docx, .ppt, .pptx, .xls, .xlsx) require **LibreOffice** installation\n> - Download from [LibreOffice official website](https:\u002F\u002Fwww.libreoffice.org\u002Fdownload\u002Fdownload\u002F)\n> - **Windows**: Download installer from official website\n> - **macOS**: `brew install --cask libreoffice`\n> - **Ubuntu\u002FDebian**: `sudo apt-get install libreoffice`\n> - **CentOS\u002FRHEL**: `sudo yum install libreoffice`\n\n**Check MinerU installation:**\n\n```bash\n# Verify installation\nmineru --version\n\n# Check if properly configured\npython -c \"from raganything import RAGAnything; rag = RAGAnything(); print('✅ MinerU installed properly' if rag.check_parser_installation() else '❌ MinerU installation issue')\"\n```\n\nModels are downloaded automatically on first use. For manual download, refer to [MinerU Model Source Configuration](https:\u002F\u002Fgithub.com\u002Fopendatalab\u002FMinerU\u002Fblob\u002Fmaster\u002FREADME.md#22-model-source-configuration).\n\n### Usage Examples\n\n#### 1. End-to-End Document Processing\n\n```python\nimport asyncio\nfrom functools import partial\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\n\nasync def main():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Create RAGAnything configuration\n    config = RAGAnythingConfig(\n        working_dir=\".\u002Frag_storage\",\n        parser=\"mineru\",  # Parser selection: mineru, docling, or paddleocr\n        parse_method=\"auto\",  # Parse method: auto, ocr, or txt\n        enable_image_processing=True,\n        enable_table_processing=True,\n        enable_equation_processing=True,\n    )\n\n    # Define LLM model function\n    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):\n        return openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n\n    # Define vision model function for image processing\n    def vision_model_func(\n        prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs\n    ):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt}\n                    if system_prompt\n                    else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image\u002Fjpeg;base64,{image_data}\"\n                                },\n                            },\n                        ],\n                    }\n                    if image_data\n                    else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    # Define embedding function\n    embedding_func = EmbeddingFunc(\n        embedding_dim=3072,\n        max_token_size=8192,\n        func=partial(\n            openai_embed.func,\n            model=\"text-embedding-3-large\",\n            api_key=api_key,\n            base_url=base_url,\n        ),\n    )\n\n    # Initialize RAGAnything\n    rag = RAGAnything(\n        config=config,\n        llm_model_func=llm_model_func,\n        vision_model_func=vision_model_func,\n        embedding_func=embedding_func,\n    )\n\n    # Process a document\n    await rag.process_document_complete(\n        file_path=\"path\u002Fto\u002Fyour\u002Fdocument.pdf\",\n        output_dir=\".\u002Foutput\",\n        parse_method=\"auto\"\n    )\n\n    # Query the processed content\n    # Pure text query - for basic knowledge base search\n    text_result = await rag.aquery(\n        \"What are the main findings shown in the figures and tables?\",\n        mode=\"hybrid\"\n    )\n    print(\"Text query result:\", text_result)\n\n    # Multimodal query with specific multimodal content\n    multimodal_result = await rag.aquery_with_multimodal(\n    \"Explain this formula and its relevance to the document content\",\n    multimodal_content=[{\n        \"type\": \"equation\",\n        \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n        \"equation_caption\": \"Document relevance probability\"\n    }],\n    mode=\"hybrid\"\n)\n    print(\"Multimodal query result:\", multimodal_result)\n\nif __name__ == \"__main__\":\n    asyncio.run(main())\n```\n\n#### 2. Direct Multimodal Content Processing\n\n```python\nimport asyncio\nfrom functools import partial\nfrom lightrag import LightRAG\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\nfrom raganything.modalprocessors import ImageModalProcessor, TableModalProcessor\n\nasync def process_multimodal_content():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Initialize LightRAG\n    rag = LightRAG(\n        working_dir=\".\u002Frag_storage\",\n        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ),\n        embedding_func=EmbeddingFunc(\n            embedding_dim=3072,\n            max_token_size=8192,\n            func=partial(\n                openai_embed.func,\n                model=\"text-embedding-3-large\",\n                api_key=api_key,\n                base_url=base_url,\n            ),\n        )\n    )\n    await rag.initialize_storages()\n\n    # Process an image\n    image_processor = ImageModalProcessor(\n        lightrag=rag,\n        modal_caption_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(\n            \"gpt-4o\",\n            \"\",\n            system_prompt=None,\n            history_messages=[],\n            messages=[\n                {\"role\": \"system\", \"content\": system_prompt} if system_prompt else None,\n                {\"role\": \"user\", \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                    {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image\u002Fjpeg;base64,{image_data}\"}}\n                ]} if image_data else {\"role\": \"user\", \"content\": prompt}\n            ],\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ) if image_data else openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n    )\n\n    image_content = {\n        \"img_path\": \"path\u002Fto\u002Fimage.jpg\",\n        \"image_caption\": [\"Figure 1: Experimental results\"],\n        \"image_footnote\": [\"Data collected in 2024\"]\n    }\n\n    description, entity_info = await image_processor.process_multimodal_content(\n        modal_content=image_content,\n        content_type=\"image\",\n        file_path=\"research_paper.pdf\",\n        entity_name=\"Experimental Results Figure\"\n    )\n\n    # Process a table\n    table_processor = TableModalProcessor(\n        lightrag=rag,\n        modal_caption_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n    )\n\n    table_content = {\n        \"table_body\": \"\"\"\n        | Method | Accuracy | F1-Score |\n        |--------|----------|----------|\n        | RAGAnything | 95.2% | 0.94 |\n        | Baseline | 87.3% | 0.85 |\n        \"\"\",\n        \"table_caption\": [\"Performance Comparison\"],\n        \"table_footnote\": [\"Results on test dataset\"]\n    }\n\n    description, entity_info = await table_processor.process_multimodal_content(\n        modal_content=table_content,\n        content_type=\"table\",\n        file_path=\"research_paper.pdf\",\n        entity_name=\"Performance Results Table\"\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(process_multimodal_content())\n```\n\n#### 3. Batch Processing\n\n```python\n# Process multiple documents\nawait rag.process_folder_complete(\n    folder_path=\".\u002Fdocuments\",\n    output_dir=\".\u002Foutput\",\n    file_extensions=[\".pdf\", \".docx\", \".pptx\"],\n    recursive=True,\n    max_workers=4\n)\n```\n\n#### 4. Custom Modal Processors\n\n```python\nfrom raganything.modalprocessors import GenericModalProcessor\n\nclass CustomModalProcessor(GenericModalProcessor):\n    async def process_multimodal_content(self, modal_content, content_type, file_path, entity_name):\n        # Your custom processing logic\n        enhanced_description = await self.analyze_custom_content(modal_content)\n        entity_info = self.create_custom_entity(enhanced_description, entity_name)\n        return await self._create_entity_and_chunk(enhanced_description, entity_info, file_path)\n```\n\n#### 5. Query Options\n\nRAG-Anything provides three types of query methods:\n\n**Pure Text Queries** - Direct knowledge base search using LightRAG:\n```python\n# Different query modes for text queries\ntext_result_hybrid = await rag.aquery(\"Your question\", mode=\"hybrid\")\ntext_result_local = await rag.aquery(\"Your question\", mode=\"local\")\ntext_result_global = await rag.aquery(\"Your question\", mode=\"global\")\ntext_result_naive = await rag.aquery(\"Your question\", mode=\"naive\")\n\n# Synchronous version\nsync_text_result = rag.query(\"Your question\", mode=\"hybrid\")\n```\n\n**VLM Enhanced Queries** - Automatically analyze images in retrieved context using VLM:\n```python\n# VLM enhanced query (automatically enabled when vision_model_func is provided)\nvlm_result = await rag.aquery(\n    \"Analyze the charts and figures in the document\",\n    mode=\"hybrid\"\n    # vlm_enhanced=True is automatically set when vision_model_func is available\n)\n\n# Manually control VLM enhancement\nvlm_enabled = await rag.aquery(\n    \"What do the images show in this document?\",\n    mode=\"hybrid\",\n    vlm_enhanced=True  # Force enable VLM enhancement\n)\n\nvlm_disabled = await rag.aquery(\n    \"What do the images show in this document?\",\n    mode=\"hybrid\",\n    vlm_enhanced=False  # Force disable VLM enhancement\n)\n\n# When documents contain images, VLM can see and analyze them directly\n# The system will automatically:\n# 1. Retrieve relevant context containing image paths\n# 2. Load and encode images as base64\n# 3. Send both text context and images to VLM for comprehensive analysis\n```\n\n**Multimodal Queries** - Enhanced queries with specific multimodal content analysis:\n```python\n# Query with table data\ntable_result = await rag.aquery_with_multimodal(\n    \"Compare these performance metrics with the document content\",\n    multimodal_content=[{\n        \"type\": \"table\",\n        \"table_data\": \"\"\"Method,Accuracy,Speed\n                        RAGAnything,95.2%,120ms\n                        Traditional,87.3%,180ms\"\"\",\n        \"table_caption\": \"Performance comparison\"\n    }],\n    mode=\"hybrid\"\n)\n\n# Query with equation content\nequation_result = await rag.aquery_with_multimodal(\n    \"Explain this formula and its relevance to the document content\",\n    multimodal_content=[{\n        \"type\": \"equation\",\n        \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n        \"equation_caption\": \"Document relevance probability\"\n    }],\n    mode=\"hybrid\"\n)\n```\n\n#### 6. Loading Existing LightRAG Instance\n\n```python\nimport asyncio\nfrom functools import partial\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag import LightRAG\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.kg.shared_storage import initialize_pipeline_status\nfrom lightrag.utils import EmbeddingFunc\nimport os\n\nasync def load_existing_lightrag():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # First, create or load existing LightRAG instance\n    lightrag_working_dir = \".\u002Fexisting_lightrag_storage\"\n\n    # Check if previous LightRAG instance exists\n    if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):\n        print(\"✅ Found existing LightRAG instance, loading...\")\n    else:\n        print(\"❌ No existing LightRAG instance found, will create new one\")\n\n    # Create\u002Fload LightRAG instance with your configuration\n    lightrag_instance = LightRAG(\n        working_dir=lightrag_working_dir,\n        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ),\n        embedding_func=EmbeddingFunc(\n            embedding_dim=3072,\n            max_token_size=8192,\n            func=partial(\n                openai_embed.func,\n                model=\"text-embedding-3-large\",\n                api_key=api_key,\n                base_url=base_url,\n            ),\n        )\n    )\n\n    # Initialize storage (this will load existing data if available)\n    await lightrag_instance.initialize_storages()\n    await initialize_pipeline_status()\n\n    # Define vision model function for image processing\n    def vision_model_func(\n        prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs\n    ):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt}\n                    if system_prompt\n                    else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image\u002Fjpeg;base64,{image_data}\"\n                                },\n                            },\n                        ],\n                    }\n                    if image_data\n                    else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    # Now use existing LightRAG instance to initialize RAGAnything\n    rag = RAGAnything(\n        lightrag=lightrag_instance,  # Pass existing LightRAG instance\n        vision_model_func=vision_model_func,\n        # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance\n    )\n\n    # Query existing knowledge base\n    result = await rag.aquery(\n        \"What data has been processed in this LightRAG instance?\",\n        mode=\"hybrid\"\n    )\n    print(\"Query result:\", result)\n\n    # Add new multimodal document to existing LightRAG instance\n    await rag.process_document_complete(\n        file_path=\"path\u002Fto\u002Fnew\u002Fmultimodal_document.pdf\",\n        output_dir=\".\u002Foutput\"\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(load_existing_lightrag())\n```\n\n#### 7. Direct Content List Insertion\n\nFor scenarios where you already have a pre-parsed content list (e.g., from external parsers or previous processing), you can directly insert it into RAGAnything without document parsing:\n\n```python\nimport asyncio\nfrom functools import partial\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\n\nasync def insert_content_list_example():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Create RAGAnything configuration\n    config = RAGAnythingConfig(\n        working_dir=\".\u002Frag_storage\",\n        enable_image_processing=True,\n        enable_table_processing=True,\n        enable_equation_processing=True,\n    )\n\n    # Define model functions\n    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):\n        return openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n\n    def vision_model_func(prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt} if system_prompt else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image\u002Fjpeg;base64,{image_data}\"}}\n                        ],\n                    } if image_data else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    embedding_func = EmbeddingFunc(\n        embedding_dim=3072,\n        max_token_size=8192,\n        func=partial(\n            openai_embed.func,\n            model=\"text-embedding-3-large\",\n            api_key=api_key,\n            base_url=base_url,\n        ),\n    )\n\n    # Initialize RAGAnything\n    rag = RAGAnything(\n        config=config,\n        llm_model_func=llm_model_func,\n        vision_model_func=vision_model_func,\n        embedding_func=embedding_func,\n    )\n\n    # Example: Pre-parsed content list from external source\n    content_list = [\n        {\n            \"type\": \"text\",\n            \"text\": \"This is the introduction section of our research paper.\",\n            \"page_idx\": 0  # Page number where this content appears\n        },\n        {\n            \"type\": \"image\",\n            \"img_path\": \"\u002Fabsolute\u002Fpath\u002Fto\u002Ffigure1.jpg\",  # IMPORTANT: Use absolute path\n            \"image_caption\": [\"Figure 1: System Architecture\"],\n            \"image_footnote\": [\"Source: Authors' original design\"],\n            \"page_idx\": 1  # Page number where this image appears\n        },\n        {\n            \"type\": \"table\",\n            \"table_body\": \"| Method | Accuracy | F1-Score |\\n|--------|----------|----------|\\n| Ours | 95.2% | 0.94 |\\n| Baseline | 87.3% | 0.85 |\",\n            \"table_caption\": [\"Table 1: Performance Comparison\"],\n            \"table_footnote\": [\"Results on test dataset\"],\n            \"page_idx\": 2  # Page number where this table appears\n        },\n        {\n            \"type\": \"equation\",\n            \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n            \"text\": \"Document relevance probability formula\",\n            \"page_idx\": 3  # Page number where this equation appears\n        },\n        {\n            \"type\": \"text\",\n            \"text\": \"In conclusion, our method demonstrates superior performance across all metrics.\",\n            \"page_idx\": 4  # Page number where this content appears\n        }\n    ]\n\n    # Insert the content list directly\n    await rag.insert_content_list(\n        content_list=content_list,\n        file_path=\"research_paper.pdf\",  # Reference file name for citation\n        split_by_character=None,         # Optional text splitting\n        split_by_character_only=False,   # Optional text splitting mode\n        doc_id=None,                     # Optional custom document ID (will be auto-generated if not provided)\n        display_stats=True               # Show content statistics\n    )\n\n    # Query the inserted content\n    result = await rag.aquery(\n        \"What are the key findings and performance metrics mentioned in the research?\",\n        mode=\"hybrid\"\n    )\n    print(\"Query result:\", result)\n\n    # You can also insert multiple content lists with different document IDs\n    another_content_list = [\n        {\n            \"type\": \"text\",\n            \"text\": \"This is content from another document.\",\n            \"page_idx\": 0  # Page number where this content appears\n        },\n        {\n            \"type\": \"table\",\n            \"table_body\": \"| Feature | Value |\\n|---------|-------|\\n| Speed | Fast |\\n| Accuracy | High |\",\n            \"table_caption\": [\"Feature Comparison\"],\n            \"page_idx\": 1  # Page number where this table appears\n        }\n    ]\n\n    await rag.insert_content_list(\n        content_list=another_content_list,\n        file_path=\"another_document.pdf\",\n        doc_id=\"custom-doc-id-123\"  # Custom document ID\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(insert_content_list_example())\n```\n\n**Content List Format:**\n\nThe `content_list` should follow the standard format with each item being a dictionary containing:\n\n- **Text content**: `{\"type\": \"text\", \"text\": \"content text\", \"page_idx\": 0}`\n- **Image content**: `{\"type\": \"image\", \"img_path\": \"\u002Fabsolute\u002Fpath\u002Fto\u002Fimage.jpg\", \"image_caption\": [\"caption\"], \"image_footnote\": [\"note\"], \"page_idx\": 1}`\n- **Table content**: `{\"type\": \"table\", \"table_body\": \"markdown table\", \"table_caption\": [\"caption\"], \"table_footnote\": [\"note\"], \"page_idx\": 2}`\n- **Equation content**: `{\"type\": \"equation\", \"latex\": \"LaTeX formula\", \"text\": \"description\", \"page_idx\": 3}`\n- **Generic content**: `{\"type\": \"custom_type\", \"content\": \"any content\", \"page_idx\": 4}`\n\n**Important Notes:**\n- **`img_path`**: Must be an absolute path to the image file (e.g., `\u002Fhome\u002Fuser\u002Fimages\u002Fchart.jpg` or `C:\\Users\\user\\images\\chart.jpg`)\n- **`page_idx`**: Represents the page number where the content appears in the original document (0-based indexing)\n- **Content ordering**: Items are processed in the order they appear in the list\n\nThis method is particularly useful when:\n- You have content from external parsers (non-MinerU\u002FDocling)\n- You want to process programmatically generated content\n- You need to insert content from multiple sources into a single knowledge base\n- You have cached parsing results that you want to reuse\n\n---\n\n## 🛠️ Examples\n\n*Practical Implementation Demos*\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Fuser-images.githubusercontent.com\u002F74038190\u002F212257455-13e3e01e-d6a6-45dc-bb92-3ab87b12dfc1.gif\" width=\"300\">\n\u003C\u002Fdiv>\n\nThe `examples\u002F` directory contains comprehensive usage examples:\n\n- **`raganything_example.py`**: End-to-end document processing with MinerU\n- **`modalprocessors_example.py`**: Direct multimodal content processing\n- **`office_document_test.py`**: Office document parsing test with MinerU (no API key required)\n- **`image_format_test.py`**: Image format parsing test with MinerU (no API key required)\n- **`text_format_test.py`**: Text format parsing test with MinerU (no API key required)\n\n**Run examples:**\n\n```bash\n# End-to-end processing with parser selection\npython examples\u002Fraganything_example.py path\u002Fto\u002Fdocument.pdf --api-key YOUR_API_KEY --parser mineru\n\n# Direct modal processing\npython examples\u002Fmodalprocessors_example.py --api-key YOUR_API_KEY\n\n# Office document parsing test (MinerU only)\npython examples\u002Foffice_document_test.py --file path\u002Fto\u002Fdocument.docx\n\n# Image format parsing test (MinerU only)\npython examples\u002Fimage_format_test.py --file path\u002Fto\u002Fimage.bmp\n\n# Text format parsing test (MinerU only)\npython examples\u002Ftext_format_test.py --file path\u002Fto\u002Fdocument.md\n\n# Check LibreOffice installation\npython examples\u002Foffice_document_test.py --check-libreoffice --file dummy\n\n# Check PIL\u002FPillow installation\npython examples\u002Fimage_format_test.py --check-pillow --file dummy\n\n# Check ReportLab installation\npython examples\u002Ftext_format_test.py --check-reportlab --file dummy\n```\n\n---\n\n## 🔧 Configuration\n\n*System Optimization Parameters*\n\n### Environment Variables\n\nCreate a `.env` file (refer to `.env.example`):\n\n```bash\nOPENAI_API_KEY=your_openai_api_key\nOPENAI_BASE_URL=your_base_url  # Optional\nOUTPUT_DIR=.\u002Foutput             # Default output directory for parsed documents\nPARSER=mineru                   # Parser selection: mineru, docling, or paddleocr\nPARSE_METHOD=auto              # Parse method: auto, ocr, or txt\n```\n\n**Note:** For backward compatibility, legacy environment variable names are still supported:\n- `MINERU_PARSE_METHOD` is deprecated, please use `PARSE_METHOD`\n\n> **Note**: API keys are only required for full RAG processing with LLM integration. The parsing test files (`office_document_test.py` and `image_format_test.py`) only test parser functionality and do not require API keys.\n\n### Parser Configuration\n\nRAGAnything now supports multiple parsers, each with specific advantages:\n\n#### MinerU Parser\n- Supports PDF, images, Office documents, and more formats\n- Powerful OCR and table extraction capabilities\n- GPU acceleration support\n\n#### Docling Parser\n- Optimized for Office documents and HTML files\n- Better document structure preservation\n- Native support for multiple Office formats\n\n#### PaddleOCR Parser\n- OCR-focused parser for images and PDFs\n- Produces text blocks compatible with existing `content_list` processing\n- Supports optional Office\u002FTXT\u002FMD parsing by converting to PDF first\n\nInstall PaddleOCR parser extras:\n\n```bash\npip install -e \".[paddleocr]\"\n# or\nuv sync --extra paddleocr\n```\n\n> **Note**: PaddleOCR also requires `paddlepaddle` (CPU\u002FGPU package varies by platform). Install it with the official guide: https:\u002F\u002Fwww.paddlepaddle.org.cn\u002Finstall\u002Fquick\n\n### MinerU Configuration\n\n```bash\n# MinerU 2.0 uses command-line parameters instead of config files\n# Check available options:\nmineru --help\n\n# Common configurations:\nmineru -p input.pdf -o output_dir -m auto    # Automatic parsing mode\nmineru -p input.pdf -o output_dir -m ocr     # OCR-focused parsing\nmineru -p input.pdf -o output_dir -b pipeline --device cuda  # GPU acceleration\n```\n\nYou can also configure parsing through RAGAnything parameters:\n\n```python\n# Basic parsing configuration with parser selection\nawait rag.process_document_complete(\n    file_path=\"document.pdf\",\n    output_dir=\".\u002Foutput\u002F\",\n    parse_method=\"auto\",          # or \"ocr\", \"txt\"\n    parser=\"mineru\"               # Optional: \"mineru\", \"docling\", or \"paddleocr\"\n)\n\n# Advanced parsing configuration with special parameters\nawait rag.process_document_complete(\n    file_path=\"document.pdf\",\n    output_dir=\".\u002Foutput\u002F\",\n    parse_method=\"auto\",          # Parsing method: \"auto\", \"ocr\", \"txt\"\n    parser=\"mineru\",              # Parser selection: \"mineru\", \"docling\", or \"paddleocr\"\n\n    # MinerU special parameters - all supported kwargs:\n    lang=\"ch\",                   # Document language for OCR optimization (e.g., \"ch\", \"en\", \"ja\")\n    device=\"cuda:0\",             # Inference device: \"cpu\", \"cuda\", \"cuda:0\", \"npu\", \"mps\"\n    start_page=0,                # Starting page number (0-based, for PDF)\n    end_page=10,                 # Ending page number (0-based, for PDF)\n    formula=True,                # Enable formula parsing\n    table=True,                  # Enable table parsing\n    backend=\"pipeline\",          # Parsing backend: pipeline|hybrid-auto-engine|hybrid-http-client|vlm-auto-engine|vlm-http-client.\n    source=\"huggingface\",        # Model source: \"huggingface\", \"modelscope\", \"local\"\n    # vlm_url=\"http:\u002F\u002F127.0.0.1:3000\" # Service address when using backend=vlm-http-client\n\n    # Standard RAGAnything parameters\n    display_stats=True,          # Display content statistics\n    split_by_character=None,     # Optional character to split text by\n    doc_id=None                  # Optional document ID\n)\n```\n\n> **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything supports multiple document parsers, including MinerU, Docling, and PaddleOCR.\n\n### Processing Requirements\n\nDifferent content types require specific optional dependencies:\n\n- **Office Documents** (.doc, .docx, .ppt, .pptx, .xls, .xlsx): Install [LibreOffice](https:\u002F\u002Fwww.libreoffice.org\u002Fdownload\u002Fdownload\u002F)\n- **Extended Image Formats** (.bmp, .tiff, .gif, .webp): Install with `pip install raganything[image]`\n- **Text Files** (.txt, .md): Install with `pip install raganything[text]`\n- **PaddleOCR Parser** (`parser=\"paddleocr\"`): Install with `pip install raganything[paddleocr]`, then install `paddlepaddle` for your platform\n\n> **📋 Quick Install**: Use `pip install raganything[all]` to enable all format support (Python dependencies only - LibreOffice still needs separate installation)\n\n---\n\n## 🧪 Supported Content Types\n\n### Document Formats\n\n- **PDFs** - Research papers, reports, presentations\n- **Office Documents** - DOC, DOCX, PPT, PPTX, XLS, XLSX\n- **Images** - JPG, PNG, BMP, TIFF, GIF, WebP\n- **Text Files** - TXT, MD\n\n### Multimodal Elements\n\n- **Images** - Photographs, diagrams, charts, screenshots\n- **Tables** - Data tables, comparison charts, statistical summaries\n- **Equations** - Mathematical formulas in LaTeX format\n- **Generic Content** - Custom content types via extensible processors\n\n*For installation of format-specific dependencies, see the [Configuration](#-configuration) section.*\n\n---\n\n## 📖 Citation\n\n*Academic Reference*\n\n\u003Cdiv align=\"center\">\n  \u003Cdiv style=\"width: 60px; height: 60px; margin: 20px auto; position: relative;\">\n    \u003Cdiv style=\"width: 100%; height: 100%; border: 2px solid #00d9ff; border-radius: 50%; position: relative;\">\n      \u003Cdiv style=\"position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); font-size: 24px; color: #00d9ff;\">📖\u003C\u002Fdiv>\n    \u003C\u002Fdiv>\n    \u003Cdiv style=\"position: absolute; bottom: -5px; left: 50%; transform: translateX(-50%); width: 20px; height: 20px; background: white; border-right: 2px solid #00d9ff; border-bottom: 2px solid #00d9ff; transform: rotate(45deg);\">\u003C\u002Fdiv>\n  \u003C\u002Fdiv>\n\u003C\u002Fdiv>\n\nIf you find RAG-Anything useful in your research, please cite our paper:\n\n```bibtex\n@misc{guo2025raganythingallinoneragframework,\n      title={RAG-Anything: All-in-One RAG Framework},\n      author={Zirui Guo and Xubin Ren and Lingrui Xu and Jiahao Zhang and Chao Huang},\n      year={2025},\n      eprint={2510.12323},\n      archivePrefix={arXiv},\n      primaryClass={cs.AI},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.12323},\n}\n```\n\n---\n\n## 🔗 Related Projects\n\n*Ecosystem & Extensions*\n\n\u003Cdiv align=\"center\">\n  \u003Ctable>\n    \u003Ctr>\n      \u003Ctd align=\"center\">\n        \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FLightRAG\">\n          \u003Cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\">\n            \u003Cspan style=\"font-size: 32px;\">⚡\u003C\u002Fspan>\n          \u003C\u002Fdiv>\n          \u003Cb>LightRAG\u003C\u002Fb>\u003Cbr>\n          \u003Csub>Simple and Fast RAG\u003C\u002Fsub>\n        \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\">\n        \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FVideoRAG\">\n          \u003Cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\">\n            \u003Cspan style=\"font-size: 32px;\">🎥\u003C\u002Fspan>\n          \u003C\u002Fdiv>\n          \u003Cb>VideoRAG\u003C\u002Fb>\u003Cbr>\n          \u003Csub>Extreme Long-Context Video RAG\u003C\u002Fsub>\n        \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\">\n        \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FMiniRAG\">\n          \u003Cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\">\n            \u003Cspan style=\"font-size: 32px;\">✨\u003C\u002Fspan>\n          \u003C\u002Fdiv>\n          \u003Cb>MiniRAG\u003C\u002Fb>\u003Cbr>\n          \u003Csub>Extremely Simple RAG\u003C\u002Fsub>\n        \u003C\u002Fa>\n      \u003C\u002Ftd>\n    \u003C\u002Ftr>\n  \u003C\u002Ftable>\n\u003C\u002Fdiv>\n\n---\n\n## ⭐ Star History\n\n*Community Growth Trajectory*\n\n\u003Cdiv align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fstar-history.com\u002F#HKUDS\u002FRAG-Anything&Date\">\n    \u003Cpicture>\n      \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"https:\u002F\u002Fapi.star-history.com\u002Fsvg?repos=HKUDS\u002FRAG-Anything&type=Date&theme=dark\" \u002F>\n      \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"https:\u002F\u002Fapi.star-history.com\u002Fsvg?repos=HKUDS\u002FRAG-Anything&type=Date\" \u002F>\n      \u003Cimg alt=\"Star History Chart\" src=\"https:\u002F\u002Fapi.star-history.com\u002Fsvg?repos=HKUDS\u002FRAG-Anything&type=Date\" style=\"border-radius: 15px; box-shadow: 0 0 30px rgba(0, 217, 255, 0.3);\" \u002F>\n    \u003C\u002Fpicture>\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n\n---\n\n## 🤝 Contribution\n\n*Join the Innovation*\n\n\u003Cdiv align=\"center\">\n  We thank all our contributors for their valuable contributions.\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\u002Fgraphs\u002Fcontributors\">\n    \u003Cimg src=\"https:\u002F\u002Fcontrib.rocks\u002Fimage?repo=HKUDS\u002FRAG-Anything\" style=\"border-radius: 15px; box-shadow: 0 0 20px rgba(0, 217, 255, 0.3);\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n\n---\n\n\u003Cdiv align=\"center\" style=\"background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 30px; margin: 30px 0;\">\n  \u003Cdiv>\n    \u003Cimg src=\"https:\u002F\u002Fuser-images.githubusercontent.com\u002F74038190\u002F212284100-561aa473-3905-4a80-b561-0d28506553ee.gif\" width=\"500\">\n  \u003C\u002Fdiv>\n  \u003Cdiv style=\"margin-top: 20px;\">\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\" style=\"text-decoration: none;\">\n      \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F⭐%20Star%20us%20on%20GitHub-1a1a2e?style=for-the-badge&logo=github&logoColor=white\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\u002Fissues\" style=\"text-decoration: none;\">\n      \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🐛%20Report%20Issues-ff6b6b?style=for-the-badge&logo=github&logoColor=white\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FHKUDS\u002FRAG-Anything\u002Fdiscussions\" style=\"text-decoration: none;\">\n      \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F💬%20Discussions-4ecdc4?style=for-the-badge&logo=github&logoColor=white\">\n    \u003C\u002Fa>\n  \u003C\u002Fdiv>\n\u003C\u002Fdiv>\n\n\u003Cdiv align=\"center\">\n  \u003Cdiv style=\"width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);\">\n    \u003Cdiv style=\"display: flex; justify-content: center; align-items: center; gap: 15px;\">\n      \u003Cspan style=\"font-size: 24px;\">⭐\u003C\u002Fspan>\n      \u003Cspan style=\"color: #00d9ff; font-size: 18px;\">Thank you for visiting RAG-Anything!\u003C\u002Fspan>\n      \u003Cspan style=\"font-size: 24px;\">⭐\u003C\u002Fspan>\n    \u003C\u002Fdiv>\n    \u003Cdiv style=\"margin-top: 10px; color: #00d9ff; font-size: 16px;\">Building the Future of Multimodal AI\u003C\u002Fdiv>\n  \u003C\u002Fdiv>\n\u003C\u002Fdiv>\n","RAG-Anything 是一个全功能的检索增强生成（RAG）框架，旨在简化多模态数据处理和生成任务。它支持多种类型的输入数据（如文本、图像等），并通过高效的检索机制来增强模型的生成能力。基于先进的AI技术构建，RAG-Anything 提供了灵活易用的API接口，使得开发者能够轻松集成到自己的项目中。此框架特别适用于需要结合外部知识库进行内容创作或对话系统的场景，比如智能客服、自动写作助手等领域。",2,"2026-06-11 02:50:08","top_language"]