graph TB
subgraph "离线索引 (Indexing)"
A[原始文档] --> B[文档解析<br/>PDF/HTML/Markdown]
B --> C[文档分块<br/>Chunking]
C --> D[Embedding 生成]
D --> E[向量数据库<br/>存储索引]
end
subgraph "在线检索 (Retrieval)"
F[用户查询] --> G[Query 重写/扩展]
G --> H[Query Embedding]
H --> I[向量相似度检索]
E --> I
I --> J[重排序 Re-ranking]
end
subgraph "生成 (Generation)"
J --> K[Prompt 构建<br/>Context + Query]
K --> L[LLM 生成回答]
L --> M[引用溯源]
end
style A fill:#3498db,color:#fff
style E fill:#e74c3c,color:#fff
style L fill:#2ecc71,color:#fff
文档分块策略
分块质量直接影响检索精度和生成质量。不同的分块策略适用于不同场景:
固定大小分块
1 2 3 4 5 6 7 8 9 10 11
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Basic fixed-size chunking with overlap text_splitter = RecursiveCharacterTextSplitter( chunk_size=512, # Characters per chunk chunk_overlap=50, # Overlap between chunks length_function=len, separators=["\n\n", "\n", ". ", " ", ""], )
chunks = text_splitter.split_text(document_text)
语义分块
1 2 3 4 5 6 7 8 9 10 11
from langchain_experimental.text_splitter import SemanticChunker from langchain_openai import OpenAIEmbeddings
# Semantic chunking: split at points where meaning shifts semantic_splitter = SemanticChunker( OpenAIEmbeddings(), breakpoint_threshold_type="percentile", # or "standard_deviation" breakpoint_threshold_amount=95, )
for parent in parent_chunks: children = child_splitter.split_documents([parent]) for child in children: child.metadata["parent_id"] = parent.metadata.get("id") # Store child for retrieval, but use parent for context
graph TD
A[原始文档] --> B[Parent Chunk<br/>2000 tokens]
B --> C1[Child 1<br/>400 tokens]
B --> C2[Child 2<br/>400 tokens]
B --> C3[Child 3<br/>400 tokens]
C2 -->|检索命中| D[返回 Parent Chunk<br/>提供完整上下文]
style C2 fill:#e74c3c,color:#fff
style D fill:#2ecc71,color:#fff
from langchain.retrievers import EnsembleRetriever from langchain_community.retrievers import BM25Retriever from langchain_community.vectorstores import Chroma
graph LR
A[用户查询] --> B[BM25 关键词检索]
A --> C[向量相似度检索]
B --> D[BM25 结果<br/>权重 0.3]
C --> E[向量结果<br/>权重 0.7]
D --> F[Reciprocal Rank Fusion<br/>结果合并]
E --> F
F --> G[Top-K 候选文档]
style B fill:#f39c12,color:#000
style C fill:#3498db,color:#fff
style F fill:#9b59b6,color:#fff
from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough
graph TD
A[RAG 评估维度] --> B[检索质量]
A --> C[生成质量]
A --> D[端到端质量]
B --> B1[Recall@K]
B --> B2[MRR - Mean Reciprocal Rank]
B --> B3[NDCG]
C --> C1[Faithfulness 忠实度]
C --> C2[Answer Relevancy 答案相关性]
C --> C3[Hallucination 幻觉检测]
D --> D1[RAGAS Score]
D --> D2[Human Evaluation]
style B fill:#3498db,color:#fff
style C fill:#e74c3c,color:#fff
style D fill:#2ecc71,color:#fff
from ragas import evaluate from ragas.metrics import ( faithfulness, answer_relevancy, context_precision, context_recall, )
# Evaluate RAG pipeline with RAGAS result = evaluate( dataset=eval_dataset, metrics=[ faithfulness, # Is the answer grounded in context? answer_relevancy, # Is the answer relevant to the question? context_precision, # Are retrieved docs relevant? context_recall, # Are all relevant docs retrieved? ], )