AI · #llm#langchain#ai#chain

LangChain实战:构建AI应用链

2025.07.30 6 min 2.6k
// 目录 · contents

引言

LangChain 是构建 LLM 应用的主流框架,提供了从简单的 Prompt 链到复杂的自主 Agent 的全套抽象。本文将系统介绍 LangChain 的核心概念——Chain、Agent、Tool、Memory 和 Output Parser,通过实际代码示例展示如何构建不同类型的 AI 应用。

LangChain 核心架构

graph TB
    A[LangChain 核心组件] --> B[Model I/O]
    A --> C[Retrieval]
    A --> D[Agents]
    A --> E[Chains / LCEL]

    B --> B1[Prompt Templates]
    B --> B2[LLM / Chat Models]
    B --> B3[Output Parsers]

    C --> C1[Document Loaders]
    C --> C2[Text Splitters]
    C --> C3[Vector Stores]
    C --> C4[Retrievers]

    D --> D1[Agent Executor]
    D --> D2[Tools]
    D --> D3[Agent Types]

    E --> E1[RunnableSequence]
    E --> E2[RunnableParallel]
    E --> E3[RunnableBranch]

    style A fill:#1c3d5a,color:#fff
    style B fill:#3498db,color:#fff
    style C fill:#2ecc71,color:#fff
    style D fill:#e74c3c,color:#fff
    style E fill:#f39c12,color:#000

LCEL — LangChain Expression Language

LCEL 是 LangChain 的核心编排方式,使用管道符 | 组合组件:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Basic chain using LCEL
prompt = ChatPromptTemplate.from_messages([
("system", "你是一个专业的{domain}专家,用简洁的中文回答问题。"),
("human", "{question}"),
])

model = ChatOpenAI(model="gpt-4o", temperature=0.7)
output_parser = StrOutputParser()

# Pipe operator creates a RunnableSequence
chain = prompt | model | output_parser

# Invoke
result = chain.invoke({
"domain": "机器学习",
"question": "什么是梯度下降?"
})
print(result)

# Streaming
for chunk in chain.stream({"domain": "机器学习", "question": "什么是反向传播?"}):
print(chunk, end="", flush=True)

# Batch processing
results = chain.batch([
{"domain": "前端", "question": "什么是虚拟DOM?"},
{"domain": "后端", "question": "什么是微服务?"},
], config={"max_concurrency": 5})

RunnableParallel — 并行执行

1
2
3
4
5
6
7
8
9
10
11
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# Run multiple chains in parallel
analysis_chain = RunnableParallel(
summary=prompt_summary | model | output_parser,
keywords=prompt_keywords | model | output_parser,
sentiment=prompt_sentiment | model | output_parser,
)

result = analysis_chain.invoke({"text": "LangChain是一个优秀的LLM应用框架..."})
# result = {"summary": "...", "keywords": "...", "sentiment": "..."}

RunnableBranch — 条件路由

1
2
3
4
5
6
7
8
9
10
from langchain_core.runnables import RunnableBranch

# Route to different chains based on input
router = RunnableBranch(
(lambda x: "代码" in x["question"], code_chain),
(lambda x: "数学" in x["question"], math_chain),
general_chain, # Default fallback
)

result = router.invoke({"question": "写一段Python代码实现快速排序"})

Prompt Templates

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from langchain_core.prompts import (
ChatPromptTemplate,
FewShotChatMessagePromptTemplate,
MessagesPlaceholder,
)

# Few-shot prompt template
examples = [
{"input": "什么是Python?", "output": "Python是一种高级编程语言,以简洁清晰著称。"},
{"input": "什么是Docker?", "output": "Docker是一个容器化平台,用于打包和运行应用。"},
]

example_prompt = ChatPromptTemplate.from_messages([
("human", "{input}"),
("ai", "{output}"),
])

few_shot_prompt = FewShotChatMessagePromptTemplate(
example_prompt=example_prompt,
examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages([
("system", "你是一个技术术语解释器,用一句话简洁回答。"),
few_shot_prompt,
("human", "{input}"),
])

chain = final_prompt | model | output_parser
result = chain.invoke({"input": "什么是Kubernetes?"})

Output Parsers — 结构化输出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List

# Define output schema
class CodeReview(BaseModel):
issues: List[str] = Field(description="发现的代码问题列表")
severity: str = Field(description="严重程度: low/medium/high")
suggestions: List[str] = Field(description="改进建议列表")
score: int = Field(description="代码质量分数 0-100")

parser = JsonOutputParser(pydantic_object=CodeReview)

review_prompt = ChatPromptTemplate.from_messages([
("system", "你是一个代码审查专家。分析以下代码并给出结构化的审查结果。\n{format_instructions}"),
("human", "请审查这段代码:\n```python\n{code}\n```"),
])

review_chain = review_prompt.partial(
format_instructions=parser.get_format_instructions()
) | model | parser

result = review_chain.invoke({
"code": "def add(a,b): return a+b"
})
# result is a dict matching CodeReview schema

使用 with_structured_output

1
2
3
4
5
6
7
8
9
10
# More robust: use model's native function calling
class ExtractedInfo(BaseModel):
"""Extracted information from the text."""
name: str = Field(description="人名")
age: int = Field(description="年龄")
occupation: str = Field(description="职业")

structured_llm = model.with_structured_output(ExtractedInfo)
result = structured_llm.invoke("张三今年30岁,是一名软件工程师。")
# result = ExtractedInfo(name='张三', age=30, occupation='软件工程师')

Memory Systems

graph LR
    A[Memory Types] --> B[ConversationBufferMemory<br/>完整历史]
    A --> C[ConversationBufferWindowMemory<br/>窗口历史]
    A --> D[ConversationSummaryMemory<br/>摘要历史]
    A --> E[ConversationTokenBufferMemory<br/>Token限制]
    A --> F[VectorStoreMemory<br/>向量存储]

    style B fill:#3498db,color:#fff
    style D fill:#e74c3c,color:#fff
    style F fill:#2ecc71,color:#fff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Chat history store
store = {}

def get_session_history(session_id: str):
if session_id not in store:
store[session_id] = InMemoryChatMessageHistory()
return store[session_id]

# Prompt with message history placeholder
prompt = ChatPromptTemplate.from_messages([
("system", "你是一个友好的AI助手。"),
MessagesPlaceholder(variable_name="history"),
("human", "{input}"),
])

chain = prompt | model | output_parser

# Wrap with message history
chain_with_history = RunnableWithMessageHistory(
chain,
get_session_history,
input_messages_key="input",
history_messages_key="history",
)

# Use with session
config = {"configurable": {"session_id": "user_123"}}

r1 = chain_with_history.invoke({"input": "我叫张三"}, config=config)
r2 = chain_with_history.invoke({"input": "我叫什么名字?"}, config=config)
# r2 will remember: "你叫张三"

Tools — 工具定义与使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
import httpx
import ast

# Define custom tools using decorator
@tool
def calculate(expression: str) -> str:
"""计算数学表达式。输入应该是一个有效的Python数学表达式。"""
try:
# Use ast.literal_eval for safe expression evaluation
tree = ast.parse(expression, mode='eval')
result = compile(tree, '<string>', 'eval')
return str(result)
except Exception as e:
return f"计算错误: {e}"

@tool
def get_weather(city: str) -> str:
"""获取指定城市的当前天气信息。"""
response = httpx.get(
f"https://api.weatherapi.com/v1/current.json",
params={"key": "YOUR_API_KEY", "q": city, "lang": "zh"},
)
data = response.json()
current = data["current"]
return f"{city}: {current['condition']['text']}, 温度{current['temp_c']}°C"

@tool
def search_docs(query: str) -> str:
"""在内部知识库中搜索相关文档。"""
results = vector_store.similarity_search(query, k=3)
return "\n".join([doc.page_content for doc in results])

# Built-in tools
search_tool = DuckDuckGoSearchRun()

Agents — 自主决策

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from langchain.agents import create_tool_calling_agent, AgentExecutor

# Define tools
tools = [calculate, get_weather, search_docs, search_tool]

# Agent prompt
agent_prompt = ChatPromptTemplate.from_messages([
("system", """你是一个智能助手,可以使用以下工具来帮助用户:
- calculate: 进行数学计算
- get_weather: 查询天气
- search_docs: 搜索内部文档
- duckduckgo_search: 搜索互联网

根据用户的问题,决定使用哪些工具,然后综合结果回答。"""),
MessagesPlaceholder(variable_name="chat_history", optional=True),
("human", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# Create agent
agent = create_tool_calling_agent(model, tools, agent_prompt)

# Agent executor handles the loop
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True, # Print reasoning steps
max_iterations=5, # Prevent infinite loops
handle_parsing_errors=True,
)

# Run agent
result = agent_executor.invoke({
"input": "北京今天天气怎么样?另外帮我算一下 (15 * 24) + 360"
})
sequenceDiagram
    participant U as 用户
    participant A as Agent
    participant L as LLM
    participant T as Tools

    U->>A: "北京天气怎么样?算一下15*24+360"
    A->>L: 分析用户意图
    L-->>A: 计划: 1.查天气 2.做计算

    A->>T: get_weather("北京")
    T-->>A: "北京: 晴, 28°C"

    A->>T: calculate("15*24+360")
    T-->>A: "720"

    A->>L: 综合工具结果生成回答
    L-->>A: 最终回答
    A->>U: "北京今天晴天28°C。15*24+360=720"

Retrieval Chains

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Load and index documents
loader = WebBaseLoader("https://docs.example.com/guide")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = FAISS.from_documents(splits, OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# Create QA chain
system_prompt = """基于以下上下文回答用户的问题。如果不确定,说"我不知道"。

上下文:
{context}"""

qa_prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
])

question_answer_chain = create_stuff_documents_chain(model, qa_prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Use with history
result = rag_chain.invoke({
"input": "LangChain支持哪些向量数据库?",
"chat_history": [],
})

print(result["answer"])
print(result["context"]) # Retrieved source documents

Custom Tools with Pydantic Schema

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from langchain_core.tools import StructuredTool
from pydantic import BaseModel, Field

class DatabaseQueryInput(BaseModel):
table: str = Field(description="要查询的数据表名")
conditions: str = Field(description="查询条件,如 'age > 25'")
limit: int = Field(default=10, description="返回结果数量限制")

def query_database(table: str, conditions: str, limit: int = 10) -> str:
"""Query the database with given conditions."""
# Simulated database query
query = f"SELECT * FROM {table} WHERE {conditions} LIMIT {limit}"
return f"Query executed: {query}\nResults: [...]"

db_tool = StructuredTool.from_function(
func=query_database,
name="database_query",
description="查询数据库中的数据",
args_schema=DatabaseQueryInput,
)

LangSmith 调试与追踪

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import os

# Enable LangSmith tracing
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-api-key"
os.environ["LANGCHAIN_PROJECT"] = "my-rag-project"

# All chain invocations are now traced
# View traces at https://smith.langchain.com

# Add custom metadata to traces
result = chain.invoke(
{"question": "什么是LangChain?"},
config={
"metadata": {"user_id": "user_123", "session": "abc"},
"tags": ["production", "rag"],
},
)

总结

LangChain 通过 LCEL 管道式编排、结构化输出、Memory 系统和 Agent 机制,提供了构建 LLM 应用的完整工具链。核心建议:

  1. 优先使用 LCEL 而非 Legacy Chain 类
  2. 结构化输出 使用 with_structured_output 比 OutputParser 更可靠
  3. Agent 设计 要控制迭代次数、处理错误、记录日志
  4. 开启 LangSmith 追踪,对调试和优化至关重要
  5. 关注 LangGraph 作为下一代 Agent 编排框架
作者 · authorzt
发布 · date2025-07-30
篇幅 · length2.6k 字 · 6 min
许可 · licenseCC BY-SA 4.0
$ echo "comments" · 评论