Skip to content

3.7 Structured Output

本节介绍 LangChain 中的结构化输出功能。


什么是 Structured Output?

结构化输出(Structured Output) 让 Agent 能够返回特定、可预测格式的数据,而不是需要解析的自然语言文本。

"Structured output allows agents to return data in a specific, predictable format."

这使得应用程序可以直接消费 JSON 对象、Pydantic 模型或 dataclass。


两种实现策略

1. Provider Strategy(提供商策略)

使用模型原生的结构化输出 API(OpenAI、Anthropic、Google 等):

python
from langchain.agents import create_agent, ProviderStrategy
from pydantic import BaseModel

class WeatherReport(BaseModel):
    city: str
    temperature: float
    condition: str
    humidity: int

agent = create_agent(
    "gpt-4o",
    tools=[get_weather],
    structured_output=ProviderStrategy(WeatherReport)
)

优点:最可靠,由模型原生支持

2. Tool Strategy(工具策略)

通过工具调用实现结构化输出:

python
from langchain.agents import create_agent, ToolStrategy

agent = create_agent(
    "gpt-4o",
    tools=[get_weather],
    structured_output=ToolStrategy(WeatherReport)
)

优点:兼容更多模型


支持的 Schema 类型

1. Pydantic BaseModel

最常用的方式,支持验证:

python
from pydantic import BaseModel, Field
from typing import List, Optional

class Person(BaseModel):
    """人物信息"""
    name: str = Field(description="姓名")
    age: int = Field(description="年龄", ge=0, le=150)
    email: Optional[str] = Field(default=None, description="邮箱地址")
    skills: List[str] = Field(default_factory=list, description="技能列表")

2. Python dataclass

python
from dataclasses import dataclass
from typing import List

@dataclass
class Product:
    """产品信息"""
    name: str
    price: float
    category: str
    tags: List[str]

3. TypedDict

python
from typing import TypedDict, List

class OrderInfo(TypedDict):
    order_id: str
    items: List[str]
    total: float
    status: str

4. JSON Schema

python
json_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "description": "产品名称"},
        "price": {"type": "number", "description": "价格"},
        "in_stock": {"type": "boolean", "description": "是否有库存"}
    },
    "required": ["name", "price"]
}

5. Union 类型

支持多种可能的输出格式:

python
from typing import Union

class SuccessResponse(BaseModel):
    status: str = "success"
    data: dict

class ErrorResponse(BaseModel):
    status: str = "error"
    message: str

ResponseType = Union[SuccessResponse, ErrorResponse]

基本用法

模型级结构化输出

python
from langchain_openai import ChatOpenAI
from pydantic import BaseModel

class Sentiment(BaseModel):
    """情感分析结果"""
    text: str
    sentiment: str  # positive, negative, neutral
    confidence: float

model = ChatOpenAI(model="gpt-4o")
structured_model = model.with_structured_output(Sentiment)

result = structured_model.invoke("分析这句话的情感:今天天气真好,心情很愉快!")
print(result)
# Sentiment(text='今天天气真好,心情很愉快!', sentiment='positive', confidence=0.95)

Agent 级结构化输出

python
from langchain.agents import create_agent, ToolStrategy
from pydantic import BaseModel
from typing import List

class AnalysisResult(BaseModel):
    """分析结果"""
    summary: str
    key_points: List[str]
    recommendations: List[str]
    confidence_score: float

agent = create_agent(
    "gpt-4o",
    tools=[analyze_data, search],
    structured_output=ToolStrategy(AnalysisResult)
)

result = agent.invoke(
    {"messages": [{"role": "user", "content": "分析最近的销售数据"}]}
)

# 获取结构化响应
analysis = result["structured_response"]
print(f"摘要: {analysis.summary}")
print(f"关键点: {analysis.key_points}")

错误处理

LangChain 提供智能重试机制:

多输出错误

当模型错误地调用多个结构化工具时:

python
from langchain.agents import create_agent, ToolStrategy

agent = create_agent(
    "gpt-4o",
    tools=[my_tools],
    structured_output=ToolStrategy(
        MySchema,
        handle_errors=True,  # 启用错误处理
        max_retries=3        # 最多重试 3 次
    )
)

验证错误

Schema 不匹配时触发重试:

python
from pydantic import BaseModel, Field, validator

class StrictOutput(BaseModel):
    score: int = Field(ge=0, le=100)

    @validator('score')
    def validate_score(cls, v):
        if v < 0 or v > 100:
            raise ValueError('分数必须在 0-100 之间')
        return v

自定义错误处理

python
from langchain.agents import ToolStrategy

def custom_error_handler(error, state):
    """自定义错误处理"""
    return f"输出格式错误: {error}。请确保返回正确的 JSON 格式。"

agent = create_agent(
    "gpt-4o",
    tools=[my_tools],
    structured_output=ToolStrategy(
        MySchema,
        error_handler=custom_error_handler
    )
)

复杂结构示例

嵌套结构

python
from pydantic import BaseModel
from typing import List, Optional

class Address(BaseModel):
    street: str
    city: str
    country: str
    postal_code: str

class ContactInfo(BaseModel):
    email: str
    phone: Optional[str] = None
    address: Address

class Customer(BaseModel):
    id: str
    name: str
    contact: ContactInfo
    tags: List[str]
    is_active: bool

枚举类型

python
from enum import Enum
from pydantic import BaseModel

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"

class Task(BaseModel):
    title: str
    description: str
    priority: Priority
    assignee: str

完整示例

python
from langchain.agents import create_agent, ToolStrategy
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from pydantic import BaseModel, Field
from typing import List
from enum import Enum

# 定义枚举
class Category(str, Enum):
    TECH = "technology"
    BUSINESS = "business"
    SCIENCE = "science"
    OTHER = "other"

# 定义输出结构
class ArticleAnalysis(BaseModel):
    """文章分析结果"""
    title: str = Field(description="文章标题")
    summary: str = Field(description="内容摘要,不超过100字")
    category: Category = Field(description="文章分类")
    keywords: List[str] = Field(description="关键词列表,3-5个")
    sentiment: str = Field(description="情感倾向:positive/negative/neutral")
    reading_time_minutes: int = Field(description="预估阅读时间(分钟)")

# 定义工具
@tool
def fetch_article(url: str) -> str:
    """获取文章内容"""
    return "这是一篇关于人工智能最新发展的技术文章..."

# 创建 Agent
agent = create_agent(
    ChatOpenAI(model="gpt-4o"),
    tools=[fetch_article],
    structured_output=ToolStrategy(ArticleAnalysis),
    system_prompt="你是专业的内容分析师,擅长分析和总结文章。"
)

# 执行分析
result = agent.invoke({
    "messages": [{
        "role": "user",
        "content": "分析这篇文章:https://example.com/ai-article"
    }]
})

# 获取结构化结果
analysis = result["structured_response"]

print(f"标题: {analysis.title}")
print(f"摘要: {analysis.summary}")
print(f"分类: {analysis.category.value}")
print(f"关键词: {', '.join(analysis.keywords)}")
print(f"情感: {analysis.sentiment}")
print(f"阅读时间: {analysis.reading_time_minutes} 分钟")

最佳实践

实践说明
使用 Pydantic获得类型验证和更好的 IDE 支持
添加 Field 描述帮助模型理解每个字段的含义
设置合理的默认值处理可选字段
使用枚举限制字段的可选值
启用错误处理提高输出可靠性

结构化输出 vs 普通输出

方面结构化输出普通输出
格式固定的 JSON/对象自由文本
解析直接使用需要解析
可靠性更高可能格式不一致
灵活性受限于 Schema完全自由
适用场景API、数据处理对话、创作

上一节3.6 Streaming

下一章4.0 Advanced Usage

基于 MIT 许可证发布。内容版权归作者所有。