DSPy 使用速查表

以下是 DSPy 常见使用模式的代码片段,帮助您快速上手和高效开发。


📦 DSPy 数据加载器

导入并初始化 DataLoader 对象

import dspy
from dspy.datasets import DataLoader

dl = DataLoader()

从 HuggingFace 数据集加载

code_alpaca = dl.from_huggingface("HuggingFaceH4/CodeAlpaca_20K")

访问特定数据集分割:

train_dataset = code_alpaca['train']
test_dataset = code_alpaca['test']

加载特定分割

code_alpaca = dl.from_huggingface(
    "HuggingFaceH4/CodeAlpaca_20K",
    split=["train", "test"],
)

print(f"Splits in dataset: {code_alpaca.keys()}")

加载单一分割:

code_alpaca = dl.from_huggingface(
    "HuggingFaceH4/CodeAlpaca_20K",
    split="train",
)

print(f"Number of examples in split: {len(code_alpaca)}")

切片分割

code_alpaca_80 = dl.from_huggingface(
    "HuggingFaceH4/CodeAlpaca_20K",
    split="train[:80%]",
)

print(f"Number of examples in split: {len(code_alpaca_80)}")

从 CSV 文件加载

dolly_100_dataset = dl.from_csv("dolly_subset_100_rows.csv")

选择特定列:

dolly_100_dataset = dl.from_csv(
    "dolly_subset_100_rows.csv",
    fields=("instruction", "context", "response"),
    input_keys=("instruction", "context")
)

🧠 DSPy 程序

定义 Signature

class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

使用 ChainOfThought

generate_answer = dspy.ChainOfThought(BasicQA)

question = 'What is the color of the sky?'
pred = generate_answer(question=question)

使用 ProgramOfThought

pot = dspy.ProgramOfThought(BasicQA)

question = 'Sarah has 5 apples. She buys 7 more apples. How many apples does she have?'
result = pot(question=question)

print(f"Question: {question}")
print(f"Final Predicted Answer: {result.answer}")

📏 DSPy 评估

自定义 Metric

def gsm8k_metric(gold, pred, trace=None) -> int:
    return int(gold.answer.strip()) == int(pred.answer.strip())

使用 Evaluate

from dspy.evaluate import Evaluate

evaluate_program = Evaluate(
    devset=devset,
    metric=your_defined_metric,
    num_threads=NUM_THREADS,
    display_progress=True,
    display_table=5
)

evaluate_program(your_dspy_program)

🔧 DSPy 优化器

使用 LabeledFewShot

from dspy.teleprompt import LabeledFewShot

labeled_fewshot_optimizer = LabeledFewShot(k=8)
your_dspy_program_compiled = labeled_fewshot_optimizer.compile(
    student=your_dspy_program,
    trainset=trainset
)

使用 BootstrapFewShot

from dspy.teleprompt import BootstrapFewShot

fewshot_optimizer = BootstrapFewShot(
    metric=your_defined_metric,
    max_bootstrapped_demos=4,
    max_labeled_demos=16,
    max_rounds=1,
    max_errors=5
)

your_dspy_program_compiled = fewshot_optimizer.compile(
    student=your_dspy_program,
    trainset=trainset
)

🚀 DSPy 高级功能

使用 MIPRO 优化器

from dspy.teleprompt import MIPRO

teleprompter = MIPRO(
    metric=your_defined_metric,
    num_candidates=10,
    init_temperature=0.7
)

optimized_program = teleprompter.compile(
    program=your_dspy_program,
    trainset=trainset
)

使用 MIPROv2(最新版本)

from dspy.teleprompt import MIPROv2

teleprompter = MIPROv2(
    metric=your_defined_metric,
    auto="light"
)

optimized_program = teleprompter.compile(
    program=your_dspy_program,
    trainset=trainset,
    max_bootstrapped_demos=3,
    max_labeled_demos=4
)

🛡️ DSPy 断言

定义断言

dspy.Assert(
    your_validation_fn(model_outputs),
    "Validation failed!",
    target_module="YourDSPyModule"
)

激活断言

from dspy.primitives.assertions import assert_transform_module, backtrack_handler

program_with_assertions = assert_transform_module(
    ProgramWithAssertions(),
    backtrack_handler
)

💾 保存与加载

保存优化后的程序

save_path = './optimized_program.json'
optimized_program.save(save_path)

加载程序

loaded_program = YourProgramClass()
loaded_program.load(path=save_path)

🌟 总结

DSPy 提供了强大的工具集,帮助开发者轻松构建、优化和部署语言模型程序。通过本速查表,您可以快速掌握 DSPy 的核心功能并应用到实际项目中。如果需要更详细的文档,请参考 DSPy 的官方指南或相关教程!

《DSPy 使用速查表》有3条评论

发表评论

人生梦想 - 关注前沿的计算机技术 acejoy.com 🐾 步子哥の博客 🐾 背多分论坛 🐾 知差(chai)网 🐾 DeepracticeX 社区 🐾 老薛主机 🐾 智柴论坛 🐾