以下是 DSPy 常见使用模式的代码片段,帮助您快速上手和高效开发。
📦 DSPy 数据加载器
导入并初始化 DataLoader 对象
import dspy
from dspy.datasets import DataLoader
dl = DataLoader()
从 HuggingFace 数据集加载
code_alpaca = dl.from_huggingface("HuggingFaceH4/CodeAlpaca_20K")
访问特定数据集分割:
train_dataset = code_alpaca['train']
test_dataset = code_alpaca['test']
加载特定分割
code_alpaca = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split=["train", "test"],
)
print(f"Splits in dataset: {code_alpaca.keys()}")
加载单一分割:
code_alpaca = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split="train",
)
print(f"Number of examples in split: {len(code_alpaca)}")
切片分割
code_alpaca_80 = dl.from_huggingface(
"HuggingFaceH4/CodeAlpaca_20K",
split="train[:80%]",
)
print(f"Number of examples in split: {len(code_alpaca_80)}")
从 CSV 文件加载
dolly_100_dataset = dl.from_csv("dolly_subset_100_rows.csv")
选择特定列:
dolly_100_dataset = dl.from_csv(
"dolly_subset_100_rows.csv",
fields=("instruction", "context", "response"),
input_keys=("instruction", "context")
)
🧠 DSPy 程序
定义 Signature
class BasicQA(dspy.Signature):
"""Answer questions with short factoid answers."""
question = dspy.InputField()
answer = dspy.OutputField(desc="often between 1 and 5 words")
使用 ChainOfThought
generate_answer = dspy.ChainOfThought(BasicQA)
question = 'What is the color of the sky?'
pred = generate_answer(question=question)
使用 ProgramOfThought
pot = dspy.ProgramOfThought(BasicQA)
question = 'Sarah has 5 apples. She buys 7 more apples. How many apples does she have?'
result = pot(question=question)
print(f"Question: {question}")
print(f"Final Predicted Answer: {result.answer}")
📏 DSPy 评估
自定义 Metric
def gsm8k_metric(gold, pred, trace=None) -> int:
return int(gold.answer.strip()) == int(pred.answer.strip())
使用 Evaluate
from dspy.evaluate import Evaluate
evaluate_program = Evaluate(
devset=devset,
metric=your_defined_metric,
num_threads=NUM_THREADS,
display_progress=True,
display_table=5
)
evaluate_program(your_dspy_program)
🔧 DSPy 优化器
使用 LabeledFewShot
from dspy.teleprompt import LabeledFewShot
labeled_fewshot_optimizer = LabeledFewShot(k=8)
your_dspy_program_compiled = labeled_fewshot_optimizer.compile(
student=your_dspy_program,
trainset=trainset
)
使用 BootstrapFewShot
from dspy.teleprompt import BootstrapFewShot
fewshot_optimizer = BootstrapFewShot(
metric=your_defined_metric,
max_bootstrapped_demos=4,
max_labeled_demos=16,
max_rounds=1,
max_errors=5
)
your_dspy_program_compiled = fewshot_optimizer.compile(
student=your_dspy_program,
trainset=trainset
)
🚀 DSPy 高级功能
使用 MIPRO 优化器
from dspy.teleprompt import MIPRO
teleprompter = MIPRO(
metric=your_defined_metric,
num_candidates=10,
init_temperature=0.7
)
optimized_program = teleprompter.compile(
program=your_dspy_program,
trainset=trainset
)
使用 MIPROv2(最新版本)
from dspy.teleprompt import MIPROv2
teleprompter = MIPROv2(
metric=your_defined_metric,
auto="light"
)
optimized_program = teleprompter.compile(
program=your_dspy_program,
trainset=trainset,
max_bootstrapped_demos=3,
max_labeled_demos=4
)
🛡️ DSPy 断言
定义断言
dspy.Assert(
your_validation_fn(model_outputs),
"Validation failed!",
target_module="YourDSPyModule"
)
激活断言
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
program_with_assertions = assert_transform_module(
ProgramWithAssertions(),
backtrack_handler
)
💾 保存与加载
保存优化后的程序
save_path = './optimized_program.json'
optimized_program.save(save_path)
加载程序
loaded_program = YourProgramClass()
loaded_program.load(path=save_path)
🌟 总结
DSPy 提供了强大的工具集,帮助开发者轻松构建、优化和部署语言模型程序。通过本速查表,您可以快速掌握 DSPy 的核心功能并应用到实际项目中。如果需要更详细的文档,请参考 DSPy 的官方指南或相关教程!
注意这里的写法
这里指明了主键
一个签名类可以代入不同的推理模型,例如CoT