Python 量化回测
Python 量化回测全流程实战指南
量化回测是策略开发的核心环节,通过历史数据模拟交易,评估策略有效性。Python 提供了多种回测框架,从事件驱动到向量化,应有尽有。以下是完整回测体系的设计与实现。
1. 回测框架对比与选择
框架 | 类型 | 特点 | 适用场景 | 学习曲线 |
---|---|---|---|---|
Backtrader | 事件驱动 | 灵活、支持多资产、多时间框架 | 复杂策略、实盘过渡 | 中等 |
Zipline | 事件驱动 | Quantopian生态、Pipeline API | 因子策略、美股 | 中等 |
VectorBT | 向量化 | 超高性能、GPU支持 | 简单策略、参数优化 | 低 |
QSTrader | 事件驱动 | 模块化、易扩展 | 自定义需求 | 中等 |
vnpy | 事件驱动 | 国内实盘框架 | 期货/股票实盘 | 高 |
2. Backtrader 完整回测系统
基础架构搭建
import backtrader as bt
import backtrader.analyzers as btanalyzers
import backtrader.feeds as btfeeds
import yfinance as yf
import pandas as pd
from datetime import datetime
import numpy as np
class MultiAssetData(btfeeds.PandasData):
"""多资产数据适配器"""
lines = ('high', 'low', 'open', 'close', 'volume', 'openinterest')
params = (
('datetime', None),
('high', 'High'),
('low', 'Low'),
('open', 'Open'),
('close', 'Close'),
('volume', 'Volume'),
('openinterest', None),
)
class AdvancedMAStrategy(bt.Strategy):
"""高级移动平均策略"""
params = (
('fast', 10),
('slow', 30),
('atr_period', 14),
('stop_loss', 2.0),
('take_profit', 3.0),
('size', 100),
)
def log(self, txt, dt=None):
dt = dt or self.datas[0].datetime.date(0)
print(f'{dt.isoformat()}, {txt}')
def __init__(self):
self.fast_ma = bt.indicators.SMA(self.data.close, period=self.p.fast)
self.slow_ma = bt.indicators.SMA(self.data.close, period=self.p.slow)
self.atr = bt.indicators.ATR(self.data, period=self.p.atr_period)
self.crossover = bt.indicators.CrossOver(self.fast_ma, self.slow_ma)
self.order = None
self.stop_price = None
self.take_price = None
def notify_order(self, order):
if order.status in [order.Completed]:
if order.isbuy():
self.log(f'买入执行, 价格: {order.executed.price:.2f}')
self.stop_price = order.executed.price - self.atr[0] * self.p.stop_loss
self.take_price = order.executed.price + self.atr[0] * self.p.take_profit
elif order.issell():
self.log(f'卖出执行, 价格: {order.executed.price:.2f}')
self.stop_price = None
self.take_price = None
self.order = None
def next(self):
if self.order:
return
if not self.position:
if self.crossover > 0: # 金叉买入
self.order = self.buy(size=self.p.size)
else:
# 止损止盈
if self.data.close[0] <= self.stop_price or self.crossover < 0:
self.order = self.sell(size=self.p.size)
elif self.data.close[0] >= self.take_price:
self.order = self.sell(size=self.p.size)
def run_backtest(symbols, start_date, end_date, initial_cash=100000):
"""完整回测流程"""
cerebro = bt.Cerebro()
cerebro.broker.setcash(initial_cash)
cerebro.broker.setcommission(commission=0.001) # 0.1%手续费
# 添加数据
for symbol in symbols:
df = yf.download(symbol, start=start_date, end=end_date, auto_adjust=True)
data = bt.feeds.PandasData(dataname=df)
cerebro.adddata(data, name=symbol)
# 添加策略
cerebro.addstrategy(AdvancedMAStrategy)
# 分析器
cerebro.addanalyzer(btanalyzers.SharpeRatio, _name='sharpe', timeframe=bt.TimeFrame.Days)
cerebro.addanalyzer(btanalyzers.DrawDown, _name='drawdown')
cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='trades')
cerebro.addanalyzer(btanalyzers.Returns, _name='returns')
cerebro.addanalyzer(btanalyzers.PyFolio, _name='pyfolio')
# 运行
results = cerebro.run()
strat = results[0]
# 结果分析
print(f"初始资金: {initial_cash:,.0f}")
print(f"最终资金: {cerebro.broker.getvalue():,.0f}")
print(f"总收益率: {((cerebro.broker.getvalue()/initial_cash)-1)*100:.2f}%")
sharpe = strat.analyzers.sharpe.get_analysis()
drawdown = strat.analyzers.drawdown.get_analysis()
trades = strat.analyzers.trades.get_analysis()
print(f"夏普比率: {sharpe.get('sharperatio', 'N/A')}")
print(f"最大回撤: {drawdown.get('max', {}).get('drawdown', 'N/A'):.2f}%")
print(f"总交易次数: {trades.get('total', {}).get('total', 0)}")
# PyFolio 分析
pyfoliozer = strat.analyzers.pyfolio.get_pf_items()
import pyfolio as pf
pf.create_simple_tear_sheet(pyfoliozer['returns'])
cerebro.plot(style='candlestick')
return cerebro, results
# 执行回测
symbols = ['AAPL', 'MSFT']
start_date = '2020-01-01'
end_date = '2025-01-01'
cerebro, results = run_backtest(symbols, start_date, end_date)
3. VectorBT 高性能向量化回测
超快参数优化
import vectorbt as vbt
import numpy as np
import pandas as pd
import yfinance as yf
from itertools import product
# 数据准备
data = yf.download('AAPL', start='2020-01-01', end='2025-01-01')['Close']
close = pd.Series(data)
# 信号生成函数
def generate_signals(close, fast_window, slow_window):
"""生成买卖信号"""
fast_sma = close.rolling(fast_window).mean()
slow_sma = close.rolling(slow_window).mean()
entries = fast_sma.crosses_above(slow_sma)
exits = fast_sma.crosses_below(slow_sma)
return entries, exits
# 参数网格
fast_windows = range(5, 21)
slow_windows = range(20, 51)
parameter_combinations = list(product(fast_windows, slow_windows))
# 批量回测
all_portfolios = []
for fast, slow in parameter_combinations:
entries, exits = generate_signals(close, fast, slow)
# 向量化回测
pf = vbt.Portfolio.from_signals(
close, entries, exits,
init_cash=100000,
fees=0.001,
freq='1D'
)
all_portfolios.append({
'fast_window': fast,
'slow_window': slow,
'total_return': pf.total_return(),
'sharpe': pf.sharpe_ratio(),
'max_drawdown': pf.max_drawdown(),
'win_rate': pf.win_rate()
})
# 结果分析
results_df = pd.DataFrame(all_portfolios)
best_params = results_df.loc[results_df['sharpe'].idxmax()]
print("最佳参数:", best_params)
# 可视化优化结果
fig = results_df.plot(x='fast_window', y='sharpe', kind='scatter')
vbt.plot.scatter_matrix(results_df[['total_return', 'sharpe', 'max_drawdown']])
并行优化与蒙特卡洛测试
from multiprocessing import Pool
import warnings
warnings.filterwarnings('ignore')
def optimize_params(params):
"""单参数组合优化"""
fast, slow = params
try:
entries, exits = generate_signals(close, fast, slow)
pf = vbt.Portfolio.from_signals(close, entries, exits, fees=0.001)
return {
'fast': fast, 'slow': slow,
'sharpe': pf.sharpe_ratio(),
'return': pf.total_return(),
'drawdown': pf.max_drawdown()
}
except:
return None
# 并行计算
with Pool() as pool:
results = pool.map(optimize_params, parameter_combinations)
results_df = pd.DataFrame([r for r in results if r is not None])
print(results_df.nlargest(5, 'sharpe'))
4. 滑点与交易成本建模
真实交易成本模拟
class RealisticCosts:
"""真实交易成本模型"""
def __init__(self, commission=0.001, slippage=0.0005, market_impact=0.0002):
self.commission = commission # 手续费
self.slippage = slippage # 滑点
self.market_impact = market_impact # 市场冲击
def calculate_cost(self, price, volume, avg_daily_volume):
"""计算总交易成本"""
# 比例滑点
slippage_cost = self.slippage * price
# 市场冲击(基于成交量占比)
volume_ratio = volume / avg_daily_volume
impact_cost = self.market_impact * price * np.sqrt(volume_ratio)
# 手续费
commission_cost = self.commission * price
total_cost = slippage_cost + impact_cost + commission_cost
return total_cost
# 集成到Backtrader
class CostAwareBroker(bt.BrokerBase):
"""成本感知经纪商"""
def __init__(self):
super().__init__()
self.cost_model = RealisticCosts()
def getcommission(self, data, size, price, pseudoexec):
# 动态手续费计算
avg_volume = data.volume.get(size=100) # 最近100天平均成交量
cost = self.cost_model.calculate_cost(price, abs(size), avg_volume)
return cost * abs(size)
# 使用
cerebro.broker = CostAwareBroker()
5. 多时间框架与多资产策略
市场微观结构与订单簿模拟
class MarketMicrostructure(bt.Strategy):
"""基于市场微观结构的策略"""
def __init__(self):
self.order_book = {}
self.spread = bt.indicators.BollingerBands(
period=20, devfactor=2
).top - self.data.close
def next(self):
# 模拟订单簿深度
bid_ask_spread = self.spread[0]
# 流动性检查
volume_sma = bt.indicators.SMA(self.data.volume, period=20)
liquidity_score = self.data.volume[0] / volume_sma[0]
# 基于微观结构的交易决策
if liquidity_score > 1.5 and self.spread[0] < self.spread.sma[0]:
# 高流动性、低点差时交易
if not self.position:
self.buy()
else:
self.sell()
6. 走走测试(Walk-Forward Analysis)
滚动优化避免过拟合
def walk_forward_optimization(data, train_periods=252, test_periods=63):
"""走走测试框架"""
results = []
for i in range(train_periods, len(data) - test_periods, test_periods):
# 训练期参数优化
train_data = data.iloc[i-train_periods:i]
test_data = data.iloc[i:i+test_periods]
# 参数优化(示例:寻找最佳均线组合)
best_params = optimize_ma_params(train_data)
# 测试期回测
entries, exits = generate_signals(test_data, *best_params)
pf = vbt.Portfolio.from_signals(test_data, entries, exits)
results.append({
'train_start': train_data.index[0],
'train_end': train_data.index[-1],
'test_start': test_data.index[0],
'test_end': test_data.index[-1],
'sharpe': pf.sharpe_ratio(),
'total_return': pf.total_return(),
'params': best_params
})
return pd.DataFrame(results)
# 执行走走测试
wf_results = walk_forward_optimization(close)
print("走走测试平均夏普比率:", wf_results['sharpe'].mean())
wf_results['sharpe'].plot(title='走走测试夏普比率稳定性')
7. 蒙特卡洛模拟与稳健性测试
策略稳健性验证
def monte_carlo_robustness(returns, n_simulations=1000, block_size=252):
"""蒙特卡洛稳健性测试"""
n_blocks = len(returns) // block_size
blocks = [returns[i*block_size:(i+1)*block_size]
for i in range(n_blocks)]
simulation_results = []
for _ in range(n_simulations):
# 随机重采样区块
sampled_blocks = np.random.choice(blocks, size=n_blocks, replace=True)
sim_returns = pd.concat(sampled_blocks)
# 计算绩效
pf = vbt.Portfolio.from_returns(sim_returns)
simulation_results.append({
'sharpe': pf.sharpe_ratio(),
'max_drawdown': pf.max_drawdown(),
'total_return': pf.total_return()
})
sim_df = pd.DataFrame(simulation_results)
return sim_df
# 应用到策略
strategy_returns = pf.returns() # 从回测获取
mc_results = monte_carlo_robustness(strategy_returns)
print("蒙特卡洛测试 - 夏普比率95%置信区间:")
print(mc_results['sharpe'].quantile([0.05, 0.5, 0.95]))
8. 综合绩效评估系统
多维度绩效分析
class PerformanceAnalyzer:
"""综合绩效分析器"""
@staticmethod
def calculate_metrics(returns):
"""计算核心绩效指标"""
from empyrical import (
sharpe_ratio, sortino_ratio, max_drawdown,
calmar_ratio, stability_of_timeseries
)
metrics = {
'total_return': (1 + returns).prod() - 1,
'annual_return': (1 + returns).prod() ** (252/len(returns)) - 1,
'annual_volatility': returns.std() * np.sqrt(252),
'sharpe_ratio': sharpe_ratio(returns, risk_free=0.02),
'sortino_ratio': sortino_ratio(returns),
'max_drawdown': max_drawdown(returns),
'calmar_ratio': calmar_ratio(returns),
'stability': stability_of_timeseries(returns),
'win_rate': (returns > 0).mean(),
'profit_factor': returns[returns > 0].sum() / abs(returns[returns < 0].sum())
}
return metrics
@staticmethod
def benchmark_comparison(strategy_returns, benchmark_returns):
"""基准对比"""
metrics = {}
for name, rets in [('Strategy', strategy_returns), ('Benchmark', benchmark_returns)]:
metrics[f'{name}_Sharpe'] = PerformanceAnalyzer.calculate_metrics(rets)['sharpe_ratio']
metrics['Information_Ratio'] = (metrics['Strategy_Sharpe'] - metrics['Benchmark_Sharpe'])
return metrics
# 使用示例
perf_metrics = PerformanceAnalyzer.calculate_metrics(strategy_returns)
benchmark_rets = yf.download('SPY', start='2020-01-01')['Close'].pct_change()
comparison = PerformanceAnalyzer.benchmark_comparison(strategy_returns, benchmark_rets)
9. 实盘过渡与前瞻测试
Paper Trading 模拟实盘
class PaperTrading:
"""模拟实盘交易"""
def __init__(self, strategy, live_data_source):
self.strategy = strategy
self.data_source = live_data_source
self.positions = {}
self.cash = 100000
self.trades = []
def run_live(self):
"""实时运行"""
while True:
# 获取最新数据
latest_data = self.data_source.get_latest()
# 策略信号
signal = self.strategy.generate_signal(latest_data)
# 执行交易
if signal['action'] == 'BUY':
self.execute_buy(signal['symbol'], signal['quantity'])
elif signal['action'] == 'SELL':
self.execute_sell(signal['symbol'], signal['quantity'])
# 记录日志
self.log_performance()
time.sleep(60) # 1分钟周期
def execute_buy(self, symbol, quantity):
"""模拟买入"""
price = self.data_source.get_price(symbol)
cost = price * quantity * 1.001 # 包含手续费
if cost <= self.cash:
self.cash -= cost
self.positions[symbol] = self.positions.get(symbol, 0) + quantity
self.trades.append({'action': 'BUY', 'symbol': symbol, 'quantity': quantity, 'price': price})
10. 最佳实践与注意事项
回测陷阱与解决方案
- 前视偏差(Look-ahead Bias):
- 确保只使用当时可获得的数据
- 使用延迟数据处理
- 幸存者偏差(Survivorship Bias):
- 使用完整股票池(包含退市股票)
- 验证数据源完整性
- 过度优化(Overfitting):
- 使用走走测试
- 限制参数空间
- 样本外验证
代码质量控制
import pytest
from typing import Dict, Any
class BacktestValidator:
"""回测结果验证器"""
@staticmethod
def validate_results(results: Dict[str, Any]):
"""验证回测结果合理性"""
assert results['total_return'] > -1, "总收益率不能为-100%以下"
assert results['sharpe_ratio'] > -2, "夏普比率异常"
assert results['max_drawdown'] < 1, "最大回撤异常"
assert results['trades'] > 0, "没有交易记录"
# 交易一致性检查
buy_trades = sum(1 for t in results['trades'] if t['action'] == 'BUY')
sell_trades = sum(1 for t in results['trades'] if t['action'] == 'SELL')
assert abs(buy_trades - sell_trades) <= 1, "买卖交易不匹配"
# 自动化测试
def test_strategy_backtest():
results = run_backtest(['AAPL'], '2020-01-01', '2025-01-01')
validator = BacktestValidator()
validator.validate_results(results)
生产部署检查清单
- [ ] 数据一致性验证
- [ ] 交易成本真实建模
- [ ] 风险限额设置
- [ ] 异常处理机制
- [ ] 实时监控告警
- [ ] 回测/实盘参数一致
- [ ] 灾难恢复方案
需要特定策略的完整回测代码、参数优化算法或实盘接口对接,请告诉我具体需求!