SciPy 教程
Python SciPy 全面教程:科学计算核心库
SciPy(Scientific Python)是Python科学计算生态的核心库,基于NumPy构建,提供高性能的数学、科学和工程计算工具。它包含优化、积分、统计、信号处理、线性代数等专业模块,是量化金融、数据科学和工程分析的必备工具。
1. SciPy 核心模块概览
模块 | 功能 | 量化金融应用 |
---|---|---|
scipy.optimize | 优化算法(最小化、最大化) | 投资组合优化、参数拟合 |
scipy.integrate | 数值积分 | 期权定价、蒙特卡洛积分 |
scipy.stats | 统计分布与假设检验 | 风险分析、因子模型 |
scipy.linalg | 线性代数运算 | 协方差矩阵、PCA降维 |
scipy.signal | 信号处理 | 技术指标平滑、滤波 |
scipy.interpolate | 插值与拟合 | 收益率曲线插值 |
scipy.fft | 快速傅里叶变换 | 时间序列频域分析 |
scipy.special | 特殊函数 | Black-Scholes公式 |
2. 安装与环境配置
# Conda安装(推荐)
conda install scipy numpy matplotlib pandas
# Pip安装
pip install scipy numpy matplotlib pandas
# 验证安装
python -c "import scipy; print(scipy.__version__)"
完整开发环境
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from scipy import optimize, integrate, stats, linalg, signal
import warnings
warnings.filterwarnings('ignore')
print(f"SciPy版本: {sp.__version__}")
print(f"NumPy版本: {np.__version__}")
3. 优化模块(scipy.optimize)
投资组合优化示例
from scipy.optimize import minimize, differential_evolution
import numpy as np
# 模拟资产收益与协方差
np.random.seed(42)
n_assets = 5
returns = np.random.normal(0.1, 0.2, n_assets) # 年化收益
cov_matrix = np.random.rand(n_assets, n_assets)
cov_matrix = cov_matrix @ cov_matrix.T * 0.1 # 协方差矩阵
def portfolio_performance(weights, returns, cov_matrix):
"""投资组合绩效函数"""
portfolio_return = np.dot(weights, returns)
portfolio_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
return -portfolio_return / portfolio_vol # 负夏普比率(最小化)
# 约束条件
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) # 权重和为1
bounds = tuple((0, 1) for _ in range(n_assets)) # 无空头
# 初始权重
initial_weights = np.array([1/n_assets] * n_assets)
# 方法1:SLSQP(顺序二次规划)
result_slsp = minimize(portfolio_performance, initial_weights,
args=(returns, cov_matrix),
method='SLSQP', bounds=bounds, constraints=constraints)
# 方法2:全局优化(差分进化)
result_de = differential_evolution(portfolio_performance,
bounds=bounds,
args=(returns, cov_matrix),
constraints=constraints)
print("SLSQP最优权重:", result_slsp.x.round(4))
print("差分进化最优权重:", result_de.x.round(4))
# 可视化有效前沿
target_returns = np.linspace(returns.min(), returns.max(), 50)
frontier_vols = []
for target in target_returns:
cons = (
{'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
{'type': 'eq', 'fun': lambda x: np.dot(x, returns) - target}
)
res = minimize(lambda x: np.sqrt(np.dot(x.T, np.dot(cov_matrix, x))),
initial_weights, method='SLSQP', bounds=bounds, constraints=cons)
frontier_vols.append(res.fun)
plt.figure(figsize=(10, 6))
plt.plot(frontier_vols, target_returns, 'b-', label='有效前沿')
plt.scatter(np.sqrt(np.diag(cov_matrix)), returns, c='red', s=50, label='单资产')
plt.xlabel('波动率')
plt.ylabel('预期收益')
plt.title('Markowitz有效前沿')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
非线性方程求解
from scipy.optimize import fsolve, root
def equations(vars):
"""非线性方程组:Black-Scholes隐含波动率"""
S, K, T, r, price, sigma = vars
d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
d2 = d1 - sigma*np.sqrt(T)
theoretical_price = S*stats.norm.cdf(d1) - K*np.exp(-r*T)*stats.norm.cdf(d2)
return [theoretical_price - price, 0, 0, 0, 0, 0] # 简化示例
# 求解隐含波动率
initial_guess = [100, 100, 0.25, 0.05, 10, 0.2] # [S,K,T,r,price,sigma]
solution = fsolve(equations, initial_guess)
print("隐含波动率:", solution[5])
4. 数值积分(scipy.integrate)
Black-Scholes 期权定价(积分形式)
from scipy.integrate import quad
def black_scholes_call_integral(S, K, T, r, sigma):
"""Black-Scholes看涨期权积分定价"""
def integrand(x):
d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
return np.exp(-0.5*x**2)/np.sqrt(2*np.pi) * (S/K)**(0.5*(2*r/sigma**2 + 1)) * \
stats.norm.cdf((np.log(S/K) + (r + 0.5*sigma**2)*T - x*sigma*np.sqrt(T))/
(sigma*np.sqrt(T)))
result, error = quad(integrand, -np.inf, np.inf)
return K * np.exp(-r*T) * result
# 示例计算
S, K, T, r, sigma = 100, 95, 0.25, 0.05, 0.2
bs_price = black_scholes_call_integral(S, K, T, r, sigma)
print(f"看涨期权价格: {bs_price:.4f}")
# 验证:与解析解对比
from scipy.stats import norm
d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
d2 = d1 - sigma*np.sqrt(T)
analytical_price = S*norm.cdf(d1) - K*np.exp(-r*T)*norm.cdf(d2)
print(f"解析解: {analytical_price:.4f}, 误差: {abs(bs_price - analytical_price):.6f}")
蒙特卡洛积分加速
from scipy.integrate import nquad
def monte_carlo_european_put(S, K, T, r, sigma, n_paths=100000):
"""蒙特卡洛欧洲看跌期权定价"""
dt = T
# 几何布朗运动
Z = np.random.standard_normal(n_paths)
ST = S * np.exp((r - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*Z)
payoff = np.maximum(K - ST, 0)
return np.exp(-r*dt) * np.mean(payoff)
# 多维积分(路径依赖期权简化)
def path_dependent_option(S, K, T, r, sigma):
def integrand(x1, x2):
# 模拟两期路径
S1 = S * np.exp((r - 0.5*sigma**2)*T/2 + sigma*np.sqrt(T/2)*x1)
S2 = S1 * np.exp((r - 0.5*sigma**2)*T/2 + sigma*np.sqrt(T/2)*x2)
payoff = np.maximum(K - S2, 0)
return np.exp(-r*T) * payoff / (2*np.pi)
result, error = nquad(integrand, [[-np.inf, np.inf], [-np.inf, np.inf]])
return result
5. 统计模块(scipy.stats)
分布拟合与风险分析
from scipy import stats
import matplotlib.pyplot as plt
# 生成样本数据
np.random.seed(42)
returns = np.random.normal(0.001, 0.02, 1000) # 日收益率
# 分布拟合
rv = stats.norm.fit(returns) # 正态分布拟合
print(f"正态分布参数: μ={rv[0]:.4f}, σ={rv[1]:.4f}")
# Kolmogorov-Smirnov检验
ks_stat, p_value = stats.kstest(returns, 'norm', args=rv)
print(f"KS检验: 统计量={ks_stat:.4f}, p值={p_value:.4f}")
# VaR计算
confidence = 0.95
var_analytical = stats.norm.ppf(1-confidence, rv[0], rv[1])
var_historical = np.percentile(returns, (1-confidence)*100)
print(f"95% VaR: 解析={var_analytical:.4f}, 历史={var_historical:.4f}")
# 绘制分布对比
x = np.linspace(returns.min(), returns.max(), 100)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(returns, bins=50, density=True, alpha=0.7, label='历史收益率')
plt.plot(x, stats.norm.pdf(x, *rv), 'r-', lw=2, label='拟合正态分布')
plt.axvline(var_analytical, color='red', linestyle='--', label=f'VaR {confidence*100}%')
plt.legend()
plt.title('收益率分布拟合')
# QQ图
plt.subplot(1, 2, 2)
stats.probplot(returns, dist="norm", plot=plt)
plt.title('正态分布QQ图')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
多元正态分布与协方差分析
from scipy.stats import multivariate_normal
# 多资产协方差
n_assets = 3
mu = np.array([0.1, 0.08, 0.12]) # 年化收益
cov = np.array([[0.04, 0.01, 0.005],
[0.01, 0.03, 0.008],
[0.005, 0.008, 0.05]]) # 协方差矩阵
mvn = multivariate_normal(mean=mu, cov=cov)
# 生成样本
samples = mvn.rvs(size=10000)
# 概率密度
x, y = np.mgrid[-0.2:0.3:30j, -0.2:0.3:30j]
pos = np.dstack((x, y))
rv = multivariate_normal([mu[0], mu[1]], cov[:2, :2])
z = rv.pdf(pos)
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.contourf(x, y, z)
plt.title('多元正态分布密度')
plt.colorbar()
plt.subplot(1, 2, 2)
plt.scatter(samples[:, 0], samples[:, 1], alpha=0.5)
plt.scatter(mu[0], mu[1], color='red', s=100, label='真实均值')
plt.legend()
plt.title('蒙特卡洛样本')
plt.tight_layout()
plt.show()
6. 线性代数(scipy.linalg)
主成分分析(PCA)
from scipy.linalg import eigh, svd
import pandas as pd
# 模拟因子数据
np.random.seed(42)
n_factors = 10
n_stocks = 100
factors = np.random.randn(n_factors, n_stocks)
# 协方差矩阵特征分解
cov_matrix = np.cov(factors)
eigenvalues, eigenvectors = eigh(cov_matrix)
# 按特征值排序
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
# 主成分
n_components = 3
pca_components = eigenvectors[:, :n_components]
explained_variance = eigenvalues[:n_components] / eigenvalues.sum()
print("解释方差比:", explained_variance.round(4))
# 重构数据
scores = np.dot(pca_components.T, factors)
reconstructed = np.dot(pca_components, scores)
# SVD分解(数值更稳定)
U, s, Vt = svd(factors, full_matrices=False)
pca_svd = Vt.T[:, :n_components]
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.plot(explained_variance.cumsum(), 'bo-')
plt.xlabel('主成分数量')
plt.ylabel('累积解释方差')
plt.title('PCA累积解释方差')
plt.subplot(1, 3, 2)
plt.scatter(scores[0], scores[1], alpha=0.6)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('主成分得分')
plt.subplot(1, 3, 3)
plt.plot(s[:20], 'ro-')
plt.xlabel('奇异值索引')
plt.ylabel('奇异值')
plt.title('SVD奇异值')
plt.tight_layout()
plt.show()
矩阵分解与求逆
from scipy.linalg import lu, cholesky, solve
# LU分解
A = np.random.rand(5, 5)
P, L, U = lu(A)
print("LU分解验证:", np.allclose(A, P @ L @ U))
# Cholesky分解(对称正定矩阵)
A_posdef = A @ A.T + np.eye(5)
L_chol = cholesky(A_posdef, lower=True)
print("Cholesky验证:", np.allclose(A_posdef, L_chol @ L_chol.T))
# 线性方程组求解
b = np.random.rand(5)
x_exact = solve(A, b)
x_lu = solve(L, solve(U, solve(P.T, b))) # 使用LU分解
print("解的精度:", np.allclose(x_exact, x_lu))
7. 信号处理(scipy.signal)
技术指标平滑与滤波
from scipy import signal
import yfinance as yf
# 获取真实数据
df = yf.download('AAPL', start='2024-01-01', end='2025-01-01')
close = df['Close'].values
# 移动平均(FIR滤波器)
b, a = signal.firwin(20, cutoff=0.3, window='hamming', pass_zero=True), 1
ma_fir = signal.lfilter(b, a, close)
# 卡尔曼滤波(简化版)
def kalman_filter(data, process_var=1e-3, measurement_var=1e-2):
"""简单一维卡尔曼滤波"""
n = len(data)
filtered = np.zeros(n)
filtered[0] = data[0]
for t in range(1, n):
# 预测
pred = filtered[t-1]
# 更新
kalman_gain = process_var / (process_var + measurement_var)
filtered[t] = pred + kalman_gain * (data[t] - pred)
process_var = (1 - kalman_gain) * process_var
return filtered
kf_smooth = kalman_filter(close)
# 绘制对比
plt.figure(figsize=(15, 6))
plt.plot(close, label='原始收盘价', alpha=0.7)
plt.plot(ma_fir, label='FIR滤波(20期)', linewidth=2)
plt.plot(kf_smooth, label='卡尔曼滤波', linewidth=2)
plt.title('价格序列平滑对比')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
傅里叶变换分析
from scipy.fft import fft, fftfreq
# FFT分析周期性
N = len(close)
yf = fft(close)
xf = fftfreq(N, 1)[:N//2]
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(close)
plt.title('原始时间序列')
plt.subplot(1, 2, 2)
plt.plot(xf[1:N//2], 2.0/N * np.abs(yf[1:N//2]))
plt.title('功率谱密度')
plt.xlabel('频率')
plt.ylabel('幅度')
plt.grid(True)
plt.tight_layout()
plt.show()
# 识别主要周期
dominant_freq = xf[1:N//2][np.argmax(2.0/N * np.abs(yf[1:N//2]))]
dominant_period = 1 / dominant_freq
print(f"主导周期: {dominant_period:.1f} 个交易日")
8. 插值与曲线拟合(scipy.interpolate)
收益率曲线插值
from scipy.interpolate import interp1d, UnivariateSpline, BPoly
# 模拟收益率曲线数据
maturities = np.array([0.1, 0.5, 1.0, 2.0, 5.0, 10.0])
rates = np.array([0.02, 0.025, 0.03, 0.035, 0.04, 0.045])
# 线性插值
linear_interp = interp1d(maturities, rates, kind='linear',
bounds_error=False, fill_value='extrapolate')
# 样条插值
spline_interp = UnivariateSpline(maturities, rates, s=0, ext=1)
# B样条
bp = BPoly.from_derivatives(maturities,
np.column_stack([rates, np.gradient(rates, maturities)]))
# 对比不同插值方法
test_maturities = np.linspace(0.1, 10, 100)
linear_rates = linear_interp(test_maturities)
spline_rates = spline_interp(test_maturities)
bp_rates = bp(test_maturities)
plt.figure(figsize=(10, 6))
plt.plot(maturities, rates, 'ro', markersize=8, label='观测点')
plt.plot(test_maturities, linear_rates, 'b-', label='线性插值')
plt.plot(test_maturities, spline_rates, 'g-', label='样条插值')
plt.plot(test_maturities, bp_rates, 'orange', label='B样条')
plt.xlabel('期限(年)')
plt.ylabel('收益率')
plt.title('收益率曲线插值对比')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
9. 特殊函数(scipy.special)
金融特殊函数
from scipy.special import erf, erfinv, gammainc, iv
# 误差函数(Black-Scholes核心)
def black_scholes_d1(S, K, T, r, sigma):
"""Black-Scholes d1计算"""
return (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
# 使用erf实现累积正态分布
def norm_cdf(x):
return (1 + erf(x / np.sqrt(2))) / 2
S, K, T, r, sigma = 100, 95, 0.25, 0.05, 0.2
d1 = black_scholes_d1(S, K, T, r, sigma)
d2 = d1 - sigma * np.sqrt(T)
# erf实现
N_d1_erf = norm_cdf(d1)
N_d2_erf = norm_cdf(d2)
# scipy.stats验证
N_d1_stats = stats.norm.cdf(d1)
N_d2_stats = stats.norm.cdf(d2)
print(f"d1: {d1:.4f}, N(d1): {N_d1_erf:.4f} (erf) vs {N_d1_stats:.4f} (stats)")
print(f"d2: {d2:.4f}, N(d2): {N_d2_erf:.4f} (erf) vs {N_d2_stats:.4f} (stats)")
# 修改贝塞尔函数(债券期权)
kappa = 0.5 # 波动率参数
nu = 1.0 # 订单
z = 2.0 * kappa * np.sqrt(T)
I_nu = iv(nu, z)
print(f"修改贝塞尔函数 I_{nu}({z}): {I_nu:.6f}")
10. 高级应用:稳健优化与约束
带约束的稳健优化
def robust_optimization(returns, cov_matrix, risk_aversion=3):
"""稳健投资组合优化(CVaR约束)"""
def objective(weights):
expected_return = np.dot(weights, returns)
portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
return -expected_return + risk_aversion * portfolio_risk
# CVaR约束(简化)
def cvar_constraint(weights):
portfolio_returns = np.dot(returns, weights)
return -np.mean(np.sort(portfolio_returns)[:int(0.05*len(portfolio_returns))])
constraints = [
{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}, # 权重约束
{'type': 'ineq', 'fun': cvar_constraint} # CVaR约束
]
bounds = [(0, 1)] * len(returns)
result = minimize(objective, np.ones(len(returns))/len(returns),
method='SLSQP', bounds=bounds, constraints=constraints)
return result.x, -result.fun
# 应用
optimal_weights, utility = robust_optimization(returns, cov_matrix)
print("稳健最优权重:", optimal_weights.round(4))
print("效用值:", utility)
11. 性能优化与并行计算
向量化与JIT加速
from numba import jit, prange
from scipy.optimize import minimize
@jit(nopython=True, parallel=True)
def vectorized_portfolio_vol(weights, cov_matrix):
"""向量化波动率计算"""
return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
# 加速蒙特卡洛模拟
@jit(nopython=True, parallel=True)
def fast_monte_carlo(S, K, T, r, sigma, n_simulations):
dt = T
payoffs = np.zeros(n_simulations)
for i in prange(n_simulations):
Z = np.random.normal()
ST = S * np.exp((r - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*Z)
payoffs[i] = max(ST - K, 0)
return np.mean(payoffs) * np.exp(-r*dt)
# 性能对比
import time
start = time.time()
price_fast = fast_monte_carlo(100, 100, 1, 0.05, 0.2, 1000000)
print(f"加速蒙特卡洛: {price_fast:.4f}, 时间: {time.time()-start:.2f}s")
12. 最佳实践与调试技巧
数值稳定性
# 避免数值不稳定
def stable_norm_cdf(x):
"""数值稳定的正态累积分布"""
return 0.5 * (1 + np.tanh(np.sqrt(2/np.pi) * x +
np.tanh(np.sqrt(2/np.pi) * x)**3 / 3))
# 条件数检查(矩阵病态检测)
def check_matrix_condition(A, threshold=1e10):
"""检查矩阵条件数"""
cond = linalg.cond(A)
if cond > threshold:
print(f"警告:矩阵条件数过大 {cond:.2e},可能数值不稳定")
return cond
A = np.random.rand(100, 100)
cond_num = check_matrix_condition(A)
错误处理与验证
def safe_optimize(func, x0, bounds=None, constraints=None, method='SLSQP'):
"""安全的优化函数"""
try:
result = minimize(func, x0, bounds=bounds, constraints=constraints,
method=method, options={'maxiter': 1000})
if not result.success:
print(f"优化失败: {result.message}")
return None
# 验证约束满足
if constraints:
for cons in constraints:
if cons['type'] == 'eq':
violation = abs(cons['fun'](result.x))
if violation > 1e-6:
print(f"约束违反: {violation:.2e}")
return result
except Exception as e:
print(f"优化异常: {e}")
return None
13. 学习资源与进阶
官方文档与示例
- SciPy官方文档:https://docs.scipy.org/doc/scipy/
- 教程与Gallery:https://scipy-lectures.org/
- GitHub源码:https://github.com/scipy/scipy
推荐书籍
- 《SciPy and NumPy》(Elliot、Oliphant)
- 《Python for Finance》(Hilpisch)
进阶主题
- 稀疏矩阵运算(
scipy.sparse
) - 微分方程求解(
scipy.integrate.odeint
) - 贝叶斯优化(
scipy.optimize.bayesopt
) - GPU加速(CuPy + SciPy接口)
SciPy是量化金融计算的基础设施,掌握其核心模块能显著提升模型开发效率。需要特定模块的深度示例(如贝叶斯优化、稀疏矩阵)或与Pandas/QuantLib的集成,请告诉我具体需求!