SciPy 模块列表
SciPy 完整模块列表与量化金融应用
SciPy 包含丰富的科学计算模块,每个模块都有特定的数学和工程应用场景。以下是完整模块列表及其在量化金融中的实际应用。
1. 核心模块(Subpackages)
模块 | 描述 | 量化金融应用 | 核心函数 |
---|---|---|---|
scipy.cluster | 聚类算法 | 资产分类、K-means投资组合 | kmeans , hierarchy , linkage |
scipy.constants | 物理常数 | 利率计算、时间单位转换 | c , h , physical_constants |
scipy.fft | 快速傅里叶变换 | 周期分析、技术指标频域 | fft , ifft , rfft , fft2 |
scipy.integrate | 数值积分与ODE求解 | 期权定价、蒙特卡洛积分 | quad , dblquad , odeint , solve_ivp |
scipy.interpolate | 插值与样条拟合 | 收益率曲线、价格插值 | interp1d , UnivariateSpline , griddata |
scipy.io | 数据I/O | MAT文件、WAV、ARFF读取 | loadmat , savemat , wavfile |
scipy.linalg | 线性代数运算 | 协方差矩阵、PCA分解 | eigh , svd , solve , lstsq |
scipy.ndimage | N维图像处理 | 技术指标平滑、形态学 | gaussian_filter , binary_dilation |
scipy.odr | 正交距离回归 | 误差传播、曲线拟合 | ODR , real_odr |
scipy.optimize | 优化算法 | 投资组合优化、参数拟合 | minimize , fsolve , differential_evolution |
scipy.signal | 信号处理 | 价格滤波、指标平滑 | butter , lfilter , find_peaks , hilbert |
scipy.sparse | 稀疏矩阵运算 | 大规模协方差、因子模型 | csr_matrix , spdiags , lsqr |
scipy.sparse.linalg | 稀疏线性代数 | 大型系统求解、隐式波动率 | spsolve , eigsh , svds |
scipy.special | 特殊数学函数 | Black-Scholes、累积分布 | erf , gammainc , iv , kv |
scipy.spatial | 空间数据结构 | KD树、Voronoi图、距离计算 | KDTree , distance , ConvexHull |
scipy.stats | 统计分布与检验 | 风险度量、假设检验 | norm , t , kstest , chi2_contingency |
2. 详细模块功能与示例
scipy.cluster – 聚类分析
from scipy.cluster import hierarchy, vq
from scipy.spatial.distance import pdist
import numpy as np
# 资产收益聚类
returns = np.random.randn(100, 10) # 100天10资产收益
condensed_dist = pdist(returns, metric='correlation')
# 层次聚类
Z = hierarchy.linkage(condensed_dist, method='ward')
clusters = hierarchy.fcluster(Z, t=2, criterion='maxclust')
# K-means聚类
centroids, distortion = vq.kmeans(returns, 3)
labels, _ = vq.vq(returns, centroids)
scipy.constants – 物理与金融常数
from scipy import constants
# 时间单位转换(量化金融常用)
seconds_per_year = constants.minute * 60 * 24 * 365.25
print(f"秒/年: {seconds_per_year:.0f}")
# 自然对数底e(数值计算)
e = constants.e
print(f"e: {e}")
scipy.fft – 频域分析
from scipy.fft import fft, fftfreq, rfft, irfft
import numpy as np
# 价格序列周期分析
price = np.cumsum(np.random.randn(1024)) + 100
N = len(price)
yf = fft(price)
xf = fftfreq(N, 1)[:N//2]
# 识别主导频率
power = 2.0/N * np.abs(yf[:N//2])
dominant_freq = xf[np.argmax(power[1:]) + 1] # 跳过DC分量
print(f"主导频率: {dominant_freq:.4f}, 周期: {1/dominant_freq:.1f}")
scipy.integrate – 数值积分
from scipy.integrate import quad, dblquad, odeint, solve_ivp
# Black-Scholes积分定价
def bs_call_integrand(k, S, T, r, sigma):
d2 = (np.log(S/k) + (r - 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
return k * stats.norm.pdf(d2) / (S * sigma * np.sqrt(T))
def bs_call_price(S, T, r, sigma):
result, _ = quad(bs_call_integrand, 0, np.inf, args=(S, T, r, sigma))
return S * stats.norm.cdf((np.log(S/S) + (r + 0.5*sigma**2)*T)/(sigma*np.sqrt(T))) - result
# ODE求解(利率模型)
def cir_process(y, t, kappa, theta, sigma):
r = y[0]
dr = kappa * (theta - r) * r * dt + sigma * np.sqrt(r) * dW
return dr
t = np.linspace(0, 1, 252)
sol = solve_ivp(cir_process, [0, 1], [0.05], args=(0.1, 0.05, 0.01), t_eval=t)
scipy.interpolate – 曲线插值
from scipy.interpolate import interp1d, UnivariateSpline, RegularGridInterpolator
from scipy.interpolate import RBFInterpolator, griddata
# 收益率曲线插值
maturities = np.array([0.1, 0.5, 1, 2, 5, 10])
zero_rates = np.array([0.02, 0.025, 0.03, 0.035, 0.04, 0.045])
# 不同插值方法
linear_interp = interp1d(maturities, zero_rates, kind='linear',
bounds_error=False, fill_value='extrapolate')
cubic_spline = UnivariateSpline(maturities, zero_rates, s=0, ext=1)
# 径向基函数插值(非结构化数据)
rbf = RBFInterpolator([maturities], zero_rates, kernel='thin_plate_spline')
test_mats = np.linspace(0.1, 10, 100)
print("插值结果:", cubic_spline(test_mats[:5]))
scipy.io – 数据导入导出
from scipy.io import loadmat, savemat, wavfile
import pandas as pd
# MATLAB文件处理(量化模型兼容)
mat_data = loadmat('model.mat')
weights = mat_data['weights'] # 提取Matlab变量
savemat('results.mat', {'optimized_weights': weights, 'returns': returns})
# WAV文件(另类数据源)
sample_rate, audio_data = wavfile.read('market_noise.wav')
# 频谱分析市场噪音
scipy.linalg – 高级线性代数
from scipy.linalg import lu, cholesky, schur, norm
from scipy.linalg.lapack import get_lapack_funcs
# LU分解与求解
A = np.random.rand(100, 100)
b = np.random.rand(100)
P, L, U = lu(A)
x_lu = np.linalg.solve(U, np.linalg.solve(L[1:, 1:], b[P[1:]]))
x_lu = np.linalg.solve(L[1:, 1:], np.linalg.solve(U, b[P[1:]]))
# Cholesky分解(正定矩阵)
cov_posdef = np.random.rand(50, 50)
cov_posdef = cov_posdef @ cov_posdef.T + np.eye(50)
L_chol = cholesky(cov_posdef, lower=True)
# Schur分解(特征值问题)
T, Z = schur(cov_posdef)
print("Schur分解上三角:", T[:3, :3])
scipy.ndimage – 图像与信号处理
from scipy import ndimage
import matplotlib.pyplot as plt
# K线图形态识别(图像处理)
def detect_candlestick_patterns(image):
"""检测K线形态(简化)"""
# 高斯滤波去噪
smoothed = ndimage.gaussian_filter(image, sigma=1)
# 边缘检测
edges = ndimage.sobel(smoothed)
# 形态学操作
structure = ndimage.generate_binary_structure(2, 1)
closed = ndimage.binary_closing(edges > 0.1, structure)
# 标签连接区域
labeled, num_features = ndimage.label(closed)
return labeled, num_features
# 价格热力图平滑
price_heatmap = np.random.rand(50, 50)
smoothed_heatmap = ndimage.median_filter(price_heatmap, size=3)
scipy.odr – 正交距离回归
from scipy.odr import ODR, Model, RealData
# 误差传播模型(波动率拟合)
def linear_model(B, x):
return B[0] * x + B[1]
data = RealData(xdata, ydata, sx=sx, sy=sy) # 带误差数据
model = Model(linear_model)
odr = ODR(data, model, beta0=[1., 2.])
output = odr.run()
print("ODR参数:", output.beta)
print("参数协方差:", output.cov_beta)
scipy.sparse – 稀疏矩阵运算
from scipy.sparse import csr_matrix, diags, bsr_matrix
from scipy.sparse.linalg import spsolve, eigsh
# 大规模协方差矩阵(稀疏)
n = 10000
diagonals = np.ones((3, n))
diagonals[1, 1:-1] = 0.5 # 次对角线
sparse_cov = diags(diagonals, [-1, 0, 1], shape=(n, n)).tocsc()
# 稀疏线性系统求解
b = np.random.rand(n)
x = spsolve(sparse_cov, b)
# 稀疏特征值(前K个最大特征值)
eigenvalues, eigenvectors = eigsh(sparse_cov, k=10, which='LM')
print("最大特征值:", eigenvalues[-1])
scipy.special – 特殊函数
from scipy.special import erf, erfinv, gammaln, digamma, iv, kv, ellipk
# Black-Scholes N(d)实现
def norm_cdf(x):
return 0.5 * (1 + erf(x / np.sqrt(2)))
def implied_vol(S, K, T, r, market_price, option_type='call'):
"""隐含波动率(使用特殊函数)"""
def objective(sigma):
d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
d2 = d1 - sigma*np.sqrt(T)
if option_type == 'call':
return S*norm_cdf(d1) - K*np.exp(-r*T)*norm_cdf(d2) - market_price
else:
return K*np.exp(-r*T)*norm_cdf(-d2) - S*norm_cdf(-d1) - market_price
from scipy.optimize import brentq
return brentq(objective, 0.001, 2.0)
# 伽马函数(期权定价)
log_gamma = gammaln(5) # log(Γ(5))
print("log Γ(5):", log_gamma)
# 修改贝塞尔函数(利率模型)
I_0 = iv(0, 1.0) # I₀(1)
K_0 = kv(0, 1.0) # K₀(1)
print(f"I₀(1)={I_0:.6f}, K₀(1)={K_0:.6f}")
scipy.spatial – 空间计算
from scipy.spatial import KDTree, cKDTree, Voronoi, distance
from scipy.spatial.distance import cdist, pdist, squareform
# 资产相关性空间聚类
returns = np.random.randn(100, 10)
dist_matrix = cdist(returns, returns, metric='correlation')
# KD树最近邻搜索
tree = cKDTree(returns)
distances, indices = tree.query(returns[0], k=5) # 找最相似的5个资产
# Voronoi图(市场分割)
points = np.random.rand(50, 2) * 100 # 资产坐标
vor = Voronoi(points)
print("Voronoi区域数:", len(vor.point_region))
scipy.stats – 统计分析
from scipy import stats
from scipy.stats import multivariate_normal, wishart
# 分布测试
data = np.random.normal(0, 1, 1000)
shapiro_stat, shapiro_p = stats.shapiro(data)
ks_stat, ks_p = stats.kstest(data, 'norm')
# 多元统计
mu = np.array([0.1, 0.08])
cov = np.array([[0.04, 0.01], [0.01, 0.03]])
mvn = multivariate_normal(mu, cov)
samples = mvn.rvs(10000)
# 协方差估计(Wishart分布)
n_obs = 100
sample_cov = np.cov(samples.T)
wishart_cov = wishart.rvs(df=n_obs-1, scale=sample_cov/n_obs, size=1)
3. 实用工具函数
scipy.misc(已废弃,部分移至scipy.special
)
# from scipy.misc import imread, imsave # 已废弃
# 使用imageio替代
import imageio
img = imageio.imread('chart.png')
scipy.version – 版本信息
from scipy import version, show_config
print("SciPy版本:", version.version)
print("构建配置:", show_config())
4. 模块导入最佳实践
按需导入(推荐)
# 避免 from scipy import *
from scipy.optimize import minimize, fsolve, differential_evolution
from scipy.stats import norm, t, chi2, kstest
from scipy.linalg import svd, eigh, cholesky
from scipy.integrate import quad, solve_ivp
from scipy.signal import butter, filtfilt, find_peaks
# 别名导入
import scipy.optimize as opt
import scipy.stats as stats
import scipy.linalg as la
版本兼容性检查
import scipy
required_version = '1.10.0'
def check_module_version(module_name, min_version):
module = getattr(scipy, module_name)
if hasattr(module, '__version__'):
version = module.__version__
from packaging import version as pkg_version
if pkg_version.parse(version) < pkg_version.parse(min_version):
raise ImportError(f"{module_name}版本{version}过低,需要{min_version}")
check_module_version('optimize', '1.8.0')
5. 模块依赖关系图
NumPy (基础层)
↓
SciPy Core (linalg, fft, special)
↓
├── optimize (依赖linalg)
├── integrate (依赖special)
├── stats (依赖special, linalg)
├── signal (依赖fft, linalg)
├── interpolate (依赖linalg)
├── sparse (依赖linalg)
└── spatial (依赖linalg)
6. 性能关键模块
高性能模块
scipy.linalg.blas
/lapack
:底层BLAS/LAPACK绑定scipy.sparse.linalg
:稀疏矩阵求解器scipy.fft._pocketfft
:快速FFT实现scipy.special.cython_special
:Cython加速特殊函数
并行计算支持
# 多线程BLAS(自动)
import os
os.environ['OMP_NUM_THREADS'] = '4'
os.environ['MKL_NUM_THREADS'] = '4'
# 稀疏矩阵并行求解
from scipy.sparse.linalg import splu
lu = splu(sparse_matrix, use_umfpack=False) # 并行LU分解
7. 量化金融常用模块组合
投资组合优化
# 核心模块组合
from scipy.optimize import minimize
from scipy.linalg import cholesky
from scipy.stats import norm
from scipy.sparse.linalg import eigsh
期权定价
from scipy.integrate import quad
from scipy.special import erf
from scipy.optimize import brentq
from scipy.stats import norm
风险管理
from scipy.stats import multivariate_normal, wishart
from scipy.linalg import svd
from scipy.optimize import differential_evolution
掌握这些模块及其相互关系,能构建完整的量化金融计算框架。需要特定模块的深度教程或应用示例,请告诉我!