SciPy 教程

Python SciPy 全面教程：科学计算核心库

SciPy（Scientific Python）是Python科学计算生态的核心库，基于NumPy构建，提供高性能的数学、科学和工程计算工具。它包含优化、积分、统计、信号处理、线性代数等专业模块，是量化金融、数据科学和工程分析的必备工具。

1. SciPy 核心模块概览

模块	功能	量化金融应用
scipy.optimize	优化算法（最小化、最大化）	投资组合优化、参数拟合
scipy.integrate	数值积分	期权定价、蒙特卡洛积分
scipy.stats	统计分布与假设检验	风险分析、因子模型
scipy.linalg	线性代数运算	协方差矩阵、PCA降维
scipy.signal	信号处理	技术指标平滑、滤波
scipy.interpolate	插值与拟合	收益率曲线插值
scipy.fft	快速傅里叶变换	时间序列频域分析
scipy.special	特殊函数	Black-Scholes公式

2. 安装与环境配置

# Conda安装（推荐）
conda install scipy numpy matplotlib pandas

# Pip安装
pip install scipy numpy matplotlib pandas

# 验证安装
python -c "import scipy; print(scipy.__version__)"

完整开发环境

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from scipy import optimize, integrate, stats, linalg, signal
import warnings
warnings.filterwarnings('ignore')

print(f"SciPy版本: {sp.__version__}")
print(f"NumPy版本: {np.__version__}")

3. 优化模块（scipy.optimize）

投资组合优化示例

from scipy.optimize import minimize, differential_evolution
import numpy as np

# 模拟资产收益与协方差
np.random.seed(42)
n_assets = 5
returns = np.random.normal(0.1, 0.2, n_assets)  # 年化收益
cov_matrix = np.random.rand(n_assets, n_assets)
cov_matrix = cov_matrix @ cov_matrix.T * 0.1  # 协方差矩阵

def portfolio_performance(weights, returns, cov_matrix):
    """投资组合绩效函数"""
    portfolio_return = np.dot(weights, returns)
    portfolio_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    return -portfolio_return / portfolio_vol  # 负夏普比率（最小化）

# 约束条件
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})  # 权重和为1
bounds = tuple((0, 1) for _ in range(n_assets))  # 无空头

# 初始权重
initial_weights = np.array([1/n_assets] * n_assets)

# 方法1：SLSQP（顺序二次规划）
result_slsp = minimize(portfolio_performance, initial_weights,
                      args=(returns, cov_matrix),
                      method='SLSQP', bounds=bounds, constraints=constraints)

# 方法2：全局优化（差分进化）
result_de = differential_evolution(portfolio_performance,
                                  bounds=bounds,
                                  args=(returns, cov_matrix),
                                  constraints=constraints)

print("SLSQP最优权重:", result_slsp.x.round(4))
print("差分进化最优权重:", result_de.x.round(4))

# 可视化有效前沿
target_returns = np.linspace(returns.min(), returns.max(), 50)
frontier_vols = []

for target in target_returns:
    cons = (
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
        {'type': 'eq', 'fun': lambda x: np.dot(x, returns) - target}
    )
    res = minimize(lambda x: np.sqrt(np.dot(x.T, np.dot(cov_matrix, x))),
                  initial_weights, method='SLSQP', bounds=bounds, constraints=cons)
    frontier_vols.append(res.fun)

plt.figure(figsize=(10, 6))
plt.plot(frontier_vols, target_returns, 'b-', label='有效前沿')
plt.scatter(np.sqrt(np.diag(cov_matrix)), returns, c='red', s=50, label='单资产')
plt.xlabel('波动率')
plt.ylabel('预期收益')
plt.title('Markowitz有效前沿')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

非线性方程求解

from scipy.optimize import fsolve, root

def equations(vars):
    """非线性方程组：Black-Scholes隐含波动率"""
    S, K, T, r, price, sigma = vars
    d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
    d2 = d1 - sigma*np.sqrt(T)
    theoretical_price = S*stats.norm.cdf(d1) - K*np.exp(-r*T)*stats.norm.cdf(d2)
    return [theoretical_price - price, 0, 0, 0, 0, 0]  # 简化示例

# 求解隐含波动率
initial_guess = [100, 100, 0.25, 0.05, 10, 0.2]  # [S,K,T,r,price,sigma]
solution = fsolve(equations, initial_guess)
print("隐含波动率:", solution[5])

4. 数值积分（scipy.integrate）

Black-Scholes 期权定价（积分形式）

from scipy.integrate import quad

def black_scholes_call_integral(S, K, T, r, sigma):
    """Black-Scholes看涨期权积分定价"""

    def integrand(x):
        d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
        return np.exp(-0.5*x**2)/np.sqrt(2*np.pi) * (S/K)**(0.5*(2*r/sigma**2 + 1)) * \
               stats.norm.cdf((np.log(S/K) + (r + 0.5*sigma**2)*T - x*sigma*np.sqrt(T))/ 
                             (sigma*np.sqrt(T)))

    result, error = quad(integrand, -np.inf, np.inf)
    return K * np.exp(-r*T) * result

# 示例计算
S, K, T, r, sigma = 100, 95, 0.25, 0.05, 0.2
bs_price = black_scholes_call_integral(S, K, T, r, sigma)
print(f"看涨期权价格: {bs_price:.4f}")

# 验证：与解析解对比
from scipy.stats import norm
d1 = (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
d2 = d1 - sigma*np.sqrt(T)
analytical_price = S*norm.cdf(d1) - K*np.exp(-r*T)*norm.cdf(d2)
print(f"解析解: {analytical_price:.4f}, 误差: {abs(bs_price - analytical_price):.6f}")

蒙特卡洛积分加速

from scipy.integrate import nquad

def monte_carlo_european_put(S, K, T, r, sigma, n_paths=100000):
    """蒙特卡洛欧洲看跌期权定价"""
    dt = T
    # 几何布朗运动
    Z = np.random.standard_normal(n_paths)
    ST = S * np.exp((r - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*Z)
    payoff = np.maximum(K - ST, 0)
    return np.exp(-r*dt) * np.mean(payoff)

# 多维积分（路径依赖期权简化）
def path_dependent_option(S, K, T, r, sigma):
    def integrand(x1, x2):
        # 模拟两期路径
        S1 = S * np.exp((r - 0.5*sigma**2)*T/2 + sigma*np.sqrt(T/2)*x1)
        S2 = S1 * np.exp((r - 0.5*sigma**2)*T/2 + sigma*np.sqrt(T/2)*x2)
        payoff = np.maximum(K - S2, 0)
        return np.exp(-r*T) * payoff / (2*np.pi)

    result, error = nquad(integrand, [[-np.inf, np.inf], [-np.inf, np.inf]])
    return result

5. 统计模块（scipy.stats）

分布拟合与风险分析

from scipy import stats
import matplotlib.pyplot as plt

# 生成样本数据
np.random.seed(42)
returns = np.random.normal(0.001, 0.02, 1000)  # 日收益率

# 分布拟合
rv = stats.norm.fit(returns)  # 正态分布拟合
print(f"正态分布参数: μ={rv[0]:.4f}, σ={rv[1]:.4f}")

# Kolmogorov-Smirnov检验
ks_stat, p_value = stats.kstest(returns, 'norm', args=rv)
print(f"KS检验: 统计量={ks_stat:.4f}, p值={p_value:.4f}")

# VaR计算
confidence = 0.95
var_analytical = stats.norm.ppf(1-confidence, rv[0], rv[1])
var_historical = np.percentile(returns, (1-confidence)*100)
print(f"95% VaR: 解析={var_analytical:.4f}, 历史={var_historical:.4f}")

# 绘制分布对比
x = np.linspace(returns.min(), returns.max(), 100)
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(returns, bins=50, density=True, alpha=0.7, label='历史收益率')
plt.plot(x, stats.norm.pdf(x, *rv), 'r-', lw=2, label='拟合正态分布')
plt.axvline(var_analytical, color='red', linestyle='--', label=f'VaR {confidence*100}%')
plt.legend()
plt.title('收益率分布拟合')

# QQ图
plt.subplot(1, 2, 2)
stats.probplot(returns, dist="norm", plot=plt)
plt.title('正态分布QQ图')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

多元正态分布与协方差分析

from scipy.stats import multivariate_normal

# 多资产协方差
n_assets = 3
mu = np.array([0.1, 0.08, 0.12])  # 年化收益
cov = np.array([[0.04, 0.01, 0.005],
                [0.01, 0.03, 0.008],
                [0.005, 0.008, 0.05]])  # 协方差矩阵

mvn = multivariate_normal(mean=mu, cov=cov)

# 生成样本
samples = mvn.rvs(size=10000)

# 概率密度
x, y = np.mgrid[-0.2:0.3:30j, -0.2:0.3:30j]
pos = np.dstack((x, y))
rv = multivariate_normal([mu[0], mu[1]], cov[:2, :2])
z = rv.pdf(pos)

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.contourf(x, y, z)
plt.title('多元正态分布密度')
plt.colorbar()

plt.subplot(1, 2, 2)
plt.scatter(samples[:, 0], samples[:, 1], alpha=0.5)
plt.scatter(mu[0], mu[1], color='red', s=100, label='真实均值')
plt.legend()
plt.title('蒙特卡洛样本')
plt.tight_layout()
plt.show()

6. 线性代数（scipy.linalg）

主成分分析（PCA）

from scipy.linalg import eigh, svd
import pandas as pd

# 模拟因子数据
np.random.seed(42)
n_factors = 10
n_stocks = 100
factors = np.random.randn(n_factors, n_stocks)

# 协方差矩阵特征分解
cov_matrix = np.cov(factors)
eigenvalues, eigenvectors = eigh(cov_matrix)

# 按特征值排序
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# 主成分
n_components = 3
pca_components = eigenvectors[:, :n_components]
explained_variance = eigenvalues[:n_components] / eigenvalues.sum()

print("解释方差比:", explained_variance.round(4))

# 重构数据
scores = np.dot(pca_components.T, factors)
reconstructed = np.dot(pca_components, scores)

# SVD分解（数值更稳定）
U, s, Vt = svd(factors, full_matrices=False)
pca_svd = Vt.T[:, :n_components]

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.plot(explained_variance.cumsum(), 'bo-')
plt.xlabel('主成分数量')
plt.ylabel('累积解释方差')
plt.title('PCA累积解释方差')

plt.subplot(1, 3, 2)
plt.scatter(scores[0], scores[1], alpha=0.6)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('主成分得分')

plt.subplot(1, 3, 3)
plt.plot(s[:20], 'ro-')
plt.xlabel('奇异值索引')
plt.ylabel('奇异值')
plt.title('SVD奇异值')
plt.tight_layout()
plt.show()

矩阵分解与求逆

from scipy.linalg import lu, cholesky, solve

# LU分解
A = np.random.rand(5, 5)
P, L, U = lu(A)
print("LU分解验证:", np.allclose(A, P @ L @ U))

# Cholesky分解（对称正定矩阵）
A_posdef = A @ A.T + np.eye(5)
L_chol = cholesky(A_posdef, lower=True)
print("Cholesky验证:", np.allclose(A_posdef, L_chol @ L_chol.T))

# 线性方程组求解
b = np.random.rand(5)
x_exact = solve(A, b)
x_lu = solve(L, solve(U, solve(P.T, b)))  # 使用LU分解
print("解的精度:", np.allclose(x_exact, x_lu))

7. 信号处理（scipy.signal）

技术指标平滑与滤波

from scipy import signal
import yfinance as yf

# 获取真实数据
df = yf.download('AAPL', start='2024-01-01', end='2025-01-01')
close = df['Close'].values

# 移动平均（FIR滤波器）
b, a = signal.firwin(20, cutoff=0.3, window='hamming', pass_zero=True), 1
ma_fir = signal.lfilter(b, a, close)

# 卡尔曼滤波（简化版）
def kalman_filter(data, process_var=1e-3, measurement_var=1e-2):
    """简单一维卡尔曼滤波"""
    n = len(data)
    filtered = np.zeros(n)
    filtered[0] = data[0]

    for t in range(1, n):
        # 预测
        pred = filtered[t-1]
        # 更新
        kalman_gain = process_var / (process_var + measurement_var)
        filtered[t] = pred + kalman_gain * (data[t] - pred)
        process_var = (1 - kalman_gain) * process_var

    return filtered

kf_smooth = kalman_filter(close)

# 绘制对比
plt.figure(figsize=(15, 6))
plt.plot(close, label='原始收盘价', alpha=0.7)
plt.plot(ma_fir, label='FIR滤波(20期)', linewidth=2)
plt.plot(kf_smooth, label='卡尔曼滤波', linewidth=2)
plt.title('价格序列平滑对比')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

傅里叶变换分析

from scipy.fft import fft, fftfreq

# FFT分析周期性
N = len(close)
yf = fft(close)
xf = fftfreq(N, 1)[:N//2]

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(close)
plt.title('原始时间序列')

plt.subplot(1, 2, 2)
plt.plot(xf[1:N//2], 2.0/N * np.abs(yf[1:N//2]))
plt.title('功率谱密度')
plt.xlabel('频率')
plt.ylabel('幅度')
plt.grid(True)
plt.tight_layout()
plt.show()

# 识别主要周期
dominant_freq = xf[1:N//2][np.argmax(2.0/N * np.abs(yf[1:N//2]))]
dominant_period = 1 / dominant_freq
print(f"主导周期: {dominant_period:.1f} 个交易日")

8. 插值与曲线拟合（scipy.interpolate）

收益率曲线插值

from scipy.interpolate import interp1d, UnivariateSpline, BPoly

# 模拟收益率曲线数据
maturities = np.array([0.1, 0.5, 1.0, 2.0, 5.0, 10.0])
rates = np.array([0.02, 0.025, 0.03, 0.035, 0.04, 0.045])

# 线性插值
linear_interp = interp1d(maturities, rates, kind='linear', 
                        bounds_error=False, fill_value='extrapolate')

# 样条插值
spline_interp = UnivariateSpline(maturities, rates, s=0, ext=1)

# B样条
bp = BPoly.from_derivatives(maturities, 
                           np.column_stack([rates, np.gradient(rates, maturities)]))

# 对比不同插值方法
test_maturities = np.linspace(0.1, 10, 100)
linear_rates = linear_interp(test_maturities)
spline_rates = spline_interp(test_maturities)
bp_rates = bp(test_maturities)

plt.figure(figsize=(10, 6))
plt.plot(maturities, rates, 'ro', markersize=8, label='观测点')
plt.plot(test_maturities, linear_rates, 'b-', label='线性插值')
plt.plot(test_maturities, spline_rates, 'g-', label='样条插值')
plt.plot(test_maturities, bp_rates, 'orange', label='B样条')
plt.xlabel('期限（年）')
plt.ylabel('收益率')
plt.title('收益率曲线插值对比')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

9. 特殊函数（scipy.special）

金融特殊函数

from scipy.special import erf, erfinv, gammainc, iv

# 误差函数（Black-Scholes核心）
def black_scholes_d1(S, K, T, r, sigma):
    """Black-Scholes d1计算"""
    return (np.log(S/K) + (r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))

# 使用erf实现累积正态分布
def norm_cdf(x):
    return (1 + erf(x / np.sqrt(2))) / 2

S, K, T, r, sigma = 100, 95, 0.25, 0.05, 0.2
d1 = black_scholes_d1(S, K, T, r, sigma)
d2 = d1 - sigma * np.sqrt(T)

# erf实现
N_d1_erf = norm_cdf(d1)
N_d2_erf = norm_cdf(d2)

# scipy.stats验证
N_d1_stats = stats.norm.cdf(d1)
N_d2_stats = stats.norm.cdf(d2)

print(f"d1: {d1:.4f}, N(d1): {N_d1_erf:.4f} (erf) vs {N_d1_stats:.4f} (stats)")
print(f"d2: {d2:.4f}, N(d2): {N_d2_erf:.4f} (erf) vs {N_d2_stats:.4f} (stats)")

# 修改贝塞尔函数（债券期权）
kappa = 0.5  # 波动率参数
nu = 1.0     # 订单
z = 2.0 * kappa * np.sqrt(T)
I_nu = iv(nu, z)
print(f"修改贝塞尔函数 I_{nu}({z}): {I_nu:.6f}")

10. 高级应用：稳健优化与约束

带约束的稳健优化

def robust_optimization(returns, cov_matrix, risk_aversion=3):
    """稳健投资组合优化（CVaR约束）"""

    def objective(weights):
        expected_return = np.dot(weights, returns)
        portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        return -expected_return + risk_aversion * portfolio_risk

    # CVaR约束（简化）
    def cvar_constraint(weights):
        portfolio_returns = np.dot(returns, weights)
        return -np.mean(np.sort(portfolio_returns)[:int(0.05*len(portfolio_returns))])

    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # 权重约束
        {'type': 'ineq', 'fun': cvar_constraint}         # CVaR约束
    ]

    bounds = [(0, 1)] * len(returns)
    result = minimize(objective, np.ones(len(returns))/len(returns),
                     method='SLSQP', bounds=bounds, constraints=constraints)

    return result.x, -result.fun

# 应用
optimal_weights, utility = robust_optimization(returns, cov_matrix)
print("稳健最优权重:", optimal_weights.round(4))
print("效用值:", utility)

11. 性能优化与并行计算

向量化与JIT加速

from numba import jit, prange
from scipy.optimize import minimize

@jit(nopython=True, parallel=True)
def vectorized_portfolio_vol(weights, cov_matrix):
    """向量化波动率计算"""
    return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

# 加速蒙特卡洛模拟
@jit(nopython=True, parallel=True)
def fast_monte_carlo(S, K, T, r, sigma, n_simulations):
    dt = T
    payoffs = np.zeros(n_simulations)

    for i in prange(n_simulations):
        Z = np.random.normal()
        ST = S * np.exp((r - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*Z)
        payoffs[i] = max(ST - K, 0)

    return np.mean(payoffs) * np.exp(-r*dt)

# 性能对比
import time
start = time.time()
price_fast = fast_monte_carlo(100, 100, 1, 0.05, 0.2, 1000000)
print(f"加速蒙特卡洛: {price_fast:.4f}, 时间: {time.time()-start:.2f}s")

12. 最佳实践与调试技巧

数值稳定性

# 避免数值不稳定
def stable_norm_cdf(x):
    """数值稳定的正态累积分布"""
    return 0.5 * (1 + np.tanh(np.sqrt(2/np.pi) * x + 
                    np.tanh(np.sqrt(2/np.pi) * x)**3 / 3))

# 条件数检查（矩阵病态检测）
def check_matrix_condition(A, threshold=1e10):
    """检查矩阵条件数"""
    cond = linalg.cond(A)
    if cond > threshold:
        print(f"警告：矩阵条件数过大 {cond:.2e}，可能数值不稳定")
    return cond

A = np.random.rand(100, 100)
cond_num = check_matrix_condition(A)

错误处理与验证

def safe_optimize(func, x0, bounds=None, constraints=None, method='SLSQP'):
    """安全的优化函数"""
    try:
        result = minimize(func, x0, bounds=bounds, constraints=constraints, 
                         method=method, options={'maxiter': 1000})

        if not result.success:
            print(f"优化失败: {result.message}")
            return None

        # 验证约束满足
        if constraints:
            for cons in constraints:
                if cons['type'] == 'eq':
                    violation = abs(cons['fun'](result.x))
                    if violation > 1e-6:
                        print(f"约束违反: {violation:.2e}")

        return result

    except Exception as e:
        print(f"优化异常: {e}")
        return None

13. 学习资源与进阶

官方文档与示例

SciPy官方文档：https://docs.scipy.org/doc/scipy/
教程与Gallery：https://scipy-lectures.org/
GitHub源码：https://github.com/scipy/scipy

推荐书籍

《SciPy and NumPy》（Elliot、Oliphant）
《Python for Finance》（Hilpisch）

进阶主题

稀疏矩阵运算（scipy.sparse）
微分方程求解（scipy.integrate.odeint）
贝叶斯优化（scipy.optimize.bayesopt）
GPU加速（CuPy + SciPy接口）

SciPy是量化金融计算的基础设施，掌握其核心模块能显著提升模型开发效率。需要特定模块的深度示例（如贝叶斯优化、稀疏矩阵）或与Pandas/QuantLib的集成，请告诉我具体需求！

Python SciPy 全面教程：科学计算核心库

1. SciPy 核心模块概览

2. 安装与环境配置

3. 优化模块（scipy.optimize）

4. 数值积分（scipy.integrate）

5. 统计模块（scipy.stats）

6. 线性代数（scipy.linalg）

7. 信号处理（scipy.signal）

8. 插值与曲线拟合（scipy.interpolate）

9. 特殊函数（scipy.special）

10. 高级应用：稳健优化与约束

11. 性能优化与并行计算

12. 最佳实践与调试技巧

13. 学习资源与进阶

likuolei

发表回复取消回复

归档

分类

2025 年 12 月
一	二	三	四	五	六	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30	31

Python SciPy 全面教程：科学计算核心库

1. SciPy 核心模块概览

2. 安装与环境配置

3. 优化模块（scipy.optimize）

4. 数值积分（scipy.integrate）

5. 统计模块（scipy.stats）

6. 线性代数（scipy.linalg）

7. 信号处理（scipy.signal）

8. 插值与曲线拟合（scipy.interpolate）

9. 特殊函数（scipy.special）

10. 高级应用：稳健优化与约束

11. 性能优化与并行计算

12. 最佳实践与调试技巧

13. 学习资源与进阶

likuolei

发表回复 取消回复

相关文章

发表回复取消回复