Pandas DateOffset 对象
Pandas DateOffset 对象完整参考手册
1. DateOffset 基础概念
1.1 什么是 DateOffset
import pandas as pd
from pandas.tseries.offsets import DateOffset, BDay, MonthEnd
from datetime import datetime
import numpy as np
# DateOffset是pandas用于日期时间偏移的核心类
# 它是pandas日期算术运算的基础,支持复杂的日历规则
# 基本示例
date = pd.Timestamp('2023-01-15')
offset = DateOffset(days=5)
result = date + offset
print("基础DateOffset:")
print(f"原日期: {date}")
print(f"加5天: {result}")
print(f"类型: {type(offset)}")
1.2 DateOffset vs Timedelta
# DateOffset(日历感知)vs Timedelta(固定时间间隔)
date = pd.Timestamp('2023-01-28')
# Timedelta:固定24小时
timedelta_result = date + pd.Timedelta(days=1)
print("Timedelta结果:", timedelta_result) # 2023-01-29
# DateOffset:下一个工作日
business_offset = date + pd.offsets.BDay()
print("BDay结果:", business_offset) # 2023-01-30(跳过周末)
# DateOffset考虑日历规则
date_end_month = pd.Timestamp('2023-01-31')
offset_end = date_end_month + DateOffset(months=1)
print("月末+1月:", offset_end) # 2023-02-28,不是31日
2. 基本 DateOffset 类
2.1 DateOffset 基础参数
# DateOffset支持多种时间单位参数
offset = DateOffset(
years=1, # 年
months=2, # 月
days=3, # 天
hours=4, # 小时
minutes=5, # 分钟
seconds=6, # 秒
microseconds=7 # 微秒
)
date = pd.Timestamp('2023-01-15 10:20:30')
result = date + offset
print("复杂DateOffset:")
print(f"原时间: {date}")
print(f"结果: {result}")
print(f"年月日时分秒: {result.year}-{result.month}-{result.day} {result.hour}:{result.minute}:{result.second}")
2.2 常用基础偏移
date = pd.Timestamp('2023-01-15')
# 年、月、日
print("年偏移:", date + DateOffset(years=1))
print("月偏移:", date + DateOffset(months=1))
print("日偏移:", date + DateOffset(days=10))
# 时间单位
print("小时偏移:", date + DateOffset(hours=25)) # 超过24小时会进位
print("周偏移:", date + DateOffset(weeks=1))
# 组合偏移
combo = DateOffset(years=1, months=1, days=1)
print("组合偏移:", date + combo)
3. 日历感知偏移(Calendar-Aware Offsets)
3.1 工作日偏移(BDay)
# BDay:工作日(周一到周五)
date = pd.Timestamp('2023-01-27') # 星期五
print("BDay偏移:")
print("下一个工作日:", date + BDay(1)) # 2023-01-30(周一)
print("上一个工作日:", date - BDay(1)) # 2023-01-26(周四)
print("3个工作日后:", date + BDay(3)) # 2023-02-01(周三)
# 周末行为
weekend = pd.Timestamp('2023-01-28') # 星期六
print("周末+1工作日:", weekend + BDay(1)) # 2023-01-30(下一个周一)
3.2 月末和月初偏移
# MonthEnd:月末
date_me = pd.Timestamp('2023-01-15')
print("MonthEnd:")
print("到月末:", date_me + MonthEnd(0)) # 2023-01-31
print("下月末:", date_me + MonthEnd(1)) # 2023-02-28
print("上月末:", date_me - MonthEnd(1)) # 2022-12-31
# MonthBegin:月初
from pandas.tseries.offsets import MonthBegin
print("\nMonthBegin:")
print("到月初:", date_me + MonthBegin(0)) # 2023-01-01
print("下月初:", date_me + MonthBegin(1)) # 2023-02-01
# MonthOffset:相对月偏移
date_mo = pd.Timestamp('2023-01-15')
print("MonthOffset(2个月):", date_mo + DateOffset(months=2)) # 2023-03-15
3.3 季度偏移
# QuarterEnd:季度末
from pandas.tseries.offsets import QuarterEnd, QuarterBegin
date_q = pd.Timestamp('2023-02-15')
print("季度偏移:")
print("当前季度末:", date_q + QuarterEnd(0)) # 2023-03-31
print("下一季度末:", date_q + QuarterEnd(1)) # 2023-06-30
print("上一季度末:", date_q - QuarterEnd(0)) # 2022-12-31
# QuarterBegin
print("当前季度初:", date_q + QuarterBegin(0)) # 2023-01-01
3.4 年末和年初偏移
# YearEnd和YearBegin
from pandas.tseries.offsets import YearEnd, YearBegin
date_y = pd.Timestamp('2023-06-15')
print("年度偏移:")
print("当前年终:", date_y + YearEnd(0)) # 2023-12-31
print("下一年终:", date_y + YearEnd(1)) # 2024-12-31
print("当前年初:", date_y + YearBegin(0)) # 2023-01-01
4. 自定义日历规则偏移
4.1 自定义工作日(CustomBusinessDay)
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
# 使用美国联邦假期日历
us_holidays = USFederalHolidayCalendar()
cbday_us = CustomBusinessDay(calendar=us_holidays)
date = pd.Timestamp('2023-07-04') # 美国独立日(假期)
print("自定义工作日:")
print("普通BDay:", date + BDay(1)) # 2023-07-05
print("自定义(跳过假期):", date + cbday_us(1)) # 2023-07-05,但会考虑假期
# 自定义假期
custom_cal = pd.tseries.offsets.CustomBusinessDay(
holidays=['2023-01-01', '2023-12-25']
)
print("自定义假期:", pd.Timestamp('2023-12-25') + custom_cal(1))
4.2 周偏移(Week)
from pandas.tseries.offsets import Week
# 指定星期几的周偏移
week_monday = Week(weekday=0) # 0=周一
week_friday = Week(weekday=4) # 4=周五
date = pd.Timestamp('2023-01-15') # 周日
print("周偏移:")
print("到下周一:", date + week_monday)
print("到下周五:", date + week_friday)
# 周末到工作日
weekend = pd.Timestamp('2023-01-14') # 周六
print("周末到周一:", weekend + Week(weekday=0))
4.3 每月N日偏移(Nth Weekday)
from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin
# 每月特定星期几
from pandas.tseries.offsets import MonthEnd
# 每月第三个星期五
third_friday = pd.offsets.Week(weekday=4, n=3) # 每个月的第3个周五
print("每月第3个周五示例:")
base = pd.Timestamp('2023-01-01')
for i in range(12):
print(base + third_friday * i)
5. 高级 DateOffset 功能
5.1 偏移规范化(normalize)
# normalize参数:将结果时间设为00:00:00
date_time = pd.Timestamp('2023-01-15 14:30:00')
offset_normal = DateOffset(days=5, normalize=True)
offset_not_normal = DateOffset(days=5, normalize=False)
print("normalize对比:")
print("带时间原日期:", date_time + offset_not_normal) # 保持时间
print("规范化:", date_time + offset_normal) # 00:00:00
# MonthEnd的normalize行为
print("MonthEnd normalize:", date_time + MonthEnd(0, normalize=True))
5.2 偏移规则组合
# 复杂规则组合
date = pd.Timestamp('2023-01-15')
# 组合1:月末后第5个工作日
rule1 = MonthEnd(0) + BDay(5)
print("月末+5工作日:", date + rule1)
# 组合2:季度末后第3个交易日
rule2 = QuarterEnd(0) + BDay(3)
print("季度末+3工作日:", date + rule2)
# 链式偏移
date_chain = pd.Timestamp('2023-01-31')
result_chain = date_chain + MonthEnd(1) + BDay(2) + DateOffset(hours=9)
print("链式偏移:", result_chain)
5.3 条件偏移
def next_business_day(date):
"""下一个工作日"""
return date + BDay(1)
def next_month_end_trading_day(date):
"""下月末交易日"""
month_end = date + MonthEnd(1)
# 如果月末是周末,找到下一个工作日
if month_end.weekday() >= 5:
return month_end + BDay(1)
return month_end
# 测试
test_date = pd.Timestamp('2023-01-27') # 周五
print("条件偏移:")
print("下月末交易日:", next_month_end_trading_day(test_date))
6. 实际应用场景
6.1 财务日期计算
# 财报日期计算
def earnings_date_calculation(report_date):
"""计算财报相关日期"""
# 上季度末
prev_quarter_end = report_date - QuarterEnd(0)
# 下个交易日(财报发布)
earnings_release = prev_quarter_end + BDay(1)
# 财报后第30个交易日
post_earnings = earnings_release + BDay(30)
return {
'prev_quarter_end': prev_quarter_end,
'earnings_release': earnings_release,
'post_earnings': post_earnings
}
# 示例:2023年Q1财报(通常4月中发布)
q1_report = pd.Timestamp('2023-04-15')
dates = earnings_date_calculation(q1_report)
for key, date in dates.items():
print(f"{key}: {date.date()}")
6.2 期权到期日计算
def option_expiry_dates(base_date):
"""期权到期日计算"""
# 每月第三个周五
third_friday = base_date + pd.offsets.Week(weekday=4, n=3)
# 季度期权(3,6,9,12月第三个周五)
quarter_months = [3, 6, 9, 12]
if third_friday.month in quarter_months:
expiry_type = "Quarterly"
else:
expiry_type = "Monthly"
# 周期权(每周五)
weekly = base_date + pd.offsets.Week(weekday=4)
return {
'monthly_third_friday': third_friday,
'weekly': weekly,
'type': expiry_type
}
base = pd.Timestamp('2023-03-15')
print("期权到期日:")
for key, value in option_expiry_dates(base).items():
print(f"{key}: {value}")
6.3 支付和结算日期
def payment_settlement_dates(invoice_date, terms_days=30):
"""计算支付和结算日期"""
# 账期到期日
due_date = invoice_date + DateOffset(days=terms_days)
# 最近工作日(净额结算)
settlement_date = due_date
# 如果到期日是周末,移到前一个工作日
if due_date.weekday() >= 5:
settlement_date = due_date - BDay(1)
# T+2结算(交易日后2个工作日)
t2_settlement = settlement_date + BDay(2)
return {
'invoice_date': invoice_date,
'due_date': due_date,
'settlement_date': settlement_date,
't2_settlement': t2_settlement
}
invoice = pd.Timestamp('2023-12-29') # 周五
payment_dates = payment_settlement_dates(invoice)
for key, date in payment_dates.items():
print(f"{key}: {date.date()} ({date.day_name()[:3]})")
7. 批量操作和向量化
7.1 DateOffset 向量化
# 对日期数组应用偏移
dates = pd.date_range('2023-01-01', periods=5, freq='B')
print("批量偏移:")
print("原日期:", dates)
# 批量应用不同偏移
offsets = [DateOffset(days=i) for i in range(5)]
batch_results = dates + pd.offsets.DateOffset(days=np.arange(5))
print("批量结果:", batch_results)
# Series/DataFrame批量操作
df = pd.DataFrame({'date': dates, 'value': range(5)})
df['shifted'] = df['date'] + BDay(1)
print("\nDataFrame批量偏移:")
print(df[['date', 'shifted']])
7.2 条件批量偏移
def apply_conditional_offset(dates, condition_func, offset_func):
"""条件批量偏移"""
results = []
for date in dates:
if condition_func(date):
results.append(offset_func(date))
else:
results.append(date)
return pd.DatetimeIndex(results)
# 示例:工作日加1天,周末加3天到周一
dates_mixed = pd.date_range('2023-01-13', periods=5) # 周五到周二
def is_weekday(date):
return date.weekday() < 5
def offset_func(date):
if is_weekday(date):
return date + DateOffset(days=1)
else:
return date + DateOffset(days=3) # 周末到周一
result = apply_conditional_offset(dates_mixed, is_weekday, offset_func)
print("条件批量偏移:")
for orig, new in zip(dates_mixed, result):
print(f"{orig.date()} -> {new.date()}")
8. 与 Period 和频率的集成
8.1 DateOffset 与 PeriodIndex
# DateOffset与Period结合
periods = pd.period_range('2023Q1', periods=4, freq='Q')
print("Period与DateOffset:")
print("Period:", periods)
# Period转换为Timestamp再应用偏移
ts_periods = periods.to_timestamp()
offsetted = ts_periods + QuarterEnd(0)
print("偏移后Timestamp:", offsetted)
print("转换回Period:", offsetted.to_period('Q'))
8.2 频率字符串与 DateOffset
# DateOffset可以与频率字符串结合
date = pd.Timestamp('2023-01-15')
# 频率字符串等价
print("频率字符串等价:")
print("D (日):", date + pd.offsets.Day(5))
print("B (工作日):", date + pd.offsets.BDay(5))
print("M (月末):", date + pd.offsets.MonthEnd(2))
print("Q (季度末):", date + pd.offsets.QuarterEnd(1))
# 自定义频率
custom_freq = date + pd.DateOffset(days=1, hours=12)
print("自定义频率:", custom_freq)
9. 性能优化和最佳实践
9.1 性能对比
import time
n = 100000
dates = pd.date_range('2023-01-01', periods=n)
# DateOffset vs Timedelta性能
start = time.time()
for i in range(100):
_ = dates + DateOffset(days=1)
dateoffset_time = time.time() - start
start = time.time()
for i in range(100):
_ = dates + pd.Timedelta(days=1)
timedelta_time = time.time() - start
print("性能对比 (100k日期, 100次循环):")
print(f"DateOffset: {dateoffset_time:.3f}s")
print(f"Timedelta: {timedelta_time:.3f}s")
print(f"DateOffset较慢: {dateoffset_time/timedelta_time:.1%}")
9.2 内存优化
# 预计算常用偏移
class OffsetCache:
def __init__(self):
self._cache = {}
def get_offset(self, offset_spec):
"""缓存常用偏移"""
if offset_spec not in self._cache:
if offset_spec == 'business_day':
self._cache[offset_spec] = BDay()
elif offset_spec == 'month_end':
self._cache[offset_spec] = MonthEnd()
else:
self._cache[offset_spec] = DateOffset(**eval(offset_spec))
return self._cache[offset_spec]
# 使用缓存
cache = OffsetCache()
dates = pd.date_range('2023-01-01', periods=1000)
cached_offset = cache.get_offset('business_day')
result = dates + cached_offset
print("使用偏移缓存避免重复创建")
9.3 最佳实践
def robust_date_offset(date, offset_spec, holidays=None):
"""健壮的日期偏移函数"""
try:
if isinstance(offset_spec, str):
# 解析频率字符串
if offset_spec == 'business':
offset = BDay(calendar=holidays)
elif offset_spec == 'monthend':
offset = MonthEnd()
else:
offset = DateOffset(**eval(f"dict({offset_spec})"))
else:
offset = offset_spec
# 应用偏移
result = date + offset
# 验证结果
if pd.isna(result):
raise ValueError("偏移结果无效")
return result
except Exception as e:
print(f"偏移失败: {e}")
return date # 返回原日期作为fallback
# 测试
test_cases = [
('2023-01-27', 'business'), # 周五
('2023-01-28', 'monthend'), # 周六
]
for date_str, spec in test_cases:
result = robust_date_offset(pd.Timestamp(date_str), spec)
print(f"{date_str} + {spec} = {result}")
10. 错误处理和边界情况
10.1 常见错误
# 错误1:不支持的操作
try:
offset = DateOffset(invalid_param=1)
except TypeError as e:
print("参数错误:", e)
# 错误2:时区问题
naive_date = pd.Timestamp('2023-01-01')
tz_date = naive_date.tz_localize('UTC')
try:
# 混合时区偏移可能出错
result = naive_date + (tz_date + BDay(1) - tz_date)
except TypeError as e:
print("时区错误:", e)
# 解决方案:确保时区一致
result_safe = naive_date + BDay(1)
print("安全偏移:", result_safe)
10.2 边界日期处理
# 闰年2月29日
leap_day = pd.Timestamp('2024-02-29')
print("闰年处理:")
print("加1月:", leap_day + DateOffset(months=1)) # 2024-03-29
print("到月末:", leap_day + MonthEnd(0)) # 2024-02-29
# 非闰年2月29日
non_leap = pd.Timestamp('2023-02-28')
print("非闰年+1月:", non_leap + DateOffset(months=1)) # 2023-03-28
print("到下月末:", non_leap + MonthEnd(1)) # 2023-03-31
10.3 国际化日历
# 中国假期示例
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
class ChineseHolidayCalendar(AbstractHolidayCalendar):
rules = [
Holiday('Chinese New Year', month=1, day=22, year=2023), # 示例
Holiday('Labor Day', month=5, day=1),
]
# 使用自定义日历
ch_cal = ChineseHolidayCalendar()
cbday_ch = CustomBusinessDay(calendar=ch_cal)
print("中国工作日偏移示例:")
date = pd.Timestamp('2023-01-22') # 春节
print("春节+1工作日:", date + cbday_ch(1))
Pandas DateOffset 提供了强大的日期时间算术功能,特别是日历感知偏移(BDay, MonthEnd等)在金融、商业场景中非常有用。关键是理解DateOffset与Timedelta的区别,选择合适的偏移类型,并结合自定义日历处理特殊假期。在批量操作中,注意性能优化和错误处理,确保日期计算的准确性和健壮性。