|

Pandas DateOffset 对象

Pandas DateOffset 对象完整参考手册

1. DateOffset 基础概念

1.1 什么是 DateOffset

import pandas as pd
from pandas.tseries.offsets import DateOffset, BDay, MonthEnd
from datetime import datetime
import numpy as np

# DateOffset是pandas用于日期时间偏移的核心类
# 它是pandas日期算术运算的基础,支持复杂的日历规则

# 基本示例
date = pd.Timestamp('2023-01-15')
offset = DateOffset(days=5)
result = date + offset
print("基础DateOffset:")
print(f"原日期: {date}")
print(f"加5天: {result}")
print(f"类型: {type(offset)}")

1.2 DateOffset vs Timedelta

# DateOffset(日历感知)vs Timedelta(固定时间间隔)
date = pd.Timestamp('2023-01-28')

# Timedelta:固定24小时
timedelta_result = date + pd.Timedelta(days=1)
print("Timedelta结果:", timedelta_result)  # 2023-01-29

# DateOffset:下一个工作日
business_offset = date + pd.offsets.BDay()
print("BDay结果:", business_offset)  # 2023-01-30(跳过周末)

# DateOffset考虑日历规则
date_end_month = pd.Timestamp('2023-01-31')
offset_end = date_end_month + DateOffset(months=1)
print("月末+1月:", offset_end)  # 2023-02-28,不是31日

2. 基本 DateOffset 类

2.1 DateOffset 基础参数

# DateOffset支持多种时间单位参数
offset = DateOffset(
    years=1,      # 年
    months=2,     # 月
    days=3,       # 天
    hours=4,      # 小时
    minutes=5,    # 分钟
    seconds=6,    # 秒
    microseconds=7 # 微秒
)

date = pd.Timestamp('2023-01-15 10:20:30')
result = date + offset
print("复杂DateOffset:")
print(f"原时间: {date}")
print(f"结果: {result}")
print(f"年月日时分秒: {result.year}-{result.month}-{result.day} {result.hour}:{result.minute}:{result.second}")

2.2 常用基础偏移

date = pd.Timestamp('2023-01-15')

# 年、月、日
print("年偏移:", date + DateOffset(years=1))
print("月偏移:", date + DateOffset(months=1))
print("日偏移:", date + DateOffset(days=10))

# 时间单位
print("小时偏移:", date + DateOffset(hours=25))  # 超过24小时会进位
print("周偏移:", date + DateOffset(weeks=1))

# 组合偏移
combo = DateOffset(years=1, months=1, days=1)
print("组合偏移:", date + combo)

3. 日历感知偏移(Calendar-Aware Offsets)

3.1 工作日偏移(BDay)

# BDay:工作日(周一到周五)
date = pd.Timestamp('2023-01-27')  # 星期五

print("BDay偏移:")
print("下一个工作日:", date + BDay(1))  # 2023-01-30(周一)
print("上一个工作日:", date - BDay(1))  # 2023-01-26(周四)
print("3个工作日后:", date + BDay(3))   # 2023-02-01(周三)

# 周末行为
weekend = pd.Timestamp('2023-01-28')  # 星期六
print("周末+1工作日:", weekend + BDay(1))  # 2023-01-30(下一个周一)

3.2 月末和月初偏移

# MonthEnd:月末
date_me = pd.Timestamp('2023-01-15')
print("MonthEnd:")
print("到月末:", date_me + MonthEnd(0))  # 2023-01-31
print("下月末:", date_me + MonthEnd(1))  # 2023-02-28
print("上月末:", date_me - MonthEnd(1))  # 2022-12-31

# MonthBegin:月初
from pandas.tseries.offsets import MonthBegin
print("\nMonthBegin:")
print("到月初:", date_me + MonthBegin(0))  # 2023-01-01
print("下月初:", date_me + MonthBegin(1))  # 2023-02-01

# MonthOffset:相对月偏移
date_mo = pd.Timestamp('2023-01-15')
print("MonthOffset(2个月):", date_mo + DateOffset(months=2))  # 2023-03-15

3.3 季度偏移

# QuarterEnd:季度末
from pandas.tseries.offsets import QuarterEnd, QuarterBegin

date_q = pd.Timestamp('2023-02-15')
print("季度偏移:")
print("当前季度末:", date_q + QuarterEnd(0))  # 2023-03-31
print("下一季度末:", date_q + QuarterEnd(1))  # 2023-06-30
print("上一季度末:", date_q - QuarterEnd(0))  # 2022-12-31

# QuarterBegin
print("当前季度初:", date_q + QuarterBegin(0))  # 2023-01-01

3.4 年末和年初偏移

# YearEnd和YearBegin
from pandas.tseries.offsets import YearEnd, YearBegin

date_y = pd.Timestamp('2023-06-15')
print("年度偏移:")
print("当前年终:", date_y + YearEnd(0))  # 2023-12-31
print("下一年终:", date_y + YearEnd(1))  # 2024-12-31
print("当前年初:", date_y + YearBegin(0))  # 2023-01-01

4. 自定义日历规则偏移

4.1 自定义工作日(CustomBusinessDay)

from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

# 使用美国联邦假期日历
us_holidays = USFederalHolidayCalendar()
cbday_us = CustomBusinessDay(calendar=us_holidays)

date = pd.Timestamp('2023-07-04')  # 美国独立日(假期)
print("自定义工作日:")
print("普通BDay:", date + BDay(1))  # 2023-07-05
print("自定义(跳过假期):", date + cbday_us(1))  # 2023-07-05,但会考虑假期

# 自定义假期
custom_cal = pd.tseries.offsets.CustomBusinessDay(
    holidays=['2023-01-01', '2023-12-25']
)
print("自定义假期:", pd.Timestamp('2023-12-25') + custom_cal(1))

4.2 周偏移(Week)

from pandas.tseries.offsets import Week

# 指定星期几的周偏移
week_monday = Week(weekday=0)  # 0=周一
week_friday = Week(weekday=4)  # 4=周五

date = pd.Timestamp('2023-01-15')  # 周日
print("周偏移:")
print("到下周一:", date + week_monday)
print("到下周五:", date + week_friday)

# 周末到工作日
weekend = pd.Timestamp('2023-01-14')  # 周六
print("周末到周一:", weekend + Week(weekday=0))

4.3 每月N日偏移(Nth Weekday)

from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin

# 每月特定星期几
from pandas.tseries.offsets import MonthEnd

# 每月第三个星期五
third_friday = pd.offsets.Week(weekday=4, n=3)  # 每个月的第3个周五
print("每月第3个周五示例:")
base = pd.Timestamp('2023-01-01')
for i in range(12):
    print(base + third_friday * i)

5. 高级 DateOffset 功能

5.1 偏移规范化(normalize)

# normalize参数:将结果时间设为00:00:00
date_time = pd.Timestamp('2023-01-15 14:30:00')

offset_normal = DateOffset(days=5, normalize=True)
offset_not_normal = DateOffset(days=5, normalize=False)

print("normalize对比:")
print("带时间原日期:", date_time + offset_not_normal)  # 保持时间
print("规范化:", date_time + offset_normal)  # 00:00:00

# MonthEnd的normalize行为
print("MonthEnd normalize:", date_time + MonthEnd(0, normalize=True))

5.2 偏移规则组合

# 复杂规则组合
date = pd.Timestamp('2023-01-15')

# 组合1:月末后第5个工作日
rule1 = MonthEnd(0) + BDay(5)
print("月末+5工作日:", date + rule1)

# 组合2:季度末后第3个交易日
rule2 = QuarterEnd(0) + BDay(3)
print("季度末+3工作日:", date + rule2)

# 链式偏移
date_chain = pd.Timestamp('2023-01-31')
result_chain = date_chain + MonthEnd(1) + BDay(2) + DateOffset(hours=9)
print("链式偏移:", result_chain)

5.3 条件偏移

def next_business_day(date):
    """下一个工作日"""
    return date + BDay(1)

def next_month_end_trading_day(date):
    """下月末交易日"""
    month_end = date + MonthEnd(1)
    # 如果月末是周末,找到下一个工作日
    if month_end.weekday() >= 5:
        return month_end + BDay(1)
    return month_end

# 测试
test_date = pd.Timestamp('2023-01-27')  # 周五
print("条件偏移:")
print("下月末交易日:", next_month_end_trading_day(test_date))

6. 实际应用场景

6.1 财务日期计算

# 财报日期计算
def earnings_date_calculation(report_date):
    """计算财报相关日期"""
    # 上季度末
    prev_quarter_end = report_date - QuarterEnd(0)
    # 下个交易日(财报发布)
    earnings_release = prev_quarter_end + BDay(1)
    # 财报后第30个交易日
    post_earnings = earnings_release + BDay(30)

    return {
        'prev_quarter_end': prev_quarter_end,
        'earnings_release': earnings_release,
        'post_earnings': post_earnings
    }

# 示例:2023年Q1财报(通常4月中发布)
q1_report = pd.Timestamp('2023-04-15')
dates = earnings_date_calculation(q1_report)
for key, date in dates.items():
    print(f"{key}: {date.date()}")

6.2 期权到期日计算

def option_expiry_dates(base_date):
    """期权到期日计算"""
    # 每月第三个周五
    third_friday = base_date + pd.offsets.Week(weekday=4, n=3)

    # 季度期权(3,6,9,12月第三个周五)
    quarter_months = [3, 6, 9, 12]
    if third_friday.month in quarter_months:
        expiry_type = "Quarterly"
    else:
        expiry_type = "Monthly"

    # 周期权(每周五)
    weekly = base_date + pd.offsets.Week(weekday=4)

    return {
        'monthly_third_friday': third_friday,
        'weekly': weekly,
        'type': expiry_type
    }

base = pd.Timestamp('2023-03-15')
print("期权到期日:")
for key, value in option_expiry_dates(base).items():
    print(f"{key}: {value}")

6.3 支付和结算日期

def payment_settlement_dates(invoice_date, terms_days=30):
    """计算支付和结算日期"""
    # 账期到期日
    due_date = invoice_date + DateOffset(days=terms_days)

    # 最近工作日(净额结算)
    settlement_date = due_date

    # 如果到期日是周末,移到前一个工作日
    if due_date.weekday() >= 5:
        settlement_date = due_date - BDay(1)

    # T+2结算(交易日后2个工作日)
    t2_settlement = settlement_date + BDay(2)

    return {
        'invoice_date': invoice_date,
        'due_date': due_date,
        'settlement_date': settlement_date,
        't2_settlement': t2_settlement
    }

invoice = pd.Timestamp('2023-12-29')  # 周五
payment_dates = payment_settlement_dates(invoice)
for key, date in payment_dates.items():
    print(f"{key}: {date.date()} ({date.day_name()[:3]})")

7. 批量操作和向量化

7.1 DateOffset 向量化

# 对日期数组应用偏移
dates = pd.date_range('2023-01-01', periods=5, freq='B')
print("批量偏移:")
print("原日期:", dates)

# 批量应用不同偏移
offsets = [DateOffset(days=i) for i in range(5)]
batch_results = dates + pd.offsets.DateOffset(days=np.arange(5))
print("批量结果:", batch_results)

# Series/DataFrame批量操作
df = pd.DataFrame({'date': dates, 'value': range(5)})
df['shifted'] = df['date'] + BDay(1)
print("\nDataFrame批量偏移:")
print(df[['date', 'shifted']])

7.2 条件批量偏移

def apply_conditional_offset(dates, condition_func, offset_func):
    """条件批量偏移"""
    results = []
    for date in dates:
        if condition_func(date):
            results.append(offset_func(date))
        else:
            results.append(date)
    return pd.DatetimeIndex(results)

# 示例:工作日加1天,周末加3天到周一
dates_mixed = pd.date_range('2023-01-13', periods=5)  # 周五到周二
def is_weekday(date):
    return date.weekday() < 5

def offset_func(date):
    if is_weekday(date):
        return date + DateOffset(days=1)
    else:
        return date + DateOffset(days=3)  # 周末到周一

result = apply_conditional_offset(dates_mixed, is_weekday, offset_func)
print("条件批量偏移:")
for orig, new in zip(dates_mixed, result):
    print(f"{orig.date()} -> {new.date()}")

8. 与 Period 和频率的集成

8.1 DateOffset 与 PeriodIndex

# DateOffset与Period结合
periods = pd.period_range('2023Q1', periods=4, freq='Q')
print("Period与DateOffset:")
print("Period:", periods)

# Period转换为Timestamp再应用偏移
ts_periods = periods.to_timestamp()
offsetted = ts_periods + QuarterEnd(0)
print("偏移后Timestamp:", offsetted)
print("转换回Period:", offsetted.to_period('Q'))

8.2 频率字符串与 DateOffset

# DateOffset可以与频率字符串结合
date = pd.Timestamp('2023-01-15')

# 频率字符串等价
print("频率字符串等价:")
print("D (日):", date + pd.offsets.Day(5))
print("B (工作日):", date + pd.offsets.BDay(5))
print("M (月末):", date + pd.offsets.MonthEnd(2))
print("Q (季度末):", date + pd.offsets.QuarterEnd(1))

# 自定义频率
custom_freq = date + pd.DateOffset(days=1, hours=12)
print("自定义频率:", custom_freq)

9. 性能优化和最佳实践

9.1 性能对比

import time

n = 100000
dates = pd.date_range('2023-01-01', periods=n)

# DateOffset vs Timedelta性能
start = time.time()
for i in range(100):
    _ = dates + DateOffset(days=1)
dateoffset_time = time.time() - start

start = time.time()
for i in range(100):
    _ = dates + pd.Timedelta(days=1)
timedelta_time = time.time() - start

print("性能对比 (100k日期, 100次循环):")
print(f"DateOffset: {dateoffset_time:.3f}s")
print(f"Timedelta: {timedelta_time:.3f}s")
print(f"DateOffset较慢: {dateoffset_time/timedelta_time:.1%}")

9.2 内存优化

# 预计算常用偏移
class OffsetCache:
    def __init__(self):
        self._cache = {}

    def get_offset(self, offset_spec):
        """缓存常用偏移"""
        if offset_spec not in self._cache:
            if offset_spec == 'business_day':
                self._cache[offset_spec] = BDay()
            elif offset_spec == 'month_end':
                self._cache[offset_spec] = MonthEnd()
            else:
                self._cache[offset_spec] = DateOffset(**eval(offset_spec))
        return self._cache[offset_spec]

# 使用缓存
cache = OffsetCache()
dates = pd.date_range('2023-01-01', periods=1000)
cached_offset = cache.get_offset('business_day')
result = dates + cached_offset
print("使用偏移缓存避免重复创建")

9.3 最佳实践

def robust_date_offset(date, offset_spec, holidays=None):
    """健壮的日期偏移函数"""
    try:
        if isinstance(offset_spec, str):
            # 解析频率字符串
            if offset_spec == 'business':
                offset = BDay(calendar=holidays)
            elif offset_spec == 'monthend':
                offset = MonthEnd()
            else:
                offset = DateOffset(**eval(f"dict({offset_spec})"))
        else:
            offset = offset_spec

        # 应用偏移
        result = date + offset

        # 验证结果
        if pd.isna(result):
            raise ValueError("偏移结果无效")

        return result

    except Exception as e:
        print(f"偏移失败: {e}")
        return date  # 返回原日期作为fallback

# 测试
test_cases = [
    ('2023-01-27', 'business'),  # 周五
    ('2023-01-28', 'monthend'),  # 周六
]

for date_str, spec in test_cases:
    result = robust_date_offset(pd.Timestamp(date_str), spec)
    print(f"{date_str} + {spec} = {result}")

10. 错误处理和边界情况

10.1 常见错误

# 错误1:不支持的操作
try:
    offset = DateOffset(invalid_param=1)
except TypeError as e:
    print("参数错误:", e)

# 错误2:时区问题
naive_date = pd.Timestamp('2023-01-01')
tz_date = naive_date.tz_localize('UTC')

try:
    # 混合时区偏移可能出错
    result = naive_date + (tz_date + BDay(1) - tz_date)
except TypeError as e:
    print("时区错误:", e)

# 解决方案:确保时区一致
result_safe = naive_date + BDay(1)
print("安全偏移:", result_safe)

10.2 边界日期处理

# 闰年2月29日
leap_day = pd.Timestamp('2024-02-29')
print("闰年处理:")
print("加1月:", leap_day + DateOffset(months=1))  # 2024-03-29
print("到月末:", leap_day + MonthEnd(0))  # 2024-02-29

# 非闰年2月29日
non_leap = pd.Timestamp('2023-02-28')
print("非闰年+1月:", non_leap + DateOffset(months=1))  # 2023-03-28
print("到下月末:", non_leap + MonthEnd(1))  # 2023-03-31

10.3 国际化日历

# 中国假期示例
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday

class ChineseHolidayCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('Chinese New Year', month=1, day=22, year=2023),  # 示例
        Holiday('Labor Day', month=5, day=1),
    ]

# 使用自定义日历
ch_cal = ChineseHolidayCalendar()
cbday_ch = CustomBusinessDay(calendar=ch_cal)

print("中国工作日偏移示例:")
date = pd.Timestamp('2023-01-22')  # 春节
print("春节+1工作日:", date + cbday_ch(1))

Pandas DateOffset 提供了强大的日期时间算术功能,特别是日历感知偏移(BDay, MonthEnd等)在金融、商业场景中非常有用。关键是理解DateOffset与Timedelta的区别,选择合适的偏移类型,并结合自定义日历处理特殊假期。在批量操作中,注意性能优化和错误处理,确保日期计算的准确性和健壮性。

类似文章

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注