Pandas 数据可视化详解

1. Pandas 内置绘图基础

1.1 基本绘图方法

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 设置matplotlib支持中文显示
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']  # 中文字体
plt.rcParams['axes.unicode_minus'] = False  # 正常显示负号

# 创建示例数据
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=100, freq='D')
df = pd.DataFrame({
    'date': dates,
    'sales': np.random.randn(100).cumsum() + 1000,
    'profit': np.random.randn(100).cumsum() + 500,
    'region': np.random.choice(['North', 'South', 'East', 'West'], 100),
    'category': np.random.choice(['A', 'B', 'C'], 100)
}, index=dates)

print("数据预览:")
print(df.head())

1.1.1 基本线图

# 简单线图
df['sales'].plot()
plt.title('销售趋势')
plt.xlabel('日期')
plt.ylabel('销售额')
plt.show()

# 多线图
df[['sales', 'profit']].plot(figsize=(10, 6))
plt.title('销售与利润趋势')
plt.ylabel('数值')
plt.legend(title='指标')
plt.show()

# 自定义样式
df.plot(x='date', y=['sales', 'profit'], 
        figsize=(12, 6),
        title='销售与利润时间序列',
        grid=True,
        style=['-', '--'],
        linewidth=2)
plt.tight_layout()
plt.show()

1.1.2 柱状图

# 单变量柱状图
df['sales'].hist(bins=20, figsize=(10, 6))
plt.title('销售分布直方图')
plt.xlabel('销售额')
plt.ylabel('频数')
plt.show()

# 分类柱状图
regional_sales = df.groupby('region')['sales'].sum()
regional_sales.plot(kind='bar', figsize=(8, 6), color='skyblue')
plt.title('各地区总销售额')
plt.xlabel('地区')
plt.ylabel('销售额')
plt.xticks(rotation=45)
plt.show()

# 水平柱状图
regional_sales.plot(kind='barh', figsize=(8, 6))
plt.title('各地区总销售额（水平）')
plt.xlabel('销售额')
plt.ylabel('地区')
plt.tight_layout()
plt.show()

1.2 高级内置图表

1.2.1 散点图

# 基本散点图
df.plot.scatter(x='sales', y='profit', figsize=(8, 6), alpha=0.6)
plt.title('销售与利润散点图')
plt.xlabel('销售额')
plt.ylabel('利润')
plt.show()

# 带颜色和大小的散点图
scatter = df.plot.scatter(x='sales', y='profit', 
                         c=df.index.dayofyear,  # 按天数着色
                         s=df['sales']/10,     # 按销售额控制点大小
                         cmap='viridis',
                         alpha=0.7,
                         figsize=(10, 8))
plt.colorbar(scatter.collections[0])
plt.title('销售-利润散点图（时间着色，大小表示销售额）')
plt.show()

1.2.2 饼图和面积图

# 饼图
category_sales = df.groupby('category')['sales'].sum()
category_sales.plot(kind='pie', figsize=(8, 8), autopct='%1.1f%%')
plt.title('各品类销售占比')
plt.ylabel('')  # 移除y轴标签
plt.show()

# 面积图
df[['sales', 'profit']].plot(kind='area', figsize=(12, 6), alpha=0.7)
plt.title('销售与利润面积图')
plt.ylabel('数值')
plt.legend(loc='upper left')
plt.show()

1.2.3 箱线图

# 单个箱线图
df.boxplot(column='sales', figsize=(8, 6))
plt.title('销售数据箱线图')
plt.ylabel('销售额')
plt.show()

# 分组箱线图
df.boxplot(column='sales', by='region', figsize=(10, 6))
plt.suptitle('各地区销售箱线图')  # 移除默认标题
plt.title('')
plt.ylabel('销售额')
plt.show()

# 多变量箱线图
df[['sales', 'profit']].boxplot(figsize=(10, 6))
plt.title('销售与利润箱线图')
plt.ylabel('数值')
plt.show()

2. 分组数据可视化

2.1 透视表可视化

# 创建透视表
pivot_sales = df.pivot_table(values='sales', 
                            index='region', 
                            columns=df.index.month, 
                            aggfunc='sum')

# 热力图
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_sales, annot=True, cmap='YlOrRd', fmt='.0f')
plt.title('月度地区销售热力图')
plt.xlabel('月份')
plt.ylabel('地区')
plt.show()

# 堆叠柱状图
pivot_sales.plot(kind='bar', stacked=True, figsize=(12, 6))
plt.title('各地区月度销售堆叠图')
plt.xlabel('地区')
plt.ylabel('销售额')
plt.legend(title='月份')
plt.xticks(rotation=45)
plt.show()

2.2 分组统计图

# 分组统计
group_stats = df.groupby('region').agg({
    'sales': ['mean', 'sum', 'count'],
    'profit': 'mean'
}).round(2)

# 选择特定统计绘制
means = group_stats['sales']['mean']
sums = group_stats['sales']['sum']

x = np.arange(len(means))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width/2, means, width, label='平均销售额', alpha=0.8)
bars2 = ax.bar(x + width/2, sums/1000, width, label='总销售额(千)', alpha=0.8)

ax.set_xlabel('地区')
ax.set_ylabel('销售额')
ax.set_title('各地区销售统计对比')
ax.set_xticks(x)
ax.set_xticklabels(means.index)
ax.legend()
ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

3. 时间序列可视化

3.1 基础时间序列图

# 设置日期为索引
ts_df = df.set_index('date')

# 基础时间序列
ts_df[['sales', 'profit']].plot(figsize=(12, 6))
plt.title('时间序列分析')
plt.ylabel('数值')
plt.show()

# 移动平均
ts_df['sales_ma7'] = ts_df['sales'].rolling(window=7).mean()
ts_df['sales_ma30'] = ts_df['sales'].rolling(window=30).mean()

ts_df[['sales', 'sales_ma7', 'sales_ma30']].plot(figsize=(12, 6))
plt.title('销售趋势与移动平均')
plt.ylabel('销售额')
plt.legend(['实际值', '7日均值', '30日均值'])
plt.show()

3.2 季节性和趋势分解

from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings('ignore')

# 季节性分解（月频数据）
monthly_sales = ts_df['sales'].resample('M').sum()

# 分解（假设月度季节性）
decomposition = seasonal_decompose(monthly_sales, model='additive', period=12)
fig = decomposition.plot()
fig.set_size_inches(12, 8)
plt.suptitle('时间序列分解（趋势、季节性、残差）')
plt.show()

# 绘制分解结果
fig, axes = plt.subplots(4, 1, figsize=(12, 10))
decomposition.observed.plot(ax=axes[0], title='原始数据')
decomposition.trend.plot(ax=axes[1], title='趋势')
decomposition.seasonal.plot(ax=axes[2], title='季节性')
decomposition.resid.plot(ax=axes[3], title='残差')
plt.tight_layout()
plt.show()

3.3 相关性时间序列

# 滚动相关性
rolling_corr = ts_df['sales'].rolling(window=30).corr(ts_df['profit'])

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# 原始序列
ts_df[['sales', 'profit']].plot(ax=ax1, secondary_y='profit')
ax1.set_title('销售与利润时间序列')
ax1.set_ylabel('销售')

# 滚动相关性
rolling_corr.plot(ax=ax2, color='red', linewidth=2)
ax2.axhline(y=0, color='black', linestyle='--', alpha=0.5)
ax2.set_title('销售与利润滚动相关性（30天窗口）')
ax2.set_ylabel('相关系数')

plt.tight_layout()
plt.show()

4. 统计图表

4.1 分布分析

# 密度图和直方图
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# 密度图
df['sales'].plot(kind='density', ax=axes[0], color='blue', bw_method=0.3)
axes[0].set_title('销售密度分布')
axes[0].set_xlabel('销售额')

# 直方图
df['sales'].plot(kind='hist', bins=30, ax=axes[1], color='green', alpha=0.7)
axes[1].set_title('销售直方图')
axes[1].set_xlabel('销售额')

plt.tight_layout()
plt.show()

# Q-Q图（正态性检验）
from scipy import stats
stats.probplot(df['sales'], dist="norm", plot=plt)
plt.title('销售数据Q-Q图（正态性检验）')
plt.show()

4.2 相关性可视化

# 相关系数矩阵
corr_matrix = df[['sales', 'profit']].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
            square=True, fmt='.3f')
plt.title('相关系数热力图')
plt.tight_layout()
plt.show()

# 配对图
sns.pairplot(df[['sales', 'profit']], diag_kind='kde', kind='reg')
plt.suptitle('销售与利润配对图', y=1.02)
plt.show()

5. 高级可视化技巧

5.1 自定义样式和主题

# 设置全局样式
plt.style.use('seaborn-v0_8-darkgrid')  # 或其他样式
# plt.style.use('ggplot')
# plt.style.use('seaborn-v0_8')

# 自定义颜色和样式
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. 自定义线图
axes[0,0].plot(df.index, df['sales'], color=colors[0], linewidth=2, marker='o', markersize=4)
axes[0,0].set_title('自定义线图', fontsize=14, fontweight='bold')
axes[0,0].grid(True, alpha=0.3)

# 2. 自定义柱状图
regional_sales = df.groupby('region')['sales'].mean()
bars = axes[0,1].bar(regional_sales.index, regional_sales.values, 
                    color=colors[:len(regional_sales)], alpha=0.8, edgecolor='black')
axes[0,1].set_title('自定义柱状图')
axes[0,1].tick_params(axis='x', rotation=45)

# 添加数值标签
for bar in bars:
    height = bar.get_height()
    axes[0,1].text(bar.get_x() + bar.get_width()/2., height,
                  f'{height:.0f}', ha='center', va='bottom')

# 3. 散点图
scatter = axes[1,0].scatter(df['sales'], df['profit'], 
                           c=df.index.dayofyear, cmap='viridis', s=50, alpha=0.6)
plt.colorbar(scatter, ax=axes[1,0])
axes[1,0].set_title('自定义散点图')

# 4. 箱线图
df.boxplot(column='sales', by='region', ax=axes[1,1], patch_artist=True)
axes[1,1].set_title('自定义箱线图')

plt.tight_layout()
plt.show()

5.2 子图布局

# 复杂子图布局
fig = plt.figure(figsize=(16, 12))

# 1x2布局
ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=2)
ax2 = plt.subplot2grid((3, 3), (0, 2))
ax3 = plt.subplot2grid((3, 3), (1, 0), colspan=3)
ax4 = plt.subplot2grid((3, 3), (2, 0), colspan=2)
ax5 = plt.subplot2grid((3, 3), (2, 2))

# 主时间序列
df['sales'].plot(ax=ax1, title='销售时间序列', linewidth=2)
ax1.set_ylabel('销售额')

# 地区对比
regional_sales = df.groupby('region')['sales'].sum()
regional_sales.plot(kind='bar', ax=ax2, title='地区销售汇总', color=colors)
ax2.tick_params(axis='x', rotation=45)
ax2.set_ylabel('总销售额')

# 移动平均
df['sales_ma'] = df['sales'].rolling(7).mean()
df[['sales', 'sales_ma']].plot(ax=ax3, title='销售与7日移动平均', secondary_y='sales_ma')
ax3.set_ylabel('销售额')

# 分布
df['sales'].hist(bins=20, ax=ax4, alpha=0.7, color='skyblue')
ax4.set_title('销售分布')
ax4.set_xlabel('销售额')
ax4.set_ylabel('频数')

# 饼图
category_sales = df.groupby('category')['sales'].sum()
ax5.pie(category_sales.values, labels=category_sales.index, autopct='%1.1f%%', 
        colors=colors[:len(category_sales)])
ax5.set_title('品类销售占比')

plt.tight_layout()
plt.show()

6. 与Seaborn集成

6.1 高级统计可视化

import seaborn as sns

# 设置Seaborn样式
sns.set_style("whitegrid")
sns.set_palette("husl")

# 分类散点图
plt.figure(figsize=(12, 8))
sns.scatterplot(data=df, x='sales', y='profit', hue='region', size='sales', 
                sizes=(20, 200), alpha=0.7)
plt.title('销售与利润散点图（按地区着色）')
plt.show()

# 分类箱线图
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='region', y='sales')
plt.title('各地区销售箱线图')
plt.xticks(rotation=45)
plt.show()

# 小提琴图
plt.figure(figsize=(10, 6))
sns.violinplot(data=df, x='region', y='sales')
plt.title('各地区销售小提琴图')
plt.xticks(rotation=45)
plt.show()

6.2 相关性分析可视化

# 配对图
sns.pairplot(df[['sales', 'profit', 'region']], hue='region', diag_kind='hist')
plt.suptitle('多变量配对图', y=1.02)
plt.show()

# 回归图
plt.figure(figsize=(10, 6))
sns.lmplot(data=df, x='sales', y='profit', hue='region', 
           scatter_kws={'alpha':0.6}, line_kws={'linewidth':2})
plt.title('销售与利润回归分析')
plt.show()

# 联合分布
plt.figure(figsize=(10, 6))
sns.jointplot(data=df, x='sales', y='profit', kind='hex', bins=30)
plt.suptitle('销售与利润六边形联合分布')
plt.show()

7. 交互式可视化

7.1 Plotly集成

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 交互式线图
fig = px.line(df, x='date', y=['sales', 'profit'], 
              title='交互式销售与利润趋势',
              labels={'value': '数值', 'date': '日期'})
fig.update_layout(hovermode='x unified')
fig.show()

# 交互式散点图
fig = px.scatter(df, x='sales', y='profit', 
                color='region', size='sales',
                hover_data=['category'],
                title='交互式销售-利润散点图')
fig.show()

# 3D散点图
fig = px.scatter_3d(df, x='sales', y='profit', z=df.index.dayofyear,
                   color='region', size='sales',
                   title='3D销售分析')
fig.show()

7.2 Bokeh可视化

from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.io import output_notebook
# output_notebook()  # Jupyter环境

# 准备数据
source = ColumnDataSource(df)

# 创建图形
p = figure(title="销售数据交互图", height=400, width=800,
           tools="pan,wheel_zoom,box_zoom,reset,hover,save")

# 添加散点
p.scatter('sales', 'profit', source=source, size=8,
          color='region', fill_alpha=0.6,
          legend_group='region')

# 悬停工具
hover = p.select_one(HoverTool)
hover.tooltips = [
    ("日期", "@date{%Y-%m-%d}"),
    ("销售额", "@sales{0.0f}"),
    ("利润", "@profit{0.0f}"),
    ("地区", "@region")
]
hover.formatters = {"date": "datetime"}

# 添加趋势线
sales_trend = df['sales'].rolling(7).mean()
p.line(df.index, sales_trend, color='red', line_width=2, legend_label='7日均值')

show(p)

8. 自定义可视化函数

8.1 自动化报告生成

class PandasVisualizer:
    """Pandas数据可视化工具类"""

    def __init__(self, df):
        self.df = df
        self.fig_size = (12, 8)

    def plot_time_series(self, columns=None, rolling_windows=[7, 30]):
        """时间序列图"""
        if columns is None:
            columns = self.df.select_dtypes(include=[np.number]).columns

        ts_df = self.df.set_index('date') if 'date' in self.df else self.df

        fig, axes = plt.subplots(len(columns), 1, figsize=(self.fig_size[0], self.fig_size[1]*len(columns)))
        if len(columns) == 1:
            axes = [axes]

        for i, col in enumerate(columns):
            ax = axes[i]
            ts_df[col].plot(ax=ax, linewidth=2, label=col)

            # 移动平均
            for window in rolling_windows:
                ma = ts_df[col].rolling(window).mean()
                ma.plot(ax=ax, alpha=0.7, label=f'{window}日均值')

            ax.set_title(f'{col}时间序列分析')
            ax.legend()
            ax.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

    def plot_category_analysis(self, cat_col, value_col, plot_type='bar'):
        """分类分析图"""
        group_data = self.df.groupby(cat_col)[value_col].agg(['mean', 'sum', 'count'])

        fig, axes = plt.subplots(1, 2, figsize=self.fig_size)

        # 平均值柱状图
        group_data['mean'].plot(kind=plot_type, ax=axes[0], color='skyblue')
        axes[0].set_title(f'{cat_col}平均{value_col}')
        axes[0].tick_params(axis='x', rotation=45)

        # 总数柱状图
        group_data['sum'].plot(kind=plot_type, ax=axes[1], color='lightcoral')
        axes[1].set_title(f'{cat_col}总{value_col}')
        axes[1].tick_params(axis='x', rotation=45)

        plt.tight_layout()
        plt.show()

    def plot_correlation_matrix(self, numeric_cols=None):
        """相关性矩阵热力图"""
        if numeric_cols is None:
            numeric_cols = self.df.select_dtypes(include=[np.number]).columns

        corr_matrix = self.df[numeric_cols].corr()

        plt.figure(figsize=self.fig_size)
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
                   square=True, fmt='.3f')
        plt.title('相关系数热力图')
        plt.tight_layout()
        plt.show()

    def generate_summary_report(self):
        """生成完整分析报告"""
        print("生成可视化报告...")

        # 1. 分布分析
        self.df.hist(figsize=(15, 10), bins=20, alpha=0.7)
        plt.suptitle('数据分布直方图')
        plt.tight_layout()
        plt.show()

        # 2. 箱线图
        self.df.boxplot(figsize=(12, 8))
        plt.title('各变量箱线图')
        plt.xticks(rotation=45)
        plt.show()

        # 3. 相关性
        self.plot_correlation_matrix()

        # 4. 时间序列（如果有日期）
        if 'date' in self.df.columns or self.df.index.dtype == 'datetime64[ns]':
            self.plot_time_series()

        print("报告生成完成!")

# 使用示例
visualizer = PandasVisualizer(df)
visualizer.generate_summary_report()
visualizer.plot_category_analysis('region', 'sales')

9. 输出和保存

9.1 保存静态图片

# 保存为不同格式
df['sales'].plot()
plt.title('销售趋势')
plt.savefig('sales_trend.png', dpi=300, bbox_inches='tight')
plt.savefig('sales_trend.pdf', bbox_inches='tight')
plt.savefig('sales_trend.svg', bbox_inches='tight')
plt.show()

# 批量保存
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# ... 绘图代码 ...
plt.savefig('report_multi.png', dpi=300, bbox_inches='tight')
plt.close()  # 关闭图形防止内存泄漏

9.2 HTML报告

from pathlib import Path

def create_html_report(df, output_dir='reports'):
    """创建HTML可视化报告"""
    Path(output_dir).mkdir(exist_ok=True)

    # 创建HTML模板
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Pandas数据可视化报告</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; }
            .chart-container { margin: 20px 0; text-align: center; }
            h1, h2 { color: #2c3e50; }
            img { max-width: 100%; height: auto; }
        </style>
    </head>
    <body>
    """

    # 基本统计
    html_content += f"<h1>数据概览 (形状: {df.shape})</h1>"
    html_content += f"<p>时间范围: {df['date'].min()} 到 {df['date'].max()}</p>"

    # 保存图表
    fig, ax = plt.subplots(figsize=(12, 6))
    df['sales'].plot(ax=ax)
    plt.title('销售趋势')
    plt.savefig(f'{output_dir}/sales_trend.png', dpi=300, bbox_inches='tight')
    plt.close()

    html_content += '<div class="chart-container"><h2>销售趋势</h2><img src="sales_trend.png"></div>'

    # 地区分析
    regional_sales = df.groupby('region')['sales'].sum()
    fig, ax = plt.subplots(figsize=(10, 6))
    regional_sales.plot(kind='bar', ax=ax)
    plt.title('各地区总销售额')
    plt.savefig(f'{output_dir}/regional_sales.png', dpi=300, bbox_inches='tight')
    plt.close()

    html_content += '<div class="chart-container"><h2>地区销售分析</h2><img src="regional_sales.png"></div>'

    html_content += """
    </body>
    </html>
    """

    with open(f'{output_dir}/report.html', 'w', encoding='utf-8') as f:
        f.write(html_content)

    print(f"HTML报告已保存到 {output_dir}/report.html")

# 生成报告
create_html_report(df)

10. 性能优化和最佳实践

10.1 大数据可视化优化

def optimize_large_plot(df, sample_frac=0.1, max_points=10000):
    """优化大数据可视化"""
    if len(df) > max_points:
        # 采样显示
        df_sample = df.sample(frac=sample_frac, random_state=42)
        print(f"数据量过大，已采样 {len(df_sample)} 条记录进行可视化")
        return df_sample
    return df

# 下采样时间序列
def downsample_timeseries(df, freq='W', columns=None):
    """时间序列下采样"""
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns

    ts_df = df.set_index('date') if 'date' in df else df
    return ts_df[columns].resample(freq).agg({
        col: ['mean', 'sum'] for col in columns
    })

# 示例
large_sample = optimize_large_plot(df, sample_frac=0.5)
weekly_sales = downsample_timeseries(df, freq='W')
weekly_sales.plot(figsize=(12, 6))
plt.title('周度销售汇总')
plt.show()

10.2 最佳实践

# 1. 设置合理的图形尺寸
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12

# 2. 统一颜色方案
colors = plt.cm.Set1(np.linspace(0, 1, 8))  # 固定调色板

# 3. 添加统计信息
def add_stats_annotation(ax, data, x_col, y_col):
    """添加统计注释"""
    corr = data[x_col].corr(data[y_col])
    r2 = corr**2
    ax.text(0.05, 0.95, f'r = {corr:.3f}\nR² = {r2:.3f}', 
            transform=ax.transAxes, fontsize=10,
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
    return ax

# 4. 动态图表调整
def adaptive_plot(df, max_cols=4):
    """自适应子图布局"""
    n_cols = min(max_cols, len(df.columns))
    n_rows = (len(df.columns) + n_cols - 1) // n_cols

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
    axes = np.array(axes).flatten() if n_rows > 1 or n_cols > 1 else [axes]

    for i, col in enumerate(df.columns):
        if i < len(axes):
            df[col].plot(ax=axes[i])
            axes[i].set_title(col)

    # 隐藏多余子图
    for i in range(len(df.columns), len(axes)):
        axes[i].set_visible(False)

    plt.tight_layout()
    plt.show()

# 使用最佳实践
fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=df, x='sales', y='profit', ax=ax)
add_stats_annotation(ax, df, 'sales', 'profit')
plt.title('优化后的散点图')
plt.show()

Pandas内置的绘图功能结合Matplotlib和Seaborn提供了强大的数据可视化能力。通过掌握基础图表类型、分组可视化、时间序列分析和交互式图表，可以创建专业的数据可视化报告。关键是根据数据特点选择合适的图表类型，并通过样式优化和统计注释增强可读性和洞察力。

2025 年 12 月
一	二	三	四	五	六	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30	31

Pandas 数据可视化详解

1. Pandas 内置绘图基础

1.1 基本绘图方法

1.1.1 基本线图

1.1.2 柱状图

1.2 高级内置图表

1.2.1 散点图

1.2.2 饼图和面积图

1.2.3 箱线图

2. 分组数据可视化

2.1 透视表可视化

2.2 分组统计图

3. 时间序列可视化

3.1 基础时间序列图

3.2 季节性和趋势分解

3.3 相关性时间序列

4. 统计图表

4.1 分布分析

4.2 相关性可视化

5. 高级可视化技巧

5.1 自定义样式和主题

5.2 子图布局

6. 与Seaborn集成

6.1 高级统计可视化

6.2 相关性分析可视化

7. 交互式可视化

7.1 Plotly集成

7.2 Bokeh可视化

8. 自定义可视化函数

8.1 自动化报告生成

9. 输出和保存

9.1 保存静态图片

9.2 HTML报告

10. 性能优化和最佳实践

10.1 大数据可视化优化

10.2 最佳实践

likuolei

发表回复 取消回复

相关文章

发表回复取消回复