PostgreSQL 连接(JOIN)
PostgreSQL JOIN 完全指南
JOIN是 SQL 的“连接器”,将多个表的数据横向合并,是复杂查询的基石。
本文涵盖 7 种 JOIN、语法、执行顺序、性能优化、索引策略、NULL 处理、JSON/数组 JOIN、递归 JOIN、常见陷阱、最佳实践 等全部内容。
一、7 种 JOIN 类型总览
| JOIN 类型 | 说明 | 匹配条件 | 
|---|---|---|
INNER JOIN | 仅返回两表匹配的行 | 默认 | 
LEFT JOIN | 左表全部 + 右表匹配 | 右表无匹配 → NULL | 
RIGHT JOIN | 右表全部 + 左表匹配 | 左表无匹配 → NULL | 
FULL OUTER JOIN | 两表全部 | 无匹配 → NULL | 
CROSS JOIN | 笛卡尔积(每行配对) | 无条件 | 
LATERAL JOIN | 子查询依赖左表(PG 独有) | 动态子查询 | 
SEMI/ANTI JOIN | 逻辑概念(用 EXISTS/NOT EXISTS) | 存在性 | 
二、核心语法
SELECT ...
FROM table1
[INNER] JOIN table2 ON condition
[LEFT|RIGHT|FULL] JOIN table3 ON condition
...
等价写法(老式):
FROM table1, table2 WHERE table1.id = table2.id不推荐:易混淆,性能差
三、准备测试数据
-- 员工表
CREATE TABLE employees (
    id BIGSERIAL PRIMARY KEY,
    name TEXT,
    dept_code VARCHAR(10),
    salary NUMERIC(10,2),
    manager_id BIGINT
);
-- 部门表
CREATE TABLE departments (
    code VARCHAR(10) PRIMARY KEY,
    name TEXT,
    location TEXT
);
-- 订单表
CREATE TABLE orders (
    id BIGSERIAL PRIMARY KEY,
    emp_id BIGINT,
    amount NUMERIC(10,2),
    order_date DATE
);
-- 插入数据
INSERT INTO departments VALUES 
('IT', 'IT Department', 'Beijing'),
('HR', 'HR Department', 'Shanghai'),
('FIN', 'Finance', 'Guangzhou');
INSERT INTO employees (name, dept_code, salary, manager_id) VALUES
('Alice', 'IT', 85000, NULL),
('Bob', 'HR', 52000, 1),
('Carol', 'IT', 92000, 1),
('Dave', 'IT', 78000, 3),
('Eve', 'HR', 48000, 2),
('Frank', NULL, 60000, NULL);  -- 无部门
INSERT INTO orders (emp_id, amount, order_date) VALUES
(1, 1200, '2025-01-15'),
(1, 800, '2025-01-20'),
(3, 2500, '2025-01-17'),
(5, 150, '2025-01-18'),
(999, 500, '2025-01-19');  -- 无效员工
四、1. INNER JOIN:最常用
-- 员工 + 部门(仅匹配的)
SELECT e.name, e.salary, d.name AS dept_name
FROM employees e
INNER JOIN departments d ON e.dept_code = d.code;
输出:
 name  | salary | dept_name
-------+--------+-----------
 Alice |  85000 | IT Department
 Bob   |  52000 | HR Department
 Carol |  92000 | IT Department
 Dave  |  78000 | IT Department
 Eve   |  48000 | HR Department
五、2. LEFT JOIN:保留左表
-- 所有员工,即使无部门
SELECT e.name, e.dept_code, d.name AS dept_name
FROM employees e
LEFT JOIN departments d ON e.dept_code = d.code;
输出:
 name  | dept_code | dept_name
-------+-----------+-----------
 Alice | IT        | IT Department
 Bob   | HR        | HR Department
 Carol | IT        | IT Department
 Dave  | IT        | IT Department
 Eve   | HR        | HR Department
 Frank | NULL      | NULL
六、3. RIGHT JOIN:保留右表
-- 所有部门,即使无员工
SELECT d.name, e.name
FROM employees e
RIGHT JOIN departments d ON e.dept_code = d.code;
七、4. FULL OUTER JOIN:两表全保留
-- 员工 + 部门,任意一边有数据
SELECT e.name, d.name AS dept_name
FROM employees e
FULL OUTER JOIN departments d ON e.dept_code = d.code;
八、5. CROSS JOIN:笛卡尔积
-- 每位员工配对每个部门(6 x 3 = 18 行)
SELECT e.name, d.name
FROM employees e
CROSS JOIN departments d;
用途:生成测试数据、组合分析
九、6. LATERAL JOIN:动态子查询(PG 独有神器)
-- 每个员工的最新 2 笔订单
SELECT e.name, o.amount, o.order_date
FROM employees e
LEFT JOIN LATERAL (
    SELECT amount, order_date
    FROM orders
    WHERE emp_id = e.id
    ORDER BY order_date DESC
    LIMIT 2
) o ON true;
ON true:LATERAL必须有,但条件总是真
十、多表 JOIN 链
-- 员工 → 部门 → 订单
SELECT e.name, d.name AS dept, o.amount
FROM employees e
JOIN departments d ON e.dept_code = d.code
LEFT JOIN orders o ON e.id = o.emp_id;
十一、JOIN 执行顺序(重要!)
FROM → WHERE → GROUP BY → HAVING → SELECT → ORDER BY → LIMIT
JOIN 顺序影响性能:
- 先 JOIN 小表
 - 先过滤行数(
WHERE) 
-- 推荐:先过滤
SELECT e.name, o.amount
FROM employees e
JOIN orders o ON e.id = o.emp_id
WHERE o.order_date >= '2025-01-01';
十二、性能优化:索引是命脉
必须建索引的列:
- 外键列:
employees.dept_code,orders.emp_id - JOIN 条件列
 - WHERE 过滤列
 
CREATE INDEX idx_emp_dept ON employees(dept_code);
CREATE INDEX idx_orders_emp ON orders(emp_id);
CREATE INDEX idx_orders_date ON orders(order_date);
复合索引(推荐)
-- 加速 JOIN + WHERE
CREATE INDEX idx_orders_emp_date ON orders(emp_id, order_date);
十三、NULL 处理:LEFT JOIN 的副作用
-- 错误:NULL 不参与计算
SELECT e.name, AVG(o.amount)
FROM employees e
LEFT JOIN orders o ON e.id = o.emp_id
GROUP BY e.name;
-- 正确:过滤 NULL
SELECT e.name, AVG(o.amount)
FROM employees e
LEFT JOIN orders o ON e.id = o.emp_id
WHERE o.id IS NOT NULL
GROUP BY e.name;
十四、JSON 与数组 JOIN
-- 假设 config 含部门代码数组
ALTER TABLE employees ADD COLUMN config JSONB;
-- JOIN JSON 数组
SELECT e.name, dept_item->>'code' AS dept_code
FROM employees e
JOIN LATERAL jsonb_array_elements(e.config->'depts') AS dept_item ON true;
十五、递归 JOIN(自连接)
-- 员工 → 经理(自连接)
SELECT 
    e.name AS employee,
    m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
十六、常见陷阱与解决方案
| 陷阱 | 说明 | 解决方案 | 
|---|---|---|
JOIN 前未过滤 | 产生巨量中间结果 | 先 WHERE | 
| 缺少索引 | 全表扫描 | 建外键索引 | 
LEFT JOIN + WHERE right.col = value | 变成 INNER JOIN | 移到 ON | 
| 多表 JOIN 顺序混乱 | 性能崩 | 小表优先 | 
CROSS JOIN 爆炸 | 亿级行 | 避免 | 
-- 错误:LEFT → INNER
SELECT e.name, o.amount
FROM employees e
LEFT JOIN orders o ON e.id = o.emp_id
WHERE o.amount > 1000;  -- o.amount NULL 被过滤!
-- 正确
SELECT e.name, o.amount
FROM employees e
LEFT JOIN orders o ON e.id = o.emp_id AND o.amount > 1000;
十七、最佳实践 Checklist
| 项目 | 建议 | 
|---|---|
| 外键列必建索引 | 加速 JOIN | 
| 先过滤再 JOIN | 减少行数 | 
| 小表优先 JOIN | 优化计划 | 
ON 放 JOIN 条件,WHERE 放过滤 | 逻辑清晰 | 
LATERAL 处理动态子查询 | 强大灵活 | 
避免 CROSS JOIN | 除非必要 | 
EXPLAIN 验证计划 | 确保 Index Scan | 
十八、一键复杂报表(生产级)
-- 员工 + 部门 + 订单统计(最新订单 + 总金额)
WITH emp_orders AS (
    SELECT 
        emp_id,
        SUM(amount) AS total_amount,
        COUNT(*) AS order_count,
        MAX(order_date) AS last_order
    FROM orders
    GROUP BY emp_id
),
latest_order AS (
    SELECT DISTINCT ON (emp_id)
           emp_id, amount AS latest_amount, order_date
    FROM orders
    ORDER BY emp_id, order_date DESC
)
SELECT 
    e.name,
    d.name AS department,
    COALESCE(eo.total_amount, 0) AS total_sales,
    COALESCE(eo.order_count, 0) AS order_count,
    lo.latest_amount,
    lo.order_date AS last_order_date
FROM employees e
LEFT JOIN departments d ON e.dept_code = d.code
LEFT JOIN emp_orders eo ON e.id = eo.emp_id
LEFT JOIN latest_order lo ON e.id = lo.emp_id
ORDER BY total_sales DESC NULLS LAST;
十九、快速实战:5 分钟掌握 JOIN
-- 1. INNER JOIN
SELECT e.name, d.name FROM employees e JOIN departments d ON e.dept_code = d.code;
-- 2. LEFT JOIN
SELECT e.name, o.amount FROM employees e LEFT JOIN orders o ON e.id = o.emp_id;
-- 3. LATERAL JOIN(最新订单)
SELECT e.name, lo.amount FROM employees e
LEFT JOIN LATERAL (
    SELECT amount FROM orders WHERE emp_id = e.id ORDER BY order_date DESC LIMIT 1
) lo ON true;
-- 4. 建索引
CREATE INDEX idx_emp_dept ON employees(dept_code);
CREATE INDEX idx_orders_emp ON orders(emp_id);
现在就动手:
在测试表中:
- 查询 所有员工及其部门(
LEFT JOIN) - 查询 有订单的员工及其订单总额(
INNER JOIN+GROUP BY) - 用 
LATERAL查询 每个员工最新一笔订单 - 建索引,用 
EXPLAIN验证是否走 Index Scan 
二十、性能对比(EXPLAIN)
-- 慢:无索引
EXPLAIN ANALYZE 
SELECT e.name, d.name FROM employees e JOIN departments d ON e.dept_code = d.code;
-- 快:有索引
CREATE INDEX idx_emp_dept ON employees(dept_code);
EXPLAIN ANALYZE 
SELECT e.name, d.name FROM employees e JOIN departments d ON e.dept_code = d.code;
需要我生成:
- 10万条数据 + 多表 JOIN 压力测试?回复 
测试数据 - 动态 JOIN 生成(Python)?回复 
Python JOIN - 树形组织架构递归 JOIN?回复 
组织架构 - EXPLAIN 可视化分析?回复 
EXPLAIN 分析 
随时告诉我!