MySQL优化-查询性能调优

清峰大约 8 分钟

MySQL优化-查询性能调优

业务场景引入

在电商平台的实际运营中，我们经常遇到这些性能问题：

商品搜索：用户搜索商品时响应缓慢，特别是多条件筛选
订单查询：用户查看历史订单列表加载时间过长
销售统计：管理员查看销售报表需要等待数分钟
库存更新：高并发下商品库存更新出现锁等待

查询性能调优是解决这些问题的关键技术。

执行计划分析

EXPLAIN详解

-- 复杂查询示例：用户订单统计
EXPLAIN FORMAT=JSON
SELECT 
    u.user_id,
    u.username,
    COUNT(o.order_id) as order_count,
    SUM(o.total_amount) as total_spent,
    AVG(o.total_amount) as avg_order_amount,
    MAX(o.order_date) as last_order_date
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id 
    AND o.order_status IN ('PAID', 'SHIPPED', 'DELIVERED')
    AND o.order_date >= '2024-01-01'
WHERE u.registration_date >= '2023-01-01'
  AND u.is_active = TRUE
GROUP BY u.user_id, u.username
HAVING order_count > 0
ORDER BY total_spent DESC
LIMIT 100;

执行计划关键指标解读：

-- 查看详细执行统计
EXPLAIN ANALYZE
SELECT product_name, category_name, price 
FROM products p
INNER JOIN categories c ON p.category_id = c.category_id
WHERE p.price BETWEEN 100 AND 500
  AND p.stock_quantity > 0
ORDER BY p.price DESC;

-- 关键指标说明：
-- • type: 连接类型 (system > const > eq_ref > ref > range > index > ALL)
-- • rows: 预估扫描行数
-- • filtered: 过滤后的行数比例
-- • Extra: 额外信息 (Using index, Using filesort, Using temporary)

性能分析工具

-- 开启慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2;  -- 超过2秒记录
SET GLOBAL log_queries_not_using_indexes = 'ON';

-- 查看当前运行的查询
SELECT 
    ID,
    USER,
    HOST,
    DB,
    COMMAND,
    TIME,
    STATE,
    LEFT(INFO, 100) as QUERY_SNIPPET
FROM information_schema.PROCESSLIST
WHERE COMMAND != 'Sleep'
ORDER BY TIME DESC;

-- 性能模式查询统计
SELECT 
    schema_name,
    digest_text,
    count_star as exec_count,
    avg_timer_wait/1000000000 as avg_time_ms,
    sum_timer_wait/1000000000 as total_time_ms
FROM performance_schema.events_statements_summary_by_digest
WHERE schema_name = 'ecommerce'
ORDER BY avg_timer_wait DESC
LIMIT 10;

索引优化策略

复合索引设计

-- 根据查询模式设计复合索引

-- 1. 订单查询优化
-- 查询模式：按用户查询某时间段的订单
SELECT * FROM orders 
WHERE user_id = 1001 
  AND order_date >= '2024-01-01' 
  AND order_status = 'DELIVERED';

-- 优化索引：遵循等值、范围、排序原则
CREATE INDEX idx_orders_user_date_status ON orders (user_id, order_date, order_status);

-- 2. 商品搜索优化
-- 查询模式：按类别、价格范围、库存状态筛选
SELECT product_id, product_name, price 
FROM products 
WHERE category_id = 5 
  AND price BETWEEN 100 AND 500 
  AND stock_quantity > 0 
  AND status = 'ACTIVE'
ORDER BY price DESC;

-- 优化索引
CREATE INDEX idx_products_search ON products (category_id, status, price, stock_quantity);

-- 3. 覆盖索引设计
-- 避免回表查询
CREATE INDEX idx_products_cover ON products (category_id, status, product_id, product_name, price);

索引使用分析

-- 检查索引使用情况
SELECT 
    object_schema,
    object_name,
    index_name,
    count_fetch,
    count_insert,
    count_update,
    count_delete
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE object_schema = 'ecommerce'
ORDER BY count_fetch DESC;

-- 查找未使用的索引
SELECT 
    table_schema,
    table_name,
    index_name
FROM information_schema.statistics
WHERE table_schema = 'ecommerce'
  AND index_name NOT IN (
    SELECT index_name 
    FROM performance_schema.table_io_waits_summary_by_index_usage 
    WHERE object_schema = 'ecommerce'
  );

查询重写优化

子查询优化

-- ❌ 低效的子查询
SELECT u.username, u.email
FROM users u
WHERE u.user_id IN (
    SELECT o.user_id 
    FROM orders o 
    WHERE o.order_date >= '2024-01-01'
);

-- ✅ 优化后的JOIN查询
SELECT DISTINCT u.username, u.email
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.order_date >= '2024-01-01';

-- ❌ 关联子查询
SELECT 
    p.product_name,
    p.price,
    (SELECT AVG(rating) FROM product_reviews pr WHERE pr.product_id = p.product_id) as avg_rating
FROM products p
WHERE p.category_id = 5;

-- ✅ 左连接优化
SELECT 
    p.product_name,
    p.price,
    COALESCE(pr.avg_rating, 0) as avg_rating
FROM products p
LEFT JOIN (
    SELECT product_id, AVG(rating) as avg_rating
    FROM product_reviews
    GROUP BY product_id
) pr ON p.product_id = pr.product_id
WHERE p.category_id = 5;

分页查询优化

-- ❌ 传统分页（深度分页性能差）
SELECT * FROM orders 
ORDER BY order_date DESC 
LIMIT 100000, 20;

-- ✅ 游标分页优化
-- 第一页
SELECT * FROM orders 
WHERE order_date >= '2024-01-01'
ORDER BY order_date DESC, order_id DESC 
LIMIT 20;

-- 后续页（使用上一页最后一条记录作为游标）
SELECT * FROM orders 
WHERE order_date >= '2024-01-01'
  AND (order_date < '2024-03-15' OR (order_date = '2024-03-15' AND order_id < 12345))
ORDER BY order_date DESC, order_id DESC 
LIMIT 20;

-- ✅ 延迟连接优化大偏移量
SELECT o.* FROM orders o
INNER JOIN (
    SELECT order_id FROM orders 
    ORDER BY order_date DESC 
    LIMIT 100000, 20
) t ON o.order_id = t.order_id;

聚合查询优化

-- ❌ 多次聚合查询
SELECT 
    (SELECT COUNT(*) FROM orders WHERE order_status = 'PENDING') as pending_count,
    (SELECT COUNT(*) FROM orders WHERE order_status = 'PAID') as paid_count,
    (SELECT COUNT(*) FROM orders WHERE order_status = 'SHIPPED') as shipped_count;

-- ✅ 单次聚合优化
SELECT 
    SUM(CASE WHEN order_status = 'PENDING' THEN 1 ELSE 0 END) as pending_count,
    SUM(CASE WHEN order_status = 'PAID' THEN 1 ELSE 0 END) as paid_count,
    SUM(CASE WHEN order_status = 'SHIPPED' THEN 1 ELSE 0 END) as shipped_count
FROM orders;

-- 时间维度聚合优化
SELECT 
    DATE(order_date) as order_day,
    COUNT(*) as order_count,
    SUM(total_amount) as day_revenue
FROM orders 
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY DATE(order_date)
ORDER BY order_day DESC;

SQL改写技巧

条件下推

-- ❌ 外层过滤
SELECT * FROM (
    SELECT u.user_id, u.username, o.order_id, o.total_amount
    FROM users u
    LEFT JOIN orders o ON u.user_id = o.user_id
) t
WHERE t.total_amount > 1000;

-- ✅ 条件下推到JOIN内部
SELECT u.user_id, u.username, o.order_id, o.total_amount
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.total_amount > 1000;

EXISTS与IN优化

-- 场景：查找有订单的用户

-- ❌ IN子查询（可能产生大结果集）
SELECT * FROM users 
WHERE user_id IN (SELECT user_id FROM orders);

-- ✅ EXISTS（短路求值）
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.user_id);

-- ✅ 半连接优化
SELECT DISTINCT u.* FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;

临时表优化

-- 复杂查询临时表分解
-- 创建临时结果表
CREATE TEMPORARY TABLE temp_user_stats AS
SELECT 
    user_id,
    COUNT(*) as order_count,
    SUM(total_amount) as total_spent
FROM orders 
WHERE order_date >= '2024-01-01'
GROUP BY user_id;

-- 添加索引
CREATE INDEX idx_temp_user ON temp_user_stats (user_id);

-- 关联查询
SELECT 
    u.username,
    u.email,
    ts.order_count,
    ts.total_spent,
    CASE 
        WHEN ts.total_spent >= 10000 THEN 'VIP'
        WHEN ts.total_spent >= 1000 THEN 'GOLD'
        ELSE 'REGULAR'
    END as user_level
FROM users u
INNER JOIN temp_user_stats ts ON u.user_id = ts.user_id
ORDER BY ts.total_spent DESC;

数据类型优化

字段类型选择

-- ❌ 低效的数据类型
CREATE TABLE orders_bad (
    order_id VARCHAR(50),        -- 应该用BIGINT
    user_id VARCHAR(20),         -- 应该用BIGINT  
    order_date DATETIME,         -- 如果不需要时间，用DATE
    total_amount FLOAT,          -- 金额应该用DECIMAL
    order_status VARCHAR(100),   -- 状态应该用ENUM
    description TEXT             -- 短文本应该用VARCHAR
);

-- ✅ 优化的数据类型
CREATE TABLE orders_good (
    order_id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
    user_id BIGINT UNSIGNED NOT NULL,
    order_date DATE NOT NULL,
    total_amount DECIMAL(12,2) NOT NULL,
    order_status ENUM('PENDING','PAID','SHIPPED','DELIVERED') NOT NULL,
    description VARCHAR(500),
    
    PRIMARY KEY (order_id),
    KEY idx_user_date (user_id, order_date)
);

JSON字段优化

-- JSON字段索引优化
ALTER TABLE products 
ADD COLUMN attributes JSON,
ADD INDEX idx_brand ((JSON_UNQUOTE(JSON_EXTRACT(attributes, '$.brand'))));

-- 高效的JSON查询
SELECT product_name, JSON_EXTRACT(attributes, '$.brand') as brand
FROM products 
WHERE JSON_UNQUOTE(JSON_EXTRACT(attributes, '$.brand')) = 'Apple';

-- 使用生成列优化
ALTER TABLE products 
ADD COLUMN brand VARCHAR(50) GENERATED ALWAYS AS (JSON_UNQUOTE(JSON_EXTRACT(attributes, '$.brand'))) STORED,
ADD INDEX idx_brand_generated (brand);

批量操作优化

批量插入优化

-- ❌ 逐条插入
INSERT INTO products (product_name, price) VALUES ('Product1', 100);
INSERT INTO products (product_name, price) VALUES ('Product2', 200);
-- ... 重复多次

-- ✅ 批量插入
INSERT INTO products (product_name, price) VALUES 
('Product1', 100),
('Product2', 200),
('Product3', 300),
-- ... 一次性插入多条
-- 建议每批1000-5000条

-- ✅ LOAD DATA优化大量数据
LOAD DATA INFILE '/path/to/products.csv'
INTO TABLE products 
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
(product_name, price, category_id);

批量更新优化

-- ❌ 逐条更新
UPDATE products SET price = price * 1.1 WHERE product_id = 1;
UPDATE products SET price = price * 1.1 WHERE product_id = 2;

-- ✅ 批量更新
UPDATE products SET price = price * 1.1 
WHERE product_id IN (1, 2, 3, 4, 5);

-- ✅ 使用临时表进行复杂批量更新
CREATE TEMPORARY TABLE temp_price_updates (
    product_id BIGINT,
    new_price DECIMAL(10,2),
    PRIMARY KEY (product_id)
);

INSERT INTO temp_price_updates VALUES 
(1, 110.00), (2, 220.00), (3, 330.00);

UPDATE products p
INNER JOIN temp_price_updates t ON p.product_id = t.product_id
SET p.price = t.new_price;

性能监控实战

查询性能监控

-- 创建性能监控视图
CREATE VIEW query_performance_monitor AS
SELECT 
    schema_name,
    LEFT(digest_text, 100) as query_sample,
    count_star as execution_count,
    ROUND(avg_timer_wait/1000000000, 3) as avg_time_seconds,
    ROUND(max_timer_wait/1000000000, 3) as max_time_seconds,
    ROUND(sum_timer_wait/1000000000, 3) as total_time_seconds,
    ROUND(avg_rows_examined) as avg_rows_examined,
    ROUND(avg_rows_sent) as avg_rows_sent
FROM performance_schema.events_statements_summary_by_digest
WHERE schema_name = 'ecommerce'
  AND avg_timer_wait > 1000000000  -- 超过1秒的查询
ORDER BY avg_timer_wait DESC;

-- 查看TOP慢查询
SELECT * FROM query_performance_monitor LIMIT 10;

自动化优化脚本

import mysql.connector
import logging

class QueryOptimizer:
    def __init__(self, db_config):
        self.db_config = db_config
        self.setup_logging()
    
    def setup_logging(self):
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )
        self.logger = logging.getLogger(__name__)
    
    def analyze_slow_queries(self):
        """分析慢查询并提供优化建议"""
        conn = mysql.connector.connect(**self.db_config)
        cursor = conn.cursor(dictionary=True)
        
        query = """
        SELECT 
            digest_text,
            count_star as exec_count,
            ROUND(avg_timer_wait/1000000000, 3) as avg_time_sec,
            ROUND(sum_timer_wait/1000000000, 3) as total_time_sec,
            avg_rows_examined,
            avg_rows_sent
        FROM performance_schema.events_statements_summary_by_digest
        WHERE schema_name = %s 
          AND avg_timer_wait > 2000000000  -- 超过2秒
        ORDER BY avg_timer_wait DESC
        LIMIT 20
        """
        
        cursor.execute(query, (self.db_config['database'],))
        slow_queries = cursor.fetchall()
        
        for query_info in slow_queries:
            self.logger.info(f"慢查询分析:")
            self.logger.info(f"SQL: {query_info['digest_text'][:100]}...")
            self.logger.info(f"平均执行时间: {query_info['avg_time_sec']}秒")
            self.logger.info(f"平均扫描行数: {query_info['avg_rows_examined']}")
            
            # 优化建议
            self.suggest_optimization(query_info)
            self.logger.info("-" * 50)
        
        cursor.close()
        conn.close()
    
    def suggest_optimization(self, query_info):
        """提供优化建议"""
        suggestions = []
        
        if query_info['avg_rows_examined'] > 100000:
            suggestions.append("考虑添加索引减少扫描行数")
        
        if query_info['avg_time_sec'] > 5:
            suggestions.append("查询时间过长，考虑分解复杂查询")
        
        if query_info['avg_rows_examined'] / query_info['avg_rows_sent'] > 10:
            suggestions.append("过滤效率低，优化WHERE条件")
        
        for suggestion in suggestions:
            self.logger.info(f"建议: {suggestion}")
    
    def check_missing_indexes(self):
        """检查可能缺失的索引"""
        conn = mysql.connector.connect(**self.db_config)
        cursor = conn.cursor(dictionary=True)
        
        # 查找全表扫描的查询
        query = """
        SELECT 
            object_schema,
            object_name,
            sum_timer_wait/1000000000 as total_time_sec,
            count_star as scan_count
        FROM performance_schema.events_statements_summary_by_digest d
        WHERE object_schema = %s
          AND digest_text LIKE '%SELECT%'
          AND digest_text NOT LIKE '%LIMIT%'
        ORDER BY total_time_sec DESC
        """
        
        cursor.execute(query, (self.db_config['database'],))
        results = cursor.fetchall()
        
        self.logger.info("可能需要添加索引的表:")
        for result in results[:10]:
            self.logger.info(f"表: {result.get('object_name', 'Unknown')}")
            self.logger.info(f"总扫描时间: {result['total_time_sec']:.2f}秒")
        
        cursor.close()
        conn.close()

# 使用示例
db_config = {
    'host': 'localhost',
    'user': 'admin', 
    'password': 'password',
    'database': 'ecommerce'
}

optimizer = QueryOptimizer(db_config)
optimizer.analyze_slow_queries()
optimizer.check_missing_indexes()

总结与最佳实践

性能调优流程

监控识别：通过慢查询日志和性能模式识别问题查询
执行计划分析：使用EXPLAIN分析查询执行路径
索引优化：设计合适的索引策略
查询重写：优化SQL语句结构
效果验证：对比优化前后的性能指标

关键优化原则

索引策略：遵循最左前缀原则，避免过度索引
查询设计：减少数据访问量，避免不必要的计算
数据类型：选择合适的数据类型和长度
批量操作：合理使用批量插入和更新
监控评估：持续监控和定期优化

查询性能调优是一个持续的过程，需要结合具体业务场景和数据特征，制定针对性的优化策略。