MySQL问题排查-性能问题诊断
大约 5 分钟
MySQL问题排查-性能问题诊断
业务场景引入
性能问题是MySQL运维中最常见的挑战:
- 查询缓慢:用户反馈页面加载时间过长
- 连接超时:高并发时出现连接池耗尽
- CPU飙升:服务器CPU使用率持续100%
- 锁等待:业务操作频繁超时失败
性能问题分类
问题分析框架
关键性能指标
指标类别 | 关键指标 | 正常范围 | 告警阈值 |
---|---|---|---|
响应时间 | 平均查询时间 | <100ms | >500ms |
吞吐量 | QPS | 根据业务 | 下降50% |
连接 | 活跃连接数 | <80% | >90% |
缓存 | 缓冲池命中率 | >95% | <90% |
慢查询诊断
慢查询日志分析
#!/bin/bash
# 慢查询分析脚本
SLOW_LOG="/var/log/mysql/slow.log"
# 分析最慢的10个查询
echo "=== Top 10 慢查询 (按执行时间) ==="
mysqldumpslow -s t -t 10 $SLOW_LOG
# 分析最频繁的慢查询
echo "=== Top 10 慢查询 (按执行次数) ==="
mysqldumpslow -s c -t 10 $SLOW_LOG
# 实时监控慢查询
tail -f $SLOW_LOG | grep -E "(Query_time|User@Host|^use)"
Performance Schema分析
-- 查找最耗时的SQL语句
SELECT
digest_text,
count_star as exec_count,
ROUND(avg_timer_wait/1000000000, 3) as avg_time_sec,
ROUND(sum_timer_wait/1000000000, 3) as total_time_sec,
avg_rows_examined,
avg_rows_sent
FROM performance_schema.events_statements_summary_by_digest
WHERE schema_name = 'ecommerce'
ORDER BY avg_timer_wait DESC
LIMIT 10;
-- 查找扫描行数最多的查询
SELECT
digest_text,
count_star,
avg_rows_examined,
avg_rows_sent,
ROUND(avg_rows_examined/avg_rows_sent, 2) as scan_ratio
FROM performance_schema.events_statements_summary_by_digest
WHERE avg_rows_examined > 10000
ORDER BY avg_rows_examined DESC
LIMIT 10;
-- 查看当前执行中的查询
SELECT
processlist_id,
processlist_user,
processlist_host,
processlist_command,
processlist_time,
LEFT(processlist_info, 100) as query_snippet
FROM performance_schema.events_statements_current
WHERE processlist_command != 'Sleep'
ORDER BY processlist_time DESC;
索引使用分析
-- 未使用索引的表
SELECT
object_schema,
object_name,
count_read as table_reads,
count_fetch as index_reads
FROM performance_schema.table_io_waits_summary_by_table
WHERE object_schema = 'ecommerce'
AND count_fetch > 0
AND count_read/count_fetch > 10
ORDER BY count_read DESC;
-- 从未使用的索引
SELECT
object_schema,
object_name,
index_name
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE object_schema = 'ecommerce'
AND count_fetch = 0
AND count_insert = 0
AND count_update = 0
AND count_delete = 0;
锁等待诊断
死锁和锁等待分析
-- 当前锁等待情况
SELECT
r.trx_id as waiting_trx,
r.trx_mysql_thread_id as waiting_thread,
LEFT(r.trx_query, 100) as waiting_query,
b.trx_id as blocking_trx,
b.trx_mysql_thread_id as blocking_thread,
LEFT(b.trx_query, 100) as blocking_query,
TIMESTAMPDIFF(SECOND, r.trx_wait_started, NOW()) as wait_seconds
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id;
-- 长时间运行的事务
SELECT
trx_id,
trx_state,
trx_started,
TIMESTAMPDIFF(SECOND, trx_started, NOW()) as duration_sec,
trx_rows_locked,
trx_rows_modified,
LEFT(trx_query, 100) as query_snippet
FROM information_schema.innodb_trx
WHERE TIMESTAMPDIFF(SECOND, trx_started, NOW()) > 60
ORDER BY trx_started;
-- 查看死锁信息
SHOW ENGINE INNODB STATUS;
锁监控脚本
#!/usr/bin/env python3
import mysql.connector
import time
from datetime import datetime
class MySQLLockMonitor:
def __init__(self, db_config):
self.db_config = db_config
def check_lock_waits(self):
"""检查锁等待情况"""
conn = mysql.connector.connect(**self.db_config)
cursor = conn.cursor(dictionary=True)
cursor.execute("""
SELECT
r.trx_id as waiting_trx,
r.trx_mysql_thread_id as waiting_thread,
SUBSTRING(r.trx_query, 1, 100) as waiting_query,
b.trx_id as blocking_trx,
TIMESTAMPDIFF(SECOND, r.trx_wait_started, NOW()) as wait_seconds
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id
""")
lock_waits = cursor.fetchall()
if lock_waits:
print(f"发现 {len(lock_waits)} 个锁等待:")
for wait in lock_waits:
print(f"等待事务: {wait['waiting_trx']}, 等待时间: {wait['wait_seconds']}秒")
print(f"查询: {wait['waiting_query']}")
print("-" * 50)
cursor.close()
conn.close()
return lock_waits
def monitor_locks(self, interval=10):
"""持续监控锁情况"""
try:
while True:
print(f"\n=== 锁监控 - {datetime.now()} ===")
lock_waits = self.check_lock_waits()
if not lock_waits:
print("✓ 未发现锁等待")
time.sleep(interval)
except KeyboardInterrupt:
print("\n监控已停止")
# 使用示例
monitor = MySQLLockMonitor({
'host': 'localhost',
'user': 'monitor_user',
'password': 'password'
})
monitor.check_lock_waits()
资源瓶颈诊断
系统资源分析
#!/bin/bash
# MySQL资源分析脚本
analyze_mysql_performance() {
echo "=== MySQL性能分析 ==="
# MySQL进程资源使用
MYSQL_PID=$(pgrep mysqld | head -1)
echo "MySQL PID: $MYSQL_PID"
ps -p $MYSQL_PID -o pid,pcpu,pmem,time,cmd
# InnoDB缓冲池命中率
echo -e "\n缓冲池命中率:"
mysql -e "
SELECT ROUND(
(1 - (
SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads'
) / (
SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests'
)) * 100, 2
) as hit_ratio_percent;
"
# 连接使用情况
echo -e "\n连接使用情况:"
mysql -e "
SELECT
(SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME = 'Threads_connected') as current_connections,
(SELECT VARIABLE_VALUE FROM performance_schema.global_variables WHERE VARIABLE_NAME = 'max_connections') as max_connections;
"
# 磁盘IO统计
echo -e "\n磁盘IO:"
iostat -x 1 2 | tail -5
}
analyze_mysql_performance
自动诊断工具
#!/usr/bin/env python3
import mysql.connector
import psutil
class MySQLDiagnostic:
def __init__(self, db_config):
self.db_config = db_config
self.alerts = []
def check_buffer_pool(self):
"""检查缓冲池命中率"""
conn = mysql.connector.connect(**self.db_config)
cursor = conn.cursor()
cursor.execute("""
SELECT ROUND((1 - (
SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads'
) / (
SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests'
)) * 100, 2) as hit_ratio
""")
hit_ratio = cursor.fetchone()[0]
if hit_ratio < 95:
self.alerts.append(f"缓冲池命中率过低: {hit_ratio}%")
cursor.close()
conn.close()
def check_connections(self):
"""检查连接使用率"""
conn = mysql.connector.connect(**self.db_config)
cursor = conn.cursor()
cursor.execute("""
SELECT
(SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME = 'Threads_connected') as current,
(SELECT VARIABLE_VALUE FROM performance_schema.global_variables WHERE VARIABLE_NAME = 'max_connections') as max_conn
""")
result = cursor.fetchone()
usage_rate = int(result[0]) / int(result[1])
if usage_rate > 0.8:
self.alerts.append(f"连接使用率过高: {usage_rate:.1%}")
cursor.close()
conn.close()
def check_system_resources(self):
"""检查系统资源"""
cpu_percent = psutil.cpu_percent()
memory_percent = psutil.virtual_memory().percent
if cpu_percent > 80:
self.alerts.append(f"CPU使用率过高: {cpu_percent}%")
if memory_percent > 85:
self.alerts.append(f"内存使用率过高: {memory_percent}%")
def run_diagnostic(self):
"""运行诊断"""
print("开始MySQL性能诊断...")
self.alerts = []
try:
self.check_buffer_pool()
self.check_connections()
self.check_system_resources()
except Exception as e:
print(f"诊断过程出错: {e}")
if self.alerts:
print(f"\n发现 {len(self.alerts)} 个问题:")
for alert in self.alerts:
print(f"⚠ {alert}")
else:
print("✅ 未发现性能问题")
# 使用示例
diagnostic = MySQLDiagnostic({
'host': 'localhost',
'user': 'monitor_user',
'password': 'password'
})
diagnostic.run_diagnostic()
问题解决方案
常见性能问题及解决方案
1. 慢查询问题
-- 解决方案:添加索引
CREATE INDEX idx_user_date ON orders (user_id, order_date);
-- 优化查询语句
-- 原查询
SELECT * FROM orders WHERE user_id = 1001 AND DATE(order_date) = '2024-03-15';
-- 优化后
SELECT * FROM orders WHERE user_id = 1001 AND order_date >= '2024-03-15' AND order_date < '2024-03-16';
2. 锁等待问题
-- 减少事务持有时间
BEGIN;
SELECT * FROM products WHERE id = 1 FOR UPDATE;
-- 尽快处理业务逻辑
UPDATE products SET stock = stock - 1 WHERE id = 1;
COMMIT;
-- 使用乐观锁
UPDATE products SET stock = stock - 1, version = version + 1
WHERE id = 1 AND version = @old_version;
3. 连接数问题
-- 优化连接池配置
-- 应用层:设置合理的连接池大小
-- 数据库层:调整max_connections
SET GLOBAL max_connections = 2000;
总结与最佳实践
性能诊断流程
- 问题定义:明确性能问题的具体表现
- 数据收集:收集监控指标和日志
- 问题分析:使用工具分析根本原因
- 解决方案:制定优化方案
- 效果验证:验证优化效果
常用诊断工具
- 慢查询日志:识别性能问题SQL
- Performance Schema:详细性能分析
- SHOW PROCESSLIST:查看当前执行状态
- EXPLAIN:分析查询执行计划
性能问题诊断需要系统化的方法和工具,结合监控数据快速定位问题根源。