跳到主要内容

MySQL 事务性能优化与监控

性能优化与监控

监控关键指标

// 事务性能监控的实际实现
func monitorTransactionPerformance() {

// 1. 长事务监控 - 找出执行时间超过阈值的事务
longTransactionsQuery := `
SELECT
trx_id,
trx_started,
TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) as duration_seconds,
trx_tables_in_use,
trx_tables_locked,
trx_query
FROM information_schema.innodb_trx
WHERE TIME_TO_SEC(TIMEDIFF(NOW(), trx_started)) > 60
ORDER BY trx_started;
`

// 2. Undo Log 空间监控 - 检查 undo 表空间使用情况
undoSpaceQuery := `
SELECT
tablespace_name,
file_name,
ROUND(file_size/1024/1024/1024, 2) as size_gb,
ROUND(allocated_size/1024/1024/1024, 2) as allocated_gb
FROM information_schema.files
WHERE tablespace_name LIKE 'innodb_undo%'
ORDER BY file_size DESC;
`

// 3. Redo Log 写入压力监控
redoLogStatusQuery := `
SHOW ENGINE INNODB STATUS;
-- 关注以下指标:
-- Log sequence number: 当前 LSN
-- Log flushed up to: 已刷盘 LSN
-- Pages flushed up to: 已刷页 LSN
-- Last checkpoint at: 最后检查点 LSN
`

// 4. 锁等待监控 - 检查当前锁冲突情况
lockWaitQuery := `
SELECT
r.trx_id as requesting_trx_id,
r.trx_mysql_thread_id as requesting_thread,
r.trx_query as requesting_query,
b.trx_id as blocking_trx_id,
b.trx_mysql_thread_id as blocking_thread,
b.trx_query as blocking_query,
l.lock_table,
l.lock_index,
l.lock_mode
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx r
ON w.requesting_trx_id = r.trx_id
INNER JOIN information_schema.innodb_trx b
ON w.blocking_trx_id = b.trx_id
INNER JOIN information_schema.innodb_locks l
ON w.blocking_lock_id = l.lock_id;
`

// 5. 主从延迟监控
replicationDelayQuery := `
SHOW SLAVE STATUS;
-- 关注指标:
-- Seconds_Behind_Master: 主从延迟秒数
-- Master_Log_File, Read_Master_Log_Pos: 主库位置
-- Relay_Log_File, Relay_Log_Pos: 中继日志位置
-- Exec_Master_Log_Pos: 从库执行位置
`

// 6. 性能指标计算
performanceMetrics := struct {
TransactionThroughput float64 `json:"tps"` // 每秒事务数
RedoLogWriteRate float64 `json:"redo_mb_per_s"` // Redo Log 写入速率
UndoLogSize float64 `json:"undo_size_gb"` // Undo Log 总大小
LockWaitTime float64 `json:"avg_lock_wait"` // 平均锁等待时间
ReplicationDelay int `json:"repl_delay_s"` // 主从延迟
}{
TransactionThroughput: calculateTPS(),
RedoLogWriteRate: calculateRedoWriteRate(),
UndoLogSize: calculateUndoSize(),
LockWaitTime: calculateAvgLockWait(),
ReplicationDelay: getReplicationDelay(),
}

// 7. 告警阈值设置
alerts := []Alert{
{
Metric: "long_transaction",
Threshold: 300, // 5分钟
Action: "发送告警,可能需要 kill 事务",
},
{
Metric: "undo_size",
Threshold: 10, // 10GB
Action: "检查是否有长事务阻止清理",
},
{
Metric: "replication_delay",
Threshold: 60, // 60秒
Action: "检查网络和从库性能",
},
{
Metric: "lock_wait_time",
Threshold: 30, // 30秒
Action: "分析锁冲突,优化查询",
},
}
}

// 工具函数示例
func calculateTPS() float64 {
// 通过 SHOW GLOBAL STATUS LIKE 'Com_commit' 计算
// TPS = (当前提交数 - 上次提交数) / 时间间隔
return 0.0
}

func calculateRedoWriteRate() float64 {
// 通过 SHOW ENGINE INNODB STATUS 中的 LSN 增长计算
// 写入速率 = LSN 增长量 / 时间间隔 / 1024 / 1024
return 0.0
}