数据库卡住了,最容易排查的就是慢SQL阻塞,processlist排序即可;次之,则是长事务未提交,导致锁等待,这种查询information_schema.innodb_trx按照trx_started排序即可找出根源;但往往有一些锁等待,前面两种方式很难定位出来。
1. Waiting for global read lock
触发:有线程执行flush table with read lock(FTWRL),mysqldump、xtrabackup都可能触发
# 获取全局锁来源
select OBJECT_TYPE,OBJECT_SCHEMA,OBJECT_NAME,LOCK_STATUS,
(SELECT PROCESSLIST_ID FROM performance_schema.threads WHERE THREAD_ID = t.OWNER_THREAD_ID) AS pid,
(SELECT PROCESSLIST_USER FROM performance_schema.threads WHERE THREAD_ID = t.OWNER_THREAD_ID) AS user
FROM performance_schema.metadata_locks AS t where OBJECT_TYPE='GLOBAL' AND LOCK_STATUS='GRANTED';
# KILL上面的pid
2. Waiting for backup lock
触发:物理冷备(xtrabackup、mysqlbackup)获取一致性快照,LOCK INSTANCE FOR BACKUP
# 查询BACKUP LOCK
select OBJECT_TYPE,OBJECT_SCHEMA,OBJECT_NAME,LOCK_STATUS,
(SELECT PROCESSLIST_ID FROM performance_schema.threads WHERE THREAD_ID = t.OWNER_THREAD_ID) AS pid,
(SELECT PROCESSLIST_USER FROM performance_schema.threads WHERE THREAD_ID = t.OWNER_THREAD_ID) AS user
FROM performance_schema.metadata_locks AS t where OBJECT_TYPE='BACKUP LOCK' AND LOCK_STATUS='GRANTED';
# KILL上面的pid
3. Waiting for table metadata lock
触发:普通查询、变更都会持有表的MDL,长事务、lock tables t read
# 找出被阻塞的block_processlist_id
SELECT
-- 锁对象公共信息
w.lock_object_type AS object_type,
w.table_schema AS table_schema,
w.table_name AS table_name,
w.metadata_lock_type AS lock_type,
-- 等待锁的线程(被阻塞)
w.lock_status AS status,
w.connection_id AS processlist_id,
w.client_ip AS client_ip,
concat(left(w.current_sql,20),'...') AS `sql`,
-- 持有锁的线程(阻塞源)
g.lock_status AS hold_status,
g.connection_id AS "block_processlist_id(*)",
g.client_ip AS block_client_ip
,concat(left(g.current_sql,20),'...') AS block_sql
FROM
(
SELECT
ml.OBJECT_TYPE AS lock_object_type,
ml.OBJECT_SCHEMA AS table_schema,
ml.OBJECT_NAME AS table_name,
ml.LOCK_TYPE AS metadata_lock_type,
ml.LOCK_STATUS AS lock_status,
ml.LOCK_DURATION AS lock_duration,
t.PROCESSLIST_ID AS connection_id,
t.PROCESSLIST_HOST AS client_ip,
COALESCE(esc.SQL_TEXT, t.PROCESSLIST_INFO) AS current_sql
FROM performance_schema.metadata_locks ml
JOIN performance_schema.threads t ON ml.OWNER_THREAD_ID = t.THREAD_ID
LEFT JOIN performance_schema.events_statements_current esc ON t.THREAD_ID = esc.THREAD_ID
WHERE ml.LOCK_STATUS = 'PENDING'
AND ml.OBJECT_TYPE IN ('TABLE','SCHEMA')
AND t.PROCESSLIST_ID IS NOT NULL
) w
JOIN
(
SELECT
ml.OBJECT_TYPE AS lock_object_type,
ml.OBJECT_SCHEMA AS table_schema,
ml.OBJECT_NAME AS table_name,
ml.LOCK_TYPE AS metadata_lock_type,
ml.LOCK_STATUS AS lock_status,
ml.LOCK_DURATION AS lock_duration,
t.PROCESSLIST_ID AS connection_id,
t.PROCESSLIST_HOST AS client_ip,
COALESCE(esc.SQL_TEXT, t.PROCESSLIST_INFO) AS current_sql
FROM performance_schema.metadata_locks ml
JOIN performance_schema.threads t ON ml.OWNER_THREAD_ID = t.THREAD_ID
LEFT JOIN performance_schema.events_statements_current esc ON t.THREAD_ID = esc.THREAD_ID
WHERE ml.LOCK_STATUS = 'GRANTED'
AND ml.OBJECT_TYPE IN ('TABLE','SCHEMA')
AND t.PROCESSLIST_ID IS NOT NULL
) g
ON w.lock_object_type = g.lock_object_type
AND w.table_schema = g.table_schema
AND w.table_name = g.table_name
AND w.lock_duration = g.lock_duration
AND w.connection_id != g.connection_id
ORDER BY w.connection_id;
# kill上一步的block_processlist_id

Comments