latin1,排序规则 latin1_swedish_ciutf8mb4,排序规则 utf8mb4_0900_ai_ci-- 旧表可能是混合编码
SHOW CREATE TABLE old_table;
-- 可能显示多种字符集:latin1, utf8, utf8mb3
陷阱3:utf8的含义变化
utf8 = UTF-8编码,最多3字节(实际是utf8mb3)utf8 是 utf8mb3 的别名,4字节表情符号无法存储-- 检查所有表的字符集
SELECT
TABLE_SCHEMA,
TABLE_NAME,
TABLE_COLLATION,
CREATE_OPTIONS
FROM information_schema.TABLES
WHERE TABLE_SCHEMA NOT IN ('mysql', 'information_schema', 'performance_schema', 'sys')
AND TABLE_COLLATION LIKE 'latin1%';
-- 检查列级字符集不一致
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
COLLATION_NAME
FROM information_schema.COLUMNS
WHERE COLLATION_NAME IS NOT NULL
AND TABLE_SCHEMA = 'your_database'
GROUP BY TABLE_SCHEMA, TABLE_NAME, COLLATION_NAME;
检查2:存储过程/函数/触发器
-- 检查对象定义中的字符集
SELECT
ROUTINE_SCHEMA,
ROUTINE_NAME,
COLLATION_CONNECTION,
DATABASE_COLLATION
FROM information_schema.ROUTINES
WHERE ROUTINE_SCHEMA = 'your_database';
检查3:系统变量检查
-- 检查当前字符集设置
SHOW VARIABLES LIKE 'character_set%';
SHOW VARIABLES LIKE 'collation%';
-- 升级前统一字符集
ALTER DATABASE your_database
CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;
-- 转换所有表
SELECT CONCAT(
'ALTER TABLE `', TABLE_SCHEMA, '`.`', TABLE_NAME,
'` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;'
) AS alter_statement
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'your_database'
AND TABLE_TYPE = 'BASE TABLE';
方案B:逐步迁移(推荐生产环境)
# 1. 使用mysqldump导出
mysqldump --default-character-set=utf8mb4 \
--skip-set-charset \
--routines \
--triggers \
--hex-blob \
your_database > dump.sql
# 2. 修改dump文件头部
sed -i 's/CHARSET=latin1/CHARSET=utf8mb4/g' dump.sql
sed -i 's/CHARSET=utf8/CHARSET=utf8mb4/g' dump.sql
# 3. 在MySQL 8.0中导入
mysql --default-character-set=utf8mb4 your_database < dump.sql
方案C:使用mysqlsh升级检查器(推荐)
# 安装MySQL Shell
wget https://dev.mysql.com/get/mysql-shell-8.0.34-linux-glibc2.12-x86-64bit.tar.gz
# 运行升级检查器
mysqlsh -- util checkForServerUpgrade \
--target-version=8.0.34 \
--output-format=JSON \
--config-path=/etc/my.cnf
-- utf8mb4字符占4字节,索引长度767字节限制
-- 需要调整innodb_large_prefix
SET GLOBAL innodb_large_prefix = ON;
SET GLOBAL innodb_default_row_format = DYNAMIC;
-- 或重建表
ALTER TABLE your_table
ROW_FORMAT=DYNAMIC
CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;
问题2:排序规则冲突
-- 创建不同排序规则的表时
CREATE TABLE t1 (
c1 VARCHAR(100) COLLATE utf8mb4_0900_ai_ci,
c2 VARCHAR(100) COLLATE utf8mb4_bin
);
-- 明确指定排序规则
SELECT c1, c2
FROM t1
WHERE c1 = c2 COLLATE utf8mb4_0900_ai_ci;
问题3:存储过程中的字符集
-- 创建存储过程时明确指定
DELIMITER $$
CREATE PROCEDURE example()
CHARACTER SET utf8mb4
COLLATE utf8mb4_0900_ai_ci
BEGIN
-- 过程体
END$$
DELIMITER ;
-- 1. 验证字符集一致性
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
CHARACTER_SET_NAME,
COLLATION_NAME
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = 'your_database'
AND CHARACTER_SET_NAME != 'utf8mb4';
-- 2. 验证数据完整性
-- 随机抽样检查
SELECT * FROM your_table
WHERE HEX(column_name) REGEXP '^(..)*[89ABCDEF]'
LIMIT 10;
-- 3. 性能基准测试
EXPLAIN SELECT * FROM your_table
WHERE text_column LIKE '%测试%';
-- 4. 检查外键约束
SELECT
TABLE_NAME,
COLUMN_NAME,
CONSTRAINT_NAME,
REFERENCED_TABLE_NAME,
REFERENCED_COLUMN_NAME
FROM information_schema.KEY_COLUMN_USAGE
WHERE TABLE_SCHEMA = 'your_database'
AND REFERENCED_TABLE_NAME IS NOT NULL;
# 1. 备份新版本数据
mysqldump --default-character-set=utf8mb4 \
your_database > backup_after_upgrade.sql
# 2. 如果需要回滚
# 安装MySQL 5.7
# 导入升级前的备份
mysql --default-character-set=latin1 your_database < backup_before_upgrade.sql
?useUnicode=true&characterEncoding=UTF-8-- 如果升级后出现乱码
-- 1. 停止应用写入
-- 2. 检查字符集设置
SHOW VARIABLES WHERE Variable_name LIKE 'character_set_%'
OR Variable_name LIKE 'collation%';
-- 3. 临时恢复措施
SET NAMES latin1;
-- 或根据原字符集调整连接
-- 4. 使用convert函数修复
UPDATE your_table
SET column_name = CONVERT(
CAST(column_name AS BINARY)
USING utf8mb4
)
WHERE id = ?;
关键建议:在升级前务必备份数据,并在测试环境充分验证。字符集问题可能导致数据损坏,必须谨慎操作。