75 lines
2.9 KiB
SQL
75 lines
2.9 KiB
SQL
-- Repair script for mojibake in admin_chat_messages
|
|
-- Case handled here: UTF-8 Polish text was previously decoded as cp1250 before being stored.
|
|
-- Example: "też" became "teĹĽ", "chmurkę" became "chmurkÄ™".
|
|
--
|
|
-- Usage:
|
|
-- 1. Run the preview SELECT first and verify that repaired_preview looks correct.
|
|
-- 2. Run the backup INSERT.
|
|
-- 3. Run the UPDATE.
|
|
-- 4. If your preview looks better in repaired_preview_latin1 than in repaired_preview_cp1250,
|
|
-- use the latin1 variant from the commented section at the bottom instead.
|
|
|
|
USE `togethere_cloud`;
|
|
|
|
-- Step 1: preview suspicious rows before any update.
|
|
SELECT
|
|
id,
|
|
username,
|
|
message AS current_message,
|
|
CONVERT(CAST(CONVERT(message USING cp1250) AS BINARY) USING utf8mb4) AS repaired_preview_cp1250,
|
|
CONVERT(CAST(CONVERT(message USING latin1) AS BINARY) USING utf8mb4) AS repaired_preview_latin1,
|
|
created_at
|
|
FROM admin_chat_messages
|
|
WHERE message REGEXP 'Ä|Å|Ã|â|Ĺ|Ć|Ł'
|
|
ORDER BY id DESC;
|
|
|
|
-- Step 2: create a backup table if it does not exist yet.
|
|
CREATE TABLE IF NOT EXISTS admin_chat_messages_encoding_backup LIKE admin_chat_messages;
|
|
|
|
-- Step 3: backup only suspicious rows before repair.
|
|
INSERT INTO admin_chat_messages_encoding_backup
|
|
SELECT *
|
|
FROM admin_chat_messages
|
|
WHERE message REGEXP 'Ä|Å|Ã|â|Ĺ|Ć|Ł'
|
|
AND id NOT IN (
|
|
SELECT id FROM admin_chat_messages_encoding_backup
|
|
);
|
|
|
|
-- Step 4: repair messages using cp1250 reinterpretation.
|
|
UPDATE admin_chat_messages
|
|
SET message = CONVERT(CAST(CONVERT(message USING cp1250) AS BINARY) USING utf8mb4)
|
|
WHERE message REGEXP 'Ä|Å|Ã|â|Ĺ|Ć|Ł'
|
|
AND message <> CONVERT(CAST(CONVERT(message USING cp1250) AS BINARY) USING utf8mb4);
|
|
|
|
-- Step 5: verify result after repair.
|
|
SELECT id, username, message, created_at
|
|
FROM admin_chat_messages
|
|
WHERE id IN (
|
|
SELECT id
|
|
FROM admin_chat_messages_encoding_backup
|
|
)
|
|
ORDER BY id DESC;
|
|
|
|
-- Optional rollback if needed.
|
|
-- UPDATE admin_chat_messages m
|
|
-- JOIN admin_chat_messages_encoding_backup b ON b.id = m.id
|
|
-- SET m.user_id = b.user_id,
|
|
-- m.username = b.username,
|
|
-- m.message = b.message,
|
|
-- m.created_at = b.created_at,
|
|
-- m.reply_to_id = b.reply_to_id,
|
|
-- m.file_name = b.file_name,
|
|
-- m.file_mime = b.file_mime,
|
|
-- m.file_size = b.file_size,
|
|
-- m.file_data = b.file_data,
|
|
-- m.updated_at = b.updated_at,
|
|
-- m.is_hearted = b.is_hearted,
|
|
-- m.hearted_by_user_id = b.hearted_by_user_id,
|
|
-- m.hearted_by_username = b.hearted_by_username,
|
|
-- m.hearted_at = b.hearted_at;
|
|
|
|
-- Optional alternative for cases where preview shows latin1/cp1252-style mojibake instead.
|
|
-- UPDATE admin_chat_messages
|
|
-- SET message = CONVERT(CAST(CONVERT(message USING latin1) AS BINARY) USING utf8mb4)
|
|
-- WHERE message REGEXP 'Ä|Å|Ã|â|Ĺ|Ć|Ł'
|
|
-- AND message <> CONVERT(CAST(CONVERT(message USING latin1) AS BINARY) USING utf8mb4); |