When "utf8mb4" is available, use it as the default client charset when invoking standalone "mysql" commands
Summary: Fixes T13390. We have some old code which doesn't dynamically select between "utf8mb4" and "utf8". This can lead to dumping utf8mb4 data over a utf8 connection in `bin/storage dump`, which possibly corrupts some emoji/whales. Instead, prefer "utf8mb4" if it's available. Test Plan: Ran `bin/storage dump` and `bin/storage shell`, saw sub-commands select utf8mb4 as the client charset. Maniphest Tasks: T13390 Differential Revision: https://secure.phabricator.com/D20742
This commit is contained in:
@@ -298,6 +298,14 @@ final class PhabricatorStorageManagementAPI extends Phobject {
|
||||
return self::isCharacterSetAvailableOnConnection($character_set, $conn);
|
||||
}
|
||||
|
||||
public function getClientCharset() {
|
||||
if ($this->isCharacterSetAvailable('utf8mb4')) {
|
||||
return 'utf8mb4';
|
||||
} else {
|
||||
return 'utf8';
|
||||
}
|
||||
}
|
||||
|
||||
public static function isCharacterSetAvailableOnConnection(
|
||||
$character_set,
|
||||
AphrontDatabaseConnection $conn) {
|
||||
|
||||
@@ -179,7 +179,9 @@ final class PhabricatorStorageManagementDumpWorkflow
|
||||
$argv = array();
|
||||
$argv[] = '--hex-blob';
|
||||
$argv[] = '--single-transaction';
|
||||
$argv[] = '--default-character-set=utf8';
|
||||
|
||||
$argv[] = '--default-character-set';
|
||||
$argv[] = $api->getClientCharset();
|
||||
|
||||
if ($args->getArg('for-replica')) {
|
||||
$argv[] = '--master-data';
|
||||
|
||||
@@ -31,8 +31,8 @@ final class PhabricatorStorageManagementShellWorkflow
|
||||
}
|
||||
|
||||
return phutil_passthru(
|
||||
'mysql --protocol=TCP --default-character-set=utf8mb4 '.
|
||||
'-u %s %C -h %s %C',
|
||||
'mysql --protocol=TCP --default-character-set %R -u %s %C -h %s %C',
|
||||
$api->getClientCharset(),
|
||||
$api->getUser(),
|
||||
$flag_password,
|
||||
$host,
|
||||
|
||||
Reference in New Issue
Block a user