From 4da74166fe87a366a37b87129a6b8a4629ff4f66 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 21 Nov 2016 17:58:51 -0800 Subject: [PATCH] When storage is partitioned, refuse to serve requests unless web and databases agree on partitioning Summary: Ref T11044. One popular tool in a modern operations environment is Puppet. The primary purpose of this tool is to randomly revert hosts to older or different configurations. Introducing an element of chaotic unpredictability into operations trains staff to be on high alert at all times, rather than lulled into complacency by predictability or consistency. When Puppet reverts a Phabricator host's configuration to an older version, we might start writing data to a lot of crazy places where it shouldn't go. This will create a big sticky mess that is virtually impossible to undo, mostly because we'll get two files with ID 123 or two tasks with ID 456 or whatever else and good luck with that. Instead, after changing the partition layout, require `bin/storage partition` to be run. This writes a copy of the config everywhere. Then, when we start serving web requests, make sure every database has the exact same config. This will foil Puppet by refusing to run requests on hosts it has reverted. Test Plan: - Changed partition configuration. - Ran Phabricator. - FOILED! - Ran `bin/storage partition` to sync config. - Things worked again. Reviewers: chad Reviewed By: chad Maniphest Tasks: T11044 Differential Revision: https://secure.phabricator.com/D16910 --- .../20161121.cluster.01.hoststate.sql | 5 +++ src/__phutil_library_map__.php | 2 + .../check/PhabricatorDatabaseSetupCheck.php | 32 ++++++++++++++ .../user/cluster/cluster_partitioning.diviner | 17 +++++++ .../cluster/PhabricatorDatabaseRef.php | 14 ++++-- .../PhabricatorStorageManagementAPI.php | 1 + ...atorStorageManagementPartitionWorkflow.php | 44 +++++++++++++++++++ .../schema/PhabricatorStorageSchemaSpec.php | 14 ++++++ 8 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 resources/sql/autopatches/20161121.cluster.01.hoststate.sql create mode 100644 src/infrastructure/storage/management/workflow/PhabricatorStorageManagementPartitionWorkflow.php diff --git a/resources/sql/autopatches/20161121.cluster.01.hoststate.sql b/resources/sql/autopatches/20161121.cluster.01.hoststate.sql new file mode 100644 index 0000000000..91fb5b4fe3 --- /dev/null +++ b/resources/sql/autopatches/20161121.cluster.01.hoststate.sql @@ -0,0 +1,5 @@ +CREATE TABLE {$NAMESPACE}_meta_data.hoststate ( + stateKey VARCHAR(128) NOT NULL COLLATE {$COLLATE_TEXT}, + stateValue LONGTEXT NOT NULL COLLATE {$COLLATE_TEXT}, + PRIMARY KEY (stateKey) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 7a00b8196f..6f1dc8722b 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -3795,6 +3795,7 @@ phutil_register_library_map(array( 'PhabricatorStorageManagementDatabasesWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementDatabasesWorkflow.php', 'PhabricatorStorageManagementDestroyWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementDestroyWorkflow.php', 'PhabricatorStorageManagementDumpWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php', + 'PhabricatorStorageManagementPartitionWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementPartitionWorkflow.php', 'PhabricatorStorageManagementProbeWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementProbeWorkflow.php', 'PhabricatorStorageManagementQuickstartWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementQuickstartWorkflow.php', 'PhabricatorStorageManagementRenamespaceWorkflow' => 'infrastructure/storage/management/workflow/PhabricatorStorageManagementRenamespaceWorkflow.php', @@ -8977,6 +8978,7 @@ phutil_register_library_map(array( 'PhabricatorStorageManagementDatabasesWorkflow' => 'PhabricatorStorageManagementWorkflow', 'PhabricatorStorageManagementDestroyWorkflow' => 'PhabricatorStorageManagementWorkflow', 'PhabricatorStorageManagementDumpWorkflow' => 'PhabricatorStorageManagementWorkflow', + 'PhabricatorStorageManagementPartitionWorkflow' => 'PhabricatorStorageManagementWorkflow', 'PhabricatorStorageManagementProbeWorkflow' => 'PhabricatorStorageManagementWorkflow', 'PhabricatorStorageManagementQuickstartWorkflow' => 'PhabricatorStorageManagementWorkflow', 'PhabricatorStorageManagementRenamespaceWorkflow' => 'PhabricatorStorageManagementWorkflow', diff --git a/src/applications/config/check/PhabricatorDatabaseSetupCheck.php b/src/applications/config/check/PhabricatorDatabaseSetupCheck.php index 7dbc00c9b2..e48e22196e 100644 --- a/src/applications/config/check/PhabricatorDatabaseSetupCheck.php +++ b/src/applications/config/check/PhabricatorDatabaseSetupCheck.php @@ -205,6 +205,38 @@ final class PhabricatorDatabaseSetupCheck extends PhabricatorSetupCheck { break; } + // If we have more than one master, we require that the cluster database + // configuration written to each database node is exactly the same as the + // one we are running with. + $masters = PhabricatorDatabaseRef::getAllMasterDatabaseRefs(); + if (count($masters) > 1) { + $state_actual = queryfx_one( + $conn_meta, + 'SELECT stateValue FROM %T WHERE stateKey = %s', + PhabricatorStorageManagementAPI::TABLE_HOSTSTATE, + 'cluster.databases'); + if ($state_actual) { + $state_actual = $state_actual['stateValue']; + } + $state_expect = $ref->getPartitionStateForCommit(); + + if ($state_expect !== $state_actual) { + $message = pht( + 'Database host "%s" has a configured cluster state which disagrees '. + 'with the state on this host ("%s"). Run `bin/storage partition` '. + 'to commit local state to the cluster. This host may have started '. + 'with an out-of-date configuration.', + $ref->getRefKey(), + php_uname('n')); + + $this->newIssue('db.state.desync') + ->setName(pht('Cluster Configuration Out of Sync')) + ->setMessage($message) + ->setIsFatal(true); + return true; + } + } } + } diff --git a/src/docs/user/cluster/cluster_partitioning.diviner b/src/docs/user/cluster/cluster_partitioning.diviner index ed92457f69..1579d9d922 100644 --- a/src/docs/user/cluster/cluster_partitioning.diviner +++ b/src/docs/user/cluster/cluster_partitioning.diviner @@ -123,6 +123,21 @@ Not all of the database partition names are the same as the application names. You can get a list of databases with `bin/storage databases` to identify the correct database names. +After you have configured partitioning, it needs to be committed to the +databases. This writes a copy of the configuration to tables on the databases, +preventing errors if a webserver accidentally starts with an old or invalid +configuration. + +To commit the configuration, run this command: + +``` +phabricator/ $ ./bin/storage partition +``` + +Run this command after making any partition or clustering changes. Webservers +will not serve traffic if their configuration and the database configuration +differ. + Launching a new Partition ========================= @@ -135,6 +150,7 @@ To add a new partition, follow these steps: are partitioning, you will need to configure your existing master as the new "default". This will let Phabricator interact with it, but won't send any traffic to it yet. + - Run `bin/storage partition`. - Run `bin/storage upgrade` to initialize the schemata on the new hosts. - Stop writes to the applications you want to move by putting Phabricator in read-only mode, or shutting down the webserver and daemons, or telling @@ -143,6 +159,7 @@ To add a new partition, follow these steps: - Load the data into the application databases on the new master. - Reconfigure the "partition" setup so that Phabricator knows the databases have moved. + - Run `bin/storage partition`. - While still in read-only mode, check that all the data appears to be intact. - Resume writes. diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php index 461fe8f17c..1c9e2c922e 100644 --- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php +++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php @@ -180,6 +180,17 @@ final class PhabricatorDatabaseRef return $this->applicationMap; } + public function getPartitionStateForCommit() { + $state = PhabricatorEnv::getEnvConfig('cluster.databases'); + foreach ($state as $key => $value) { + // Don't store passwords, since we don't care if they differ and + // users may find it surprising. + unset($state[$key]['pass']); + } + + return phutil_json_encode($state); + } + public function setMasterRef(PhabricatorDatabaseRef $master_ref) { $this->masterRef = $master_ref; return $this; @@ -498,9 +509,6 @@ final class PhabricatorDatabaseRef $masters = array(); foreach ($refs as $ref) { - if ($ref->getDisabled()) { - continue; - } if ($ref->getIsMaster()) { $masters[] = $ref; } diff --git a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php index d7104af323..19d7a98d42 100644 --- a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php +++ b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php @@ -19,6 +19,7 @@ final class PhabricatorStorageManagementAPI extends Phobject { const COLLATE_FULLTEXT = 'COLLATE_FULLTEXT'; const TABLE_STATUS = 'patch_status'; + const TABLE_HOSTSTATE = 'hoststate'; public function setDisableUTF8MB4($disable_utf8_mb4) { $this->disableUTF8MB4 = $disable_utf8_mb4; diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementPartitionWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementPartitionWorkflow.php new file mode 100644 index 0000000000..6fae0c6160 --- /dev/null +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementPartitionWorkflow.php @@ -0,0 +1,44 @@ +setName('partition') + ->setExamples('**partition** [__options__]') + ->setSynopsis(pht('Commit partition configuration to databases.')) + ->setArguments(array()); + } + + public function didExecute(PhutilArgumentParser $args) { + echo tsprintf( + "%s\n", + pht('Committing configured partition map to databases...')); + + foreach ($this->getMasterAPIs() as $api) { + $ref = $api->getRef(); + $conn = $ref->newManagementConnection(); + + $state = $ref->getPartitionStateForCommit(); + + queryfx( + $conn, + 'INSERT INTO %T.%T (stateKey, stateValue) VALUES (%s, %s) + ON DUPLICATE KEY UPDATE stateValue = VALUES(stateValue)', + $api->getDatabaseName('meta_data'), + PhabricatorStorageManagementAPI::TABLE_HOSTSTATE, + 'cluster.databases', + $state); + + echo tsprintf( + "%s\n", + pht( + 'Wrote configuration on database host "%s".', + $ref->getRefKey())); + } + + return 0; + } + +} diff --git a/src/infrastructure/storage/schema/PhabricatorStorageSchemaSpec.php b/src/infrastructure/storage/schema/PhabricatorStorageSchemaSpec.php index df48ce3812..378b49fd34 100644 --- a/src/infrastructure/storage/schema/PhabricatorStorageSchemaSpec.php +++ b/src/infrastructure/storage/schema/PhabricatorStorageSchemaSpec.php @@ -18,6 +18,20 @@ final class PhabricatorStorageSchemaSpec 'unique' => true, ), )); + + $this->buildRawSchema( + 'meta_data', + PhabricatorStorageManagementAPI::TABLE_HOSTSTATE, + array( + 'stateKey' => 'text128', + 'stateValue' => 'text', + ), + array( + 'PRIMARY' => array( + 'columns' => array('stateKey'), + 'unique' => true, + ), + )); } }