From 4ef918e213ed4c349b236e28bc371a16b5457d99 Mon Sep 17 00:00:00 2001 From: epriestley Date: Sun, 3 Jul 2011 09:47:31 -0700 Subject: [PATCH] Add a garbage collector daemon Summary: Phabricator generates a bunch of data that we don't need to keep around forever, add a GC daemon to get rid of it with some basic configuration options. This needs a couple more diffs to get some of the details but I think this is a reasonable start. I also fixed a couple of UI things related to this, e.g. the daemon logs page going crazy when a daemon gets stuck in a loop and dumps tons of data to stdout. Test Plan: - Ran gc daemon in 'phd debug' mode and saw it delete stuff, then sleep once it had cleaned everything up. - Mucked around with TTLs and verified they work correctly. - Viewed gc'd transcripts in the web interface and made sure they displayed okay. - Viewed daemon logs before/after garbage collection. - Running some run-at / run-for tests now, I'll update if the daemon doesn't shut off in ~10-15 minutes. :P Reviewed By: tuomaspelkonen Reviewers: jungejason, tuomaspelkonen, aran CC: aran, tuomaspelkonen, epriestley Differential Revision: 583 --- conf/default.conf.php | 31 ++++ scripts/install/update_phabricator.sh | 1 + src/__phutil_library_map__.php | 2 + .../PhabricatorDaemonLogEventsView.php | 37 ++++- .../daemon/view/daemonlogevents/__init__.php | 1 + .../transcript/HeraldTranscriptController.php | 81 ++++------ .../herald/controller/transcript/__init__.php | 1 + .../configuration/managing_daemons.diviner | 1 + .../PhabricatorGarbageCollectorDaemon.php | 139 ++++++++++++++++++ .../daemon/garbagecollector/__init__.php | 16 ++ 10 files changed, 259 insertions(+), 51 deletions(-) create mode 100644 src/infrastructure/daemon/garbagecollector/PhabricatorGarbageCollectorDaemon.php create mode 100644 src/infrastructure/daemon/garbagecollector/__init__.php diff --git a/conf/default.conf.php b/conf/default.conf.php index da5d7a0c2b..7472c1ecb0 100644 --- a/conf/default.conf.php +++ b/conf/default.conf.php @@ -375,6 +375,37 @@ return array( // silly (but sort of awesome). 'remarkup.enable-embedded-youtube' => false, + +// -- Garbage Collection ---------------------------------------------------- // + + // Phabricator generates various logs and caches in the database which can + // be garbage collected after a while to make the total data size more + // manageable. To run garbage collection, launch a + // PhabricatorGarbageCollector daemon. + + // Since the GC daemon can issue large writes and table scans, you may want to + // run it only during off hours or make sure it is scheduled so it doesn't + // overlap with backups. This determines when the daemon can start running + // each day. + 'gcdaemon.run-at' => '12 AM', + + // How many seconds after 'gcdaemon.run-at' the daemon may collect garbage + // for. By default it runs continuously, but you can set it to run for a + // limited period of time. For instance, if you do backups at 3 AM, you might + // run garbage collection for an hour beforehand. This is not a high-precision + // limit so you may want to leave some room for the GC to actually stop, and + // if you set it to something like 3 seconds you're on your own. + 'gcdaemon.run-for' => 24 * 60 * 60, + + // These 'ttl' keys configure how much old data the GC daemon keeps around. + // Objects older than the ttl will be collected. Set any value to 0 to store + // data indefinitely. + + 'gcdaemon.ttl.herald-transcripts' => 30 * (24 * 60 * 60), + 'gcdaemon.ttl.daemon-logs' => 7 * (24 * 60 * 60), + 'gcdaemon.ttl.differential-render-cache' => 7 * (24 * 60 * 60), + + // -- Customization --------------------------------------------------------- // // Paths to additional phutil libraries to load. diff --git a/scripts/install/update_phabricator.sh b/scripts/install/update_phabricator.sh index 207650d108..ebde400efe 100755 --- a/scripts/install/update_phabricator.sh +++ b/scripts/install/update_phabricator.sh @@ -58,5 +58,6 @@ sudo /etc/init.d/httpd start # $ROOT/phabricator/bin/phd repository-launch-master # $ROOT/phabricator/bin/phd launch metamta +# $ROOT/phabricator/bin/phd launch garbagecollector # $ROOT/phabricator/bin/phd launch 4 taskmaster # $ROOT/phabricator/bin/phd launch ircbot /config/bot.json diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index af7dba56bd..f38452557b 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -352,6 +352,7 @@ phutil_register_library_map(array( 'PhabricatorFileURI' => 'applications/files/uri', 'PhabricatorFileUploadController' => 'applications/files/controller/upload', 'PhabricatorFileViewController' => 'applications/files/controller/view', + 'PhabricatorGarbageCollectorDaemon' => 'infrastructure/daemon/garbagecollector', 'PhabricatorGoodForNothingWorker' => 'infrastructure/daemon/workers/worker/goodfornothing', 'PhabricatorHandleObjectSelectorDataView' => 'applications/phid/handle/view/selector', 'PhabricatorHelpController' => 'applications/help/controller/base', @@ -844,6 +845,7 @@ phutil_register_library_map(array( 'PhabricatorFileTransformController' => 'PhabricatorFileController', 'PhabricatorFileUploadController' => 'PhabricatorFileController', 'PhabricatorFileViewController' => 'PhabricatorFileController', + 'PhabricatorGarbageCollectorDaemon' => 'PhabricatorDaemon', 'PhabricatorGoodForNothingWorker' => 'PhabricatorWorker', 'PhabricatorHelpController' => 'PhabricatorController', 'PhabricatorHelpKeyboardShortcutController' => 'PhabricatorHelpController', diff --git a/src/applications/daemon/view/daemonlogevents/PhabricatorDaemonLogEventsView.php b/src/applications/daemon/view/daemonlogevents/PhabricatorDaemonLogEventsView.php index dd09781683..aaebcbec0c 100644 --- a/src/applications/daemon/view/daemonlogevents/PhabricatorDaemonLogEventsView.php +++ b/src/applications/daemon/view/daemonlogevents/PhabricatorDaemonLogEventsView.php @@ -43,11 +43,46 @@ final class PhabricatorDaemonLogEventsView extends AphrontView { } foreach ($this->events as $event) { + + // Limit display log size. If a daemon gets stuck in an output loop this + // page can be like >100MB if we don't truncate stuff. Try to do cheap + // line-based truncation first, and fall back to expensive UTF-8 character + // truncation if that doesn't get things short enough. + + $message = $event->getMessage(); + + $more_lines = null; + $more_chars = null; + $line_limit = 12; + if (substr_count($message, "\n") > $line_limit) { + $message = explode("\n", $message); + $more_lines = count($message) - $line_limit; + $message = array_slice($message, 0, $line_limit); + $message = implode("\n", $message); + } + + $char_limit = 8192; + if (strlen($message) > $char_limit) { + $message = phutil_utf8v($message); + $more_chars = count($message) - $char_limit; + $message = array_slice($message, 0, $char_limit); + $message = implode('', $message); + } + + $more = null; + if ($more_chars) { + $more = number_format($more_chars); + $more = "\n<... {$more} more characters ...>"; + } else if ($more_lines) { + $more = number_format($more_lines); + $more = "\n<... {$more} more lines ...>"; + } + $row = array( phutil_escape_html($event->getLogType()), phabricator_date($event->getEpoch(), $this->user), phabricator_time($event->getEpoch(), $this->user), - str_replace("\n", '
', phutil_escape_html($event->getMessage())), + str_replace("\n", '
', phutil_escape_html($message.$more)), ); if ($this->combinedLog) { diff --git a/src/applications/daemon/view/daemonlogevents/__init__.php b/src/applications/daemon/view/daemonlogevents/__init__.php index dd4e977647..04be32163d 100644 --- a/src/applications/daemon/view/daemonlogevents/__init__.php +++ b/src/applications/daemon/view/daemonlogevents/__init__.php @@ -11,6 +11,7 @@ phutil_require_module('phabricator', 'view/control/table'); phutil_require_module('phabricator', 'view/utils'); phutil_require_module('phutil', 'markup'); +phutil_require_module('phutil', 'utils'); phutil_require_source('PhabricatorDaemonLogEventsView.php'); diff --git a/src/applications/herald/controller/transcript/HeraldTranscriptController.php b/src/applications/herald/controller/transcript/HeraldTranscriptController.php index ced383d98d..bf9f0fcd5d 100644 --- a/src/applications/herald/controller/transcript/HeraldTranscriptController.php +++ b/src/applications/herald/controller/transcript/HeraldTranscriptController.php @@ -42,40 +42,45 @@ class HeraldTranscriptController extends HeraldController { throw new Exception('Uknown transcript!'); } - $field_names = HeraldFieldConfig::getFieldMap(); - $condition_names = HeraldConditionConfig::getConditionMap(); - $action_names = HeraldActionConfig::getActionMap(); - require_celerity_resource('herald-test-css'); - $filter = $this->getFilterPHIDs(); - $this->filterTranscript($xscript, $filter); - $phids = array_merge($filter, $this->getTranscriptPHIDs($xscript)); - $phids = array_unique($phids); - $phids = array_filter($phids); - - $handles = id(new PhabricatorObjectHandleData($phids)) - ->loadHandles(); - $this->handles = $handles; - - $object_xscript = $xscript->getObjectTranscript(); - $nav = $this->buildSideNav(); - $apply_xscript_panel = $this->buildApplyTranscriptPanel( - $xscript); - $nav->appendChild($apply_xscript_panel); + $object_xscript = $xscript->getObjectTranscript(); + if (!$object_xscript) { + $notice = id(new AphrontErrorView()) + ->setSeverity(AphrontErrorView::SEVERITY_NOTICE) + ->setTitle('Old Transcript') + ->appendChild( + '

Details of this transcript have been garbage collected.

'); + $nav->appendChild($notice); + } else { + $filter = $this->getFilterPHIDs(); + $this->filterTranscript($xscript, $filter); + $phids = array_merge($filter, $this->getTranscriptPHIDs($xscript)); + $phids = array_unique($phids); + $phids = array_filter($phids); - $action_xscript_panel = $this->buildActionTranscriptPanel( - $xscript); - $nav->appendChild($action_xscript_panel); + $handles = id(new PhabricatorObjectHandleData($phids)) + ->loadHandles(); + $this->handles = $handles; - $object_xscript_panel = $this->buildObjectTranscriptPanel( - $xscript); - $nav->appendChild($object_xscript_panel); + $apply_xscript_panel = $this->buildApplyTranscriptPanel( + $xscript); + $nav->appendChild($apply_xscript_panel); + + $action_xscript_panel = $this->buildActionTranscriptPanel( + $xscript); + $nav->appendChild($action_xscript_panel); + + $object_xscript_panel = $this->buildObjectTranscriptPanel( + $xscript); + $nav->appendChild($object_xscript_panel); + } /* + TODO $notice = null; if ($xscript->getDryRun()) { @@ -84,30 +89,6 @@ class HeraldTranscriptController extends HeraldController { This was a dry run to test Herald rules, no actions were executed. ; } - - if (!$object_xscript) { - $notice = - - - Details of this transcript have been discarded. Full transcripts - are retained for 30 days. - - {$notice} - ; - } - - - return - -
- renderNavItems()}> - {$notice} - {$apply_xscript_markup} - {$rule_table} - {$object_xscript_table} - -
-
; */ return $this->buildStandardPageResponse( @@ -264,7 +245,7 @@ class HeraldTranscriptController extends HeraldController { foreach ($xscript->getApplyTranscripts() as $id => $apply_xscript) { $rule_id = $apply_xscript->getRuleID(); if ($filter_owned) { - if (!$rule_xscripts[$rule_id]) { + if (empty($rule_xscripts[$rule_id])) { // No associated rule so you can't own this effect. continue; } diff --git a/src/applications/herald/controller/transcript/__init__.php b/src/applications/herald/controller/transcript/__init__.php index 81bfe3385f..3208b3b1e0 100644 --- a/src/applications/herald/controller/transcript/__init__.php +++ b/src/applications/herald/controller/transcript/__init__.php @@ -14,6 +14,7 @@ phutil_require_module('phabricator', 'applications/herald/storage/transcript/bas phutil_require_module('phabricator', 'applications/phid/handle/data'); phutil_require_module('phabricator', 'infrastructure/celerity/api'); phutil_require_module('phabricator', 'view/control/table'); +phutil_require_module('phabricator', 'view/form/error'); phutil_require_module('phabricator', 'view/layout/panel'); phutil_require_module('phabricator', 'view/layout/sidenav'); diff --git a/src/docs/configuration/managing_daemons.diviner b/src/docs/configuration/managing_daemons.diviner index 92c7d2ae78..fdc7dfe9ed 100644 --- a/src/docs/configuration/managing_daemons.diviner +++ b/src/docs/configuration/managing_daemons.diviner @@ -70,3 +70,4 @@ You can get a list of launchable daemons with **phd list**: - **PhabricatorTaskmasterDaemon** runs a generic task queue; and - **PhabricatorRepository** daemons track repositories, descriptions are available in the @{article:Diffusion User Guide}. + - **PhabricatorGarbageCollectorDaemon** cleans up old logs and caches. diff --git a/src/infrastructure/daemon/garbagecollector/PhabricatorGarbageCollectorDaemon.php b/src/infrastructure/daemon/garbagecollector/PhabricatorGarbageCollectorDaemon.php new file mode 100644 index 0000000000..757d6084e9 --- /dev/null +++ b/src/infrastructure/daemon/garbagecollector/PhabricatorGarbageCollectorDaemon.php @@ -0,0 +1,139 @@ + ($start + $run_for)) { + if ($just_ran) { + echo "Stopped garbage collector.\n"; + $just_ran = false; + } + // The configuration says we can't collect garbage right now, so + // just sleep until we can. + $this->sleep(300); + continue; + } + + if (!$just_ran) { + echo "Started garbage collector.\n"; + $just_ran = true; + } + + $n_herald = $this->collectHeraldTranscripts(); + $n_daemon = $this->collectDaemonLogs(); + $n_render = $this->collectRenderCaches(); + + $collected = array( + 'Herald Transcript' => $n_herald, + 'Daemon Log' => $n_daemon, + 'Render Cache' => $n_render, + ); + $collected = array_filter($collected); + + foreach ($collected as $thing => $count) { + $count = number_format($count); + echo "Garbage collected {$count} '{$thing}' objects.\n"; + } + + $total = array_sum($collected); + if ($total < 100) { + // We didn't max out any of the GCs so we're basically caught up. Ease + // off the GC loop so we don't keep doing table scans just to delete + // a handful of rows. + $this->sleep(300); + } else { + $this->stillWorking(); + } + } while (true); + + } + + private function collectHeraldTranscripts() { + $ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.herald-transcripts'); + if ($ttl <= 0) { + return 0; + } + + $table = new HeraldTranscript(); + $conn_w = $table->establishConnection('w'); + + queryfx( + $conn_w, + 'UPDATE %T SET + objectTranscript = "", + ruleTranscripts = "", + conditionTranscripts = "", + applyTranscripts = "" + WHERE `time` < %d AND objectTranscript != "" + LIMIT 100', + $table->getTableName(), + time() - $ttl); + + return $conn_w->getAffectedRows(); + } + + private function collectDaemonLogs() { + $ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.daemon-logs'); + if ($ttl <= 0) { + return 0; + } + + $table = new PhabricatorDaemonLogEvent(); + $conn_w = $table->establishConnection('w'); + + queryfx( + $conn_w, + 'DELETE FROM %T WHERE epoch < %d LIMIT 100', + $table->getTableName(), + time() - $ttl); + + return $conn_w->getAffectedRows(); + } + + private function collectRenderCaches() { + // TODO: Implement this, no epoch column on the table right now. + return 0; + } + +} diff --git a/src/infrastructure/daemon/garbagecollector/__init__.php b/src/infrastructure/daemon/garbagecollector/__init__.php new file mode 100644 index 0000000000..f3de769a1d --- /dev/null +++ b/src/infrastructure/daemon/garbagecollector/__init__.php @@ -0,0 +1,16 @@ +