Implement a more compact, general database-backed key-value cache

Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:

  - It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
  - It has a 12-byte hash-based key.
  - It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).

Test Plan:
  - Basics:
    - Loaded /paste/, saw caches generate and save.
    - Reloaded /paste/, saw the page hit cache.
  - GC:
    - Ran GC daemon, saw nothing.
    - Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
  - Deflate:
    - Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
    - Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
  - Ran unit tests

Reviewers: vrana, btrahan

Reviewed By: vrana

CC: aran

Differential Revision: https://secure.phabricator.com/D4259
This commit is contained in:
epriestley
2012-12-21 14:17:56 -08:00
parent 62bc3373e5
commit aae5f9efd3
9 changed files with 407 additions and 13 deletions

View File

@@ -0,0 +1,232 @@
<?php
final class PhabricatorKeyValueDatabaseCache
extends PhutilKeyValueCache {
const CACHE_FORMAT_RAW = 'raw';
const CACHE_FORMAT_DEFLATE = 'deflate';
public function setKeys(array $keys, $ttl = null) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-set',
'name' => 'phabricator-db',
'keys' => array_keys($keys),
'ttl' => $ttl,
));
}
if ($keys) {
$map = $this->digestKeys(array_keys($keys));
$conn_w = $this->establishConnection('w');
$sql = array();
foreach ($map as $key => $hash) {
$value = $keys[$key];
list($format, $storage_value) = $this->willWriteValue($key, $value);
$sql[] = qsprintf(
$conn_w,
'(%s, %s, %s, %s, %d, %nd)',
$hash,
$key,
$format,
$storage_value,
time(),
$ttl ? (time() + $ttl) : null);
}
$guard = AphrontWriteGuard::beginScopedUnguardedWrites();
foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(cacheKeyHash, cacheKey, cacheFormat, cacheData,
cacheCreated, cacheExpires) VALUES %Q
ON DUPLICATE KEY UPDATE
cacheKey = VALUES(cacheKey),
cacheFormat = VALUES(cacheFormat),
cacheData = VALUES(cacheData),
cacheCreated = VALUES(cacheCreated),
cacheExpires = VALUES(cacheExpires)',
$this->getTableName(),
$chunk);
}
unset($guard);
}
if ($call_id) {
$this->getProfiler()->endServiceCall($call_id, array());
}
return $this;
}
public function getKeys(array $keys) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-get',
'name' => 'phabricator-db',
'keys' => $keys,
));
}
$results = array();
if ($keys) {
$map = $this->digestKeys($keys);
$rows = queryfx_all(
$this->establishConnection('r'),
'SELECT * FROM %T WHERE cacheKeyHash IN (%Ls)',
$this->getTableName(),
$map);
$rows = ipull($rows, null, 'cacheKey');
foreach ($keys as $key) {
if (empty($rows[$key])) {
continue;
}
$row = $rows[$key];
if ($row['cacheExpires'] && ($row['cacheExpires'] < time())) {
continue;
}
try {
$results[$key] = $this->didReadValue(
$row['cacheFormat'],
$row['cacheData']);
} catch (Exception $ex) {
// Treat this as a cache miss.
phlog($ex);
}
}
}
if ($call_id) {
$this->getProfiler()->endServiceCall(
$call_id,
array(
'hits' => array_keys($results),
));
}
return $results;
}
public function deleteKeys(array $keys) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-del',
'name' => 'phabricator-db',
'keys' => $keys,
));
}
if ($keys) {
$map = $this->digestKeys($keys);
queryfx(
$this->establishConnection('w'),
'DELETE FROM %T WHERE cacheKeyHash IN (%Ls)',
$this->getTableName(),
$keys);
}
if ($call_id) {
$this->getProfiler()->endServiceCall($call_id, array());
}
return $this;
}
public function destroyCache() {
queryfx(
$this->establishConnection('w'),
'DELETE FROM %T',
$this->getTableName());
return $this;
}
/* -( Raw Cache Access )--------------------------------------------------- */
public function establishConnection($mode) {
// TODO: This is the only concrete table we have on the database right
// now.
return id(new PhabricatorMarkupCache())->establishConnection($mode);
}
public function getTableName() {
return 'cache_general';
}
/* -( Implementation )----------------------------------------------------- */
private function digestKeys(array $keys) {
$map = array();
foreach ($keys as $key) {
$map[$key] = PhabricatorHash::digestForIndex($key);
}
return $map;
}
private function willWriteValue($key, $value) {
if (!is_string($value)) {
throw new Exception("Only strings may be written to the DB cache!");
}
static $can_deflate;
if ($can_deflate === null) {
$can_deflate = function_exists('gzdeflate') &&
PhabricatorEnv::getEnvConfig('cache.enable-deflate');
}
// If the value is larger than 1KB, we have gzdeflate(), we successfully
// can deflate it, and it benefits from deflation, store it deflated.
if ($can_deflate) {
$len = strlen($value);
if ($len > 1024) {
$deflated = gzdeflate($value);
if ($deflated !== false) {
$deflated_len = strlen($deflated);
if ($deflated_len < ($len / 2)) {
return array(self::CACHE_FORMAT_DEFLATE, $deflated);
}
}
}
}
return array(self::CACHE_FORMAT_RAW, $value);
}
private function didReadValue($format, $value) {
switch ($format) {
case self::CACHE_FORMAT_RAW:
return $value;
case self::CACHE_FORMAT_DEFLATE:
if (!function_exists('gzinflate')) {
throw new Exception("No gzinflate() to read deflated cache.");
}
$value = gzinflate($value);
if ($value === false) {
throw new Exception("Failed to deflate cache.");
}
return $value;
default:
throw new Exception("Unknown cache format.");
}
}
}