Make i18n string extraction faster and more flexible
Summary: Ref T5267. Two general changes: - Make string extraction use a cache, so that it doesn't take several minutes every time you change something. Minor updates now only take a few seconds (like `arc liberate` and similar). - Instead of dumping a sort-of-template file out, write out to a cache (`src/.cache/i18n_strings.json`). I'm planning to add more steps to read this cache and do interesting things with it (emit translatewiki strings, generate or update standalone translation files, etc). Test Plan: - Ran `bin/i18n extract`. - Ran it again, saw it go a lot faster. - Changed stuff, ran it, saw it only look at new stuff. - Examined caches. Reviewers: chad Reviewed By: chad Maniphest Tasks: T5267 Differential Revision: https://secure.phabricator.com/D16227
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -4,6 +4,7 @@
 | 
				
			|||||||
# Diviner
 | 
					# Diviner
 | 
				
			||||||
/docs/
 | 
					/docs/
 | 
				
			||||||
/.divinercache/
 | 
					/.divinercache/
 | 
				
			||||||
 | 
					/src/.cache/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# libphutil
 | 
					# libphutil
 | 
				
			||||||
/src/.phutil_module_cache
 | 
					/src/.phutil_module_cache
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,9 +3,13 @@
 | 
				
			|||||||
final class PhabricatorInternationalizationManagementExtractWorkflow
 | 
					final class PhabricatorInternationalizationManagementExtractWorkflow
 | 
				
			||||||
  extends PhabricatorInternationalizationManagementWorkflow {
 | 
					  extends PhabricatorInternationalizationManagementWorkflow {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const CACHE_VERSION = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  protected function didConstruct() {
 | 
					  protected function didConstruct() {
 | 
				
			||||||
    $this
 | 
					    $this
 | 
				
			||||||
      ->setName('extract')
 | 
					      ->setName('extract')
 | 
				
			||||||
 | 
					      ->setExamples(
 | 
				
			||||||
 | 
					        '**extract** [__options__] __library__')
 | 
				
			||||||
      ->setSynopsis(pht('Extract translatable strings.'))
 | 
					      ->setSynopsis(pht('Extract translatable strings.'))
 | 
				
			||||||
      ->setArguments(
 | 
					      ->setArguments(
 | 
				
			||||||
        array(
 | 
					        array(
 | 
				
			||||||
@@ -13,44 +17,138 @@ final class PhabricatorInternationalizationManagementExtractWorkflow
 | 
				
			|||||||
            'name' => 'paths',
 | 
					            'name' => 'paths',
 | 
				
			||||||
            'wildcard' => true,
 | 
					            'wildcard' => true,
 | 
				
			||||||
          ),
 | 
					          ),
 | 
				
			||||||
 | 
					          array(
 | 
				
			||||||
 | 
					            'name' => 'clean',
 | 
				
			||||||
 | 
					            'help' => pht('Drop caches before extracting strings. Slow!'),
 | 
				
			||||||
 | 
					          ),
 | 
				
			||||||
        ));
 | 
					        ));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  public function execute(PhutilArgumentParser $args) {
 | 
					  public function execute(PhutilArgumentParser $args) {
 | 
				
			||||||
    $console = PhutilConsole::getConsole();
 | 
					    $console = PhutilConsole::getConsole();
 | 
				
			||||||
    $paths = $args->getArg('paths');
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $futures = array();
 | 
					    $paths = $args->getArg('paths');
 | 
				
			||||||
 | 
					    if (!$paths) {
 | 
				
			||||||
 | 
					      $paths = array(getcwd());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $targets = array();
 | 
				
			||||||
    foreach ($paths as $path) {
 | 
					    foreach ($paths as $path) {
 | 
				
			||||||
      $root = Filesystem::resolvePath($path);
 | 
					      $root = Filesystem::resolvePath($path);
 | 
				
			||||||
      $path_files = id(new FileFinder($root))
 | 
					 | 
				
			||||||
        ->withType('f')
 | 
					 | 
				
			||||||
        ->withSuffix('php')
 | 
					 | 
				
			||||||
        ->find();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      foreach ($path_files as $file) {
 | 
					      if (!Filesystem::pathExists($root) || !is_dir($root)) {
 | 
				
			||||||
        $full_path = $root.DIRECTORY_SEPARATOR.$file;
 | 
					        throw new PhutilArgumentUsageException(
 | 
				
			||||||
        $data = Filesystem::readFile($full_path);
 | 
					          pht(
 | 
				
			||||||
        $futures[$full_path] = PhutilXHPASTBinary::getParserFuture($data);
 | 
					            'Path "%s" does not exist, or is not a directory.',
 | 
				
			||||||
 | 
					            $path));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $libraries = id(new FileFinder($path))
 | 
				
			||||||
 | 
					        ->withPath('*/__phutil_library_init__.php')
 | 
				
			||||||
 | 
					        ->find();
 | 
				
			||||||
 | 
					      if (!$libraries) {
 | 
				
			||||||
 | 
					        throw new PhutilArgumentUsageException(
 | 
				
			||||||
 | 
					          pht(
 | 
				
			||||||
 | 
					            'Path "%s" contains no libphutil libraries.',
 | 
				
			||||||
 | 
					            $path));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      foreach ($libraries as $library) {
 | 
				
			||||||
 | 
					        $targets[] = Filesystem::resolvePath(dirname($library)).'/';
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $console->writeErr(
 | 
					    $targets = array_unique($targets);
 | 
				
			||||||
      "%s\n",
 | 
					 | 
				
			||||||
      pht('Found %s file(s)...', phutil_count($futures)));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $results = array();
 | 
					    foreach ($targets as $library) {
 | 
				
			||||||
 | 
					      echo tsprintf(
 | 
				
			||||||
 | 
					        "**<bg:blue> %s </bg>** %s\n",
 | 
				
			||||||
 | 
					        pht('EXTRACT'),
 | 
				
			||||||
 | 
					        pht(
 | 
				
			||||||
 | 
					          'Extracting "%s"...',
 | 
				
			||||||
 | 
					          Filesystem::readablePath($library)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $this->extractLibrary($library);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function extractLibrary($root) {
 | 
				
			||||||
 | 
					    $files = $this->loadLibraryFiles($root);
 | 
				
			||||||
 | 
					    $cache = $this->readCache($root);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $modified = $this->getModifiedFiles($files, $cache);
 | 
				
			||||||
 | 
					    $cache['files'] = $files;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if ($modified) {
 | 
				
			||||||
 | 
					      echo tsprintf(
 | 
				
			||||||
 | 
					        "**<bg:blue> %s </bg>** %s\n",
 | 
				
			||||||
 | 
					        pht('MODIFIED'),
 | 
				
			||||||
 | 
					        pht(
 | 
				
			||||||
 | 
					          'Found %s modified file(s) (of %s total).',
 | 
				
			||||||
 | 
					          phutil_count($modified),
 | 
				
			||||||
 | 
					          phutil_count($files)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $old_strings = idx($cache, 'strings');
 | 
				
			||||||
 | 
					      $old_strings = array_select_keys($old_strings, $files);
 | 
				
			||||||
 | 
					      $new_strings = $this->extractFiles($root, $modified);
 | 
				
			||||||
 | 
					      $all_strings = $new_strings + $old_strings;
 | 
				
			||||||
 | 
					      $cache['strings'] = $all_strings;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $this->writeStrings($root, $all_strings);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      echo tsprintf(
 | 
				
			||||||
 | 
					        "**<bg:blue> %s </bg>** %s\n",
 | 
				
			||||||
 | 
					        pht('NOT MODIFIED'),
 | 
				
			||||||
 | 
					        pht('Strings for this library are already up to date.'));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $cache = id(new PhutilJSON())->encodeFormatted($cache);
 | 
				
			||||||
 | 
					    $this->writeCache($root, 'i18n_files.json', $cache);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function getModifiedFiles(array $files, array $cache) {
 | 
				
			||||||
 | 
					    $known = idx($cache, 'files', array());
 | 
				
			||||||
 | 
					    $known = array_fuse($known);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $modified = array();
 | 
				
			||||||
 | 
					    foreach ($files as $file => $hash) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (isset($known[$hash])) {
 | 
				
			||||||
 | 
					        continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      $modified[$file] = $hash;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return $modified;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function extractFiles($root_path, array $files) {
 | 
				
			||||||
 | 
					    $hashes = array();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $futures = array();
 | 
				
			||||||
 | 
					    foreach ($files as $file => $hash) {
 | 
				
			||||||
 | 
					      $full_path = $root_path.DIRECTORY_SEPARATOR.$file;
 | 
				
			||||||
 | 
					      $data = Filesystem::readFile($full_path);
 | 
				
			||||||
 | 
					      $futures[$full_path] = PhutilXHPASTBinary::getParserFuture($data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $hashes[$full_path] = $hash;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $bar = id(new PhutilConsoleProgressBar())
 | 
					    $bar = id(new PhutilConsoleProgressBar())
 | 
				
			||||||
      ->setTotal(count($futures));
 | 
					      ->setTotal(count($futures));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $messages = array();
 | 
					    $messages = array();
 | 
				
			||||||
 | 
					    $results = array();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $futures = id(new FutureIterator($futures))
 | 
					    $futures = id(new FutureIterator($futures))
 | 
				
			||||||
      ->limit(8);
 | 
					      ->limit(8);
 | 
				
			||||||
    foreach ($futures as $full_path => $future) {
 | 
					    foreach ($futures as $full_path => $future) {
 | 
				
			||||||
      $bar->update(1);
 | 
					      $bar->update(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $hash = $hashes[$full_path];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      try {
 | 
					      try {
 | 
				
			||||||
        $tree = XHPASTTree::newFromDataAndResolvedExecFuture(
 | 
					        $tree = XHPASTTree::newFromDataAndResolvedExecFuture(
 | 
				
			||||||
          Filesystem::readFile($full_path),
 | 
					          Filesystem::readFile($full_path),
 | 
				
			||||||
@@ -67,24 +165,27 @@ final class PhabricatorInternationalizationManagementExtractWorkflow
 | 
				
			|||||||
      $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL');
 | 
					      $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL');
 | 
				
			||||||
      foreach ($calls as $call) {
 | 
					      foreach ($calls as $call) {
 | 
				
			||||||
        $name = $call->getChildByIndex(0)->getConcreteString();
 | 
					        $name = $call->getChildByIndex(0)->getConcreteString();
 | 
				
			||||||
        if ($name == 'pht') {
 | 
					        if ($name != 'pht') {
 | 
				
			||||||
          $params = $call->getChildByIndex(1, 'n_CALL_PARAMETER_LIST');
 | 
					          continue;
 | 
				
			||||||
          $string_node = $params->getChildByIndex(0);
 | 
					        }
 | 
				
			||||||
          $string_line = $string_node->getLineNumber();
 | 
					 | 
				
			||||||
          try {
 | 
					 | 
				
			||||||
            $string_value = $string_node->evalStatic();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            $results[$string_value][] = array(
 | 
					        $params = $call->getChildByIndex(1, 'n_CALL_PARAMETER_LIST');
 | 
				
			||||||
              'file' => Filesystem::readablePath($full_path),
 | 
					        $string_node = $params->getChildByIndex(0);
 | 
				
			||||||
              'line' => $string_line,
 | 
					        $string_line = $string_node->getLineNumber();
 | 
				
			||||||
            );
 | 
					        try {
 | 
				
			||||||
          } catch (Exception $ex) {
 | 
					          $string_value = $string_node->evalStatic();
 | 
				
			||||||
            $messages[] = pht(
 | 
					
 | 
				
			||||||
              'WARNING: Failed to evaluate pht() call on line %d in "%s": %s',
 | 
					          $results[$hash][] = array(
 | 
				
			||||||
              $call->getLineNumber(),
 | 
					            'string' => $string_value,
 | 
				
			||||||
              $full_path,
 | 
					            'file' => Filesystem::readablePath($full_path, $root_path),
 | 
				
			||||||
              $ex->getMessage());
 | 
					            'line' => $string_line,
 | 
				
			||||||
          }
 | 
					          );
 | 
				
			||||||
 | 
					        } catch (Exception $ex) {
 | 
				
			||||||
 | 
					          $messages[] = pht(
 | 
				
			||||||
 | 
					            'WARNING: Failed to evaluate pht() call on line %d in "%s": %s',
 | 
				
			||||||
 | 
					            $call->getLineNumber(),
 | 
				
			||||||
 | 
					            $full_path,
 | 
				
			||||||
 | 
					            $ex->getMessage());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -93,28 +194,109 @@ final class PhabricatorInternationalizationManagementExtractWorkflow
 | 
				
			|||||||
    $bar->done();
 | 
					    $bar->done();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    foreach ($messages as $message) {
 | 
					    foreach ($messages as $message) {
 | 
				
			||||||
      $console->writeErr("%s\n", $message);
 | 
					      echo tsprintf(
 | 
				
			||||||
 | 
					        "**<bg:yellow> %s </bg>** %s\n",
 | 
				
			||||||
 | 
					        pht('WARNING'),
 | 
				
			||||||
 | 
					        $message);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ksort($results);
 | 
					    return $results;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $out = array();
 | 
					  private function writeStrings($root, array $strings) {
 | 
				
			||||||
    $out[] = '<?php';
 | 
					    $map = array();
 | 
				
			||||||
    $out[] = '// @no'.'lint';
 | 
					    foreach ($strings as $hash => $string_list) {
 | 
				
			||||||
    $out[] = 'return array(';
 | 
					      foreach ($string_list as $string_info) {
 | 
				
			||||||
    foreach ($results as $string => $locations) {
 | 
					        $map[$string_info['string']]['uses'][] = array(
 | 
				
			||||||
      foreach ($locations as $location) {
 | 
					          'file' => $string_info['file'],
 | 
				
			||||||
        $out[] = '  // '.$location['file'].':'.$location['line'];
 | 
					          'line' => $string_info['line'],
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      $out[] = "  '".addcslashes($string, "\0..\37\\'\177..\377")."' => null,";
 | 
					 | 
				
			||||||
      $out[] = null;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    $out[] = ');';
 | 
					 | 
				
			||||||
    $out[] = null;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    echo implode("\n", $out);
 | 
					    ksort($map);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return 0;
 | 
					    $json = id(new PhutilJSON())->encodeFormatted($map);
 | 
				
			||||||
 | 
					    $this->writeCache($root, 'i18n_strings.json', $json);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function loadLibraryFiles($root) {
 | 
				
			||||||
 | 
					    $files = id(new FileFinder($root))
 | 
				
			||||||
 | 
					      ->withType('f')
 | 
				
			||||||
 | 
					      ->withSuffix('php')
 | 
				
			||||||
 | 
					      ->excludePath('*/.*')
 | 
				
			||||||
 | 
					      ->setGenerateChecksums(true)
 | 
				
			||||||
 | 
					      ->find();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $map = array();
 | 
				
			||||||
 | 
					    foreach ($files as $file => $hash) {
 | 
				
			||||||
 | 
					      $file = Filesystem::readablePath($file, $root);
 | 
				
			||||||
 | 
					      $file = ltrim($file, '/');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (dirname($file) == '.') {
 | 
				
			||||||
 | 
					        continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (dirname($file) == 'extensions') {
 | 
				
			||||||
 | 
					        continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      $map[$file] = md5($hash.$file);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return $map;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function readCache($root) {
 | 
				
			||||||
 | 
					    $path = $this->getCachePath($root, 'i18n_files.json');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $default = array(
 | 
				
			||||||
 | 
					      'version' => self::CACHE_VERSION,
 | 
				
			||||||
 | 
					      'files' => array(),
 | 
				
			||||||
 | 
					      'strings' => array(),
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if ($this->getArgv()->getArg('clean')) {
 | 
				
			||||||
 | 
					      return $default;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!Filesystem::pathExists($path)) {
 | 
				
			||||||
 | 
					      return $default;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      $data = Filesystem::readFile($path);
 | 
				
			||||||
 | 
					    } catch (Exception $ex) {
 | 
				
			||||||
 | 
					      return $default;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      $cache = phutil_json_decode($data);
 | 
				
			||||||
 | 
					    } catch (PhutilJSONParserException $e) {
 | 
				
			||||||
 | 
					      return $default;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $version = idx($cache, 'version');
 | 
				
			||||||
 | 
					    if ($version !== self::CACHE_VERSION) {
 | 
				
			||||||
 | 
					      return $default;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return $cache;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function writeCache($root, $file, $data) {
 | 
				
			||||||
 | 
					    $path = $this->getCachePath($root, $file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    $cache_dir = dirname($path);
 | 
				
			||||||
 | 
					    if (!Filesystem::pathExists($cache_dir)) {
 | 
				
			||||||
 | 
					      Filesystem::createDirectory($cache_dir, 0755, true);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Filesystem::writeFile($path, $data);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private function getCachePath($root, $to_file) {
 | 
				
			||||||
 | 
					    return $root.'/.cache/'.$to_file;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user