Summary:
- Currently, the atomizers don't emit atoms with the right file in all cases. Make them always emit it correctly.
- Currently, we use absolute paths in some cases and relative paths in other cases. Use them consistently: relative when storing/presenting, absolute when accessing data.
- Don't preserve linebreaks when marking up documentation (documentation is generally wrapped at 80col, but should not be wrapped in this way when displayed).
- Markup Diviner link rules (albeit uselesly).
Test Plan:
Before:
{F33044}
After:
{F33045}
Reviewers: chad
Reviewed By: chad
CC: aran
Maniphest Tasks: T988
Differential Revision: https://secure.phabricator.com/D4992
459 lines
15 KiB
PHP
459 lines
15 KiB
PHP
<?php
|
|
|
|
final class DivinerGenerateWorkflow extends DivinerWorkflow {
|
|
|
|
private $atomCache;
|
|
|
|
public function didConstruct() {
|
|
$this
|
|
->setName('generate')
|
|
->setSynopsis(pht('Generate documentation.'))
|
|
->setArguments(
|
|
array(
|
|
array(
|
|
'name' => 'clean',
|
|
'help' => 'Clear the caches before generating documentation.',
|
|
),
|
|
array(
|
|
'name' => 'book',
|
|
'param' => 'path',
|
|
'help' => 'Path to a Diviner book configuration.',
|
|
),
|
|
));
|
|
}
|
|
|
|
protected function getAtomCache() {
|
|
if (!$this->atomCache) {
|
|
$book_root = $this->getConfig('root');
|
|
$book_name = $this->getConfig('name');
|
|
$cache_directory = $book_root.'/.divinercache/'.$book_name;
|
|
$this->atomCache = new DivinerAtomCache($cache_directory);
|
|
}
|
|
return $this->atomCache;
|
|
}
|
|
|
|
protected function log($message) {
|
|
$console = PhutilConsole::getConsole();
|
|
$console->getServer()->setEnableLog(true);
|
|
$console->writeLog($message."\n");
|
|
}
|
|
|
|
public function execute(PhutilArgumentParser $args) {
|
|
$this->readBookConfiguration($args);
|
|
|
|
if ($args->getArg('clean')) {
|
|
$this->log(pht('CLEARING CACHES'));
|
|
$this->getAtomCache()->delete();
|
|
$this->log(pht('Done.')."\n");
|
|
}
|
|
|
|
// The major challenge of documentation generation is one of dependency
|
|
// management. When regenerating documentation, we want to do the smallest
|
|
// amount of work we can, so that regenerating documentation after minor
|
|
// changes is quick.
|
|
//
|
|
// ATOM CACHE
|
|
//
|
|
// In the first stage, we find all the direct changes to source code since
|
|
// the last run. This stage relies on two data structures:
|
|
//
|
|
// - File Hash Map: map<file_hash, node_hash>
|
|
// - Atom Map: map<node_hash, true>
|
|
//
|
|
// First, we hash all the source files in the project to detect any which
|
|
// have changed since the previous run (i.e., their hash is not present in
|
|
// the File Hash Map). If a file's content hash appears in the map, it has
|
|
// not changed, so we don't need to reparse it.
|
|
//
|
|
// We break the contents of each file into "atoms", which represent a unit
|
|
// of source code (like a function, method, class or file). Each atom has a
|
|
// "node hash" based on the content of the atom: if a function definition
|
|
// changes, the node hash of the atom changes too. The primary output of
|
|
// the atom cache is a list of node hashes which exist in the project. This
|
|
// is the Atom Map. The node hash depends only on the definition of the atom
|
|
// and the atomizer implementation. It ends with an "N", for "node".
|
|
//
|
|
// (We need the Atom Map in addition to the File Hash Map because each file
|
|
// may have several atoms in it (e.g., multiple functions, or a class and
|
|
// its methods). The File Hash Map contains an exhaustive list of all atoms
|
|
// with type "file", but not child atoms of those top-level atoms.)
|
|
//
|
|
// GRAPH CACHE
|
|
//
|
|
// We now know which atoms exist, and can compare the Atom Map to some
|
|
// existing cache to figure out what has changed. However, this isn't
|
|
// sufficient to figure out which documentation actually needs to be
|
|
// regnerated, because atoms depend on other atoms. For example, if "B
|
|
// extends A" and the definition for A changes, we need to regenerate the
|
|
// documentation in B. Similarly, if X links to Y and Y changes, we should
|
|
// regenerate X. (In both these cases, the documentation for the connected
|
|
// atom may not acutally change, but in some cases it will, and the extra
|
|
// work we need to do is generally very small compared to the size of the
|
|
// project.)
|
|
//
|
|
// To figure out which other nodes have changed, we compute a "graph hash"
|
|
// for each node. This hash combines the "node hash" with the node hashes
|
|
// of connected nodes. Our primary output is a list of graph hashes, which
|
|
// a documentation generator can use to easily determine what work needs
|
|
// to be done by comparing the list with a list of cached graph hashes,
|
|
// then generating documentation for new hashes and deleting documentation
|
|
// for missing hashes. The graph hash ends with a "G", for "graph".
|
|
//
|
|
// In this stage, we rely on three data structures:
|
|
//
|
|
// - Symbol Map: map<node_hash, symbol_hash>
|
|
// - Edge Map: map<node_hash, list<symbol_hash>>
|
|
// - Graph Map: map<node_hash, graph_hash>
|
|
//
|
|
// Calculating the graph hash requires several steps, because we need to
|
|
// figure out which nodes an atom is attached to. The atom contains symbolic
|
|
// references to other nodes by name (e.g., "extends SomeClass") in the form
|
|
// of DivinerAtomRefs. We can also build a symbolic reference for any atom
|
|
// from the atom itself. Each DivinerAtomRef generates a symbol hash,
|
|
// which ends with an "S", for "symbol".
|
|
//
|
|
// First, we update the symbol map. We remove (and mark dirty) any symbols
|
|
// associated with node hashes which no longer exist (e.g., old/dead nodes).
|
|
// Second, we add (and mark dirty) any symbols associated with new nodes.
|
|
// We also add edges defined by new nodes to the graph.
|
|
//
|
|
// We initialize a list of dirty nodes to the list of new nodes, then
|
|
// find all nodes connected to dirty symbols and add them to the dirty
|
|
// node list. This list now contains every node with a new or changed
|
|
// graph hash.
|
|
//
|
|
// We walk the dirty list and compute the new graph hashes, adding them
|
|
// to the graph hash map. This Graph Map can then be passed to an actual
|
|
// documentation generator, which can compare the graph hashes to a list
|
|
// of already-generated graph hashes and easily assess which documents need
|
|
// to be regenerated and which can be deleted.
|
|
|
|
$this->buildAtomCache();
|
|
$this->buildGraphCache();
|
|
|
|
$this->publishDocumentation();
|
|
}
|
|
|
|
/* -( Atom Cache )--------------------------------------------------------- */
|
|
|
|
private function buildAtomCache() {
|
|
$this->log(pht('BUILDING ATOM CACHE'));
|
|
|
|
$file_hashes = $this->findFilesInProject();
|
|
|
|
$this->log(pht('Found %d file(s) in project.', count($file_hashes)));
|
|
|
|
$this->deleteDeadAtoms($file_hashes);
|
|
|
|
$atomize = $this->getFilesToAtomize($file_hashes);
|
|
|
|
$this->log(pht('Found %d unatomized, uncached file(s).', count($atomize)));
|
|
|
|
$file_atomizers = $this->getAtomizersForFiles($atomize);
|
|
|
|
$this->log(pht('Found %d file(s) to atomize.', count($file_atomizers)));
|
|
|
|
$futures = $this->buildAtomizerFutures($file_atomizers);
|
|
if ($futures) {
|
|
$this->resolveAtomizerFutures($futures, $file_hashes);
|
|
$this->log(pht("Atomization complete."));
|
|
} else {
|
|
$this->log(pht("Atom cache is up to date, no files to atomize."));
|
|
}
|
|
|
|
$this->log(pht("Writing atom cache."));
|
|
|
|
$this->getAtomCache()->saveAtoms();
|
|
|
|
$this->log(pht('Done.')."\n");
|
|
}
|
|
|
|
private function getAtomizersForFiles(array $files) {
|
|
$rules = $this->getRules();
|
|
|
|
$atomizers = array();
|
|
|
|
foreach ($files as $file) {
|
|
foreach ($rules as $rule => $atomizer) {
|
|
$ok = preg_match($rule, $file);
|
|
if ($ok === false) {
|
|
throw new Exception(
|
|
"Rule '{$rule}' is not a valid regular expression.");
|
|
}
|
|
if ($ok) {
|
|
$atomizers[$file] = $atomizer;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $atomizers;
|
|
}
|
|
|
|
private function getRules() {
|
|
return $this->getConfig('rules', array()) + array(
|
|
'/\\.diviner$/' => 'DivinerArticleAtomizer',
|
|
);
|
|
}
|
|
|
|
|
|
private function findFilesInProject() {
|
|
$raw_hashes = id(new FileFinder($this->getConfig('root')))
|
|
->excludePath('*/.*')
|
|
->withType('f')
|
|
->setGenerateChecksums(true)
|
|
->find();
|
|
|
|
$version = $this->getDivinerAtomWorldVersion();
|
|
|
|
$file_hashes = array();
|
|
foreach ($raw_hashes as $file => $md5_hash) {
|
|
$rel_file = Filesystem::readablePath($file, $this->getConfig('root'));
|
|
// We want the hash to change if the file moves or Diviner gets updated,
|
|
// not just if the file content changes. Derive a hash from everything
|
|
// we care about.
|
|
$file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';
|
|
}
|
|
|
|
return $file_hashes;
|
|
}
|
|
|
|
private function deleteDeadAtoms(array $file_hashes) {
|
|
$atom_cache = $this->getAtomCache();
|
|
|
|
$hash_to_file = array_flip($file_hashes);
|
|
foreach ($atom_cache->getFileHashMap() as $hash => $atom) {
|
|
if (empty($hash_to_file[$hash])) {
|
|
$atom_cache->deleteFileHash($hash);
|
|
}
|
|
}
|
|
}
|
|
|
|
private function getFilesToAtomize(array $file_hashes) {
|
|
$atom_cache = $this->getAtomCache();
|
|
|
|
$atomize = array();
|
|
foreach ($file_hashes as $file => $hash) {
|
|
if (!$atom_cache->fileHashExists($hash)) {
|
|
$atomize[] = $file;
|
|
}
|
|
}
|
|
|
|
return $atomize;
|
|
}
|
|
|
|
private function buildAtomizerFutures(array $file_atomizers) {
|
|
$atomizers = array();
|
|
foreach ($file_atomizers as $file => $atomizer) {
|
|
$atomizers[$atomizer][] = $file;
|
|
}
|
|
|
|
$futures = array();
|
|
foreach ($atomizers as $class => $files) {
|
|
foreach (array_chunk($files, 32) as $chunk) {
|
|
$future = new ExecFuture(
|
|
'%s atomize --ugly --book %s --atomizer %s -- %Ls',
|
|
dirname(phutil_get_library_root('phabricator')).'/bin/diviner',
|
|
$this->getBookConfigPath(),
|
|
$class,
|
|
$chunk);
|
|
$future->setCWD($this->getConfig('root'));
|
|
|
|
$futures[] = $future;
|
|
}
|
|
}
|
|
|
|
return $futures;
|
|
}
|
|
|
|
private function resolveAtomizerFutures(array $futures, array $file_hashes) {
|
|
assert_instances_of($futures, 'Future');
|
|
|
|
$atom_cache = $this->getAtomCache();
|
|
foreach (Futures($futures)->limit(4) as $key => $future) {
|
|
$atoms = $future->resolveJSON();
|
|
|
|
foreach ($atoms as $atom) {
|
|
if ($atom['type'] == DivinerAtom::TYPE_FILE) {
|
|
$file_hash = $file_hashes[$atom['file']];
|
|
$atom_cache->addFileHash($file_hash, $atom['hash']);
|
|
}
|
|
$atom_cache->addAtom($atom);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Get a global version number, which changes whenever any atom or atomizer
|
|
* implementation changes in a way which is not backward-compatible.
|
|
*/
|
|
private function getDivinerAtomWorldVersion() {
|
|
$version = array();
|
|
$version['atom'] = DivinerAtom::getAtomSerializationVersion();
|
|
$version['rules'] = $this->getRules();
|
|
|
|
$atomizers = id(new PhutilSymbolLoader())
|
|
->setAncestorClass('DivinerAtomizer')
|
|
->setConcreteOnly(true)
|
|
->selectAndLoadSymbols();
|
|
|
|
$atomizer_versions = array();
|
|
foreach ($atomizers as $atomizer) {
|
|
$atomizer_versions[$atomizer['name']] = call_user_func(
|
|
array(
|
|
$atomizer['name'],
|
|
'getAtomizerVersion',
|
|
));
|
|
}
|
|
|
|
ksort($atomizer_versions);
|
|
$version['atomizers'] = $atomizer_versions;
|
|
|
|
return md5(serialize($version));
|
|
}
|
|
|
|
|
|
/* -( Graph Cache )-------------------------------------------------------- */
|
|
|
|
|
|
private function buildGraphCache() {
|
|
$this->log(pht('BUILDING GRAPH CACHE'));
|
|
|
|
$atom_cache = $this->getAtomCache();
|
|
$symbol_map = $atom_cache->getSymbolMap();
|
|
$atoms = $atom_cache->getAtomMap();
|
|
|
|
$dirty_symbols = array();
|
|
$dirty_nhashes = array();
|
|
|
|
$del_atoms = array_diff_key($symbol_map, $atoms);
|
|
$this->log(pht('Found %d obsolete atom(s) in graph.', count($del_atoms)));
|
|
foreach ($del_atoms as $nhash => $shash) {
|
|
$atom_cache->deleteSymbol($nhash);
|
|
$dirty_symbols[$shash] = true;
|
|
|
|
$atom_cache->deleteEdges($nhash);
|
|
$atom_cache->deleteGraph($nhash);
|
|
}
|
|
|
|
$new_atoms = array_diff_key($atoms, $symbol_map);
|
|
$this->log(pht('Found %d new atom(s) in graph.', count($new_atoms)));
|
|
foreach ($new_atoms as $nhash => $ignored) {
|
|
$shash = $this->computeSymbolHash($nhash);
|
|
$atom_cache->addSymbol($nhash, $shash);
|
|
$dirty_symbols[$shash] = true;
|
|
|
|
$atom_cache->addEdges(
|
|
$nhash,
|
|
$this->getEdges($nhash));
|
|
|
|
$dirty_nhashes[$nhash] = true;
|
|
}
|
|
|
|
$this->log(pht('Propagating changes through the graph.'));
|
|
|
|
// Find all the nodes which point at a dirty node, and dirty them. Then
|
|
// find all the nodes which point at those nodes and dirty them, and so
|
|
// on. (This is slightly overkill since we probably don't need to propagate
|
|
// dirtiness across documentation "links" between symbols, but we do want
|
|
// to propagate it across "extends", and we suffer only a little bit of
|
|
// collateral damage by over-dirtying as long as the documentation isn't
|
|
// too well-connected.)
|
|
|
|
$symbol_stack = array_keys($dirty_symbols);
|
|
while ($symbol_stack) {
|
|
$symbol_hash = array_pop($symbol_stack);
|
|
|
|
foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {
|
|
$dirty_nhashes[$edge] = true;
|
|
$src_hash = $this->computeSymbolHash($edge);
|
|
if (empty($dirty_symbols[$src_hash])) {
|
|
$dirty_symbols[$src_hash] = true;
|
|
$symbol_stack[] = $src_hash;
|
|
}
|
|
}
|
|
}
|
|
|
|
$this->log(pht('Found %d affected atoms.', count($dirty_nhashes)));
|
|
|
|
foreach ($dirty_nhashes as $nhash => $ignored) {
|
|
$atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));
|
|
}
|
|
|
|
$this->log(pht('Writing graph cache.'));
|
|
|
|
$atom_cache->saveGraph();
|
|
$atom_cache->saveEdges();
|
|
$atom_cache->saveSymbols();
|
|
|
|
$this->log(pht('Done.')."\n");
|
|
}
|
|
|
|
private function computeSymbolHash($node_hash) {
|
|
$atom_cache = $this->getAtomCache();
|
|
$atom = $atom_cache->getAtom($node_hash);
|
|
|
|
if (!$atom) {
|
|
throw new Exception("No such atom with node hash '{$node_hash}'!");
|
|
}
|
|
|
|
$ref = DivinerAtomRef::newFromDictionary($atom['ref']);
|
|
return $ref->toHash();
|
|
}
|
|
|
|
private function getEdges($node_hash) {
|
|
$atom_cache = $this->getAtomCache();
|
|
$atom = $atom_cache->getAtom($node_hash);
|
|
|
|
$refs = array();
|
|
|
|
// Make the atom depend on its own symbol, so that all atoms with the same
|
|
// symbol are dirtied (e.g., if a codebase defines the function "f()"
|
|
// several times, all of them should be dirtied when one is dirtied).
|
|
$refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true;
|
|
|
|
foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {
|
|
$ref = DivinerAtomRef::newFromDictionary($ref_dict);
|
|
if ($ref->getBook() == $atom['book']) {
|
|
$refs[$ref->toHash()] = true;
|
|
}
|
|
}
|
|
|
|
return array_keys($refs);
|
|
}
|
|
|
|
private function computeGraphHash($node_hash) {
|
|
$atom_cache = $this->getAtomCache();
|
|
$atom = $atom_cache->getAtom($node_hash);
|
|
|
|
$edges = $this->getEdges($node_hash);
|
|
sort($edges);
|
|
|
|
$inputs = array(
|
|
'atomHash' => $atom['hash'],
|
|
'edges' => $edges,
|
|
);
|
|
|
|
return md5(serialize($inputs)).'G';
|
|
}
|
|
|
|
|
|
private function publishDocumentation() {
|
|
$atom_cache = $this->getAtomCache();
|
|
$graph_map = $atom_cache->getGraphMap();
|
|
|
|
$this->log(pht('PUBLISHING DOCUMENTATION'));
|
|
|
|
$publisher = new DivinerStaticPublisher();
|
|
$publisher->setConfig($this->getAllConfig());
|
|
$publisher->setAtomCache($atom_cache);
|
|
$publisher->setRenderer(new DivinerDefaultRenderer());
|
|
$publisher->publishAtoms(array_values($graph_map));
|
|
|
|
$this->log(pht('Done.'));
|
|
}
|
|
|
|
|
|
}
|