Files
phabricator/src/applications/files/diff/PhabricatorDocumentEngineBlocks.php
epriestley 884cd74cc4 In prose diffs, use hash-and-diff for coarse "level 0" diffing to scale better
Summary: Depends on D20838. Fixes T13414. Instead of doing coarse diffing with "PhutilEditDistanceMatrix", use hash-and-diff with "DocumentEngine".

Test Plan:
  - On a large document (~3K top level blocks), saw a more sensible diff, instead of the whole thing falling back to "everything changed" mode.
  - On a small document, still saw a sensible granular diff.

{F6888249}

Maniphest Tasks: T13414

Differential Revision: https://secure.phabricator.com/D20839
2019-09-25 16:50:49 -07:00

146 lines
3.1 KiB
PHP

<?php
final class PhabricatorDocumentEngineBlocks
extends Phobject {
private $lists = array();
public function addBlockList(PhabricatorDocumentRef $ref, array $blocks) {
assert_instances_of($blocks, 'PhabricatorDocumentEngineBlock');
$this->lists[] = array(
'ref' => $ref,
'blocks' => array_values($blocks),
);
return $this;
}
public function newTwoUpLayout() {
$rows = array();
$lists = $this->lists;
$specs = array();
foreach ($this->lists as $list) {
$specs[] = $this->newDiffSpec($list['blocks']);
}
$old_map = $specs[0]['map'];
$new_map = $specs[1]['map'];
$old_list = $specs[0]['list'];
$new_list = $specs[1]['list'];
$changeset = id(new PhabricatorDifferenceEngine())
->generateChangesetFromFileContent($old_list, $new_list);
$hunk_parser = id(new DifferentialHunkParser())
->parseHunksForLineData($changeset->getHunks())
->reparseHunksForSpecialAttributes();
$old_lines = $hunk_parser->getOldLines();
$new_lines = $hunk_parser->getNewLines();
$rows = array();
$count = count($old_lines);
for ($ii = 0; $ii < $count; $ii++) {
$old_line = idx($old_lines, $ii);
$new_line = idx($new_lines, $ii);
if ($old_line) {
$old_hash = rtrim($old_line['text'], "\n");
if (!strlen($old_hash)) {
// This can happen when one of the sources has no blocks.
$old_block = null;
} else {
$old_block = array_shift($old_map[$old_hash]);
$old_block->setDifferenceType($old_line['type']);
}
} else {
$old_block = null;
}
if ($new_line) {
$new_hash = rtrim($new_line['text'], "\n");
if (!strlen($new_hash)) {
$new_block = null;
} else {
$new_block = array_shift($new_map[$new_hash]);
$new_block->setDifferenceType($new_line['type']);
}
} else {
$new_block = null;
}
// If both lists are empty, we may generate a row which has two empty
// blocks.
if (!$old_block && !$new_block) {
continue;
}
$rows[] = array(
$old_block,
$new_block,
);
}
return $rows;
}
public function newOneUpLayout() {
$rows = array();
$lists = $this->lists;
$idx = 0;
while (true) {
$found_any = false;
$row = array();
foreach ($lists as $list) {
$blocks = $list['blocks'];
$cell = idx($blocks, $idx);
if ($cell !== null) {
$found_any = true;
}
if ($cell) {
$rows[] = $cell;
}
}
if (!$found_any) {
break;
}
$idx++;
}
return $rows;
}
private function newDiffSpec(array $blocks) {
$map = array();
$list = array();
foreach ($blocks as $block) {
$hash = $block->getDifferenceHash();
if (!isset($map[$hash])) {
$map[$hash] = array();
}
$map[$hash][] = $block;
$list[] = $hash;
}
return array(
'map' => $map,
'list' => implode("\n", $list)."\n",
);
}
}