Allow parsing of rare extra-broken non-UTF8 messages.
This commit is contained in:
		| @@ -192,13 +192,20 @@ switch (isset($argv[1]) ? $argv[1] : 'help') { | ||||
|       throw new Exception('Unknown commit.'); | ||||
|     } | ||||
|  | ||||
|     $workers = array(); | ||||
|  | ||||
|  | ||||
|     switch ($repo->getVersionControlSystem()) { | ||||
|       case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: | ||||
|         $worker = new PhabricatorRepositoryGitCommitChangeParserWorker( | ||||
|         $workers[] = new PhabricatorRepositoryGitCommitMessageParserWorker( | ||||
|           $commit->getID()); | ||||
|         $workers[] = new PhabricatorRepositoryGitCommitChangeParserWorker( | ||||
|           $commit->getID()); | ||||
|         break; | ||||
|       case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: | ||||
|         $worker = new PhabricatorRepositorySvnCommitChangeParserWorker( | ||||
|         $workers[] = new PhabricatorRepositorySvnCommitMessageParserWorker( | ||||
|           $commit->getID()); | ||||
|         $workers[] = new PhabricatorRepositorySvnCommitChangeParserWorker( | ||||
|           $commit->getID()); | ||||
|         break; | ||||
|       default: | ||||
| @@ -207,7 +214,10 @@ switch (isset($argv[1]) ? $argv[1] : 'help') { | ||||
|  | ||||
|     ExecFuture::pushEchoMode(true); | ||||
|  | ||||
|     $worker->doWork(); | ||||
|     foreach ($workers as $worker) { | ||||
|       echo "Running ".get_class($worker)."...\n"; | ||||
|       $worker->doWork(); | ||||
|     } | ||||
|  | ||||
|     echo "Done.\n"; | ||||
|  | ||||
|   | ||||
| @@ -50,4 +50,38 @@ abstract class PhabricatorRepositoryCommitParserWorker | ||||
|     PhabricatorRepository $repository, | ||||
|     PhabricatorRepositoryCommit $commit); | ||||
|  | ||||
|   /** | ||||
|    * This method is kind of awkward here but both the SVN message and | ||||
|    * change parsers use it. | ||||
|    */ | ||||
|   protected function getSVNLogXMLObject($uri, $revision) { | ||||
|  | ||||
|     try { | ||||
|       list($xml) = execx( | ||||
|         'svn log --xml --limit 1 --non-interactive %s@%d', | ||||
|         $uri, | ||||
|         $revision); | ||||
|     } catch (CommandException $ex) { | ||||
|       // HTTPS is generally faster and more reliable than svn+ssh, but some | ||||
|       // commit messages with non-UTF8 text can't be retrieved over HTTPS, see | ||||
|       // Facebook rE197184 for one example. Make an attempt to fall back to | ||||
|       // svn+ssh if we've failed outright to retrieve the message. | ||||
|       $fallback_uri = new PhutilURI($uri); | ||||
|       if ($fallback_uri->getProtocol() != 'https') { | ||||
|         throw $ex; | ||||
|       } | ||||
|       $fallback_uri->setProtocol('svn+ssh'); | ||||
|       list($xml) = execx( | ||||
|         'svn log --xml --limit 1 --non-interactive %s@%d', | ||||
|         $fallback_uri, | ||||
|         $revision); | ||||
|     } | ||||
|  | ||||
|     // Subversion may send us back commit messages which won't parse because | ||||
|     // they have non UTF-8 garbage in them. Slam them into valid UTF-8. | ||||
|     $xml = phutil_utf8ize($xml); | ||||
|  | ||||
|     return new SimpleXMLElement($xml); | ||||
|   } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -10,6 +10,8 @@ phutil_require_module('phabricator', 'applications/repository/storage/commit'); | ||||
| phutil_require_module('phabricator', 'applications/repository/storage/repository'); | ||||
| phutil_require_module('phabricator', 'infrastructure/daemon/workers/worker'); | ||||
|  | ||||
| phutil_require_module('phutil', 'future/exec'); | ||||
| phutil_require_module('phutil', 'parser/uri'); | ||||
| phutil_require_module('phutil', 'utils'); | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -48,12 +48,8 @@ class PhabricatorRepositorySvnCommitChangeParserWorker | ||||
|  | ||||
|     // Pull the top-level path changes out of "svn log". This is pretty | ||||
|     // straightforward; just parse the XML log. | ||||
|     list($xml) = execx( | ||||
|       'svn log --verbose --xml --limit 1 --non-interactive %s@%d', | ||||
|       $uri, | ||||
|       $svn_commit); | ||||
|     $log = $this->getSVNLogXMLObject($uri, $svn_commit); | ||||
|  | ||||
|     $log = new SimpleXMLElement($xml); | ||||
|     $entry = $log->logentry[0]; | ||||
|  | ||||
|     if (!$entry->paths) { | ||||
|   | ||||
| @@ -25,16 +25,8 @@ class PhabricatorRepositorySvnCommitMessageParserWorker | ||||
|  | ||||
|     $uri = $repository->getDetail('remote-uri'); | ||||
|  | ||||
|     list($xml) = execx( | ||||
|       'svn log --xml --limit 1 --non-interactive %s@%d', | ||||
|       $uri, | ||||
|       $commit->getCommitIdentifier()); | ||||
|     $log = $this->getSVNLogXMLObject($uri, $commit->getCommitIdentifier()); | ||||
|  | ||||
|     // Subversion may send us back commit messages which won't parse because | ||||
|     // they have non UTF-8 garbage in them. Slam them into valid UTF-8. | ||||
|     $xml = phutil_utf8ize($xml); | ||||
|  | ||||
|     $log = new SimpleXMLElement($xml); | ||||
|     $entry = $log->logentry[0]; | ||||
|  | ||||
|     $author = (string)$entry->author; | ||||
|   | ||||
| @@ -9,8 +9,5 @@ | ||||
| phutil_require_module('phabricator', 'applications/repository/worker/commitmessageparser/base'); | ||||
| phutil_require_module('phabricator', 'infrastructure/daemon/workers/storage/task'); | ||||
|  | ||||
| phutil_require_module('phutil', 'future/exec'); | ||||
| phutil_require_module('phutil', 'utils'); | ||||
|  | ||||
|  | ||||
| phutil_require_source('PhabricatorRepositorySvnCommitMessageParserWorker.php'); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley