Add a robots.txt file to disallow /diffusion/

Summary:
Fixes T4610. Open to suggestions, etc., if there's anything I'm missing.

Also:

  - Moves these "system" endpoints into a real application.
  - Makes `isUnlisted()` work a little more consistently.

Test Plan: Accessed `/robots.txt`, `/status/` and `/debug/`.

Reviewers: chad, btrahan

Reviewed By: btrahan

Subscribers: aran, epriestley

Maniphest Tasks: T4610

Differential Revision: https://secure.phabricator.com/D8532
This commit is contained in:
epriestley
2014-03-14 11:53:17 -07:00
parent efcca310c2
commit 838f781285
8 changed files with 69 additions and 19 deletions

View File

@@ -0,0 +1,39 @@
<?php
/**
* This controller eases debugging of application problems that don't repro
* locally by allowing installs to add arbitrary debugging code easily. To use
* it:
*
* - Write some diagnostic script.
* - Instruct the user to install it in `/support/debug.php`.
* - Tell them to visit `/debug/`.
*/
final class PhabricatorDebugController extends PhabricatorController {
public function shouldRequireLogin() {
return false;
}
public function processRequest() {
if (!Filesystem::pathExists($this->getDebugFilePath())) {
return new Aphront404Response();
}
$request = $this->getRequest();
$user = $request->getUser();
ob_start();
require_once $this->getDebugFilePath();
$out = ob_get_clean();
$response = new AphrontWebpageResponse();
$response->setContent(phutil_tag('pre', array(), $out));
return $response;
}
private function getDebugFilePath() {
$root = dirname(phutil_get_library_root('phabricator'));
return $root.'/support/debug.php';
}
}

View File

@@ -0,0 +1,37 @@
<?php
final class PhabricatorRobotsController extends PhabricatorController {
public function shouldRequireLogin() {
return false;
}
public function processRequest() {
$out = array();
// Prevent indexing of '/diffusion/', since the content is not generally
// useful to index, web spiders get stuck scraping the history of every
// file, and much of the content is Ajaxed in anyway so spiders won't even
// see it. These pages are also relatively expensive to generate.
// Note that this still allows commits (at '/rPxxxxx') to be indexed.
// They're probably not hugely useful, but suffer fewer of the problems
// Diffusion suffers and are hard to omit with 'robots.txt'.
$out[] = 'User-Agent: *';
$out[] = 'Disallow: /diffusion/';
// Add a small crawl delay (number of seconds between requests) for spiders
// which respect it. The intent here is to prevent spiders from affecting
// performance for users. The possible cost is slower indexing, but that
// seems like a reasonable tradeoff, since most Phabricator installs are
// probably not hugely concerned about cutting-edge SEO.
$out[] = 'Crawl-delay: 1';
$content = implode("\n", $out)."\n";
return id(new AphrontPlainTextResponse())
->setContent($content)
->setCacheDurationInSeconds(phutil_units('2 hours in seconds'));
}
}

View File

@@ -0,0 +1,14 @@
<?php
final class PhabricatorStatusController extends PhabricatorController {
public function shouldRequireLogin() {
return false;
}
public function processRequest() {
$response = new AphrontWebpageResponse();
$response->setContent("ALIVE\n");
return $response;
}
}