Summary: Ref T13395. Moves some Aphront classes from libphutil to Phabricator. Test Plan: Grepped for symbols in libphutil and Arcanist. Maniphest Tasks: T13395 Differential Revision: https://secure.phabricator.com/D20975
250 lines
7.7 KiB
PHP
250 lines
7.7 KiB
PHP
<?php
|
|
|
|
final class AphrontMultipartParser extends Phobject {
|
|
|
|
private $contentType;
|
|
private $boundary;
|
|
|
|
private $buffer;
|
|
private $body;
|
|
private $state;
|
|
|
|
private $part;
|
|
private $parts;
|
|
|
|
public function setContentType($content_type) {
|
|
$this->contentType = $content_type;
|
|
return $this;
|
|
}
|
|
|
|
public function getContentType() {
|
|
return $this->contentType;
|
|
}
|
|
|
|
public function beginParse() {
|
|
$content_type = $this->getContentType();
|
|
if ($content_type === null) {
|
|
throw new PhutilInvalidStateException('setContentType');
|
|
}
|
|
|
|
if (!preg_match('(^multipart/form-data)', $content_type)) {
|
|
throw new Exception(
|
|
pht(
|
|
'Expected "multipart/form-data" content type when executing a '.
|
|
'multipart body read.'));
|
|
}
|
|
|
|
$type_parts = preg_split('(\s*;\s*)', $content_type);
|
|
$boundary = null;
|
|
foreach ($type_parts as $type_part) {
|
|
$matches = null;
|
|
if (preg_match('(^boundary=(.*))', $type_part, $matches)) {
|
|
$boundary = $matches[1];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($boundary === null) {
|
|
throw new Exception(
|
|
pht('Received "multipart/form-data" request with no "boundary".'));
|
|
}
|
|
|
|
$this->parts = array();
|
|
$this->part = null;
|
|
|
|
$this->buffer = '';
|
|
$this->boundary = $boundary;
|
|
|
|
// We're looking for a (usually empty) body before the first boundary.
|
|
$this->state = 'bodynewline';
|
|
}
|
|
|
|
public function continueParse($bytes) {
|
|
$this->buffer .= $bytes;
|
|
|
|
$continue = true;
|
|
while ($continue) {
|
|
switch ($this->state) {
|
|
case 'endboundary':
|
|
// We've just parsed a boundary. Next, we expect either "--" (which
|
|
// indicates we've reached the end of the parts) or "\r\n" (which
|
|
// indicates we should read the headers for the next part).
|
|
|
|
if (strlen($this->buffer) < 2) {
|
|
// We don't have enough bytes yet, so wait for more.
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
if (!strncmp($this->buffer, '--', 2)) {
|
|
// This is "--" after a boundary, so we're done. We'll read the
|
|
// rest of the body (the "epilogue") and discard it.
|
|
$this->buffer = substr($this->buffer, 2);
|
|
$this->state = 'epilogue';
|
|
|
|
$this->part = null;
|
|
break;
|
|
}
|
|
|
|
if (!strncmp($this->buffer, "\r\n", 2)) {
|
|
// This is "\r\n" after a boundary, so we're going to going to
|
|
// read the headers for a part.
|
|
$this->buffer = substr($this->buffer, 2);
|
|
$this->state = 'header';
|
|
|
|
// Create the object to hold the part we're about to read.
|
|
$part = new AphrontMultipartPart();
|
|
$this->parts[] = $part;
|
|
$this->part = $part;
|
|
break;
|
|
}
|
|
|
|
throw new Exception(
|
|
pht('Expected "\r\n" or "--" after multipart data boundary.'));
|
|
case 'header':
|
|
// We've just parsed a boundary, followed by "\r\n". We are going
|
|
// to read the headers for this part. They are in the form of HTTP
|
|
// headers and terminated by "\r\n". The section is terminated by
|
|
// a line with no header on it.
|
|
|
|
if (strlen($this->buffer) < 2) {
|
|
// We don't have enough data to find a "\r\n", so wait for more.
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
if (!strncmp("\r\n", $this->buffer, 2)) {
|
|
// This line immediately began "\r\n", so we're done with parsing
|
|
// headers. Start parsing the body.
|
|
$this->buffer = substr($this->buffer, 2);
|
|
$this->state = 'body';
|
|
break;
|
|
}
|
|
|
|
// This is an actual header, so look for the end of it.
|
|
$header_len = strpos($this->buffer, "\r\n");
|
|
if ($header_len === false) {
|
|
// We don't have a full header yet, so wait for more data.
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
$header_buf = substr($this->buffer, 0, $header_len);
|
|
$this->part->appendRawHeader($header_buf);
|
|
|
|
$this->buffer = substr($this->buffer, $header_len + 2);
|
|
break;
|
|
case 'body':
|
|
// We've parsed a boundary and headers, and are parsing the data for
|
|
// this part. The data is terminated by "\r\n--", then the boundary.
|
|
|
|
// We'll look for "\r\n", then switch to the "bodynewline" state if
|
|
// we find it.
|
|
|
|
$marker = "\r";
|
|
$marker_pos = strpos($this->buffer, $marker);
|
|
|
|
if ($marker_pos === false) {
|
|
// There's no "\r" anywhere in the buffer, so we can just read it
|
|
// as provided. Then, since we read all the data, we're done until
|
|
// we get more.
|
|
|
|
// Note that if we're in the preamble, we won't have a "part"
|
|
// object and will just discard the data.
|
|
if ($this->part) {
|
|
$this->part->appendData($this->buffer);
|
|
}
|
|
$this->buffer = '';
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
if ($marker_pos > 0) {
|
|
// If there are bytes before the "\r",
|
|
if ($this->part) {
|
|
$this->part->appendData(substr($this->buffer, 0, $marker_pos));
|
|
}
|
|
$this->buffer = substr($this->buffer, $marker_pos);
|
|
}
|
|
|
|
$expect = "\r\n";
|
|
$expect_len = strlen($expect);
|
|
if (strlen($this->buffer) < $expect_len) {
|
|
// We don't have enough bytes yet to know if this is "\r\n"
|
|
// or not.
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
if (strncmp($this->buffer, $expect, $expect_len)) {
|
|
// The next two bytes aren't "\r\n", so eat them and go looking
|
|
// for more newlines.
|
|
if ($this->part) {
|
|
$this->part->appendData(substr($this->buffer, 0, $expect_len));
|
|
}
|
|
$this->buffer = substr($this->buffer, $expect_len);
|
|
break;
|
|
}
|
|
|
|
// Eat the "\r\n".
|
|
$this->buffer = substr($this->buffer, $expect_len);
|
|
$this->state = 'bodynewline';
|
|
break;
|
|
case 'bodynewline':
|
|
// We've parsed a newline in a body, or we just started parsing the
|
|
// request. In either case, we're looking for "--", then the boundary.
|
|
// If we find it, this section is done. If we don't, we consume the
|
|
// bytes and move on.
|
|
|
|
$expect = '--'.$this->boundary;
|
|
$expect_len = strlen($expect);
|
|
|
|
if (strlen($this->buffer) < $expect_len) {
|
|
// We don't have enough bytes yet, so wait for more.
|
|
$continue = false;
|
|
break;
|
|
}
|
|
|
|
if (strncmp($this->buffer, $expect, $expect_len)) {
|
|
// This wasn't the boundary, so return to the "body" state and
|
|
// consume it. (But first, we need to append the "\r\n" which we
|
|
// ate earlier.)
|
|
if ($this->part) {
|
|
$this->part->appendData("\r\n");
|
|
}
|
|
$this->state = 'body';
|
|
break;
|
|
}
|
|
|
|
// This is the boundary, so toss it and move on.
|
|
$this->buffer = substr($this->buffer, $expect_len);
|
|
$this->state = 'endboundary';
|
|
break;
|
|
case 'epilogue':
|
|
// We just discard any epilogue.
|
|
$this->buffer = '';
|
|
$continue = false;
|
|
break;
|
|
default:
|
|
throw new Exception(
|
|
pht(
|
|
'Unknown parser state "%s".\n',
|
|
$this->state));
|
|
}
|
|
}
|
|
}
|
|
|
|
public function endParse() {
|
|
if ($this->state !== 'epilogue') {
|
|
throw new Exception(
|
|
pht(
|
|
'Expected "multipart/form-data" parse to end '.
|
|
'in state "epilogue".'));
|
|
}
|
|
|
|
return $this->parts;
|
|
}
|
|
|
|
|
|
}
|