mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-28 08:16:09 +03:00
222 lines
7.0 KiB
C++
222 lines
7.0 KiB
C++
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// Derived from:
|
|
// mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
|
|
// The license block is:
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is Mozilla.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications.
|
|
* Portions created by the Initial Developer are Copyright (C) 2001
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Darin Fisher <darin@netscape.com> (original author)
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include "net/http/http_chunked_decoder.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include "base/logging.h"
|
|
#include "base/strings/string_number_conversions.h"
|
|
#include "base/strings/string_piece.h"
|
|
#include "base/strings/string_util.h"
|
|
#include "net/base/net_errors.h"
|
|
|
|
namespace net {
|
|
|
|
// Absurdly long size to avoid imposing a constraint on chunked encoding
|
|
// extensions.
|
|
const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
|
|
|
|
HttpChunkedDecoder::HttpChunkedDecoder()
|
|
: chunk_remaining_(0),
|
|
chunk_terminator_remaining_(false),
|
|
reached_last_chunk_(false),
|
|
reached_eof_(false),
|
|
bytes_after_eof_(0) {
|
|
}
|
|
|
|
int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
|
|
int result = 0;
|
|
|
|
while (buf_len > 0) {
|
|
if (chunk_remaining_ > 0) {
|
|
// Since |chunk_remaining_| is positive and |buf_len| an int, the minimum
|
|
// of the two must be an int.
|
|
int num = static_cast<int>(
|
|
std::min(chunk_remaining_, static_cast<int64_t>(buf_len)));
|
|
|
|
buf_len -= num;
|
|
chunk_remaining_ -= num;
|
|
|
|
result += num;
|
|
buf += num;
|
|
|
|
// After each chunk's data there should be a CRLF.
|
|
if (chunk_remaining_ == 0)
|
|
chunk_terminator_remaining_ = true;
|
|
continue;
|
|
} else if (reached_eof_) {
|
|
bytes_after_eof_ += buf_len;
|
|
break; // Done!
|
|
}
|
|
|
|
int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
|
|
if (bytes_consumed < 0)
|
|
return bytes_consumed; // Error
|
|
|
|
buf_len -= bytes_consumed;
|
|
if (buf_len > 0)
|
|
memmove(buf, buf + bytes_consumed, buf_len);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
|
|
DCHECK_EQ(0, chunk_remaining_);
|
|
DCHECK_GT(buf_len, 0);
|
|
|
|
int bytes_consumed = 0;
|
|
|
|
size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
|
|
if (index_of_lf != base::StringPiece::npos) {
|
|
buf_len = static_cast<int>(index_of_lf);
|
|
if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR.
|
|
buf_len--;
|
|
bytes_consumed = static_cast<int>(index_of_lf) + 1;
|
|
|
|
// Make buf point to the full line buffer to parse.
|
|
if (!line_buf_.empty()) {
|
|
line_buf_.append(buf, buf_len);
|
|
buf = line_buf_.data();
|
|
buf_len = static_cast<int>(line_buf_.size());
|
|
}
|
|
|
|
if (reached_last_chunk_) {
|
|
if (buf_len > 0)
|
|
DVLOG(1) << "ignoring http trailer";
|
|
else
|
|
reached_eof_ = true;
|
|
} else if (chunk_terminator_remaining_) {
|
|
if (buf_len > 0) {
|
|
DLOG(ERROR) << "chunk data not terminated properly";
|
|
return ERR_INVALID_CHUNKED_ENCODING;
|
|
}
|
|
chunk_terminator_remaining_ = false;
|
|
} else if (buf_len > 0) {
|
|
// Ignore any chunk-extensions.
|
|
size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
|
|
if (index_of_semicolon != base::StringPiece::npos)
|
|
buf_len = static_cast<int>(index_of_semicolon);
|
|
|
|
if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
|
|
DLOG(ERROR) << "Failed parsing HEX from: " <<
|
|
std::string(buf, buf_len);
|
|
return ERR_INVALID_CHUNKED_ENCODING;
|
|
}
|
|
|
|
if (chunk_remaining_ == 0)
|
|
reached_last_chunk_ = true;
|
|
} else {
|
|
DLOG(ERROR) << "missing chunk-size";
|
|
return ERR_INVALID_CHUNKED_ENCODING;
|
|
}
|
|
line_buf_.clear();
|
|
} else {
|
|
// Save the partial line; wait for more data.
|
|
bytes_consumed = buf_len;
|
|
|
|
// Ignore a trailing CR
|
|
if (buf[buf_len - 1] == '\r')
|
|
buf_len--;
|
|
|
|
if (line_buf_.length() + buf_len > kMaxLineBufLen) {
|
|
DLOG(ERROR) << "Chunked line length too long";
|
|
return ERR_INVALID_CHUNKED_ENCODING;
|
|
}
|
|
|
|
line_buf_.append(buf, buf_len);
|
|
}
|
|
return bytes_consumed;
|
|
}
|
|
|
|
|
|
// While the HTTP 1.1 specification defines chunk-size as 1*HEX
|
|
// some sites rely on more lenient parsing.
|
|
// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
|
|
// (0x20) to be 7 characters long, such as "819b ".
|
|
//
|
|
// A comparison of browsers running on WindowsXP shows that
|
|
// they will parse the following inputs (egrep syntax):
|
|
//
|
|
// Let \X be the character class for a hex digit: [0-9a-fA-F]
|
|
//
|
|
// RFC 7230: ^\X+$
|
|
// IE7: ^\X+[^\X]*$
|
|
// Safari 3.1: ^[\t\r ]*\X+[\t ]*$
|
|
// Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
|
|
// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
|
|
//
|
|
// Our strategy is to be as strict as possible, while not breaking
|
|
// known sites.
|
|
//
|
|
// Us: ^\X+[ ]*$
|
|
bool HttpChunkedDecoder::ParseChunkSize(const char* start,
|
|
int len,
|
|
int64_t* out) {
|
|
DCHECK_GE(len, 0);
|
|
|
|
// Strip trailing spaces
|
|
while (len > 0 && start[len - 1] == ' ')
|
|
len--;
|
|
|
|
// Be more restrictive than HexStringToInt64;
|
|
// don't allow inputs with leading "-", "+", "0x", "0X"
|
|
base::StringPiece chunk_size(start, len);
|
|
if (chunk_size.find_first_not_of("0123456789abcdefABCDEF")
|
|
!= base::StringPiece::npos) {
|
|
return false;
|
|
}
|
|
|
|
int64_t parsed_number;
|
|
bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
|
|
if (ok && parsed_number >= 0) {
|
|
*out = parsed_number;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
} // namespace net
|