00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <pion/PionConfig.hpp>
00017 #include <pion/PionLogger.hpp>
00018 #include <pion/net/HTTPMessage.hpp>
00019
00020
00021 namespace pion {
00022 namespace net {
00023
00024
00025 class HTTPRequest;
00026 class HTTPResponse;
00027
00031 class PION_NET_API HTTPParser :
00032 private boost::noncopyable
00033 {
00034
00035 public:
00036
00038 static const std::size_t DEFAULT_CONTENT_MAX;
00039
00047 HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00048 : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00049 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00050 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00051 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00052 m_bytes_content_remaining(0), m_bytes_content_read(0),
00053 m_bytes_last_read(0), m_bytes_total_read(0),
00054 m_max_content_length(max_content_length)
00055 {}
00056
00058 virtual ~HTTPParser() {}
00059
00070 boost::tribool parse(HTTPMessage& http_msg);
00071
00083 boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len);
00084
00090 void finish(HTTPMessage& http_msg) const;
00091
00098 inline void setReadBuffer(const char *ptr, size_t len) {
00099 m_read_ptr = ptr;
00100 m_read_end_ptr = ptr + len;
00101 }
00102
00109 inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00110 read_ptr = m_read_ptr;
00111 read_end_ptr = m_read_end_ptr;
00112 }
00113
00123 inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00124 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00125 return true;
00126 m_message_parse_state = PARSE_END;
00127 http_msg.concatenateChunks();
00128 finish(http_msg);
00129 return false;
00130 }
00131
00133 inline void reset(void) {
00134 m_message_parse_state = PARSE_START;
00135 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00136 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00137 m_status_code = 0;
00138 m_status_message.erase();
00139 m_method.erase();
00140 m_resource.erase();
00141 m_query_string.erase();
00142 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00143 }
00144
00146 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00147
00149 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
00150
00152 inline std::size_t gcount(void) const { return m_bytes_last_read; }
00153
00155 inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00156
00158 inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00159
00161 inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00162
00164 inline bool isParsingRequest(void) const { return m_is_request; }
00165
00167 inline bool isParsingResponse(void) const { return ! m_is_request; }
00168
00170 inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00171
00173 inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00174
00176 inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00177
00179 inline PionLogger getLogger(void) { return m_logger; }
00180
00181
00190 static bool contentTypeIsUrlEncoded(HTTPRequest& http_request);
00191
00202 static bool parseURLEncoded(HTTPTypes::StringDictionary& dict,
00203 const char *ptr, const std::size_t len);
00204
00215 static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00216 const char *ptr, const std::size_t len);
00217
00227 static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00228 const std::string& cookie_header)
00229 {
00230 return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size());
00231 }
00232
00242 static inline bool parseURLEncoded(HTTPTypes::StringDictionary& dict,
00243 const std::string& query)
00244 {
00245 return parseURLEncoded(dict, query.c_str(), query.size());
00246 }
00247
00248
00249 protected:
00250
00262 boost::tribool parseHeaders(HTTPMessage& http_msg);
00263
00269 void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00270
00282 boost::tribool finishHeaderParsing(HTTPMessage& http_msg);
00283
00294 boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers);
00295
00306 boost::tribool consumeContent(HTTPMessage& http_msg);
00307
00315 std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00316
00317
00318 inline static bool isChar(int c);
00319 inline static bool isControl(int c);
00320 inline static bool isSpecial(int c);
00321 inline static bool isDigit(int c);
00322 inline static bool isHexDigit(int c);
00323
00324
00326 static const boost::uint32_t STATUS_MESSAGE_MAX;
00327
00329 static const boost::uint32_t METHOD_MAX;
00330
00332 static const boost::uint32_t RESOURCE_MAX;
00333
00335 static const boost::uint32_t QUERY_STRING_MAX;
00336
00338 static const boost::uint32_t HEADER_NAME_MAX;
00339
00341 static const boost::uint32_t HEADER_VALUE_MAX;
00342
00344 static const boost::uint32_t QUERY_NAME_MAX;
00345
00347 static const boost::uint32_t QUERY_VALUE_MAX;
00348
00350 static const boost::uint32_t COOKIE_NAME_MAX;
00351
00353 static const boost::uint32_t COOKIE_VALUE_MAX;
00354
00355
00357 mutable PionLogger m_logger;
00358
00360 const bool m_is_request;
00361
00363 const char * m_read_ptr;
00364
00366 const char * m_read_end_ptr;
00367
00368
00369 private:
00370
00372 enum MessageParseState {
00373 PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00374 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00375 };
00376
00379 enum HeadersParseState {
00380 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00381 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00382 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00383 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00384 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00385 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00386 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00387 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00388 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00389 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00390 };
00391
00394 enum ChunkedContentParseState {
00395 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
00396 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00397 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
00398 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00399 PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK,
00400 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00401 };
00402
00403
00405 MessageParseState m_message_parse_state;
00406
00408 HeadersParseState m_headers_parse_state;
00409
00411 ChunkedContentParseState m_chunked_content_parse_state;
00412
00414 boost::uint16_t m_status_code;
00415
00417 std::string m_status_message;
00418
00420 std::string m_method;
00421
00423 std::string m_resource;
00424
00426 std::string m_query_string;
00427
00429 std::string m_header_name;
00430
00432 std::string m_header_value;
00433
00435 std::string m_chunk_size_str;
00436
00438 std::size_t m_size_of_current_chunk;
00439
00441 std::size_t m_bytes_read_in_current_chunk;
00442
00444 std::size_t m_bytes_content_remaining;
00445
00447 std::size_t m_bytes_content_read;
00448
00450 std::size_t m_bytes_last_read;
00451
00453 std::size_t m_bytes_total_read;
00454
00456 std::size_t m_max_content_length;
00457 };
00458
00459
00460
00461
00462 inline bool HTTPParser::isChar(int c)
00463 {
00464 return(c >= 0 && c <= 127);
00465 }
00466
00467 inline bool HTTPParser::isControl(int c)
00468 {
00469 return( (c >= 0 && c <= 31) || c == 127);
00470 }
00471
00472 inline bool HTTPParser::isSpecial(int c)
00473 {
00474 switch (c) {
00475 case '(': case ')': case '<': case '>': case '@':
00476 case ',': case ';': case ':': case '\\': case '"':
00477 case '/': case '[': case ']': case '?': case '=':
00478 case '{': case '}': case ' ': case '\t':
00479 return true;
00480 default:
00481 return false;
00482 }
00483 }
00484
00485 inline bool HTTPParser::isDigit(int c)
00486 {
00487 return(c >= '0' && c <= '9');
00488 }
00489
00490 inline bool HTTPParser::isHexDigit(int c)
00491 {
00492 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00493 }
00494
00495 }
00496 }
00497
00498 #endif