net/src/HTTPParser.cpp

00001 // ------------------------------------------------------------------
00002 // pion-net: a C++ framework for building lightweight HTTP interfaces
00003 // ------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #include <cstdlib>
00011 #include <boost/logic/tribool.hpp>
00012 #include <pion/net/HTTPParser.hpp>
00013 #include <pion/net/HTTPRequest.hpp>
00014 #include <pion/net/HTTPResponse.hpp>
00015 #include <pion/net/HTTPMessage.hpp>
00016 
00017 
00018 namespace pion {    // begin namespace pion
00019 namespace net {     // begin namespace net (Pion Network Library)
00020 
00021 
00022 // static members of HTTPParser
00023 
00024 const boost::uint32_t   HTTPParser::STATUS_MESSAGE_MAX = 1024;  // 1 KB
00025 const boost::uint32_t   HTTPParser::METHOD_MAX = 1024;  // 1 KB
00026 const boost::uint32_t   HTTPParser::RESOURCE_MAX = 256 * 1024;  // 256 KB
00027 const boost::uint32_t   HTTPParser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB
00028 const boost::uint32_t   HTTPParser::HEADER_NAME_MAX = 1024; // 1 KB
00029 const boost::uint32_t   HTTPParser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB
00030 const boost::uint32_t   HTTPParser::QUERY_NAME_MAX = 1024;  // 1 KB
00031 const boost::uint32_t   HTTPParser::QUERY_VALUE_MAX = 1024 * 1024;  // 1 MB
00032 const boost::uint32_t   HTTPParser::COOKIE_NAME_MAX = 1024; // 1 KB
00033 const boost::uint32_t   HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB
00034 const std::size_t       HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024;  // 1 MB
00035 
00036 
00037 // HTTPParser member functions
00038 
00039 boost::tribool HTTPParser::parse(HTTPMessage& http_msg)
00040 {
00041     PION_ASSERT(! eof() );
00042 
00043     boost::tribool rc = boost::indeterminate;
00044     std::size_t total_bytes_parsed = 0;
00045 
00046     do {
00047         switch (m_message_parse_state) {
00048             // just started parsing the HTTP message
00049             case PARSE_START:
00050                 m_message_parse_state = PARSE_HEADERS;
00051                 // step through to PARSE_HEADERS
00052 
00053             // parsing the HTTP headers
00054             case PARSE_HEADERS:
00055                 rc = parseHeaders(http_msg);
00056                 total_bytes_parsed += m_bytes_last_read;
00057                 // check if we have finished parsing HTTP headers
00058                 if (rc == true) {
00059                     // finishHeaderParsing() updates m_message_parse_state
00060                     rc = finishHeaderParsing(http_msg);
00061                 }
00062                 break;
00063 
00064             // parsing chunked payload content
00065             case PARSE_CHUNKS:
00066                 rc = parseChunks(http_msg.getChunkCache());
00067                 total_bytes_parsed += m_bytes_last_read;
00068                 // check if we have finished parsing all chunks
00069                 if (rc == true) {
00070                     http_msg.concatenateChunks();
00071                 }
00072                 break;
00073 
00074             // parsing regular payload content with a known length
00075             case PARSE_CONTENT:
00076                 rc = consumeContent(http_msg);
00077                 total_bytes_parsed += m_bytes_last_read;
00078                 break;
00079 
00080             // parsing payload content with no length (until EOF)
00081             case PARSE_CONTENT_NO_LENGTH:
00082                 consumeContentAsNextChunk(http_msg.getChunkCache());
00083                 total_bytes_parsed += m_bytes_last_read;
00084                 break;
00085 
00086             // finished parsing the HTTP message
00087             case PARSE_END:
00088                 rc = true;
00089                 break;
00090         }
00091     } while ( boost::indeterminate(rc) && ! eof() );
00092 
00093     // check if we've finished parsing the HTTP message
00094     if (rc == true) {
00095         m_message_parse_state = PARSE_END;
00096         finish(http_msg);
00097     }
00098 
00099     // update bytes last read (aggregate individual operations for caller)
00100     m_bytes_last_read = total_bytes_parsed;
00101 
00102     return rc;
00103 }
00104 
00105 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg, std::size_t len)
00106 {
00107     static const char MISSING_DATA_CHAR = 'X';
00108     boost::tribool rc = boost::indeterminate;
00109 
00110     switch (m_message_parse_state) {
00111 
00112         // cannot recover from missing data while parsing HTTP headers
00113         case PARSE_START:
00114         case PARSE_HEADERS:
00115             rc = false;
00116             break;
00117 
00118         // parsing chunked payload content
00119         case PARSE_CHUNKS:
00120             // parsing chunk data -> we can only recover if data fits into current chunk
00121             if (m_chunked_content_parse_state == PARSE_CHUNK
00122                 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00123                 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00124             {
00125                 // use dummy content for missing data
00126                 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 
00127                     http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00128 
00129                 m_bytes_read_in_current_chunk += len;
00130                 m_bytes_last_read = len;
00131                 m_bytes_total_read += len;
00132                 m_bytes_content_read += len;
00133 
00134                 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00135                     m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00136                 }
00137             } else {
00138                 // cannot recover from missing data
00139                 rc = false;
00140             }
00141             break;
00142 
00143         // parsing regular payload content with a known length
00144         case PARSE_CONTENT:
00145             // parsing content (with length) -> we can only recover if data fits into content
00146             if (m_bytes_content_remaining == 0) {
00147                 // we have all of the remaining payload content
00148                 rc = true;
00149             } else if (m_bytes_content_remaining < len) {
00150                 // cannot recover from missing data
00151                 rc = false;
00152             } else {
00153 
00154                 // make sure content buffer is not already full
00155                 if ( (m_bytes_content_read+len) <= m_max_content_length) {
00156                     // use dummy content for missing data
00157                     for (std::size_t n = 0; n < len; ++n)
00158                         http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00159                 } else {
00160                     m_bytes_content_read += len;
00161                 }
00162 
00163                 m_bytes_content_remaining -= len;
00164                 m_bytes_total_read += len;
00165                 m_bytes_last_read = len;
00166 
00167                 if (m_bytes_content_remaining == 0)
00168                     rc = true;
00169             }
00170             break;
00171 
00172         // parsing payload content with no length (until EOF)
00173         case PARSE_CONTENT_NO_LENGTH:
00174             // use dummy content for missing data
00175             for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 
00176                 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00177             m_bytes_last_read = len;
00178             m_bytes_total_read += len;
00179             m_bytes_content_read += len;
00180             break;
00181 
00182         // finished parsing the HTTP message
00183         case PARSE_END:
00184             rc = true;
00185             break;
00186     }
00187 
00188     // check if we've finished parsing the HTTP message
00189     if (rc == true) {
00190         m_message_parse_state = PARSE_END;
00191         finish(http_msg);
00192     }
00193 
00194     return rc;
00195 }
00196 
00197 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg)
00198 {
00199     //
00200     // note that boost::tribool may have one of THREE states:
00201     //
00202     // false: encountered an error while parsing HTTP headers
00203     // true: finished successfully parsing the HTTP headers
00204     // indeterminate: parsed bytes, but the HTTP headers are not yet finished
00205     //
00206     const char *read_start_ptr = m_read_ptr;
00207     m_bytes_last_read = 0;
00208     while (m_read_ptr < m_read_end_ptr) {
00209 
00210         switch (m_headers_parse_state) {
00211         case PARSE_METHOD_START:
00212             // we have not yet started parsing the HTTP method string
00213             if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace
00214                 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00215                     return false;
00216                 m_headers_parse_state = PARSE_METHOD;
00217                 m_method.erase();
00218                 m_method.push_back(*m_read_ptr);
00219             }
00220             break;
00221 
00222         case PARSE_METHOD:
00223             // we have started parsing the HTTP method string
00224             if (*m_read_ptr == ' ') {
00225                 m_resource.erase();
00226                 m_headers_parse_state = PARSE_URI_STEM;
00227             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00228                 return false;
00229             } else if (m_method.size() >= METHOD_MAX) {
00230                 return false;
00231             } else {
00232                 m_method.push_back(*m_read_ptr);
00233             }
00234             break;
00235 
00236         case PARSE_URI_STEM:
00237             // we have started parsing the URI stem (or resource name)
00238             if (*m_read_ptr == ' ') {
00239                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00240             } else if (*m_read_ptr == '?') {
00241                 m_query_string.erase();
00242                 m_headers_parse_state = PARSE_URI_QUERY;
00243             } else if (isControl(*m_read_ptr)) {
00244                 return false;
00245             } else if (m_resource.size() >= RESOURCE_MAX) {
00246                 return false;
00247             } else {
00248                 m_resource.push_back(*m_read_ptr);
00249             }
00250             break;
00251 
00252         case PARSE_URI_QUERY:
00253             // we have started parsing the URI query string
00254             if (*m_read_ptr == ' ') {
00255                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00256             } else if (isControl(*m_read_ptr)) {
00257                 return false;
00258             } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00259                 return false;
00260             } else {
00261                 m_query_string.push_back(*m_read_ptr);
00262             }
00263             break;
00264 
00265         case PARSE_HTTP_VERSION_H:
00266             // parsing "HTTP"
00267             if (*m_read_ptr != 'H') return false;
00268             m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00269             break;
00270 
00271         case PARSE_HTTP_VERSION_T_1:
00272             // parsing "HTTP"
00273             if (*m_read_ptr != 'T') return false;
00274             m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00275             break;
00276 
00277         case PARSE_HTTP_VERSION_T_2:
00278             // parsing "HTTP"
00279             if (*m_read_ptr != 'T') return false;
00280             m_headers_parse_state = PARSE_HTTP_VERSION_P;
00281             break;
00282 
00283         case PARSE_HTTP_VERSION_P:
00284             // parsing "HTTP"
00285             if (*m_read_ptr != 'P') return false;
00286             m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00287             break;
00288 
00289         case PARSE_HTTP_VERSION_SLASH:
00290             // parsing slash after "HTTP"
00291             if (*m_read_ptr != '/') return false;
00292             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00293             break;
00294 
00295         case PARSE_HTTP_VERSION_MAJOR_START:
00296             // parsing the first digit of the major version number
00297             if (!isDigit(*m_read_ptr)) return false;
00298             http_msg.setVersionMajor(*m_read_ptr - '0');
00299             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00300             break;
00301 
00302         case PARSE_HTTP_VERSION_MAJOR:
00303             // parsing the major version number (not first digit)
00304             if (*m_read_ptr == '.') {
00305                 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00306             } else if (isDigit(*m_read_ptr)) {
00307                 http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10)
00308                                           + (*m_read_ptr - '0') );
00309             } else {
00310                 return false;
00311             }
00312             break;
00313 
00314         case PARSE_HTTP_VERSION_MINOR_START:
00315             // parsing the first digit of the minor version number
00316             if (!isDigit(*m_read_ptr)) return false;
00317             http_msg.setVersionMinor(*m_read_ptr - '0');
00318             m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00319             break;
00320 
00321         case PARSE_HTTP_VERSION_MINOR:
00322             // parsing the major version number (not first digit)
00323             if (*m_read_ptr == ' ') {
00324                 // should only happen for responses
00325                 if (m_is_request) return false;
00326                 m_headers_parse_state = PARSE_STATUS_CODE_START;
00327             } else if (*m_read_ptr == '\r') {
00328                 // should only happen for requests
00329                 if (! m_is_request) return false;
00330                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00331             } else if (*m_read_ptr == '\n') {
00332                 // should only happen for requests
00333                 if (! m_is_request) return false;
00334                 m_headers_parse_state = PARSE_EXPECTING_CR;
00335             } else if (isDigit(*m_read_ptr)) {
00336                 http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10)
00337                                           + (*m_read_ptr - '0') );
00338             } else {
00339                 return false;
00340             }
00341             break;
00342 
00343         case PARSE_STATUS_CODE_START:
00344             // parsing the first digit of the response status code
00345             if (!isDigit(*m_read_ptr)) return false;
00346             m_status_code = (*m_read_ptr - '0');
00347             m_headers_parse_state = PARSE_STATUS_CODE;
00348             break;
00349 
00350         case PARSE_STATUS_CODE:
00351             // parsing the response status code (not first digit)
00352             if (*m_read_ptr == ' ') {
00353                 m_status_message.erase();
00354                 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00355             } else if (isDigit(*m_read_ptr)) {
00356                 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00357             } else {
00358                 return false;
00359             }
00360             break;
00361 
00362         case PARSE_STATUS_MESSAGE:
00363             // parsing the response status message
00364             if (*m_read_ptr == '\r') {
00365                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00366             } else if (*m_read_ptr == '\n') {
00367                 m_headers_parse_state = PARSE_EXPECTING_CR;
00368             } else if (isControl(*m_read_ptr)) {
00369                 return false;
00370             } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00371                 return false;
00372             } else {
00373                 m_status_message.push_back(*m_read_ptr);
00374             }
00375             break;
00376 
00377         case PARSE_EXPECTING_NEWLINE:
00378             // we received a CR; expecting a newline to follow
00379             if (*m_read_ptr == '\n') {
00380                 m_headers_parse_state = PARSE_HEADER_START;
00381             } else if (*m_read_ptr == '\r') {
00382                 // we received two CR's in a row
00383                 // assume CR only is (incorrectly) being used for line termination
00384                 // therefore, the message is finished
00385                 ++m_read_ptr;
00386                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00387                 m_bytes_total_read += m_bytes_last_read;
00388                 return true;
00389             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00390                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00391             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00392                 return false;
00393             } else {
00394                 // assume it is the first character for the name of a header
00395                 m_header_name.erase();
00396                 m_header_name.push_back(*m_read_ptr);
00397                 m_headers_parse_state = PARSE_HEADER_NAME;
00398             }
00399             break;
00400 
00401         case PARSE_EXPECTING_CR:
00402             // we received a newline without a CR
00403             if (*m_read_ptr == '\r') {
00404                 m_headers_parse_state = PARSE_HEADER_START;
00405             } else if (*m_read_ptr == '\n') {
00406                 // we received two newlines in a row
00407                 // assume newline only is (incorrectly) being used for line termination
00408                 // therefore, the message is finished
00409                 ++m_read_ptr;
00410                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00411                 m_bytes_total_read += m_bytes_last_read;
00412                 return true;
00413             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00414                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00415             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00416                 return false;
00417             } else {
00418                 // assume it is the first character for the name of a header
00419                 m_header_name.erase();
00420                 m_header_name.push_back(*m_read_ptr);
00421                 m_headers_parse_state = PARSE_HEADER_NAME;
00422             }
00423             break;
00424 
00425         case PARSE_HEADER_WHITESPACE:
00426             // parsing whitespace before a header name
00427             if (*m_read_ptr == '\r') {
00428                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00429             } else if (*m_read_ptr == '\n') {
00430                 m_headers_parse_state = PARSE_EXPECTING_CR;
00431             } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00432                 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00433                     return false;
00434                 // assume it is the first character for the name of a header
00435                 m_header_name.erase();
00436                 m_header_name.push_back(*m_read_ptr);
00437                 m_headers_parse_state = PARSE_HEADER_NAME;
00438             }
00439             break;
00440 
00441         case PARSE_HEADER_START:
00442             // parsing the start of a new header
00443             if (*m_read_ptr == '\r') {
00444                 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00445             } else if (*m_read_ptr == '\n') {
00446                 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00447             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00448                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00449             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00450                 return false;
00451             } else {
00452                 // first character for the name of a header
00453                 m_header_name.erase();
00454                 m_header_name.push_back(*m_read_ptr);
00455                 m_headers_parse_state = PARSE_HEADER_NAME;
00456             }
00457             break;
00458 
00459         case PARSE_HEADER_NAME:
00460             // parsing the name of a header
00461             if (*m_read_ptr == ':') {
00462                 m_header_value.erase();
00463                 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00464             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00465                 return false;
00466             } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00467                 return false;
00468             } else {
00469                 // character (not first) for the name of a header
00470                 m_header_name.push_back(*m_read_ptr);
00471             }
00472             break;
00473 
00474         case PARSE_SPACE_BEFORE_HEADER_VALUE:
00475             // parsing space character before a header's value
00476             if (*m_read_ptr == ' ') {
00477                 m_headers_parse_state = PARSE_HEADER_VALUE;
00478             } else if (*m_read_ptr == '\r') {
00479                 http_msg.addHeader(m_header_name, m_header_value);
00480                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00481             } else if (*m_read_ptr == '\n') {
00482                 http_msg.addHeader(m_header_name, m_header_value);
00483                 m_headers_parse_state = PARSE_EXPECTING_CR;
00484             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00485                 return false;
00486             } else {
00487                 // assume it is the first character for the value of a header
00488                 m_header_value.push_back(*m_read_ptr);
00489                 m_headers_parse_state = PARSE_HEADER_VALUE;
00490             }
00491             break;
00492 
00493         case PARSE_HEADER_VALUE:
00494             // parsing the value of a header
00495             if (*m_read_ptr == '\r') {
00496                 http_msg.addHeader(m_header_name, m_header_value);
00497                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00498             } else if (*m_read_ptr == '\n') {
00499                 http_msg.addHeader(m_header_name, m_header_value);
00500                 m_headers_parse_state = PARSE_EXPECTING_CR;
00501             } else if (isControl(*m_read_ptr)) {
00502                 return false;
00503             } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00504                 return false;
00505             } else {
00506                 // character (not first) for the value of a header
00507                 m_header_value.push_back(*m_read_ptr);
00508             }
00509             break;
00510 
00511         case PARSE_EXPECTING_FINAL_NEWLINE:
00512             if (*m_read_ptr == '\n') ++m_read_ptr;
00513             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00514             m_bytes_total_read += m_bytes_last_read;
00515             return true;
00516 
00517         case PARSE_EXPECTING_FINAL_CR:
00518             if (*m_read_ptr == '\r') ++m_read_ptr;
00519             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00520             m_bytes_total_read += m_bytes_last_read;
00521             return true;
00522         }
00523 
00524         ++m_read_ptr;
00525     }
00526 
00527     m_bytes_last_read = (m_read_ptr - read_start_ptr);
00528     m_bytes_total_read += m_bytes_last_read;
00529     return boost::indeterminate;
00530 }
00531 
00532 void HTTPParser::updateMessageWithHeaderData(HTTPMessage& http_msg) const
00533 {
00534     if (isParsingRequest()) {
00535 
00536         // finish an HTTP request message
00537 
00538         HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
00539         http_request.setMethod(m_method);
00540         http_request.setResource(m_resource);
00541         http_request.setQueryString(m_query_string);
00542 
00543         // parse query pairs from the URI query string
00544         if (! m_query_string.empty()) {
00545             if (! parseURLEncoded(http_request.getQueryParams(),
00546                                   m_query_string.c_str(),
00547                                   m_query_string.size())) 
00548                 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI): \""
00549                     << m_query_string << "\"");
00550         }
00551 
00552         // parse "Cookie" headers
00553         std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00554         cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE);
00555         for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00556              cookie_iterator != http_request.getHeaders().end()
00557              && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00558         {
00559             if (! parseCookieHeader(http_request.getCookieParams(),
00560                                     cookie_iterator->second) )
00561                 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00562         }
00563 
00564     } else {
00565 
00566         // finish an HTTP response message
00567 
00568         HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg));
00569         http_response.setStatusCode(m_status_code);
00570         http_response.setStatusMessage(m_status_message);
00571     }
00572 }
00573 
00574 boost::tribool HTTPParser::finishHeaderParsing(HTTPMessage& http_msg)
00575 {
00576     boost::tribool rc = boost::indeterminate;
00577 
00578     m_bytes_content_remaining = m_bytes_content_read = 0;
00579     http_msg.setContentLength(0);
00580     http_msg.updateTransferCodingUsingHeader();
00581     updateMessageWithHeaderData(http_msg);
00582 
00583     if (http_msg.isChunked()) {
00584 
00585         // content is encoded using chunks
00586         m_message_parse_state = PARSE_CHUNKS;
00587 
00588     } else if (http_msg.isContentLengthImplied()) {
00589 
00590         // content length is implied to be zero
00591         m_message_parse_state = PARSE_END;
00592         rc = true;
00593 
00594     } else {
00595         // content length should be specified in the headers
00596 
00597         if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) {
00598 
00599             // message has a content-length header
00600             try {
00601                 http_msg.updateContentLengthUsingHeader();
00602             } catch (...) {
00603                 PION_LOG_ERROR(m_logger, "Unable to update content length");
00604                 return false;
00605             }
00606 
00607             // check if content-length header == 0
00608             if (http_msg.getContentLength() == 0) {
00609                 m_message_parse_state = PARSE_END;
00610                 rc = true;
00611             } else {
00612                 m_message_parse_state = PARSE_CONTENT;
00613                 m_bytes_content_remaining = http_msg.getContentLength();
00614 
00615                 // check if content-length exceeds maximum allowed
00616                 if (m_bytes_content_remaining > m_max_content_length)
00617                     http_msg.setContentLength(m_max_content_length);
00618             }
00619 
00620         } else {
00621             // no content-length specified, and the content length cannot 
00622             // otherwise be determined
00623 
00624             // only if not a request, read through the close of the connection
00625             if (! m_is_request) {
00626                 // clear the chunk buffers before we start
00627                 http_msg.getChunkCache().clear();
00628 
00629                 // continue reading content until there is no more data
00630                 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00631             } else {
00632                 m_message_parse_state = PARSE_END;
00633                 rc = true;
00634             }
00635         }
00636     }
00637 
00638     // allocate a buffer for payload content (may be zero-size)
00639     http_msg.createContentBuffer();
00640 
00641     return rc;
00642 }
00643 
00644 bool HTTPParser::parseURLEncoded(HTTPTypes::StringDictionary& dict,
00645                                  const char *ptr, const size_t len)
00646 {
00647     // used to track whether we are parsing the name or value
00648     enum QueryParseState {
00649         QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00650     } parse_state = QUERY_PARSE_NAME;
00651 
00652     // misc other variables used for parsing
00653     const char * const end = ptr + len;
00654     std::string query_name;
00655     std::string query_value;
00656 
00657     // iterate through each encoded character
00658     while (ptr < end) {
00659         switch (parse_state) {
00660 
00661         case QUERY_PARSE_NAME:
00662             // parsing query name
00663             if (*ptr == '=') {
00664                 // end of name found
00665                 if (query_name.empty()) return false;
00666                 parse_state = QUERY_PARSE_VALUE;
00667             } else if (*ptr == '&') {
00668                 // if query name is empty, just skip it (i.e. "&&")
00669                 if (! query_name.empty()) {
00670                     // assume that "=" is missing -- it's OK if the value is empty
00671                     dict.insert( std::make_pair(query_name, query_value) );
00672                     query_name.erase();
00673                 }
00674             } else if (*ptr == '\r' || *ptr == '\n') {
00675                 // ignore linefeeds and carriage returns (normally within POST content)
00676             } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00677                 // control character detected, or max sized exceeded
00678                 return false;
00679             } else {
00680                 // character is part of the name
00681                 query_name.push_back(*ptr);
00682             }
00683             break;
00684 
00685         case QUERY_PARSE_VALUE:
00686             // parsing query value
00687             if (*ptr == '&') {
00688                 // end of value found (OK if empty)
00689                 dict.insert( std::make_pair(query_name, query_value) );
00690                 query_name.erase();
00691                 query_value.erase();
00692                 parse_state = QUERY_PARSE_NAME;
00693             } else if (*ptr == '\r' || *ptr == '\n') {
00694                 // ignore linefeeds and carriage returns (normally within POST content)
00695             } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00696                 // control character detected, or max sized exceeded
00697                 return false;
00698             } else {
00699                 // character is part of the value
00700                 query_value.push_back(*ptr);
00701             }
00702             break;
00703         }
00704 
00705         ++ptr;
00706     }
00707 
00708     // handle last pair in string
00709     if (! query_name.empty())
00710         dict.insert( std::make_pair(query_name, query_value) );
00711 
00712     return true;
00713 }
00714 
00715 bool HTTPParser::parseCookieHeader(HTTPTypes::CookieParams& dict,
00716                                    const char *ptr, const size_t len)
00717 {
00718     // BASED ON RFC 2109
00719     // 
00720     // The current implementation ignores cookie attributes which begin with '$'
00721     // (i.e. $Path=/, $Domain=, etc.)
00722 
00723     // used to track what we are parsing
00724     enum CookieParseState {
00725         COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
00726     } parse_state = COOKIE_PARSE_NAME;
00727 
00728     // misc other variables used for parsing
00729     const char * const end = ptr + len;
00730     std::string cookie_name;
00731     std::string cookie_value;
00732     char value_quote_character = '\0';
00733 
00734     // iterate through each character
00735     while (ptr < end) {
00736         switch (parse_state) {
00737 
00738         case COOKIE_PARSE_NAME:
00739             // parsing cookie name
00740             if (*ptr == '=') {
00741                 // end of name found
00742                 if (cookie_name.empty()) return false;
00743                 value_quote_character = '\0';
00744                 parse_state = COOKIE_PARSE_VALUE;
00745             } else if (*ptr == ';' || *ptr == ',') {
00746                 // ignore empty cookie names since this may occur naturally
00747                 // when quoted values are encountered
00748                 if (! cookie_name.empty()) {
00749                     // value is empty (OK)
00750                     if (cookie_name[0] != '$')
00751                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00752                     cookie_name.erase();
00753                 }
00754             } else if (*ptr != ' ') {   // ignore whitespace
00755                 // check if control character detected, or max sized exceeded
00756                 if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
00757                     return false;
00758                 // character is part of the name
00759                 cookie_name.push_back(*ptr);
00760             }
00761             break;
00762 
00763         case COOKIE_PARSE_VALUE:
00764             // parsing cookie value
00765             if (value_quote_character == '\0') {
00766                 // value is not (yet) quoted
00767                 if (*ptr == ';' || *ptr == ',') {
00768                     // end of value found (OK if empty)
00769                     if (cookie_name[0] != '$') 
00770                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00771                     cookie_name.erase();
00772                     cookie_value.erase();
00773                     parse_state = COOKIE_PARSE_NAME;
00774                 } else if (*ptr == '\'' || *ptr == '"') {
00775                     if (cookie_value.empty()) {
00776                         // begin quoted value
00777                         value_quote_character = *ptr;
00778                     } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00779                         // max size exceeded
00780                         return false;
00781                     } else {
00782                         // assume character is part of the (unquoted) value
00783                         cookie_value.push_back(*ptr);
00784                     }
00785                 } else if (*ptr != ' ') {   // ignore unquoted whitespace
00786                     // check if control character detected, or max sized exceeded
00787                     if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
00788                         return false;
00789                     // character is part of the (unquoted) value
00790                     cookie_value.push_back(*ptr);
00791                 }
00792             } else {
00793                 // value is quoted
00794                 if (*ptr == value_quote_character) {
00795                     // end of value found (OK if empty)
00796                     if (cookie_name[0] != '$') 
00797                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00798                     cookie_name.erase();
00799                     cookie_value.erase();
00800                     parse_state = COOKIE_PARSE_IGNORE;
00801                 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00802                     // max size exceeded
00803                     return false;
00804                 } else {
00805                     // character is part of the (quoted) value
00806                     cookie_value.push_back(*ptr);
00807                 }
00808             }
00809             break;
00810 
00811         case COOKIE_PARSE_IGNORE:
00812             // ignore everything until we reach a comma "," or semicolon ";"
00813             if (*ptr == ';' || *ptr == ',')
00814                 parse_state = COOKIE_PARSE_NAME;
00815             break;
00816         }
00817 
00818         ++ptr;
00819     }
00820 
00821     // handle last cookie in string
00822     if (! cookie_name.empty() && cookie_name[0] != '$')
00823         dict.insert( std::make_pair(cookie_name, cookie_value) );
00824 
00825     return true;
00826 }
00827 
00828 boost::tribool HTTPParser::parseChunks(HTTPMessage::ChunkCache& chunk_cache)
00829 {
00830     //
00831     // note that boost::tribool may have one of THREE states:
00832     //
00833     // false: encountered an error while parsing message
00834     // true: finished successfully parsing the message
00835     // indeterminate: parsed bytes, but the message is not yet finished
00836     //
00837     const char *read_start_ptr = m_read_ptr;
00838     m_bytes_last_read = 0;
00839     while (m_read_ptr < m_read_end_ptr) {
00840 
00841         switch (m_chunked_content_parse_state) {
00842         case PARSE_CHUNK_SIZE_START:
00843             // we have not yet started parsing the next chunk size
00844             if (isHexDigit(*m_read_ptr)) {
00845                 m_chunk_size_str.erase();
00846                 m_chunk_size_str.push_back(*m_read_ptr);
00847                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
00848             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
00849                 // Ignore leading whitespace.  Technically, the standard probably doesn't allow white space here, 
00850                 // but we'll be flexible, since there's no ambiguity.
00851                 break;
00852             } else {
00853                 return false;
00854             }
00855             break;
00856 
00857         case PARSE_CHUNK_SIZE:
00858             if (isHexDigit(*m_read_ptr)) {
00859                 m_chunk_size_str.push_back(*m_read_ptr);
00860             } else if (*m_read_ptr == '\x0D') {
00861                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
00862             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
00863                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
00864                 // but we'll be flexible, since there's no ambiguity.
00865                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
00866             } else {
00867                 return false;
00868             }
00869             break;
00870 
00871         case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
00872             if (*m_read_ptr == '\x0D') {
00873                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
00874             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
00875                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
00876                 // but we'll be flexible, since there's no ambiguity.
00877                 break;
00878             } else {
00879                 return false;
00880             }
00881             break;
00882 
00883         case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
00884             // We received a CR; expecting LF to follow.  We can't be flexible here because 
00885             // if we see anything other than LF, we can't be certain where the chunk starts.
00886             if (*m_read_ptr == '\x0A') {
00887                 m_bytes_read_in_current_chunk = 0;
00888                 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
00889                 if (m_size_of_current_chunk == 0) {
00890                     m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK;
00891                 } else {
00892                     m_chunked_content_parse_state = PARSE_CHUNK;
00893                 }
00894             } else {
00895                 return false;
00896             }
00897             break;
00898 
00899         case PARSE_CHUNK:
00900             if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
00901                 if (chunk_cache.size() < m_max_content_length)
00902                     chunk_cache.push_back(*m_read_ptr);
00903                 m_bytes_read_in_current_chunk++;
00904             }
00905             if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00906                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00907             }
00908             break;
00909 
00910         case PARSE_EXPECTING_CR_AFTER_CHUNK:
00911             // we've read exactly m_size_of_current_chunk bytes since starting the current chunk
00912             if (*m_read_ptr == '\x0D') {
00913                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
00914             } else {
00915                 return false;
00916             }
00917             break;
00918 
00919         case PARSE_EXPECTING_LF_AFTER_CHUNK:
00920             // we received a CR; expecting LF to follow
00921             if (*m_read_ptr == '\x0A') {
00922                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00923             } else {
00924                 return false;
00925             }
00926             break;
00927 
00928         case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK:
00929             // we've read the final chunk; expecting final CRLF
00930             if (*m_read_ptr == '\x0D') {
00931                 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
00932             } else {
00933                 return false;
00934             }
00935             break;
00936 
00937         case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
00938             // we received the final CR; expecting LF to follow
00939             if (*m_read_ptr == '\x0A') {
00940                 ++m_read_ptr;
00941                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00942                 m_bytes_total_read += m_bytes_last_read;
00943                 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
00944                 return true;
00945             } else {
00946                 return false;
00947             }
00948         }
00949 
00950         ++m_read_ptr;
00951     }
00952 
00953     m_bytes_last_read = (m_read_ptr - read_start_ptr);
00954     m_bytes_total_read += m_bytes_last_read;
00955     m_bytes_content_read += m_bytes_last_read;
00956     return boost::indeterminate;
00957 }
00958 
00959 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg)
00960 {
00961     size_t content_bytes_to_read;
00962     size_t content_bytes_available = bytes_available();
00963     boost::tribool rc = boost::indeterminate;
00964 
00965     if (m_bytes_content_remaining == 0) {
00966         // we have all of the remaining payload content
00967         return true;
00968     } else {
00969         if (content_bytes_available >= m_bytes_content_remaining) {
00970             // we have all of the remaining payload content
00971             rc = true;
00972             content_bytes_to_read = m_bytes_content_remaining;
00973         } else {
00974             // only some of the payload content is available
00975             content_bytes_to_read = content_bytes_available;
00976         }
00977         m_bytes_content_remaining -= content_bytes_to_read;
00978     }
00979 
00980     // make sure content buffer is not already full
00981     if (m_bytes_content_read < m_max_content_length) {
00982         if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
00983             // read would exceed maximum size for content buffer
00984             // copy only enough bytes to fill up the content buffer
00985             memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, 
00986                 m_max_content_length - m_bytes_content_read);
00987         } else {
00988             // copy all bytes available
00989             memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
00990         }
00991     }
00992 
00993     m_read_ptr += content_bytes_to_read;
00994     m_bytes_content_read += content_bytes_to_read;
00995     m_bytes_total_read += content_bytes_to_read;
00996     m_bytes_last_read = content_bytes_to_read;
00997 
00998     return rc;
00999 }
01000 
01001 std::size_t HTTPParser::consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_cache)
01002 {
01003     if (bytes_available() == 0) {
01004         m_bytes_last_read = 0;
01005     } else {
01006         m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01007         while (m_read_ptr < m_read_end_ptr) {
01008             if (chunk_cache.size() < m_max_content_length)
01009                 chunk_cache.push_back(*m_read_ptr);
01010             ++m_read_ptr;
01011         }
01012         m_bytes_total_read += m_bytes_last_read;
01013         m_bytes_content_read += m_bytes_last_read;
01014     }
01015     return m_bytes_last_read;
01016 }
01017 
01018 void HTTPParser::finish(HTTPMessage& http_msg) const
01019 {
01020     switch (m_message_parse_state) {
01021     case PARSE_START:
01022         http_msg.setIsValid(false);
01023         http_msg.setContentLength(0);
01024         http_msg.createContentBuffer();
01025         return;
01026     case PARSE_END:
01027         http_msg.setIsValid(true);
01028         break;
01029     case PARSE_HEADERS:
01030         http_msg.setIsValid(false);
01031         updateMessageWithHeaderData(http_msg);
01032         http_msg.setContentLength(0);
01033         http_msg.createContentBuffer();
01034         break;
01035     case PARSE_CONTENT:
01036         http_msg.setIsValid(false);
01037         http_msg.setContentLength(getContentBytesRead());
01038         break;
01039     case PARSE_CHUNKS:
01040         http_msg.setIsValid(false);
01041         http_msg.concatenateChunks();
01042         break;
01043     case PARSE_CONTENT_NO_LENGTH:
01044         http_msg.setIsValid(true);
01045         http_msg.concatenateChunks();
01046         break;
01047     }
01048 
01049     if (isParsingRequest()) {
01050         // Parse query pairs from post content if content type is x-www-form-urlencoded.
01051         // Type could be followed by parameters (as defined in section 3.6 of RFC 2616)
01052         // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8
01053         HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
01054         const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE);
01055         if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(),
01056                                         HTTPTypes::CONTENT_TYPE_URLENCODED) == 0)
01057         {
01058             if (! parseURLEncoded(http_request.getQueryParams(),
01059                                   http_request.getContent(),
01060                                   http_request.getContentLength())) 
01061                 PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content): \""
01062                     << http_request.getContent() << "\"");
01063         }
01064     }
01065 }
01066 
01067 }   // end namespace net
01068 }   // end namespace pion
01069 

Generated on Fri Dec 4 08:54:29 2009 for pion-net by  doxygen 1.4.7