1/* 2 * nghttp2 - HTTP/2 C Library 3 * 4 * Copyright (c) 2012 Tatsuhiro Tsujikawa 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25#ifndef HTML_PARSER_H 26#define HTML_PARSER_H 27 28#include "nghttp2_config.h" 29 30#include <vector> 31#include <string> 32 33#ifdef HAVE_LIBXML2 34 35# include <libxml/HTMLparser.h> 36 37#endif // HAVE_LIBXML2 38 39namespace nghttp2 { 40 41enum ResourceType { 42 REQ_CSS = 1, 43 REQ_JS, 44 REQ_UNBLOCK_JS, 45 REQ_IMG, 46 REQ_OTHERS, 47}; 48 49struct ParserData { 50 std::string base_uri; 51 std::vector<std::pair<std::string, ResourceType>> links; 52 // > 0 if we are inside "head" element. 53 int inside_head; 54 ParserData(const std::string &base_uri); 55}; 56 57#ifdef HAVE_LIBXML2 58 59class HtmlParser { 60public: 61 HtmlParser(const std::string &base_uri); 62 ~HtmlParser(); 63 int parse_chunk(const char *chunk, size_t size, int fin); 64 const std::vector<std::pair<std::string, ResourceType>> &get_links() const; 65 void clear_links(); 66 67private: 68 int parse_chunk_internal(const char *chunk, size_t size, int fin); 69 70 std::string base_uri_; 71 htmlParserCtxtPtr parser_ctx_; 72 ParserData parser_data_; 73}; 74 75#else // !HAVE_LIBXML2 76 77class HtmlParser { 78public: 79 HtmlParser(const std::string &base_uri) {} 80 int parse_chunk(const char *chunk, size_t size, int fin) { return 0; } 81 const std::vector<std::pair<std::string, ResourceType>> &get_links() const { 82 return links_; 83 } 84 void clear_links() {} 85 86private: 87 std::vector<std::pair<std::string, ResourceType>> links_; 88}; 89 90#endif // !HAVE_LIBXML2 91 92} // namespace nghttp2 93 94#endif // HTML_PARSER_H 95