xref: /third_party/nghttp2/src/HtmlParser.h (revision 2c593315)
1/*
2 * nghttp2 - HTTP/2 C Library
3 *
4 * Copyright (c) 2012 Tatsuhiro Tsujikawa
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25#ifndef HTML_PARSER_H
26#define HTML_PARSER_H
27
28#include "nghttp2_config.h"
29
30#include <vector>
31#include <string>
32
33#ifdef HAVE_LIBXML2
34
35#  include <libxml/HTMLparser.h>
36
37#endif // HAVE_LIBXML2
38
39namespace nghttp2 {
40
41enum ResourceType {
42  REQ_CSS = 1,
43  REQ_JS,
44  REQ_UNBLOCK_JS,
45  REQ_IMG,
46  REQ_OTHERS,
47};
48
49struct ParserData {
50  std::string base_uri;
51  std::vector<std::pair<std::string, ResourceType>> links;
52  // > 0 if we are inside "head" element.
53  int inside_head;
54  ParserData(const std::string &base_uri);
55};
56
57#ifdef HAVE_LIBXML2
58
59class HtmlParser {
60public:
61  HtmlParser(const std::string &base_uri);
62  ~HtmlParser();
63  int parse_chunk(const char *chunk, size_t size, int fin);
64  const std::vector<std::pair<std::string, ResourceType>> &get_links() const;
65  void clear_links();
66
67private:
68  int parse_chunk_internal(const char *chunk, size_t size, int fin);
69
70  std::string base_uri_;
71  htmlParserCtxtPtr parser_ctx_;
72  ParserData parser_data_;
73};
74
75#else // !HAVE_LIBXML2
76
77class HtmlParser {
78public:
79  HtmlParser(const std::string &base_uri) {}
80  int parse_chunk(const char *chunk, size_t size, int fin) { return 0; }
81  const std::vector<std::pair<std::string, ResourceType>> &get_links() const {
82    return links_;
83  }
84  void clear_links() {}
85
86private:
87  std::vector<std::pair<std::string, ResourceType>> links_;
88};
89
90#endif // !HAVE_LIBXML2
91
92} // namespace nghttp2
93
94#endif // HTML_PARSER_H
95