Lines Matching refs:self

19     def setUp(self):
20 lines = io.StringIO(self.robots_txt).readlines()
21 self.parser = urllib.robotparser.RobotFileParser()
22 self.parser.parse(lines)
24 def get_agent_and_url(self, url):
28 return self.agent, url
30 def test_good_urls(self):
31 for url in self.good:
32 agent, url = self.get_agent_and_url(url)
33 with self.subTest(url=url, agent=agent):
34 self.assertTrue(self.parser.can_fetch(agent, url))
36 def test_bad_urls(self):
37 for url in self.bad:
38 agent, url = self.get_agent_and_url(url)
39 with self.subTest(url=url, agent=agent):
40 self.assertFalse(self.parser.can_fetch(agent, url))
42 def test_site_maps(self):
43 self.assertEqual(self.parser.site_maps(), self.site_maps)
105 def test_request_rate(self):
106 parser = self.parser
107 for url in self.good + self.bad:
108 agent, url = self.get_agent_and_url(url)
109 with self.subTest(url=url, agent=agent):
110 self.assertEqual(parser.crawl_delay(agent), self.crawl_delay)
113 self.assertEqual(parsed_request_rate, self.request_rate)
114 if self.request_rate is not None:
115 self.assertIsInstance(
119 self.assertEqual(
121 self.request_rate.requests
123 self.assertEqual(
125 self.request_rate.seconds
298 def test_string_formatting(self):
299 self.assertEqual(str(self.parser), self.expected_output)
304 def do_GET(self):
305 self.send_error(403, "Forbidden access")
307 def log_message(self, format, *args):
317 def setUp(self):
319 self.addCleanup(urllib.request.urlcleanup)
321 self.server = HTTPServer((socket_helper.HOST, 0), RobotHandler)
323 self.t = threading.Thread(
325 target=self.server.serve_forever,
330 self.t.daemon = True # In case this function raises.
331 self.t.start()
333 def tearDown(self):
334 self.server.shutdown()
335 self.t.join()
336 self.server.server_close()
339 def testPasswordProtectedSite(self):
340 addr = self.server.server_address
346 self.assertFalse(parser.can_fetch("*", robots_url))
362 def url(self, path):
364 self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
367 def test_basic(self):
368 self.assertFalse(self.parser.disallow_all)
369 self.assertFalse(self.parser.allow_all)
370 self.assertGreater(self.parser.mtime(), 0)
371 self.assertFalse(self.parser.crawl_delay('*'))
372 self.assertFalse(self.parser.request_rate('*'))
374 def test_can_fetch(self):
375 self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
376 self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
377 self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
378 self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
379 self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
380 self.assertTrue(self.parser.can_fetch('*', self.base_url))
382 def test_read_404(self):
383 parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
385 self.assertTrue(parser.allow_all)
386 self.assertFalse(parser.disallow_all)
387 self.assertEqual(parser.mtime(), 0)
388 self.assertIsNone(parser.crawl_delay('*'))
389 self.assertIsNone(parser.request_rate('*'))