|
1 |
| -import unittest |
| 1 | +import pytest |
| 2 | + |
2 | 3 | from flathunter.crawl_immowelt import CrawlImmowelt
|
3 | 4 | from test_util import count
|
4 | 5 |
|
5 |
| -class ImmoweltCrawlerTest(unittest.TestCase): |
| 6 | +TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc' |
6 | 7 |
|
7 |
| - TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc' |
| 8 | +@pytest.fixture |
| 9 | +def crawler(): |
| 10 | + return CrawlImmowelt() |
8 | 11 |
|
9 |
| - def setUp(self): |
10 |
| - self.crawler = CrawlImmowelt() |
11 | 12 |
|
12 |
| - def test(self): |
13 |
| - soup = self.crawler.get_page(self.TEST_URL) |
14 |
| - self.assertIsNotNone(soup, "Should get a soup from the URL") |
15 |
| - entries = self.crawler.extract_data(soup) |
16 |
| - self.assertIsNotNone(entries, "Should parse entries from search URL") |
17 |
| - self.assertTrue(len(entries) > 0, "Should have at least one entry") |
18 |
| - self.assertTrue(entries[0]['id'] > 0, "Id should be parsed") |
19 |
| - self.assertTrue(entries[0]['url'].startswith("https://www.immowelt.de/expose"), u"URL should be an exposé link") |
20 |
| - for attr in [ 'title', 'price', 'size', 'rooms', 'address', 'image' ]: |
21 |
| - self.assertIsNotNone(entries[0][attr], attr + " should be set") |
| 13 | +def test_crawler(crawler): |
| 14 | + soup = crawler.get_page(TEST_URL) |
| 15 | + assert soup is not None |
| 16 | + entries = crawler.extract_data(soup) |
| 17 | + assert entries is not None |
| 18 | + assert len(entries) > 0 |
| 19 | + assert entries[0]['id'] > 0 |
| 20 | + assert entries[0]['url'].startswith("https://www.immowelt.de/expose") |
| 21 | + for attr in [ 'title', 'price', 'size', 'rooms', 'address', 'image' ]: |
| 22 | + assert entries[0][attr] is not None |
22 | 23 |
|
23 |
| -def test_dont_crawl_other_urls(): |
24 |
| - exposes = CrawlImmowelt().crawl("https://www.example.com") |
| 24 | +def test_dont_crawl_other_urls(crawler): |
| 25 | + exposes = crawler.crawl("https://www.example.com") |
25 | 26 | assert count(exposes) == 0
|
| 27 | + |
| 28 | +def test_process_expose_fetches_details(crawler): |
| 29 | + soup = crawler.get_page(TEST_URL) |
| 30 | + assert soup is not None |
| 31 | + entries = crawler.extract_data(soup) |
| 32 | + assert entries is not None |
| 33 | + assert len(entries) > 0 |
| 34 | + updated_entries = [ crawler.get_expose_details(expose) for expose in entries ] |
| 35 | + for expose in updated_entries: |
| 36 | + print(expose) |
| 37 | + for attr in [ 'title', 'price', 'size', 'rooms', 'address', 'from' ]: |
| 38 | + assert expose[attr] is not None |
0 commit comments