|
| 1 | +from datetime import datetime |
| 2 | +from time import sleep |
| 3 | + |
| 4 | +from selenium.common import InvalidArgumentException, NoSuchElementException, TimeoutException |
| 5 | +from selenium.webdriver import ActionChains, Keys |
| 6 | + |
| 7 | +from logger import get_current_logger, log_function |
| 8 | +from scrapers.scraper_component.utils.driver_consts import BrowserConsts, MainConsts |
| 9 | +from scrapers.scraper_component.interfaces.base_driver_interface import BaseDriverInterface |
| 10 | +from scrapers.scraper_component.utils.driver_utils import get_driver_path, get_temp_browser_profile_path, \ |
| 11 | + create_path_if_needed, kill_browser_childes |
| 12 | +from selenium import webdriver |
| 13 | + |
| 14 | + |
| 15 | +class ChromeDriver(BaseDriverInterface): |
| 16 | + def __init__(self, browser_type: str = BrowserConsts.CHROME, browser_profile_path: str = None, |
| 17 | + webdriver_path: str = None, headless: bool = False, quit_at_end: bool = True): |
| 18 | + """ |
| 19 | + Constructor |
| 20 | + :param browser_profile_path: a browser profile path |
| 21 | + :param webdriver_path: selenium web driver executable path |
| 22 | + :param headless: open the browser headless |
| 23 | + :param quit_at_end: exit browser after done |
| 24 | + """ |
| 25 | + |
| 26 | + # Logger |
| 27 | + self.logger = get_current_logger() |
| 28 | + |
| 29 | + # Web driver path |
| 30 | + self.webdriver_path = webdriver_path if webdriver_path else get_driver_path(browser_type=browser_type) |
| 31 | + self.logger.debug(f"WebDriver path is: '{self.webdriver_path}'") |
| 32 | + |
| 33 | + # Browser profile path |
| 34 | + if browser_profile_path: |
| 35 | + self.browser_profile_path = browser_profile_path |
| 36 | + else: |
| 37 | + self.browser_profile_path = get_temp_browser_profile_path(browser_type=browser_type) |
| 38 | + create_path_if_needed(path=self.browser_profile_path) |
| 39 | + self.logger.debug(f"Browser profile path is: '{self.browser_profile_path}'") |
| 40 | + |
| 41 | + # Exit the window after the bot done |
| 42 | + self.teardown = quit_at_end |
| 43 | + |
| 44 | + # Headless |
| 45 | + self.headless = headless |
| 46 | + |
| 47 | + # Browser type |
| 48 | + self.browser_type = browser_type |
| 49 | + |
| 50 | + self.__init_chrome_driver__() |
| 51 | + |
| 52 | + # Implicitly wait time |
| 53 | + self._driver.implicitly_wait(MainConsts.IMPLICITLY_WAIT_TIME) |
| 54 | + |
| 55 | + # Maximize the page window |
| 56 | + self._driver.maximize_window() |
| 57 | + |
| 58 | + self.logger.debug(f"Initialized {self.browser_type} web driver, headless: {self.headless}") |
| 59 | + |
| 60 | + def __exit__(self, exc_type, exc_val, exc_tb): |
| 61 | + """ |
| 62 | + When the bot is done his running |
| 63 | + If teardown is True, exit |
| 64 | + Else, do not exit |
| 65 | + :param exc_type: |
| 66 | + :param exc_val: |
| 67 | + :param exc_tb: |
| 68 | + :return: |
| 69 | + """ |
| 70 | + if self.teardown: |
| 71 | + self.exit() |
| 72 | + |
| 73 | + @log_function |
| 74 | + def __init_chrome_driver__(self): |
| 75 | + try: |
| 76 | + options = webdriver.ChromeOptions() |
| 77 | + if self.headless: |
| 78 | + options.add_argument('--headless') |
| 79 | + options.add_argument('--no-sandbox') |
| 80 | + options.add_argument('--disable-dev-shm-usage') |
| 81 | + options.add_argument(argument=f"user-data-dir={self.browser_profile_path}") |
| 82 | + self._driver = webdriver.Chrome(executable_path=self.webdriver_path, options=options) |
| 83 | + except Exception as e: |
| 84 | + if "executable needs to be in path" in str(e).lower(): |
| 85 | + self.logger.error(f"PATH Error") |
| 86 | + self.logger.error(f"Error initialize chrome driver - {str(e)}") |
| 87 | + if "chromedriver is assuming that chrome has crashed" in str(e).lower(): |
| 88 | + kill_browser_childes(process_name=self.browser_type) |
| 89 | + self.logger.warning(f"Killed {self.browser_type} childes, run again") |
| 90 | + return |
| 91 | + raise e |
| 92 | + |
| 93 | + @log_function |
| 94 | + def exit(self): |
| 95 | + self._driver.quit() |
| 96 | + self.logger.info(f"ChromeDriver exit") |
| 97 | + |
| 98 | + @log_function |
| 99 | + def get_url(self, url: str): |
| 100 | + try: |
| 101 | + self._driver.get(url) |
| 102 | + except InvalidArgumentException: |
| 103 | + self.logger.error(f"Error getting url: '{url}' - invalid url input format, please give full correct format") |
| 104 | + self.exit() |
| 105 | + |
| 106 | + @log_function |
| 107 | + def get_current_url(self) -> str: |
| 108 | + return self._driver.current_url |
| 109 | + |
| 110 | + @log_function |
| 111 | + def get_title(self) -> str: |
| 112 | + return self._driver.title |
| 113 | + |
| 114 | + @log_function |
| 115 | + def find_element(self, by, value): |
| 116 | + return self._driver.find_element(by=by, value=value) |
| 117 | + |
| 118 | + @log_function |
| 119 | + def find_elements(self, by, value): |
| 120 | + return self._driver.find_elements(by=by, value=value) |
| 121 | + |
| 122 | + @log_function |
| 123 | + def wait_until_object_appears(self, by, value, timeout: int = MainConsts.DEFAULT_ELEMENT_TIMEOUT): |
| 124 | + start_time = datetime.now() |
| 125 | + seconds_pass = 0 |
| 126 | + while seconds_pass < timeout: |
| 127 | + seconds_pass = (datetime.now() - start_time).total_seconds() |
| 128 | + try: |
| 129 | + self.logger.debug(f"Waiting for element {value} to appears TIMEOUT: ({seconds_pass}/{timeout})") |
| 130 | + elements = self._driver.find_elements(by=by, value=value) |
| 131 | + if not elements: |
| 132 | + raise NoSuchElementException |
| 133 | + self.logger.info(f"Element {value} found") |
| 134 | + return |
| 135 | + except NoSuchElementException: |
| 136 | + sleep(MainConsts.ELEMENT_SLEEPING_TIME) |
| 137 | + continue |
| 138 | + raise TimeoutException |
| 139 | + |
| 140 | + @log_function |
| 141 | + def insert_text(self, by, value, text: str, press_enter_needed: bool = True): |
| 142 | + try: |
| 143 | + self.click_on_element(by=by, value=value) |
| 144 | + for char in text: |
| 145 | + ActionChains(self._driver).key_down(char).key_up(char).perform() |
| 146 | + sleep(MainConsts.INSERT_TEXT_SLEEPING_TIME) |
| 147 | + self.logger.info(f"Text: '{text}' inserted to element") |
| 148 | + if press_enter_needed: |
| 149 | + ActionChains(self._driver).key_down(Keys.ENTER).key_up(Keys.ENTER).perform() |
| 150 | + self.logger.info("Enter key pressed") |
| 151 | + except Exception as e: |
| 152 | + self.logger.error(f"Error while trying to insert text: '{text}' to element: '{value}' - {str(e)}") |
| 153 | + raise e |
| 154 | + |
| 155 | + @log_function |
| 156 | + def move_to_element(self, by, value): |
| 157 | + try: |
| 158 | + action = webdriver.ActionChains(self._driver) |
| 159 | + element = self._driver.find_element(by=by, value=value) |
| 160 | + action.move_to_element(element) |
| 161 | + action.perform() |
| 162 | + self.logger.info(f"Moved to element") |
| 163 | + except Exception as e: |
| 164 | + self.logger.error(f"Error while trying to move to element - {str(e)}") |
| 165 | + |
| 166 | + @log_function |
| 167 | + def click_on_element(self, by, value): |
| 168 | + try: |
| 169 | + self.move_to_element(by=by, value=value) |
| 170 | + action = webdriver.ActionChains(self._driver) |
| 171 | + action.click() |
| 172 | + action.perform() |
| 173 | + self.logger.info(f"Element clicked") |
| 174 | + except Exception as e: |
| 175 | + self.logger.error(f"Error while trying to click element - {str(e)}") |
| 176 | + raise e |
0 commit comments