1
1
from datetime import datetime
2
2
from time import sleep
3
+ from typing import List
3
4
4
- from selenium .common import InvalidArgumentException , NoSuchElementException , TimeoutException
5
+ from selenium .common import InvalidArgumentException , NoSuchElementException , TimeoutException , WebDriverException
5
6
from selenium .webdriver import ActionChains , Keys
7
+ from selenium .webdriver .chrome .options import Options
6
8
7
9
from logger import get_current_logger , log_function
8
10
from scrapers .scraper_component .utils .driver_consts import BrowserConsts , MainConsts
11
13
create_path_if_needed , kill_browser_childes
12
14
from selenium import webdriver
13
15
16
+ from scrapers .scraper_component .utils .element import Element
17
+
14
18
15
19
class ChromeDriver (BaseDriverInterface ):
16
20
def __init__ (self , browser_type : str = BrowserConsts .CHROME , browser_profile_path : str = None ,
@@ -73,13 +77,19 @@ def __exit__(self, exc_type, exc_val, exc_tb):
73
77
@log_function
74
78
def __init_chrome_driver__ (self ):
75
79
try :
76
- options = webdriver .ChromeOptions ()
80
+ chrome_options = Options ()
81
+ chrome_options .add_argument ('--no-sandbox' )
82
+ chrome_options .add_argument ('--disable-dev-shm-usage' )
83
+ chrome_options .add_argument (argument = f"user-data-dir={ self .browser_profile_path } " )
77
84
if self .headless :
78
- options .add_argument ('--headless' )
79
- options .add_argument ('--no-sandbox' )
80
- options .add_argument ('--disable-dev-shm-usage' )
81
- options .add_argument (argument = f"user-data-dir={ self .browser_profile_path } " )
82
- self ._driver = webdriver .Chrome (executable_path = self .webdriver_path , options = options )
85
+ chrome_options .add_argument ("--headless" )
86
+ chrome_options .add_argument ('start-maximized' )
87
+ chrome_options .add_argument ('disable-infobars' )
88
+ chrome_options .add_argument ("--disable-extensions" )
89
+ start_time = datetime .now ()
90
+ self ._driver = webdriver .Chrome (executable_path = self .webdriver_path , options = chrome_options )
91
+ end_time = datetime .now ()
92
+ self .logger .info (f"Init chrome driver in { (end_time - start_time ).total_seconds ()} seconds" )
83
93
except Exception as e :
84
94
if "executable needs to be in path" in str (e ).lower ():
85
95
self .logger .error (f"PATH Error" )
@@ -93,31 +103,49 @@ def __init_chrome_driver__(self):
93
103
@log_function
94
104
def exit (self ):
95
105
self ._driver .quit ()
96
- self .logger .info (f"ChromeDriver exit " )
106
+ self .logger .info (f"Exit Chrome Driver " )
97
107
98
108
@log_function
99
109
def get_url (self , url : str ):
100
- try :
101
- self ._driver .get (url )
102
- except InvalidArgumentException :
103
- self .logger .error (f"Error getting url: '{ url } ' - invalid url input format, please give full correct format" )
104
- self .exit ()
110
+ for trie in range (MainConsts .GET_URL_TRIES ):
111
+ try :
112
+ self .logger .debug (f"Trying to get page url: `{ url } ` NO. { trie + 1 } /{ MainConsts .GET_URL_TRIES } " )
113
+ self ._driver .get (url )
114
+ self .logger .info (f"Get to page url: `{ url } `" )
115
+ return
116
+ except InvalidArgumentException :
117
+ desc = f"Error getting url: '{ url } ' - invalid url input format, please give full correct format"
118
+ self .__error_and_exit (desc )
119
+ except WebDriverException as e :
120
+ if "ERR_CONNECTION_RESET" in str (e ):
121
+ continue
122
+ desc = f"Error getting to page url: `{ url } ` - { str (e )} "
123
+ self .__error_and_exit (desc )
124
+ except Exception as e :
125
+ desc = f"Error getting to page url: `{ url } ` - { str (e )} "
126
+ self .__error_and_exit (desc )
127
+ self .__error_and_exit (f"Error getting to page url: `{ url } ` after { MainConsts .GET_URL_TRIES } tries" )
128
+
129
+ @log_function
130
+ def __error_and_exit (self , desc ):
131
+ self .logger .error (desc )
132
+ self .exit ()
105
133
106
134
@log_function
107
135
def get_current_url (self ) -> str :
108
- return self ._driver .current_url
136
+ return self ._driver .current_url if self . _driver . current_url not in BrowserConsts . NEW_TAB_URLS else None
109
137
110
138
@log_function
111
139
def get_title (self ) -> str :
112
- return self ._driver .title
140
+ return self ._driver .title if self . _driver . title != BrowserConsts . NEW_TAB_TITLE and self . _driver . title else None
113
141
114
142
@log_function
115
- def find_element (self , by , value ):
116
- return self ._driver .find_element (by = by , value = value )
143
+ def find_element (self , by , value ) -> Element :
144
+ return Element ( read_element = self ._driver .find_element (by = by , value = value ) )
117
145
118
146
@log_function
119
- def find_elements (self , by , value ):
120
- return self ._driver .find_elements (by = by , value = value )
147
+ def find_elements (self , by , value ) -> List [ Element ] :
148
+ return [ Element ( read_element = element ) for element in self ._driver .find_elements (by = by , value = value )]
121
149
122
150
@log_function
123
151
def wait_until_object_appears (self , by , value , timeout : int = MainConsts .DEFAULT_ELEMENT_TIMEOUT ):
0 commit comments