Skip to content

Commit

Permalink
Merge pull request #37 from ClericPy/dev
Browse files Browse the repository at this point in the history
2.3.7
  • Loading branch information
ClericPy authored Aug 27, 2020
2 parents eb55bb5 + 4805d64 commit d27fbb4
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 66 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
- event-driven programming is not always advisable.
- Selenium is slow
- webdrivers often come with memory leak.
- In desperate need of a stable toolkit to communicate with Chrome browser
- In desperate need of a stable toolkit to communicate with Chrome browser (or other Blink-based browsers like Chromium)
- fast http & websocket connections (based on aiohttp) for **asyncio** environment
- **ichrome.debugger** is a sync tool and depends on the `ichrome.async_utils`
- a choice for debugging interactively.
Expand Down
2 changes: 1 addition & 1 deletion ichrome/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .logs import logger
from .sync_utils import Chrome, Tab

__version__ = "2.3.6"
__version__ = "2.3.7"
__tips__ = "[github]: https://github.com/ClericPy/ichrome\n[cdp]: https://chromedevtools.github.io/devtools-protocol/\n[cmd args]: https://peter.sh/experiments/chromium-command-line-switches/"
__all__ = [
'Chrome', 'ChromeDaemon', 'Tab', 'Tag', 'AsyncChrome', 'AsyncTab', 'logger',
Expand Down
16 changes: 16 additions & 0 deletions ichrome/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

from ichrome import ChromeDaemon, ChromeWorkers, __version__, logger
from ichrome.base import get_readable_dir_size, install_chromium


def main():
Expand Down Expand Up @@ -156,10 +157,16 @@ def main():
help="killall chrome launched local with --remote-debugging-port",
default=False,
action="store_true")
parser.add_argument("--install",
help="download chromium and unzip it to given path",
default="")
args, extra_config = parser.parse_known_args()

if args.version:
print(__version__)
return
if args.install:
return install_chromium(args.install)
if args.config:
path = Path(args.config)
if not (path.is_file() and path.exists()):
Expand Down Expand Up @@ -233,7 +240,16 @@ def main():
elif args.clear_cache:
from .debugger import clear_cache_handler
kwargs['headless'] = getattr(args, 'headless', True)
port = kwargs.get('port') or 9222
main_user_dir = ChromeDaemon._ensure_user_dir(kwargs['user_data_dir'])
port_user_dir = main_user_dir / f"chrome_{port}"
print(
f'Clearing cache(port={port}): {get_readable_dir_size(port_user_dir)}'
)
asyncio.run(clear_cache_handler(**kwargs))
print(
f'Cleared cache(port={port}): {get_readable_dir_size(port_user_dir)}'
)
else:
start_port = getattr(args, 'port', 9222)
asyncio.run(
Expand Down
7 changes: 2 additions & 5 deletions ichrome/async_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ def __init__(self,
:type default_recv_callback: Callable, optional
:param _recv_daemon_break_callback: like the tab_close_callback. sync/async function only accept 1 arg of self while _recv_daemon break, defaults to None
:type _recv_daemon_break_callback: Callable, optional
:raises ValueError: [description]
"""
tab_id = tab_id or kwargs.pop('id')
if not tab_id:
Expand Down Expand Up @@ -561,8 +560,7 @@ async def delete_cookies(self,
timeout=NotSet):
"""deleteCookies by name, with url / domain / path."""
if not any((url, domain)):
raise ValueError(
'At least one of the url and domain needs to be specified')
raise ValueError('URL and domain should not be null at the same time.')
return await self.send("Network.deleteCookies",
name=name,
url=url,
Expand Down Expand Up @@ -606,8 +604,7 @@ async def set_cookie(self,
sameSite [CookieSameSite] Cookie SameSite type.
expires [TimeSinceEpoch] Cookie expiration date, session cookie if not set"""
if not any((url, domain)):
raise ValueError(
'At least one of the url and domain needs to be specified')
raise ValueError('URL and domain should not be null at the same time.')
kwargs: Dict[str, Any] = dict(name=name,
value=value,
url=url,
Expand Down
121 changes: 118 additions & 3 deletions ichrome/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import re
import time
from asyncio import get_running_loop
from pathlib import Path
from typing import List

Expand Down Expand Up @@ -79,7 +80,7 @@ def get_proc_by_regex(regex, proc_names=None):
if (not proc_names or proc.name() in proc_names) and re.search(
regex, ' '.join(proc.cmdline())):
procs.append(proc)
except psutil.Error:
except (psutil.Error, OSError):
pass
return procs

Expand Down Expand Up @@ -114,9 +115,10 @@ def clear_chrome_process(port=None, timeout=None, max_deaths=1, interval=0.5):
while 1:
procs = get_proc(port)
for proc in procs:
logger.debug(
f"[Killing] {proc}, port: {port}. {' '.join(proc.cmdline())}")
try:
logger.debug(
f"[Killing] {proc}, port: {port}. {' '.join(proc.cmdline())}"
)
proc.kill()
except (psutil._exceptions.NoSuchProcess, ProcessLookupError):
continue
Expand All @@ -139,3 +141,116 @@ def get_dir_size(path):

def get_readable_dir_size(path):
return get_readable_size(get_dir_size(path), rounded=1)


def install_chromium(path, platform_name=None, x64=True, max_threads=5):
import os
import platform
import time
import zipfile
from io import BytesIO
from pathlib import Path
from torequests import tPool
from torequests.utils import get_readable_size

def slice_content_length(total, chunk=1 * 1024 * 1024):
start = 0
end = 0
while 1:
end = start + chunk
if end > total:
yield (start, total)
break
yield (start, end)
start += chunk + 1

# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html
# https://storage.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2FLAST_CHANGE?alt=media
# https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/798492/chrome-linux.zip
req = tPool(max_threads)
# os.environ['http_proxy'] = 'https://localhost:1080'
proxy = os.getenv('HTTPS_PROXY') or os.getenv('https_proxy') or os.getenv(
'http_proxy') or os.getenv('HTTP_PROXY')
platform_name = platform_name or platform.system()
platform_map = {
'Linux': ['Linux', '_x64' if x64 else '', 'chrome-linux', 'chrome'],
'Windows': ['Win', '_x64' if x64 else '', 'chrome-win', 'chrome.exe'],
'Darwin': ['Mac', '', 'chrome-mac', 'chrome.app'],
}
# alias names
platform_map['Mac'] = platform_map['Darwin']
platform_map['Win'] = platform_map['Windows']
_platform_name, _x64, zip_file_name, chrome_runner_name = platform_map[
platform_name]
version_api = f'https://storage.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/{_platform_name}{_x64}%2FLAST_CHANGE?alt=media'
r = req.get(version_api,
timeout=3,
retry=1,
proxies={
'https': proxy,
'https': proxy
})
if not r.text.isdigit():
print(f'check your network connect to {version_api}')
return
download_url = f'https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/{_platform_name}{_x64}%2F{r.text}%2F{zip_file_name}.zip?alt=media'
print('Downloading zip file from:', download_url)
with BytesIO() as f:
r = req.head(download_url,
retry=1,
proxies={
'https': proxy,
'https': proxy
})
total = int(r.headers['Content-Length'])
start_time = time.time()
responses = [
req.get(
download_url,
proxies={
'https': proxy,
'https': proxy
},
retry=3,
headers={'Range': f'bytes={range_start}-{range_end}'},
) for range_start, range_end in slice_content_length(
total, 1 * 1024 * 1024)
]
total_mb = round(total / 1024 / 1024, 2)
proc = 0
for r in responses:
if not r.ok:
raise ValueError(f'Bad request {r!r}')
i = r.content
f.write(i)
proc += len(i)
print(
f'{round(proc / total * 100): >3}% | {round(proc / 1024 / 1024, 2)}mb / {total_mb}mb | {get_readable_size(proc/(time.time()-start_time+0.001), rounded=0)}/s'
)
print('Downloading is finished, will unzip it to:', path)
zf = zipfile.ZipFile(f)
zf.extractall(path)
install_folder_path = Path(path) / zip_file_name
if _platform_name == 'Mac' and install_folder_path.is_dir():
print('Install succeeded, check your folder:',
install_folder_path.absolute())
return
chrome_path = install_folder_path / chrome_runner_name
if chrome_path.is_file():
chrome_abs_path = chrome_path.absolute()
print('chrome_path:', chrome_abs_path)
if _platform_name == 'Linux':
print(f'chmod 755 {chrome_abs_path}')
os.chmod(chrome_path, 755)
print(f'check chromium version:\n{chrome_abs_path} --version')
print('Install succeeded.')
else:
print('Mission failed.')


def async_run(func, *args, **kwargs):

def function():
return func(*args, **kwargs)

return get_running_loop().run_in_executor(None, function)
Loading

0 comments on commit d27fbb4

Please sign in to comment.