Skip to content

Commit

Permalink
Refactor code into two classes
Browse files Browse the repository at this point in the history
BinderBot class contained two bits of functionality:

1. Starting and stopping binders on a BinderHub
2. Interacting with the notebook API (start kernels, contentsmanager,
etc)

This commit splits them into two different classes. This has
the big advantage that we can run unit tests against a local instance
of jupyter notebooks, rather than starting an instance on mybinder.org
all the time. Tests are much faster, and everything works as
normal since it is a straight up split

We added more unit tests, but lost the CLI test. Most of its
functionality is covered by the other unit tests, but it would
be nice to have an integration test
  • Loading branch information
yuvipanda committed Apr 25, 2020
1 parent 486e06c commit 73140db
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 78 deletions.
47 changes: 25 additions & 22 deletions binderbot/binderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,29 +97,41 @@ async def shutdown_binder(self):
"""
Shut down running binder instance.
"""
if self.state != BinderUser.States.KERNEL_STARTED:
await self.start_kernel()
# Ideally, we will talk to the hub API to shut this down
# However, the token we get is just a notebook auth token, *not* a hub auth otken
# So we can't make requests to the hub API.
# FIXME: Provide hub auth tokens from binderhub API
await self.run_code("""
nbclient = NotebookClient(self.session, self.notebook_url, self.token, self.log)
await nbclient.start_kernel()
await nbclient.run_code("""
import os
import signal
# FIXME: Wait a bit, and send SIGKILL otherwise
os.kill(1, signal.SIGTERM)
""")
self.state = BinderUser.States.CLEAR

async def start_kernel(self):
assert self.state == BinderUser.States.BINDER_STARTED

class NotebookClient:
"""
Client for doing operations against a notebook server
"""
def __init__(self, session: aiohttp.ClientSession, url: URL, token: str, log: structlog.BoundLogger):
# FIXME: If we get this from BinderBot, will it close properly?
self.session = session
self.url = url
self.token = token
self.log = log

self.auth_headers = {
'Authorization': f'token {self.token}'
}

async def start_kernel(self):
self.log.msg('Kernel: Starting', action='kernel-start', phase='start')
start_time = time.monotonic()

try:
headers = {'Authorization': f'token {self.token}'}
resp = await self.session.post(self.notebook_url / 'api/kernels', headers=headers)
resp = await self.session.post(self.url / 'api/kernels', headers=self.auth_headers)
except Exception as e:
self.log.msg('Kernel: Start failed {}'.format(str(e)), action='kernel-start', phase='failed', duration=time.monotonic() - start_time)
raise OperationError()
Expand All @@ -133,13 +145,10 @@ async def start_kernel(self):


async def stop_kernel(self):
assert self.state == BinderUser.States.KERNEL_STARTED

self.log.msg('Kernel: Stopping', action='kernel-stop', phase='start')
start_time = time.monotonic()
try:
headers = {'Authorization': f'token {self.token}'}
resp = await self.session.delete(self.notebook_url / 'api/kernels' / self.kernel_id, headers=headers)
resp = await self.session.delete(self.url / 'api/kernels' / self.kernel_id, headers=self.auth_headers)
except Exception as e:
self.log.msg('Kernel:Failed Stopped {}'.format(str(e)), action='kernel-stop', phase='failed')
raise OperationError()
Expand All @@ -149,21 +158,17 @@ async def stop_kernel(self):
raise OperationError()

self.log.msg('Kernel: Stopped', action='kernel-stop', phase='complete')
self.state = BinderUser.States.BINDER_STARTED

# https://github.com/jupyter/jupyter/wiki/Jupyter-Notebook-Server-API#notebook-and-file-contents-api
async def get_contents(self, path):
headers = {'Authorization': f'token {self.token}'}
resp = await self.session.get(self.notebook_url / 'api/contents' / path, headers=headers)
resp = await self.session.get(self.url / 'api/contents' / path, headers=self.auth_headers)
resp_json = await resp.json()
return resp_json['content']


async def put_contents(self, path, nb_data):
headers = {'Authorization': f'token {self.token}'}
data = {'content': nb_data, "type": "notebook"}
resp = await self.session.put(self.notebook_url / 'api/contents' / path,
json=data, headers=headers)
resp = await self.session.put(self.url / 'api/contents' / path,
json=data, headers=self.auth_headers)
resp.raise_for_status()

def request_execute_code(self, msg_id, code):
Expand Down Expand Up @@ -191,9 +196,7 @@ def request_execute_code(self, msg_id, code):

async def run_code(self, code):
"""Run code and return stdout, stderr."""
assert self.state == BinderUser.States.KERNEL_STARTED

channel_url = self.notebook_url / 'api/kernels' / self.kernel_id / 'channels'
channel_url = self.url / 'api/kernels' / self.kernel_id / 'channels'
self.log.msg('WS: Connecting', action='kernel-connect', phase='start')
is_connected = False
try:
Expand Down
13 changes: 7 additions & 6 deletions binderbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import click
import nbformat

from .binderbot import BinderUser
from .binderbot import BinderUser, NotebookClient

# https://github.com/pallets/click/issues/85#issuecomment-43378930
def coro(f):
Expand Down Expand Up @@ -56,22 +56,23 @@ async def main(binder_url, repo, ref, output_dir, nb_timeout,
# inputs look good, start up binder
async with BinderUser(binder_url, repo, ref) as jovyan:
await jovyan.start_binder(timeout=binder_start_timeout)
await jovyan.start_kernel()
nb = NotebookClient(jovyan.session, jovyan.notebook_url, jovyan.token, jovyan.log)
await nb.start_kernel()
click.echo(f"✅ Binder and kernel started successfully.")
# could think about asyncifying this whole loop
# for now, we run one notebook at a time to avoid overloading the binder
errors = {}
for fname in filenames:
try:
click.echo(f"⌛️ Uploading {fname}...", nl=False)
await jovyan.upload_local_notebook(fname)
await nb.upload_local_notebook(fname)
click.echo("✅")
click.echo(f"⌛️ Executing {fname}...", nl=False)
await jovyan.execute_notebook(fname, timeout=nb_timeout,
await nb.execute_notebook(fname, timeout=nb_timeout,
env_vars=extra_env_vars)
click.echo("✅")
click.echo(f"⌛️ Downloading and saving {fname}...", nl=False)
nb_data = await jovyan.get_contents(fname)
nb_data = await nb.get_contents(fname)
nb = nbformat.from_dict(nb_data)
output_fname = os.path.join(output_dir, fname) if output_dir else fname
with open(output_fname, 'w', encoding='utf-8') as f:
Expand All @@ -81,7 +82,7 @@ async def main(binder_url, repo, ref, output_dir, nb_timeout,
errors[fname] = e
click.echo(f'❌ error running {fname}: {e}')

await jovyan.stop_kernel()
await nb.stop_kernel()

if len(errors) > 0:
raise RuntimeError(str(errors))
Expand Down
174 changes: 124 additions & 50 deletions tests/test_binderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,96 @@

import os

import typing
import asyncio
import pytest
from click.testing import CliRunner
import nbformat
from pathlib import Path
import subprocess
import secrets
from tempfile import TemporaryDirectory
import socket
import time
import yarl
import structlog
import aiohttp

from binderbot import binderbot
from binderbot import cli


@pytest.fixture()
def example_nb_data():
nbdata = {'cells': [{'cell_type': 'code',
'execution_count': None,
'metadata': {},
'outputs': [],
'source': 'import socket\nprint(socket.gethostname())'},
{'cell_type': 'code',
'execution_count': None,
'metadata': {},
'outputs': [],
'source': 'import os\nprint(os.environ["MY_VAR"])'}],
'metadata': {'kernelspec': {'display_name': 'Python 3',
'language': 'python',
'name': 'python3'},
'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
'file_extension': '.py',
'mimetype': 'text/x-python',
'name': 'python',
'nbconvert_exporter': 'python',
'pygments_lexer': 'ipython3',
'version': '3.8.1'}},
'nbformat': 4,
'nbformat_minor': 4}
class LocalNotebookServer:
def __init__(self, url: yarl.URL, token: str, cwd: Path):
self.url = url
self.token = token
self.cwd = cwd


@pytest.fixture
def free_port() -> int:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(("0.0.0.0", 0))
portnum = s.getsockname()[1]
s.close()
return portnum

@pytest.fixture
async def local_notebook(free_port):
token = secrets.token_hex(16)
with TemporaryDirectory() as tmpdir:
#free_port = 8888
#token = 'fdbdeca87bd6cf39d366519f5cc0c9c7c994647f6696e8bb'
cwd = Path(tmpdir)
cmd = [
'jupyter', 'notebook',
f'--NotebookApp.token={token}',
'--port', str(free_port),
'--no-browser'
]
proc = await asyncio.create_subprocess_exec(*cmd, cwd=tmpdir)
# FIXME: Use a http health-check, not a timed wait
await asyncio.sleep(2)
# localhost is important, aiohttp doesn't really like to authenticate against IPs
yield LocalNotebookServer(yarl.URL.build(scheme='http', host='localhost', port=free_port), token=token, cwd=cwd)

proc.terminate()
await proc.wait()


def make_code_notebook(cells: typing.List[str]):
nbdata = {
'cells': [
{
'cell_type': 'code',
'execution_count': None,
'metadata': {},
'outputs': [],
'source': cell
} for cell in cells
],
'metadata': {
'kernelspec': {
'display_name': 'Python 3',
'language': 'python',
'name': 'python3'
},
'language_info': {
'codemirror_mode': {
'name': 'ipython',
'version': 3
},
'file_extension': '.py',
'mimetype': 'text/x-python',
'name': 'python',
'nbconvert_exporter': 'python',
'pygments_lexer': 'ipython3',
'version': '3.8.1'
}
},
'nbformat': 4,
'nbformat_minor': 4
}
return nbformat.from_dict(nbdata)


Expand Down Expand Up @@ -65,28 +123,44 @@ async def test_binder_start_stop(binder_url):

assert resp.status == 503

def test_cli_upload_execute_download(tmp_path, example_nb_data):
"""Test the CLI."""

os.chdir(tmp_path)
fname = "example_notebook.ipynb"
with open(fname, 'w', encoding='utf-8') as f:
nbformat.write(example_nb_data, f)

env = {"MY_VAR": "SECRET"}
runner = CliRunner(env=env)
args = ["--binder-url", "http://mybinder.org",
"--repo", "binder-examples/requirements",
"--ref", "master", "--nb-timeout", "10",
"--pass-env-var", "MY_VAR",
fname]
result = runner.invoke(cli.main, args)
assert result.exit_code == 0, result.output

with open(fname) as f:
nb = nbformat.read(f, as_version=4)

hostname = nb['cells'][0]['outputs'][0]['text']
assert hostname.startswith('jupyter-binder-')
remote_env_var_value = nb['cells'][1]['outputs'][0]['text']
assert remote_env_var_value.rstrip() == env['MY_VAR']

@pytest.mark.asyncio
async def test_nbclient_run_code(local_notebook: LocalNotebookServer):
log = structlog.get_logger().bind()
async with aiohttp.ClientSession() as session:
nbclient = binderbot.NotebookClient(session, local_notebook.url, local_notebook.token, log)

await nbclient.start_kernel()
stdout, stderr = await nbclient.run_code(f"""
print('hi')
""")

assert stderr.strip() == ""
assert stdout.strip() == 'hi'

await nbclient.stop_kernel()

@pytest.mark.asyncio
async def test_upload(local_notebook: LocalNotebookServer):
log = structlog.get_logger().bind()
async with aiohttp.ClientSession() as session:
nbclient = binderbot.NotebookClient(session, local_notebook.url, local_notebook.token, log)

await nbclient.start_kernel()
fname = "example-notebook.ipynb"
filepath = local_notebook.cwd / fname
input_notebook = make_code_notebook(["print('hello')"])
with open(filepath, 'w', encoding='utf-8') as f:
nbformat.write(input_notebook, f)

await nbclient.execute_notebook(
fname,
timeout=60,
)
nb_data = await nbclient.get_contents(fname)
nb = nbformat.from_dict(nb_data)

cell1 = nb['cells'][0]['outputs'][0]['text']
assert cell1 == "hello\n"

await nbclient.stop_kernel()

0 comments on commit 73140db

Please sign in to comment.